17 years ago · e270b51df6
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 
				 VERSION = 2
			
 
				 PATCHLEVEL = 6
			
 
				 SUBLEVEL = 25
			
 
				-EXTRAVERSION =
			
 
				+EXTRAVERSION = -numa
			
 
				 NAME = Funky Weasel is Jiggy wit it
			
 
				 
			
 
				 # *DOCUMENTATION*
			
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -1409,7 +1409,6 @@ syscall_is_too_hard:
 
				 
			
 
				 	st	%o0, [%sp + STACKFRAME_SZ + PT_I0]
			
 
				 
			
 
				-	.globl	ret_sys_call
			
 
				 ret_sys_call:
			
 
				 	ld	[%curptr + TI_FLAGS], %l6
			
 
				 	cmp	%o0, -ERESTART_RESTARTBLOCK
			
--- a/arch/sparc/kernel/signal.c
+++ b/arch/sparc/kernel/signal.c
@@ -105,11 +105,6 @@ static int _sigpause_common(old_sigset_t set)
 
				 	return -ERESTARTNOHAND;
			
 
				 }
			
 
				 
			
 
				-asmlinkage int sys_sigpause(unsigned int set)
			
 
				-{
			
 
				-	return _sigpause_common(set);
			
 
				-}
			
 
				-
			
 
				 asmlinkage int sys_sigsuspend(old_sigset_t set)
			
 
				 {
			
 
				 	return _sigpause_common(set);
			
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -250,6 +250,26 @@ endchoice
 
				 
			
 
				 endmenu
			
 
				 
			
 
				+config NUMA
			
 
				+	bool "NUMA support"
			
 
				+
			
 
				+config NODES_SHIFT
			
 
				+	int
			
 
				+	default "4"
			
 
				+	depends on NEED_MULTIPLE_NODES
			
 
				+
			
 
				+# Some NUMA nodes have memory ranges that span
			
 
				+# other nodes.  Even though a pfn is valid and
			
 
				+# between a node's start and end pfns, it may not
			
 
				+# reside on that node.  See memmap_init_zone()
			
 
				+# for details.
			
 
				+config NODES_SPAN_OTHER_NODES
			
 
				+	def_bool y
			
 
				+	depends on NEED_MULTIPLE_NODES
			
 
				+
			
 
				+config ARCH_POPULATES_NODE_MAP
			
 
				+	def_bool y
			
 
				+
			
 
				 config ARCH_SELECT_MEMORY_MODEL
			
 
				 	def_bool y
			
 
				 
			
--- a/arch/sparc64/defconfig
+++ b/arch/sparc64/defconfig
@@ -1,7 +1,7 @@
 
				 #
			
 
				 # Automatically generated make config: don't edit
			
 
				-# Linux kernel version: 2.6.25
			
 
				-# Sun Apr 20 01:33:21 2008
			
 
				+# Linux kernel version: 2.6.25-numa
			
 
				+# Wed Apr 23 04:49:08 2008
			
 
				 #
			
 
				 CONFIG_SPARC=y
			
 
				 CONFIG_SPARC64=y
			
@@ -152,6 +152,8 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y
 
				 CONFIG_HUGETLB_PAGE_SIZE_4MB=y
			
 
				 # CONFIG_HUGETLB_PAGE_SIZE_512K is not set
			
 
				 # CONFIG_HUGETLB_PAGE_SIZE_64K is not set
			
 
				+# CONFIG_NUMA is not set
			
 
				+CONFIG_ARCH_POPULATES_NODE_MAP=y
			
 
				 CONFIG_ARCH_SELECT_MEMORY_MODEL=y
			
 
				 CONFIG_ARCH_SPARSEMEM_ENABLE=y
			
 
				 CONFIG_ARCH_SPARSEMEM_DEFAULT=y
			
@@ -787,7 +789,6 @@ CONFIG_I2C_ALGOBIT=y
 
				 # CONFIG_SENSORS_PCF8574 is not set
			
 
				 # CONFIG_PCF8575 is not set
			
 
				 # CONFIG_SENSORS_PCF8591 is not set
			
 
				-# CONFIG_TPS65010 is not set
			
 
				 # CONFIG_SENSORS_MAX6875 is not set
			
 
				 # CONFIG_SENSORS_TSL2550 is not set
			
 
				 # CONFIG_I2C_DEBUG_CORE is not set
			
@@ -869,6 +870,7 @@ CONFIG_SSB_POSSIBLE=y
 
				 # Multifunction device drivers
			
 
				 #
			
 
				 # CONFIG_MFD_SM501 is not set
			
 
				+# CONFIG_HTC_PASIC3 is not set
			
 
				 
			
 
				 #
			
 
				 # Multimedia devices
			
@@ -1219,10 +1221,6 @@ CONFIG_USB_STORAGE=m
 
				 # CONFIG_NEW_LEDS is not set
			
 
				 # CONFIG_INFINIBAND is not set
			
 
				 # CONFIG_RTC_CLASS is not set
			
 
				-
			
 
				-#
			
 
				-# Userspace I/O
			
 
				-#
			
 
				 # CONFIG_UIO is not set
			
 
				 
			
 
				 #
			
@@ -1399,6 +1397,7 @@ CONFIG_SCHEDSTATS=y
 
				 CONFIG_DEBUG_BUGVERBOSE=y
			
 
				 # CONFIG_DEBUG_INFO is not set
			
 
				 # CONFIG_DEBUG_VM is not set
			
 
				+# CONFIG_DEBUG_WRITECOUNT is not set
			
 
				 # CONFIG_DEBUG_LIST is not set
			
 
				 # CONFIG_DEBUG_SG is not set
			
 
				 # CONFIG_BOOT_PRINTK_DELAY is not set
			
@@ -1425,53 +1424,82 @@ CONFIG_ASYNC_CORE=m
 
				 CONFIG_ASYNC_MEMCPY=m
			
 
				 CONFIG_ASYNC_XOR=m
			
 
				 CONFIG_CRYPTO=y
			
 
				+
			
 
				+#
			
 
				+# Crypto core or helper
			
 
				+#
			
 
				 CONFIG_CRYPTO_ALGAPI=y
			
 
				 CONFIG_CRYPTO_AEAD=y
			
 
				 CONFIG_CRYPTO_BLKCIPHER=y
			
 
				-# CONFIG_CRYPTO_SEQIV is not set
			
 
				 CONFIG_CRYPTO_HASH=y
			
 
				 CONFIG_CRYPTO_MANAGER=y
			
 
				+CONFIG_CRYPTO_GF128MUL=m
			
 
				+CONFIG_CRYPTO_NULL=m
			
 
				+# CONFIG_CRYPTO_CRYPTD is not set
			
 
				+CONFIG_CRYPTO_AUTHENC=y
			
 
				+CONFIG_CRYPTO_TEST=m
			
 
				+
			
 
				+#
			
 
				+# Authenticated Encryption with Associated Data
			
 
				+#
			
 
				+# CONFIG_CRYPTO_CCM is not set
			
 
				+# CONFIG_CRYPTO_GCM is not set
			
 
				+# CONFIG_CRYPTO_SEQIV is not set
			
 
				+
			
 
				+#
			
 
				+# Block modes
			
 
				+#
			
 
				+CONFIG_CRYPTO_CBC=y
			
 
				+# CONFIG_CRYPTO_CTR is not set
			
 
				+# CONFIG_CRYPTO_CTS is not set
			
 
				+CONFIG_CRYPTO_ECB=m
			
 
				+CONFIG_CRYPTO_LRW=m
			
 
				+CONFIG_CRYPTO_PCBC=m
			
 
				+CONFIG_CRYPTO_XTS=m
			
 
				+
			
 
				+#
			
 
				+# Hash modes
			
 
				+#
			
 
				 CONFIG_CRYPTO_HMAC=y
			
 
				 CONFIG_CRYPTO_XCBC=y
			
 
				-CONFIG_CRYPTO_NULL=m
			
 
				+
			
 
				+#
			
 
				+# Digest
			
 
				+#
			
 
				+CONFIG_CRYPTO_CRC32C=m
			
 
				 CONFIG_CRYPTO_MD4=y
			
 
				 CONFIG_CRYPTO_MD5=y
			
 
				+CONFIG_CRYPTO_MICHAEL_MIC=m
			
 
				 CONFIG_CRYPTO_SHA1=y
			
 
				 CONFIG_CRYPTO_SHA256=m
			
 
				 CONFIG_CRYPTO_SHA512=m
			
 
				-CONFIG_CRYPTO_WP512=m
			
 
				 CONFIG_CRYPTO_TGR192=m
			
 
				-CONFIG_CRYPTO_GF128MUL=m
			
 
				-CONFIG_CRYPTO_ECB=m
			
 
				-CONFIG_CRYPTO_CBC=y
			
 
				-CONFIG_CRYPTO_PCBC=m
			
 
				-CONFIG_CRYPTO_LRW=m
			
 
				-CONFIG_CRYPTO_XTS=m
			
 
				-# CONFIG_CRYPTO_CTR is not set
			
 
				-# CONFIG_CRYPTO_GCM is not set
			
 
				-# CONFIG_CRYPTO_CCM is not set
			
 
				-# CONFIG_CRYPTO_CRYPTD is not set
			
 
				-CONFIG_CRYPTO_DES=y
			
 
				-CONFIG_CRYPTO_FCRYPT=m
			
 
				-CONFIG_CRYPTO_BLOWFISH=m
			
 
				-CONFIG_CRYPTO_TWOFISH=m
			
 
				-CONFIG_CRYPTO_TWOFISH_COMMON=m
			
 
				-CONFIG_CRYPTO_SERPENT=m
			
 
				+CONFIG_CRYPTO_WP512=m
			
 
				+
			
 
				+#
			
 
				+# Ciphers
			
 
				+#
			
 
				 CONFIG_CRYPTO_AES=m
			
 
				+CONFIG_CRYPTO_ANUBIS=m
			
 
				+CONFIG_CRYPTO_ARC4=m
			
 
				+CONFIG_CRYPTO_BLOWFISH=m
			
 
				+CONFIG_CRYPTO_CAMELLIA=m
			
 
				 CONFIG_CRYPTO_CAST5=m
			
 
				 CONFIG_CRYPTO_CAST6=m
			
 
				-CONFIG_CRYPTO_TEA=m
			
 
				-CONFIG_CRYPTO_ARC4=m
			
 
				+CONFIG_CRYPTO_DES=y
			
 
				+CONFIG_CRYPTO_FCRYPT=m
			
 
				 CONFIG_CRYPTO_KHAZAD=m
			
 
				-CONFIG_CRYPTO_ANUBIS=m
			
 
				-CONFIG_CRYPTO_SEED=m
			
 
				 # CONFIG_CRYPTO_SALSA20 is not set
			
 
				+CONFIG_CRYPTO_SEED=m
			
 
				+CONFIG_CRYPTO_SERPENT=m
			
 
				+CONFIG_CRYPTO_TEA=m
			
 
				+CONFIG_CRYPTO_TWOFISH=m
			
 
				+CONFIG_CRYPTO_TWOFISH_COMMON=m
			
 
				+
			
 
				+#
			
 
				+# Compression
			
 
				+#
			
 
				 CONFIG_CRYPTO_DEFLATE=y
			
 
				-CONFIG_CRYPTO_MICHAEL_MIC=m
			
 
				-CONFIG_CRYPTO_CRC32C=m
			
 
				-CONFIG_CRYPTO_CAMELLIA=m
			
 
				-CONFIG_CRYPTO_TEST=m
			
 
				-CONFIG_CRYPTO_AUTHENC=y
			
 
				 # CONFIG_CRYPTO_LZO is not set
			
 
				 CONFIG_CRYPTO_HW=y
			
 
				 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
			
@@ -1492,3 +1520,4 @@ CONFIG_PLIST=y
 
				 CONFIG_HAS_IOMEM=y
			
 
				 CONFIG_HAS_IOPORT=y
			
 
				 CONFIG_HAS_DMA=y
			
 
				+CONFIG_HAVE_LMB=y
			
--- a/arch/sparc64/kernel/ebus.c
+++ b/arch/sparc64/kernel/ebus.c
@@ -396,6 +396,7 @@ static void __init fill_ebus_device(struct device_node *dp, struct linux_ebus_de
 
				 	sd->op = &dev->ofdev;
			
 
				 	sd->iommu = dev->bus->ofdev.dev.parent->archdata.iommu;
			
 
				 	sd->stc = dev->bus->ofdev.dev.parent->archdata.stc;
			
 
				+	sd->numa_node = dev->bus->ofdev.dev.parent->archdata.numa_node;
			
 
				 
			
 
				 	dev->ofdev.node = dp;
			
 
				 	dev->ofdev.dev.parent = &dev->bus->ofdev.dev;
			
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -47,7 +47,7 @@ do_fpdis:
 
				 	ba,pt		%xcc, etrap
			
 
				 109:	 or		%g7, %lo(109b), %g7
			
 
				 	add		%g0, %g0, %g0
			
 
				-	ba,a,pt		%xcc, rtrap_clr_l6
			
 
				+	ba,a,pt		%xcc, rtrap
			
 
				 
			
 
				 1:	TRAP_LOAD_THREAD_REG(%g6, %g1)
			
 
				 	ldub		[%g6 + TI_FPSAVED], %g5
			
@@ -226,7 +226,7 @@ fp_other_bounce:
 
				 	call		do_fpother
			
 
				 	 add		%sp, PTREGS_OFF, %o0
			
 
				 	ba,pt		%xcc, rtrap
			
 
				-	 clr		%l6
			
 
				+	 nop
			
 
				 
			
 
				 	.globl		do_fpother_check_fitos
			
 
				 	.align		32
			
@@ -489,7 +489,7 @@ utrap_trap:		/* %g3=handler,%g4=level */
 
				         call		bad_trap
			
 
				 	 add		%sp, PTREGS_OFF, %o0
			
 
				 	ba,pt		%xcc, rtrap
			
 
				-	 clr		%l6
			
 
				+	 nop
			
 
				 
			
 
				 invoke_utrap:
			
 
				 	sllx		%g3, 3, %g3
			
@@ -607,7 +607,7 @@ __spitfire_cee_trap_continue:
 
				 	call		spitfire_access_error
			
 
				 	 add		%sp, PTREGS_OFF, %o0
			
 
				 	ba,pt		%xcc, rtrap
			
 
				-	 clr		%l6
			
 
				+	 nop
			
 
				 
			
 
				 	/* This is the trap handler entry point for ECC correctable
			
 
				 	 * errors.  They are corrected, but we listen for the trap
			
@@ -686,7 +686,7 @@ __spitfire_data_access_exception_tl1:
 
				 	call		spitfire_data_access_exception_tl1
			
 
				 	 add		%sp, PTREGS_OFF, %o0
			
 
				 	ba,pt		%xcc, rtrap
			
 
				-	 clr		%l6
			
 
				+	 nop
			
 
				 
			
 
				 __spitfire_data_access_exception:
			
 
				 	rdpr		%pstate, %g4
			
@@ -705,7 +705,7 @@ __spitfire_data_access_exception:
 
				 	call		spitfire_data_access_exception
			
 
				 	 add		%sp, PTREGS_OFF, %o0
			
 
				 	ba,pt		%xcc, rtrap
			
 
				-	 clr		%l6
			
 
				+	 nop
			
 
				 
			
 
				 	.globl		__spitfire_insn_access_exception
			
 
				 	.globl		__spitfire_insn_access_exception_tl1
			
@@ -725,7 +725,7 @@ __spitfire_insn_access_exception_tl1:
 
				 	call		spitfire_insn_access_exception_tl1
			
 
				 	 add		%sp, PTREGS_OFF, %o0
			
 
				 	ba,pt		%xcc, rtrap
			
 
				-	 clr		%l6
			
 
				+	 nop
			
 
				 
			
 
				 __spitfire_insn_access_exception:
			
 
				 	rdpr		%pstate, %g4
			
@@ -743,7 +743,7 @@ __spitfire_insn_access_exception:
 
				 	call		spitfire_insn_access_exception
			
 
				 	 add		%sp, PTREGS_OFF, %o0
			
 
				 	ba,pt		%xcc, rtrap
			
 
				-	 clr		%l6
			
 
				+	 nop
			
 
				 
			
 
				 	/* These get patched into the trap table at boot time
			
 
				 	 * once we know we have a cheetah processor.
			
@@ -937,7 +937,7 @@ do_dcpe_tl1_fatal:
 
				 	call		cheetah_plus_parity_error
			
 
				 	 add		%sp, PTREGS_OFF, %o1
			
 
				 	ba,pt		%xcc, rtrap
			
 
				-	 clr		%l6
			
 
				+	 nop
			
 
				 
			
 
				 do_icpe_tl1:
			
 
				 	rdpr		%tl, %g1		! Save original trap level
			
@@ -979,7 +979,7 @@ do_icpe_tl1_fatal:
 
				 	call		cheetah_plus_parity_error
			
 
				 	 add		%sp, PTREGS_OFF, %o1
			
 
				 	ba,pt		%xcc, rtrap
			
 
				-	 clr		%l6
			
 
				+	 nop
			
 
				 	
			
 
				 dcpe_icpe_tl1_common:
			
 
				 	/* Flush D-cache, re-enable D/I caches in DCU and finally
			
@@ -1281,7 +1281,7 @@ __do_privact:
 
				 	call		do_privact
			
 
				 	 add		%sp, PTREGS_OFF, %o0
			
 
				 	ba,pt		%xcc, rtrap
			
 
				-	 clr		%l6
			
 
				+	 nop
			
 
				 
			
 
				 	.globl		do_mna
			
 
				 do_mna:
			
@@ -1308,7 +1308,7 @@ do_mna:
 
				 	call		mem_address_unaligned
			
 
				 	 add		%sp, PTREGS_OFF, %o0
			
 
				 	ba,pt		%xcc, rtrap
			
 
				-	 clr		%l6
			
 
				+	 nop
			
 
				 
			
 
				 	.globl		do_lddfmna
			
 
				 do_lddfmna:
			
@@ -1326,7 +1326,7 @@ do_lddfmna:
 
				 	call		handle_lddfmna
			
 
				 	 add		%sp, PTREGS_OFF, %o0
			
 
				 	ba,pt		%xcc, rtrap
			
 
				-	 clr		%l6
			
 
				+	 nop
			
 
				 
			
 
				 	.globl		do_stdfmna
			
 
				 do_stdfmna:
			
@@ -1344,7 +1344,7 @@ do_stdfmna:
 
				 	call		handle_stdfmna
			
 
				 	 add		%sp, PTREGS_OFF, %o0
			
 
				 	ba,pt		%xcc, rtrap
			
 
				-	 clr		%l6
			
 
				+	 nop
			
 
				 
			
 
				 	.globl	breakpoint_trap
			
 
				 breakpoint_trap:
			
@@ -1424,13 +1424,13 @@ sys32_rt_sigreturn:
 
				 1:		ldx		[%curptr + TI_FLAGS], %l5
			
 
				 		andcc		%l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0
			
 
				 		be,pt		%icc, rtrap
			
 
				-		 clr		%l6
			
 
				+		 nop
			
 
				 		add		%sp, PTREGS_OFF, %o0
			
 
				 		call		syscall_trace
			
 
				 		 mov		1, %o1
			
 
				 
			
 
				 		ba,pt		%xcc, rtrap
			
 
				-		 clr		%l6
			
 
				+		 nop
			
 
				 
			
 
				 	/* This is how fork() was meant to be done, 8 instruction entry.
			
 
				 	 *
			
@@ -1559,7 +1559,7 @@ linux_sparc_syscall32:
 
				 
			
 
				 	/* Linux native system calls enter here... */
			
 
				 	.align	32
			
 
				-	.globl	linux_sparc_syscall, ret_sys_call
			
 
				+	.globl	linux_sparc_syscall
			
 
				 linux_sparc_syscall:
			
 
				 	/* Direct access to user regs, much faster. */
			
 
				 	cmp		%g1, NR_SYSCALLS			! IEU1	Group
			
@@ -1605,7 +1605,7 @@ ret_sys_call:
 
				 	bne,pn		%icc, linux_syscall_trace2
			
 
				 	 add		%l1, 0x4, %l2			! npc = npc+4
			
 
				 	stx		%l1, [%sp + PTREGS_OFF + PT_V9_TPC]
			
 
				-	ba,pt		%xcc, rtrap_clr_l6
			
 
				+	ba,pt		%xcc, rtrap
			
 
				 	 stx		%l2, [%sp + PTREGS_OFF + PT_V9_TNPC]
			
 
				 
			
 
				 1:
			
@@ -1616,7 +1616,6 @@ ret_sys_call:
 
				 	sub		%g0, %o0, %o0
			
 
				 	or		%g3, %g2, %g3
			
 
				 	stx		%o0, [%sp + PTREGS_OFF + PT_V9_I0]
			
 
				-	mov		1, %l6
			
 
				 	stx		%g3, [%sp + PTREGS_OFF + PT_V9_TSTATE]
			
 
				 	bne,pn		%icc, linux_syscall_trace2
			
 
				 	 add		%l1, 0x4, %l2			! npc = npc+4
			
--- a/arch/sparc64/kernel/entry.h
+++ b/arch/sparc64/kernel/entry.h
@@ -20,7 +20,6 @@ extern void timer_interrupt(int irq, struct pt_regs *regs);
 
				 
			
 
				 extern void do_notify_resume(struct pt_regs *regs,
			
 
				 			     unsigned long orig_i0,
			
 
				-			     int restart_syscall,
			
 
				 			     unsigned long thread_info_flags);
			
 
				 
			
 
				 extern asmlinkage void syscall_trace(struct pt_regs *regs,
			
--- a/arch/sparc64/kernel/etrap.S
+++ b/arch/sparc64/kernel/etrap.S
@@ -53,7 +53,11 @@ etrap_irq:
 
				 		stx	%g3, [%g2 + STACKFRAME_SZ + PT_V9_TPC]
			
 
				 		rd	%y, %g3
			
 
				 		stx	%g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC]
			
 
				+		rdpr	%tt, %g1
			
 
				 		st	%g3, [%g2 + STACKFRAME_SZ + PT_V9_Y]
			
 
				+		sethi	%hi(PT_REGS_MAGIC), %g3
			
 
				+		or	%g3, %g1, %g1
			
 
				+		st	%g1, [%g2 + STACKFRAME_SZ + PT_V9_MAGIC]
			
 
				 
			
 
				 		rdpr	%cansave, %g1
			
 
				 		brnz,pt %g1, etrap_save
			
--- a/arch/sparc64/kernel/iommu.c
+++ b/arch/sparc64/kernel/iommu.c
@@ -173,9 +173,11 @@ void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long np
 
				 }
			
 
				 
			
 
				 int iommu_table_init(struct iommu *iommu, int tsbsize,
			
 
				-		     u32 dma_offset, u32 dma_addr_mask)
			
 
				+		     u32 dma_offset, u32 dma_addr_mask,
			
 
				+		     int numa_node)
			
 
				 {
			
 
				-	unsigned long i, tsbbase, order, sz, num_tsb_entries;
			
 
				+	unsigned long i, order, sz, num_tsb_entries;
			
 
				+	struct page *page;
			
 
				 
			
 
				 	num_tsb_entries = tsbsize / sizeof(iopte_t);
			
 
				 
			
@@ -188,11 +190,12 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
 
				 	/* Allocate and initialize the free area map.  */
			
 
				 	sz = num_tsb_entries / 8;
			
 
				 	sz = (sz + 7UL) & ~7UL;
			
 
				-	iommu->arena.map = kzalloc(sz, GFP_KERNEL);
			
 
				+	iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
			
 
				 	if (!iommu->arena.map) {
			
 
				 		printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n");
			
 
				 		return -ENOMEM;
			
 
				 	}
			
 
				+	memset(iommu->arena.map, 0, sz);
			
 
				 	iommu->arena.limit = num_tsb_entries;
			
 
				 
			
 
				 	if (tlb_type != hypervisor)
			
@@ -201,21 +204,23 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
 
				 	/* Allocate and initialize the dummy page which we
			
 
				 	 * set inactive IO PTEs to point to.
			
 
				 	 */
			
 
				-	iommu->dummy_page = get_zeroed_page(GFP_KERNEL);
			
 
				-	if (!iommu->dummy_page) {
			
 
				+	page = alloc_pages_node(numa_node, GFP_KERNEL, 0);
			
 
				+	if (!page) {
			
 
				 		printk(KERN_ERR "IOMMU: Error, gfp(dummy_page) failed.\n");
			
 
				 		goto out_free_map;
			
 
				 	}
			
 
				+	iommu->dummy_page = (unsigned long) page_address(page);
			
 
				+	memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
			
 
				 	iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
			
 
				 
			
 
				 	/* Now allocate and setup the IOMMU page table itself.  */
			
 
				 	order = get_order(tsbsize);
			
 
				-	tsbbase = __get_free_pages(GFP_KERNEL, order);
			
 
				-	if (!tsbbase) {
			
 
				+	page = alloc_pages_node(numa_node, GFP_KERNEL, order);
			
 
				+	if (!page) {
			
 
				 		printk(KERN_ERR "IOMMU: Error, gfp(tsb) failed.\n");
			
 
				 		goto out_free_dummy_page;
			
 
				 	}
			
 
				-	iommu->page_table = (iopte_t *)tsbbase;
			
 
				+	iommu->page_table = (iopte_t *)page_address(page);
			
 
				 
			
 
				 	for (i = 0; i < num_tsb_entries; i++)
			
 
				 		iopte_make_dummy(iommu, &iommu->page_table[i]);
			
@@ -276,20 +281,24 @@ static inline void iommu_free_ctx(struct iommu *iommu, int ctx)
 
				 static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
			
 
				 				   dma_addr_t *dma_addrp, gfp_t gfp)
			
 
				 {
			
 
				+	unsigned long flags, order, first_page;
			
 
				 	struct iommu *iommu;
			
 
				+	struct page *page;
			
 
				+	int npages, nid;
			
 
				 	iopte_t *iopte;
			
 
				-	unsigned long flags, order, first_page;
			
 
				 	void *ret;
			
 
				-	int npages;
			
 
				 
			
 
				 	size = IO_PAGE_ALIGN(size);
			
 
				 	order = get_order(size);
			
 
				 	if (order >= 10)
			
 
				 		return NULL;
			
 
				 
			
 
				-	first_page = __get_free_pages(gfp, order);
			
 
				-	if (first_page == 0UL)
			
 
				+	nid = dev->archdata.numa_node;
			
 
				+	page = alloc_pages_node(nid, gfp, order);
			
 
				+	if (unlikely(!page))
			
 
				 		return NULL;
			
 
				+
			
 
				+	first_page = (unsigned long) page_address(page);
			
 
				 	memset((char *)first_page, 0, PAGE_SIZE << order);
			
 
				 
			
 
				 	iommu = dev->archdata.iommu;
			
--- a/arch/sparc64/kernel/isa.c
+++ b/arch/sparc64/kernel/isa.c
@@ -92,6 +92,7 @@ static void __init isa_fill_devices(struct sparc_isa_bridge *isa_br)
 
				 		sd->op = &isa_dev->ofdev;
			
 
				 		sd->iommu = isa_br->ofdev.dev.parent->archdata.iommu;
			
 
				 		sd->stc = isa_br->ofdev.dev.parent->archdata.stc;
			
 
				+		sd->numa_node = isa_br->ofdev.dev.parent->archdata.numa_node;
			
 
				 
			
 
				 		isa_dev->ofdev.node = dp;
			
 
				 		isa_dev->ofdev.dev.parent = &isa_br->ofdev.dev;
			
--- a/arch/sparc64/kernel/mdesc.c
+++ b/arch/sparc64/kernel/mdesc.c
@@ -1,10 +1,10 @@
 
				 /* mdesc.c: Sun4V machine description handling.
			
 
				  *
			
 
				- * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
			
 
				+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
			
 
				  */
			
 
				 #include <linux/kernel.h>
			
 
				 #include <linux/types.h>
			
 
				-#include <linux/bootmem.h>
			
 
				+#include <linux/lmb.h>
			
 
				 #include <linux/log2.h>
			
 
				 #include <linux/list.h>
			
 
				 #include <linux/slab.h>
			
@@ -84,24 +84,28 @@ static void mdesc_handle_init(struct mdesc_handle *hp,
 
				 	hp->handle_size = handle_size;
			
 
				 }
			
 
				 
			
 
				-static struct mdesc_handle * __init mdesc_bootmem_alloc(unsigned int mdesc_size)
			
 
				+static struct mdesc_handle * __init mdesc_lmb_alloc(unsigned int mdesc_size)
			
 
				 {
			
 
				-	struct mdesc_handle *hp;
			
 
				 	unsigned int handle_size, alloc_size;
			
 
				+	struct mdesc_handle *hp;
			
 
				+	unsigned long paddr;
			
 
				 
			
 
				 	handle_size = (sizeof(struct mdesc_handle) -
			
 
				 		       sizeof(struct mdesc_hdr) +
			
 
				 		       mdesc_size);
			
 
				 	alloc_size = PAGE_ALIGN(handle_size);
			
 
				 
			
 
				-	hp = __alloc_bootmem(alloc_size, PAGE_SIZE, 0UL);
			
 
				-	if (hp)
			
 
				-		mdesc_handle_init(hp, handle_size, hp);
			
 
				+	paddr = lmb_alloc(alloc_size, PAGE_SIZE);
			
 
				 
			
 
				+	hp = NULL;
			
 
				+	if (paddr) {
			
 
				+		hp = __va(paddr);
			
 
				+		mdesc_handle_init(hp, handle_size, hp);
			
 
				+	}
			
 
				 	return hp;
			
 
				 }
			
 
				 
			
 
				-static void mdesc_bootmem_free(struct mdesc_handle *hp)
			
 
				+static void mdesc_lmb_free(struct mdesc_handle *hp)
			
 
				 {
			
 
				 	unsigned int alloc_size, handle_size = hp->handle_size;
			
 
				 	unsigned long start, end;
			
@@ -124,9 +128,9 @@ static void mdesc_bootmem_free(struct mdesc_handle *hp)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static struct mdesc_mem_ops bootmem_mdesc_ops = {
			
 
				-	.alloc = mdesc_bootmem_alloc,
			
 
				-	.free  = mdesc_bootmem_free,
			
 
				+static struct mdesc_mem_ops lmb_mdesc_ops = {
			
 
				+	.alloc = mdesc_lmb_alloc,
			
 
				+	.free  = mdesc_lmb_free,
			
 
				 };
			
 
				 
			
 
				 static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size)
			
@@ -888,7 +892,7 @@ void __init sun4v_mdesc_init(void)
 
				 
			
 
				 	printk("MDESC: Size is %lu bytes.\n", len);
			
 
				 
			
 
				-	hp = mdesc_alloc(len, &bootmem_mdesc_ops);
			
 
				+	hp = mdesc_alloc(len, &lmb_mdesc_ops);
			
 
				 	if (hp == NULL) {
			
 
				 		prom_printf("MDESC: alloc of %lu bytes failed.\n", len);
			
 
				 		prom_halt();
			
--- a/arch/sparc64/kernel/of_device.c
+++ b/arch/sparc64/kernel/of_device.c
@@ -6,6 +6,7 @@
 
				 #include <linux/mod_devicetable.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <linux/errno.h>
			
 
				+#include <linux/irq.h>
			
 
				 #include <linux/of_device.h>
			
 
				 #include <linux/of_platform.h>
			
 
				 
			
@@ -660,6 +661,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op,
 
				 	struct device_node *dp = op->node;
			
 
				 	struct device_node *pp, *ip;
			
 
				 	unsigned int orig_irq = irq;
			
 
				+	int nid;
			
 
				 
			
 
				 	if (irq == 0xffffffff)
			
 
				 		return irq;
			
@@ -672,7 +674,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op,
 
				 			printk("%s: direct translate %x --> %x\n",
			
 
				 			       dp->full_name, orig_irq, irq);
			
 
				 
			
 
				-		return irq;
			
 
				+		goto out;
			
 
				 	}
			
 
				 
			
 
				 	/* Something more complicated.  Walk up to the root, applying
			
@@ -744,6 +746,14 @@ static unsigned int __init build_one_device_irq(struct of_device *op,
 
				 		printk("%s: Apply IRQ trans [%s] %x --> %x\n",
			
 
				 		       op->node->full_name, ip->full_name, orig_irq, irq);
			
 
				 
			
 
				+out:
			
 
				+	nid = of_node_to_nid(dp);
			
 
				+	if (nid != -1) {
			
 
				+		cpumask_t numa_mask = node_to_cpumask(nid);
			
 
				+
			
 
				+		irq_set_affinity(irq, numa_mask);
			
 
				+	}
			
 
				+
			
 
				 	return irq;
			
 
				 }
			
 
				 
			
--- a/arch/sparc64/kernel/pci.c
+++ b/arch/sparc64/kernel/pci.c
@@ -369,10 +369,12 @@ struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
 
				 	sd->host_controller = pbm;
			
 
				 	sd->prom_node = node;
			
 
				 	sd->op = of_find_device_by_node(node);
			
 
				+	sd->numa_node = pbm->numa_node;
			
 
				 
			
 
				 	sd = &sd->op->dev.archdata;
			
 
				 	sd->iommu = pbm->iommu;
			
 
				 	sd->stc = &pbm->stc;
			
 
				+	sd->numa_node = pbm->numa_node;
			
 
				 
			
 
				 	type = of_get_property(node, "device_type", NULL);
			
 
				 	if (type == NULL)
			
@@ -1159,6 +1161,16 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_NUMA
			
 
				+int pcibus_to_node(struct pci_bus *pbus)
			
 
				+{
			
 
				+	struct pci_pbm_info *pbm = pbus->sysdata;
			
 
				+
			
 
				+	return pbm->numa_node;
			
 
				+}
			
 
				+EXPORT_SYMBOL(pcibus_to_node);
			
 
				+#endif
			
 
				+
			
 
				 /* Return the domain nuber for this pci bus */
			
 
				 
			
 
				 int pci_domain_nr(struct pci_bus *pbus)
			
--- a/arch/sparc64/kernel/pci_fire.c
+++ b/arch/sparc64/kernel/pci_fire.c
@@ -71,7 +71,8 @@ static int pci_fire_pbm_iommu_init(struct pci_pbm_info *pbm)
 
				 	 */
			
 
				 	fire_write(iommu->iommu_flushinv, ~(u64)0);
			
 
				 
			
 
				-	err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask);
			
 
				+	err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
			
 
				+			       pbm->numa_node);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				 
			
@@ -449,6 +450,8 @@ static int __init pci_fire_pbm_init(struct pci_controller_info *p,
 
				 	pbm->next = pci_pbm_root;
			
 
				 	pci_pbm_root = pbm;
			
 
				 
			
 
				+	pbm->numa_node = -1;
			
 
				+
			
 
				 	pbm->scan_bus = pci_fire_scan_bus;
			
 
				 	pbm->pci_ops = &sun4u_pci_ops;
			
 
				 	pbm->config_space_reg_bits = 12;
			
--- a/arch/sparc64/kernel/pci_impl.h
+++ b/arch/sparc64/kernel/pci_impl.h
@@ -148,6 +148,8 @@ struct pci_pbm_info {
 
				 	struct pci_bus			*pci_bus;
			
 
				 	void (*scan_bus)(struct pci_pbm_info *);
			
 
				 	struct pci_ops			*pci_ops;
			
 
				+
			
 
				+	int				numa_node;
			
 
				 };
			
 
				 
			
 
				 struct pci_controller_info {
			
@@ -161,8 +163,6 @@ extern struct pci_pbm_info *pci_pbm_root;
 
				 extern int pci_num_pbms;
			
 
				 
			
 
				 /* PCI bus scanning and fixup support. */
			
 
				-extern void pci_iommu_table_init(struct iommu *iommu, int tsbsize,
			
 
				-				 u32 dma_offset, u32 dma_addr_mask);
			
 
				 extern void pci_get_pbm_props(struct pci_pbm_info *pbm);
			
 
				 extern struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm);
			
 
				 extern void pci_determine_mem_io_space(struct pci_pbm_info *pbm);
			
--- a/arch/sparc64/kernel/pci_msi.c
+++ b/arch/sparc64/kernel/pci_msi.c
@@ -279,11 +279,17 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
 
				 				 unsigned long devino)
			
 
				 {
			
 
				 	int irq = ops->msiq_build_irq(pbm, msiqid, devino);
			
 
				-	int err;
			
 
				+	int err, nid;
			
 
				 
			
 
				 	if (irq < 0)
			
 
				 		return irq;
			
 
				 
			
 
				+	nid = pbm->numa_node;
			
 
				+	if (nid != -1) {
			
 
				+		cpumask_t numa_mask = node_to_cpumask(nid);
			
 
				+
			
 
				+		irq_set_affinity(irq, numa_mask);
			
 
				+	}
			
 
				 	err = request_irq(irq, sparc64_msiq_interrupt, 0,
			
 
				 			  "MSIQ",
			
 
				 			  &pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]);
			
--- a/arch/sparc64/kernel/pci_psycho.c
+++ b/arch/sparc64/kernel/pci_psycho.c
@@ -848,7 +848,8 @@ static int psycho_iommu_init(struct pci_pbm_info *pbm)
 
				 	/* Leave diag mode enabled for full-flushing done
			
 
				 	 * in pci_iommu.c
			
 
				 	 */
			
 
				-	err = iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff);
			
 
				+	err = iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff,
			
 
				+			       pbm->numa_node);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				 
			
@@ -979,6 +980,8 @@ static void __init psycho_pbm_init(struct pci_controller_info *p,
 
				 	pbm->next = pci_pbm_root;
			
 
				 	pci_pbm_root = pbm;
			
 
				 
			
 
				+	pbm->numa_node = -1;
			
 
				+
			
 
				 	pbm->scan_bus = psycho_scan_bus;
			
 
				 	pbm->pci_ops = &sun4u_pci_ops;
			
 
				 	pbm->config_space_reg_bits = 8;
			
--- a/arch/sparc64/kernel/pci_sabre.c
+++ b/arch/sparc64/kernel/pci_sabre.c
@@ -704,7 +704,7 @@ static int sabre_iommu_init(struct pci_pbm_info *pbm,
 
				 	 * in pci_iommu.c
			
 
				 	 */
			
 
				 	err = iommu_table_init(iommu, tsbsize * 1024 * 8,
			
 
				-			       dvma_offset, dma_mask);
			
 
				+			       dvma_offset, dma_mask, pbm->numa_node);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				 
			
@@ -737,6 +737,8 @@ static void __init sabre_pbm_init(struct pci_controller_info *p,
 
				 	pbm->name = dp->full_name;
			
 
				 	printk("%s: SABRE PCI Bus Module\n", pbm->name);
			
 
				 
			
 
				+	pbm->numa_node = -1;
			
 
				+
			
 
				 	pbm->scan_bus = sabre_scan_bus;
			
 
				 	pbm->pci_ops = &sun4u_pci_ops;
			
 
				 	pbm->config_space_reg_bits = 8;
			
--- a/arch/sparc64/kernel/pci_schizo.c
+++ b/arch/sparc64/kernel/pci_schizo.c
@@ -1220,7 +1220,8 @@ static int schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
 
				 	/* Leave diag mode enabled for full-flushing done
			
 
				 	 * in pci_iommu.c
			
 
				 	 */
			
 
				-	err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask);
			
 
				+	err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
			
 
				+			       pbm->numa_node);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				 
			
@@ -1379,6 +1380,8 @@ static int __init schizo_pbm_init(struct pci_controller_info *p,
 
				 	pbm->next = pci_pbm_root;
			
 
				 	pci_pbm_root = pbm;
			
 
				 
			
 
				+	pbm->numa_node = -1;
			
 
				+
			
 
				 	pbm->scan_bus = schizo_scan_bus;
			
 
				 	pbm->pci_ops = &sun4u_pci_ops;
			
 
				 	pbm->config_space_reg_bits = 8;
			
--- a/arch/sparc64/kernel/pci_sun4v.c
+++ b/arch/sparc64/kernel/pci_sun4v.c
@@ -127,10 +127,12 @@ static inline long iommu_batch_end(void)
 
				 static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
			
 
				 				   dma_addr_t *dma_addrp, gfp_t gfp)
			
 
				 {
			
 
				-	struct iommu *iommu;
			
 
				 	unsigned long flags, order, first_page, npages, n;
			
 
				+	struct iommu *iommu;
			
 
				+	struct page *page;
			
 
				 	void *ret;
			
 
				 	long entry;
			
 
				+	int nid;
			
 
				 
			
 
				 	size = IO_PAGE_ALIGN(size);
			
 
				 	order = get_order(size);
			
@@ -139,10 +141,12 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
 
				 
			
 
				 	npages = size >> IO_PAGE_SHIFT;
			
 
				 
			
 
				-	first_page = __get_free_pages(gfp, order);
			
 
				-	if (unlikely(first_page == 0UL))
			
 
				+	nid = dev->archdata.numa_node;
			
 
				+	page = alloc_pages_node(nid, gfp, order);
			
 
				+	if (unlikely(!page))
			
 
				 		return NULL;
			
 
				 
			
 
				+	first_page = (unsigned long) page_address(page);
			
 
				 	memset((char *)first_page, 0, PAGE_SIZE << order);
			
 
				 
			
 
				 	iommu = dev->archdata.iommu;
			
@@ -899,6 +903,8 @@ static void __init pci_sun4v_pbm_init(struct pci_controller_info *p,
 
				 	pbm->next = pci_pbm_root;
			
 
				 	pci_pbm_root = pbm;
			
 
				 
			
 
				+	pbm->numa_node = of_node_to_nid(dp);
			
 
				+
			
 
				 	pbm->scan_bus = pci_sun4v_scan_bus;
			
 
				 	pbm->pci_ops = &sun4v_pci_ops;
			
 
				 	pbm->config_space_reg_bits = 12;
			
@@ -913,6 +919,7 @@ static void __init pci_sun4v_pbm_init(struct pci_controller_info *p,
 
				 	pbm->name = dp->full_name;
			
 
				 
			
 
				 	printk("%s: SUN4V PCI Bus Module\n", pbm->name);
			
 
				+	printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node);
			
 
				 
			
 
				 	pci_determine_mem_io_space(pbm);
			
 
				 
			
--- a/arch/sparc64/kernel/prom.c
+++ b/arch/sparc64/kernel/prom.c
@@ -19,8 +19,8 @@
 
				 #include <linux/types.h>
			
 
				 #include <linux/string.h>
			
 
				 #include <linux/mm.h>
			
 
				-#include <linux/bootmem.h>
			
 
				 #include <linux/module.h>
			
 
				+#include <linux/lmb.h>
			
 
				 
			
 
				 #include <asm/prom.h>
			
 
				 #include <asm/of_device.h>
			
@@ -122,16 +122,20 @@ int of_find_in_proplist(const char *list, const char *match, int len)
 
				 }
			
 
				 EXPORT_SYMBOL(of_find_in_proplist);
			
 
				 
			
 
				-static unsigned int prom_early_allocated;
			
 
				+static unsigned int prom_early_allocated __initdata;
			
 
				 
			
 
				 static void * __init prom_early_alloc(unsigned long size)
			
 
				 {
			
 
				+	unsigned long paddr = lmb_alloc(size, SMP_CACHE_BYTES);
			
 
				 	void *ret;
			
 
				 
			
 
				-	ret = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL);
			
 
				-	if (ret != NULL)
			
 
				-		memset(ret, 0, size);
			
 
				+	if (!paddr) {
			
 
				+		prom_printf("prom_early_alloc(%lu) failed\n");
			
 
				+		prom_halt();
			
 
				+	}
			
 
				 
			
 
				+	ret = __va(paddr);
			
 
				+	memset(ret, 0, size);
			
 
				 	prom_early_allocated += size;
			
 
				 
			
 
				 	return ret;
			
--- a/arch/sparc64/kernel/rtrap.S
+++ b/arch/sparc64/kernel/rtrap.S
@@ -18,12 +18,6 @@
 
				 #define		RTRAP_PSTATE_IRQOFF	(PSTATE_RMO|PSTATE_PEF|PSTATE_PRIV)
			
 
				 #define		RTRAP_PSTATE_AG_IRQOFF	(PSTATE_RMO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG)
			
 
				 
			
 
				-		/* Register %l6 keeps track of whether we are returning
			
 
				-		 * from a system call or not.  It is cleared if we call
			
 
				-		 * do_notify_resume, and it must not be otherwise modified
			
 
				-		 * until we fully commit to returning to userspace.
			
 
				-		 */
			
 
				-
			
 
				 		.text
			
 
				 		.align			32
			
 
				 __handle_softirq:
			
@@ -56,14 +50,12 @@ __handle_user_windows:
 
				 		be,pt			%xcc, __handle_user_windows_continue
			
 
				 		 nop
			
 
				 		mov			%l5, %o1
			
 
				-		mov			%l6, %o2
			
 
				 		add			%sp, PTREGS_OFF, %o0
			
 
				-		mov			%l0, %o3
			
 
				+		mov			%l0, %o2
			
 
				 
			
 
				 		call			do_notify_resume
			
 
				 		 wrpr			%g0, RTRAP_PSTATE, %pstate
			
 
				 		wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
			
 
				-		clr			%l6
			
 
				 		/* Signal delivery can modify pt_regs tstate, so we must
			
 
				 		 * reload it.
			
 
				 		 */
			
@@ -99,14 +91,12 @@ __handle_perfctrs:
 
				 		be,pt			%xcc, __handle_perfctrs_continue
			
 
				 		 sethi			%hi(TSTATE_PEF), %o0
			
 
				 		mov			%l5, %o1
			
 
				-		mov			%l6, %o2
			
 
				 		add			%sp, PTREGS_OFF, %o0
			
 
				-		mov			%l0, %o3
			
 
				+		mov			%l0, %o2
			
 
				 		call			do_notify_resume
			
 
				 
			
 
				 		 wrpr			%g0, RTRAP_PSTATE, %pstate
			
 
				 		wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
			
 
				-		clr			%l6
			
 
				 		/* Signal delivery can modify pt_regs tstate, so we must
			
 
				 		 * reload it.
			
 
				 		 */
			
@@ -127,13 +117,11 @@ __handle_userfpu:
 
				 
			
 
				 __handle_signal:
			
 
				 		mov			%l5, %o1
			
 
				-		mov			%l6, %o2
			
 
				 		add			%sp, PTREGS_OFF, %o0
			
 
				-		mov			%l0, %o3
			
 
				+		mov			%l0, %o2
			
 
				 		call			do_notify_resume
			
 
				 		 wrpr			%g0, RTRAP_PSTATE, %pstate
			
 
				 		wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
			
 
				-		clr			%l6
			
 
				 
			
 
				 		/* Signal delivery can modify pt_regs tstate, so we must
			
 
				 		 * reload it.
			
@@ -145,9 +133,8 @@ __handle_signal:
 
				 		 andn			%l1, %l4, %l1
			
 
				 
			
 
				 		.align			64
			
 
				-		.globl			rtrap_irq, rtrap_clr_l6, rtrap, irqsz_patchme, rtrap_xcall
			
 
				+		.globl			rtrap_irq, rtrap, irqsz_patchme, rtrap_xcall
			
 
				 rtrap_irq:
			
 
				-rtrap_clr_l6:	clr			%l6
			
 
				 rtrap:
			
 
				 #ifndef CONFIG_SMP
			
 
				 		sethi			%hi(per_cpu____cpu_data), %l0
			
--- a/arch/sparc64/kernel/sbus.c
+++ b/arch/sparc64/kernel/sbus.c
@@ -544,6 +544,7 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus)
 
				 
			
 
				 	sbus->ofdev.dev.archdata.iommu = iommu;
			
 
				 	sbus->ofdev.dev.archdata.stc = strbuf;
			
 
				+	sbus->ofdev.dev.archdata.numa_node = -1;
			
 
				 
			
 
				 	reg_base = regs + SYSIO_IOMMUREG_BASE;
			
 
				 	iommu->iommu_control = reg_base + IOMMU_CONTROL;
			
@@ -575,7 +576,7 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus)
 
				 	       sbus->portid, regs);
			
 
				 
			
 
				 	/* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */
			
 
				-	if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff))
			
 
				+	if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff, -1))
			
 
				 		goto fatal_memory_error;
			
 
				 
			
 
				 	control = upa_readq(iommu->iommu_control);
			
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -82,7 +82,7 @@ unsigned long cmdline_memory_size = 0;
 
				 static struct console prom_early_console = {
			
 
				 	.name =		"earlyprom",
			
 
				 	.write =	prom_console_write,
			
 
				-	.flags =	CON_PRINTBUFFER | CON_BOOT,
			
 
				+	.flags =	CON_PRINTBUFFER | CON_BOOT | CON_ANYTIME,
			
 
				 	.index =	-1,
			
 
				 };
			
 
				 
			
@@ -281,6 +281,7 @@ void __init setup_arch(char **cmdline_p)
 
				 	/* Initialize PROM console and command line. */
			
 
				 	*cmdline_p = prom_getbootargs();
			
 
				 	strcpy(boot_command_line, *cmdline_p);
			
 
				+	parse_early_param();
			
 
				 
			
 
				 	boot_flags_init(*cmdline_p);
			
 
				 	register_console(&prom_early_console);
			
--- a/arch/sparc64/kernel/signal.c
+++ b/arch/sparc64/kernel/signal.c
@@ -510,15 +510,20 @@ static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs,
 
				  * want to handle. Thus you cannot kill init even with a SIGKILL even by
			
 
				  * mistake.
			
 
				  */
			
 
				-static void do_signal(struct pt_regs *regs, unsigned long orig_i0, int restart_syscall)
			
 
				+static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
			
 
				 {
			
 
				-	siginfo_t info;
			
 
				 	struct signal_deliver_cookie cookie;
			
 
				 	struct k_sigaction ka;
			
 
				-	int signr;
			
 
				 	sigset_t *oldset;
			
 
				+	siginfo_t info;
			
 
				+	int signr, tt;
			
 
				 	
			
 
				-	cookie.restart_syscall = restart_syscall;
			
 
				+	tt = regs->magic & 0x1ff;
			
 
				+	if (tt == 0x110 || tt == 0x111 || tt == 0x16d) {
			
 
				+		regs->magic &= ~0x1ff;
			
 
				+		cookie.restart_syscall = 1;
			
 
				+	} else
			
 
				+		cookie.restart_syscall = 0;
			
 
				 	cookie.orig_i0 = orig_i0;
			
 
				 
			
 
				 	if (test_thread_flag(TIF_RESTORE_SIGMASK))
			
@@ -529,9 +534,8 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0, int restart_s
 
				 #ifdef CONFIG_SPARC32_COMPAT
			
 
				 	if (test_thread_flag(TIF_32BIT)) {
			
 
				 		extern void do_signal32(sigset_t *, struct pt_regs *,
			
 
				-					unsigned long, int);
			
 
				-		do_signal32(oldset, regs, orig_i0,
			
 
				-			    cookie.restart_syscall);
			
 
				+					struct signal_deliver_cookie *);
			
 
				+		do_signal32(oldset, regs, &cookie);
			
 
				 		return;
			
 
				 	}
			
 
				 #endif	
			
@@ -539,7 +543,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0, int restart_s
 
				 	signr = get_signal_to_deliver(&info, &ka, regs, &cookie);
			
 
				 	if (signr > 0) {
			
 
				 		if (cookie.restart_syscall)
			
 
				-			syscall_restart(orig_i0, regs, &ka.sa);
			
 
				+			syscall_restart(cookie.orig_i0, regs, &ka.sa);
			
 
				 		handle_signal(signr, &ka, &info, oldset, regs);
			
 
				 
			
 
				 		/* a signal was successfully delivered; the saved
			
@@ -576,11 +580,10 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0, int restart_s
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, int restart_syscall,
			
 
				-		      unsigned long thread_info_flags)
			
 
				+void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long thread_info_flags)
			
 
				 {
			
 
				 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
			
 
				-		do_signal(regs, orig_i0, restart_syscall);
			
 
				+		do_signal(regs, orig_i0);
			
 
				 }
			
 
				 
			
 
				 void ptrace_signal_deliver(struct pt_regs *regs, void *cookie)
			
--- a/arch/sparc64/kernel/signal32.c
+++ b/arch/sparc64/kernel/signal32.c
@@ -982,20 +982,16 @@ static inline void syscall_restart32(unsigned long orig_i0, struct pt_regs *regs
 
				  * mistake.
			
 
				  */
			
 
				 void do_signal32(sigset_t *oldset, struct pt_regs * regs,
			
 
				-		 unsigned long orig_i0, int restart_syscall)
			
 
				+		 struct signal_deliver_cookie *cookie)
			
 
				 {
			
 
				-	siginfo_t info;
			
 
				-	struct signal_deliver_cookie cookie;
			
 
				 	struct k_sigaction ka;
			
 
				+	siginfo_t info;
			
 
				 	int signr;
			
 
				 	
			
 
				-	cookie.restart_syscall = restart_syscall;
			
 
				-	cookie.orig_i0 = orig_i0;
			
 
				-
			
 
				-	signr = get_signal_to_deliver(&info, &ka, regs, &cookie);
			
 
				+	signr = get_signal_to_deliver(&info, &ka, regs, cookie);
			
 
				 	if (signr > 0) {
			
 
				-		if (cookie.restart_syscall)
			
 
				-			syscall_restart32(orig_i0, regs, &ka.sa);
			
 
				+		if (cookie->restart_syscall)
			
 
				+			syscall_restart32(cookie->orig_i0, regs, &ka.sa);
			
 
				 		handle_signal32(signr, &ka, &info, oldset, regs);
			
 
				 
			
 
				 		/* a signal was successfully delivered; the saved
			
@@ -1007,16 +1003,16 @@ void do_signal32(sigset_t *oldset, struct pt_regs * regs,
 
				 			clear_thread_flag(TIF_RESTORE_SIGMASK);
			
 
				 		return;
			
 
				 	}
			
 
				-	if (cookie.restart_syscall &&
			
 
				+	if (cookie->restart_syscall &&
			
 
				 	    (regs->u_regs[UREG_I0] == ERESTARTNOHAND ||
			
 
				 	     regs->u_regs[UREG_I0] == ERESTARTSYS ||
			
 
				 	     regs->u_regs[UREG_I0] == ERESTARTNOINTR)) {
			
 
				 		/* replay the system call when we are done */
			
 
				-		regs->u_regs[UREG_I0] = cookie.orig_i0;
			
 
				+		regs->u_regs[UREG_I0] = cookie->orig_i0;
			
 
				 		regs->tpc -= 4;
			
 
				 		regs->tnpc -= 4;
			
 
				 	}
			
 
				-	if (cookie.restart_syscall &&
			
 
				+	if (cookie->restart_syscall &&
			
 
				 	    regs->u_regs[UREG_I0] == ERESTART_RESTARTBLOCK) {
			
 
				 		regs->u_regs[UREG_G1] = __NR_restart_syscall;
			
 
				 		regs->tpc -= 4;
			
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -20,7 +20,7 @@
 
				 #include <linux/cache.h>
			
 
				 #include <linux/jiffies.h>
			
 
				 #include <linux/profile.h>
			
 
				-#include <linux/bootmem.h>
			
 
				+#include <linux/lmb.h>
			
 
				 
			
 
				 #include <asm/head.h>
			
 
				 #include <asm/ptrace.h>
			
@@ -1431,7 +1431,7 @@ EXPORT_SYMBOL(__per_cpu_shift);
 
				 
			
 
				 void __init real_setup_per_cpu_areas(void)
			
 
				 {
			
 
				-	unsigned long goal, size, i;
			
 
				+	unsigned long paddr, goal, size, i;
			
 
				 	char *ptr;
			
 
				 
			
 
				 	/* Copy section for each CPU (we discard the original) */
			
@@ -1441,8 +1441,13 @@ void __init real_setup_per_cpu_areas(void)
 
				 	for (size = PAGE_SIZE; size < goal; size <<= 1UL)
			
 
				 		__per_cpu_shift++;
			
 
				 
			
 
				-	ptr = alloc_bootmem_pages(size * NR_CPUS);
			
 
				+	paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE);
			
 
				+	if (!paddr) {
			
 
				+		prom_printf("Cannot allocate per-cpu memory.\n");
			
 
				+		prom_halt();
			
 
				+	}
			
 
				 
			
 
				+	ptr = __va(paddr);
			
 
				 	__per_cpu_base = ptr - __per_cpu_start;
			
 
				 
			
 
				 	for (i = 0; i < NR_CPUS; i++, ptr += size)
			
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -68,8 +68,6 @@ extern void *__memscan_zero(void *, size_t);
 
				 extern void *__memscan_generic(void *, int, size_t);
			
 
				 extern int __memcmp(const void *, const void *, __kernel_size_t);
			
 
				 extern __kernel_size_t strlen(const char *);
			
 
				-extern void linux_sparc_syscall(void);
			
 
				-extern void rtrap(void);
			
 
				 extern void show_regs(struct pt_regs *);
			
 
				 extern void syscall_trace(struct pt_regs *, int);
			
 
				 extern void sys_sigsuspend(void);
			
--- a/arch/sparc64/kernel/stacktrace.c
+++ b/arch/sparc64/kernel/stacktrace.c
@@ -20,6 +20,8 @@ void save_stack_trace(struct stack_trace *trace)
 
				 	thread_base = (unsigned long) tp;
			
 
				 	do {
			
 
				 		struct reg_window *rw;
			
 
				+		struct pt_regs *regs;
			
 
				+		unsigned long pc;
			
 
				 
			
 
				 		/* Bogus frame pointer? */
			
 
				 		if (fp < (thread_base + sizeof(struct thread_info)) ||
			
@@ -27,11 +29,19 @@ void save_stack_trace(struct stack_trace *trace)
 
				 			break;
			
 
				 
			
 
				 		rw = (struct reg_window *) fp;
			
 
				+		regs = (struct pt_regs *) (rw + 1);
			
 
				+
			
 
				+		if ((regs->magic & ~0x1ff) == PT_REGS_MAGIC) {
			
 
				+			pc = regs->tpc;
			
 
				+			fp = regs->u_regs[UREG_I6] + STACK_BIAS;
			
 
				+		} else {
			
 
				+			pc = rw->ins[7];
			
 
				+			fp = rw->ins[6] + STACK_BIAS;
			
 
				+		}
			
 
				+
			
 
				 		if (trace->skip > 0)
			
 
				 			trace->skip--;
			
 
				 		else
			
 
				-			trace->entries[trace->nr_entries++] = rw->ins[7];
			
 
				-
			
 
				-		fp = rw->ins[6] + STACK_BIAS;
			
 
				+			trace->entries[trace->nr_entries++] = pc;
			
 
				 	} while (trace->nr_entries < trace->max_entries);
			
 
				 }
			
--- a/arch/sparc64/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc64/kernel/sun4v_tlb_miss.S
@@ -262,7 +262,7 @@ sun4v_iacc:
 
				 	mov	%l5, %o2
			
 
				 	call	sun4v_insn_access_exception
			
 
				 	 add	%sp, PTREGS_OFF, %o0
			
 
				-	ba,a,pt	%xcc, rtrap_clr_l6
			
 
				+	ba,a,pt	%xcc, rtrap
			
 
				 
			
 
				 	/* Instruction Access Exception, tl1. */
			
 
				 sun4v_iacc_tl1:
			
@@ -278,7 +278,7 @@ sun4v_iacc_tl1:
 
				 	mov	%l5, %o2
			
 
				 	call	sun4v_insn_access_exception_tl1
			
 
				 	 add	%sp, PTREGS_OFF, %o0
			
 
				-	ba,a,pt	%xcc, rtrap_clr_l6
			
 
				+	ba,a,pt	%xcc, rtrap
			
 
				 
			
 
				 	/* Data Access Exception, tl0. */
			
 
				 sun4v_dacc:
			
@@ -294,7 +294,7 @@ sun4v_dacc:
 
				 	mov	%l5, %o2
			
 
				 	call	sun4v_data_access_exception
			
 
				 	 add	%sp, PTREGS_OFF, %o0
			
 
				-	ba,a,pt	%xcc, rtrap_clr_l6
			
 
				+	ba,a,pt	%xcc, rtrap
			
 
				 
			
 
				 	/* Data Access Exception, tl1. */
			
 
				 sun4v_dacc_tl1:
			
@@ -310,7 +310,7 @@ sun4v_dacc_tl1:
 
				 	mov	%l5, %o2
			
 
				 	call	sun4v_data_access_exception_tl1
			
 
				 	 add	%sp, PTREGS_OFF, %o0
			
 
				-	ba,a,pt	%xcc, rtrap_clr_l6
			
 
				+	ba,a,pt	%xcc, rtrap
			
 
				 
			
 
				 	/* Memory Address Unaligned.  */
			
 
				 sun4v_mna:
			
@@ -344,7 +344,7 @@ sun4v_mna:
 
				 	mov	%l5, %o2
			
 
				 	call	sun4v_do_mna
			
 
				 	 add	%sp, PTREGS_OFF, %o0
			
 
				-	ba,a,pt	%xcc, rtrap_clr_l6
			
 
				+	ba,a,pt	%xcc, rtrap
			
 
				 
			
 
				 	/* Privileged Action.  */
			
 
				 sun4v_privact:
			
@@ -352,7 +352,7 @@ sun4v_privact:
 
				 	 rd	%pc, %g7
			
 
				 	call	do_privact
			
 
				 	 add	%sp, PTREGS_OFF, %o0
			
 
				-	ba,a,pt	%xcc, rtrap_clr_l6
			
 
				+	ba,a,pt	%xcc, rtrap
			
 
				 
			
 
				 	/* Unaligned ldd float, tl0. */
			
 
				 sun4v_lddfmna:
			
@@ -368,7 +368,7 @@ sun4v_lddfmna:
 
				 	mov	%l5, %o2
			
 
				 	call	handle_lddfmna
			
 
				 	 add	%sp, PTREGS_OFF, %o0
			
 
				-	ba,a,pt	%xcc, rtrap_clr_l6
			
 
				+	ba,a,pt	%xcc, rtrap
			
 
				 
			
 
				 	/* Unaligned std float, tl0. */
			
 
				 sun4v_stdfmna:
			
@@ -384,7 +384,7 @@ sun4v_stdfmna:
 
				 	mov	%l5, %o2
			
 
				 	call	handle_stdfmna
			
 
				 	 add	%sp, PTREGS_OFF, %o0
			
 
				-	ba,a,pt	%xcc, rtrap_clr_l6
			
 
				+	ba,a,pt	%xcc, rtrap
			
 
				 
			
 
				 #define BRANCH_ALWAYS	0x10680000
			
 
				 #define NOP		0x01000000
			
--- a/arch/sparc64/kernel/sysfs.c
+++ b/arch/sparc64/kernel/sysfs.c
@@ -273,10 +273,22 @@ static void __init check_mmu_stats(void)
 
				 		mmu_stats_supported = 1;
			
 
				 }
			
 
				 
			
 
				+static void register_nodes(void)
			
 
				+{
			
 
				+#ifdef CONFIG_NUMA
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < MAX_NUMNODES; i++)
			
 
				+		register_one_node(i);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 static int __init topology_init(void)
			
 
				 {
			
 
				 	int cpu;
			
 
				 
			
 
				+	register_nodes();
			
 
				+
			
 
				 	check_mmu_stats();
			
 
				 
			
 
				 	register_cpu_notifier(&sysfs_cpu_nb);
			
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -2091,9 +2091,8 @@ static void user_instruction_dump(unsigned int __user *pc)
 
				 
			
 
				 void show_stack(struct task_struct *tsk, unsigned long *_ksp)
			
 
				 {
			
 
				-	unsigned long pc, fp, thread_base, ksp;
			
 
				+	unsigned long fp, thread_base, ksp;
			
 
				 	struct thread_info *tp;
			
 
				-	struct reg_window *rw;
			
 
				 	int count = 0;
			
 
				 
			
 
				 	ksp = (unsigned long) _ksp;
			
@@ -2117,15 +2116,27 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
 
				 	printk("\n");
			
 
				 #endif
			
 
				 	do {
			
 
				+		struct reg_window *rw;
			
 
				+		struct pt_regs *regs;
			
 
				+		unsigned long pc;
			
 
				+
			
 
				 		/* Bogus frame pointer? */
			
 
				 		if (fp < (thread_base + sizeof(struct thread_info)) ||
			
 
				 		    fp >= (thread_base + THREAD_SIZE))
			
 
				 			break;
			
 
				 		rw = (struct reg_window *)fp;
			
 
				-		pc = rw->ins[7];
			
 
				+		regs = (struct pt_regs *) (rw + 1);
			
 
				+
			
 
				+		if ((regs->magic & ~0x1ff) == PT_REGS_MAGIC) {
			
 
				+			pc = regs->tpc;
			
 
				+			fp = regs->u_regs[UREG_I6] + STACK_BIAS;
			
 
				+		} else {
			
 
				+			pc = rw->ins[7];
			
 
				+			fp = rw->ins[6] + STACK_BIAS;
			
 
				+		}
			
 
				+
			
 
				 		printk(" [%016lx] ", pc);
			
 
				 		print_symbol("%s\n", pc);
			
 
				-		fp = rw->ins[6] + STACK_BIAS;
			
 
				 	} while (++count < 16);
			
 
				 #ifndef CONFIG_KALLSYMS
			
 
				 	printk("\n");
			
--- a/arch/sparc64/kernel/tsb.S
+++ b/arch/sparc64/kernel/tsb.S
@@ -275,7 +275,7 @@ sparc64_realfault_common:
 
				 	stx	%l5, [%g6 + TI_FAULT_ADDR]	! Save fault address
			
 
				 	call	do_sparc64_fault		! Call fault handler
			
 
				 	 add	%sp, PTREGS_OFF, %o0		! Compute pt_regs arg
			
 
				-	ba,pt	%xcc, rtrap_clr_l6		! Restore cpu state
			
 
				+	ba,pt	%xcc, rtrap			! Restore cpu state
			
 
				 	 nop					! Delay slot (fill me)
			
 
				 
			
 
				 winfix_trampoline:
			
--- a/arch/sparc64/kernel/winfixup.S
+++ b/arch/sparc64/kernel/winfixup.S
@@ -32,7 +32,7 @@ fill_fixup:
 
				 	 rd	%pc, %g7
			
 
				 	call	do_sparc64_fault
			
 
				 	 add	%sp, PTREGS_OFF, %o0
			
 
				-	ba,pt	%xcc, rtrap_clr_l6
			
 
				+	ba,pt	%xcc, rtrap
			
 
				 	 nop
			
 
				 
			
 
				 	/* Be very careful about usage of the trap globals here.
			
@@ -100,7 +100,7 @@ spill_fixup_dax:
 
				 	 rd	%pc, %g7
			
 
				 	call	do_sparc64_fault
			
 
				 	 add	%sp, PTREGS_OFF, %o0
			
 
				-	ba,a,pt	%xcc, rtrap_clr_l6
			
 
				+	ba,a,pt	%xcc, rtrap
			
 
				 
			
 
				 winfix_mna:
			
 
				 	andn	%g3, 0x7f, %g3
			
@@ -122,12 +122,12 @@ fill_fixup_mna:
 
				 	mov	%l4, %o2
			
 
				 	call	sun4v_do_mna
			
 
				 	 mov	%l5, %o1
			
 
				-	ba,a,pt	%xcc, rtrap_clr_l6
			
 
				+	ba,a,pt	%xcc, rtrap
			
 
				 1:	mov	%l4, %o1
			
 
				 	mov	%l5, %o2
			
 
				 	call	mem_address_unaligned
			
 
				 	 nop
			
 
				-	ba,a,pt	%xcc, rtrap_clr_l6
			
 
				+	ba,a,pt	%xcc, rtrap
			
 
				 
			
 
				 winfix_dax:
			
 
				 	andn	%g3, 0x7f, %g3
			
@@ -150,7 +150,7 @@ fill_fixup_dax:
 
				 	 add	%sp, PTREGS_OFF, %o0
			
 
				 	call	sun4v_data_access_exception
			
 
				 	 nop
			
 
				-	ba,a,pt	%xcc, rtrap_clr_l6
			
 
				+	ba,a,pt	%xcc, rtrap
			
 
				 1:	call	spitfire_data_access_exception
			
 
				 	 nop
			
 
				-	ba,a,pt	%xcc, rtrap_clr_l6
			
 
				+	ba,a,pt	%xcc, rtrap
			
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -24,6 +24,8 @@
 
				 #include <linux/cache.h>
			
 
				 #include <linux/sort.h>
			
 
				 #include <linux/percpu.h>
			
 
				+#include <linux/lmb.h>
			
 
				+#include <linux/mmzone.h>
			
 
				 
			
 
				 #include <asm/head.h>
			
 
				 #include <asm/system.h>
			
@@ -72,9 +74,7 @@ extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
 
				 #define MAX_BANKS	32
			
 
				 
			
 
				 static struct linux_prom64_registers pavail[MAX_BANKS] __initdata;
			
 
				-static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata;
			
 
				 static int pavail_ents __initdata;
			
 
				-static int pavail_rescan_ents __initdata;
			
 
				 
			
 
				 static int cmp_p64(const void *a, const void *b)
			
 
				 {
			
@@ -715,285 +715,684 @@ void get_new_mmu_context(struct mm_struct *mm)
 
				 		smp_new_mmu_context_version();
			
 
				 }
			
 
				 
			
 
				-/* Find a free area for the bootmem map, avoiding the kernel image
			
 
				- * and the initial ramdisk.
			
 
				- */
			
 
				-static unsigned long __init choose_bootmap_pfn(unsigned long start_pfn,
			
 
				-					       unsigned long end_pfn)
			
 
				+static int numa_enabled = 1;
			
 
				+static int numa_debug;
			
 
				+
			
 
				+static int __init early_numa(char *p)
			
 
				 {
			
 
				-	unsigned long avoid_start, avoid_end, bootmap_size;
			
 
				-	int i;
			
 
				+	if (!p)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (strstr(p, "off"))
			
 
				+		numa_enabled = 0;
			
 
				+
			
 
				+	if (strstr(p, "debug"))
			
 
				+		numa_debug = 1;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+early_param("numa", early_numa);
			
 
				 
			
 
				-	bootmap_size = bootmem_bootmap_pages(end_pfn - start_pfn);
			
 
				-	bootmap_size <<= PAGE_SHIFT;
			
 
				+#define numadbg(f, a...) \
			
 
				+do {	if (numa_debug) \
			
 
				+		printk(KERN_INFO f, ## a); \
			
 
				+} while (0)
			
 
				 
			
 
				-	avoid_start = avoid_end = 0;
			
 
				+static void __init find_ramdisk(unsigned long phys_base)
			
 
				+{
			
 
				 #ifdef CONFIG_BLK_DEV_INITRD
			
 
				-	avoid_start = initrd_start;
			
 
				-	avoid_end = PAGE_ALIGN(initrd_end);
			
 
				+	if (sparc_ramdisk_image || sparc_ramdisk_image64) {
			
 
				+		unsigned long ramdisk_image;
			
 
				+
			
 
				+		/* Older versions of the bootloader only supported a
			
 
				+		 * 32-bit physical address for the ramdisk image
			
 
				+		 * location, stored at sparc_ramdisk_image.  Newer
			
 
				+		 * SILO versions set sparc_ramdisk_image to zero and
			
 
				+		 * provide a full 64-bit physical address at
			
 
				+		 * sparc_ramdisk_image64.
			
 
				+		 */
			
 
				+		ramdisk_image = sparc_ramdisk_image;
			
 
				+		if (!ramdisk_image)
			
 
				+			ramdisk_image = sparc_ramdisk_image64;
			
 
				+
			
 
				+		/* Another bootloader quirk.  The bootloader normalizes
			
 
				+		 * the physical address to KERNBASE, so we have to
			
 
				+		 * factor that back out and add in the lowest valid
			
 
				+		 * physical page address to get the true physical address.
			
 
				+		 */
			
 
				+		ramdisk_image -= KERNBASE;
			
 
				+		ramdisk_image += phys_base;
			
 
				+
			
 
				+		numadbg("Found ramdisk at physical address 0x%lx, size %u\n",
			
 
				+			ramdisk_image, sparc_ramdisk_size);
			
 
				+
			
 
				+		initrd_start = ramdisk_image;
			
 
				+		initrd_end = ramdisk_image + sparc_ramdisk_size;
			
 
				+
			
 
				+		lmb_reserve(initrd_start, initrd_end);
			
 
				+	}
			
 
				 #endif
			
 
				+}
			
 
				 
			
 
				-	for (i = 0; i < pavail_ents; i++) {
			
 
				-		unsigned long start, end;
			
 
				+struct node_mem_mask {
			
 
				+	unsigned long mask;
			
 
				+	unsigned long val;
			
 
				+	unsigned long bootmem_paddr;
			
 
				+};
			
 
				+static struct node_mem_mask node_masks[MAX_NUMNODES];
			
 
				+static int num_node_masks;
			
 
				 
			
 
				-		start = pavail[i].phys_addr;
			
 
				-		end = start + pavail[i].reg_size;
			
 
				+int numa_cpu_lookup_table[NR_CPUS];
			
 
				+cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
			
 
				 
			
 
				-		while (start < end) {
			
 
				-			if (start >= kern_base &&
			
 
				-			    start < PAGE_ALIGN(kern_base + kern_size)) {
			
 
				-				start = PAGE_ALIGN(kern_base + kern_size);
			
 
				-				continue;
			
 
				-			}
			
 
				-			if (start >= avoid_start && start < avoid_end) {
			
 
				-				start = avoid_end;
			
 
				-				continue;
			
 
				-			}
			
 
				+#ifdef CONFIG_NEED_MULTIPLE_NODES
			
 
				+static bootmem_data_t plat_node_bdata[MAX_NUMNODES];
			
 
				 
			
 
				-			if ((end - start) < bootmap_size)
			
 
				-				break;
			
 
				+struct mdesc_mblock {
			
 
				+	u64	base;
			
 
				+	u64	size;
			
 
				+	u64	offset; /* RA-to-PA */
			
 
				+};
			
 
				+static struct mdesc_mblock *mblocks;
			
 
				+static int num_mblocks;
			
 
				 
			
 
				-			if (start < kern_base &&
			
 
				-			    (start + bootmap_size) > kern_base) {
			
 
				-				start = PAGE_ALIGN(kern_base + kern_size);
			
 
				-				continue;
			
 
				-			}
			
 
				+static unsigned long ra_to_pa(unsigned long addr)
			
 
				+{
			
 
				+	int i;
			
 
				 
			
 
				-			if (start < avoid_start &&
			
 
				-			    (start + bootmap_size) > avoid_start) {
			
 
				-				start = avoid_end;
			
 
				-				continue;
			
 
				-			}
			
 
				+	for (i = 0; i < num_mblocks; i++) {
			
 
				+		struct mdesc_mblock *m = &mblocks[i];
			
 
				 
			
 
				-			/* OK, it doesn't overlap anything, use it.  */
			
 
				-			return start >> PAGE_SHIFT;
			
 
				+		if (addr >= m->base &&
			
 
				+		    addr < (m->base + m->size)) {
			
 
				+			addr += m->offset;
			
 
				+			break;
			
 
				 		}
			
 
				 	}
			
 
				-
			
 
				-	prom_printf("Cannot find free area for bootmap, aborting.\n");
			
 
				-	prom_halt();
			
 
				+	return addr;
			
 
				 }
			
 
				 
			
 
				-static void __init trim_pavail(unsigned long *cur_size_p,
			
 
				-			       unsigned long *end_of_phys_p)
			
 
				+static int find_node(unsigned long addr)
			
 
				 {
			
 
				-	unsigned long to_trim = *cur_size_p - cmdline_memory_size;
			
 
				-	unsigned long avoid_start, avoid_end;
			
 
				 	int i;
			
 
				 
			
 
				-	to_trim = PAGE_ALIGN(to_trim);
			
 
				+	addr = ra_to_pa(addr);
			
 
				+	for (i = 0; i < num_node_masks; i++) {
			
 
				+		struct node_mem_mask *p = &node_masks[i];
			
 
				 
			
 
				-	avoid_start = avoid_end = 0;
			
 
				-#ifdef CONFIG_BLK_DEV_INITRD
			
 
				-	avoid_start = initrd_start;
			
 
				-	avoid_end = PAGE_ALIGN(initrd_end);
			
 
				+		if ((addr & p->mask) == p->val)
			
 
				+			return i;
			
 
				+	}
			
 
				+	return -1;
			
 
				+}
			
 
				+
			
 
				+static unsigned long nid_range(unsigned long start, unsigned long end,
			
 
				+			       int *nid)
			
 
				+{
			
 
				+	*nid = find_node(start);
			
 
				+	start += PAGE_SIZE;
			
 
				+	while (start < end) {
			
 
				+		int n = find_node(start);
			
 
				+
			
 
				+		if (n != *nid)
			
 
				+			break;
			
 
				+		start += PAGE_SIZE;
			
 
				+	}
			
 
				+
			
 
				+	return start;
			
 
				+}
			
 
				+#else
			
 
				+static unsigned long nid_range(unsigned long start, unsigned long end,
			
 
				+			       int *nid)
			
 
				+{
			
 
				+	*nid = 0;
			
 
				+	return end;
			
 
				+}
			
 
				 #endif
			
 
				 
			
 
				-	/* Trim some pavail[] entries in order to satisfy the
			
 
				-	 * requested "mem=xxx" kernel command line specification.
			
 
				-	 *
			
 
				-	 * We must not trim off the kernel image area nor the
			
 
				-	 * initial ramdisk range (if any).  Also, we must not trim
			
 
				-	 * any pavail[] entry down to zero in order to preserve
			
 
				-	 * the invariant that all pavail[] entries have a non-zero
			
 
				-	 * size which is assumed by all of the code in here.
			
 
				-	 */
			
 
				-	for (i = 0; i < pavail_ents; i++) {
			
 
				-		unsigned long start, end, kern_end;
			
 
				-		unsigned long trim_low, trim_high, n;
			
 
				+/* This must be invoked after performing all of the necessary
			
 
				+ * add_active_range() calls for 'nid'.  We need to be able to get
			
 
				+ * correct data from get_pfn_range_for_nid().
			
 
				+ */
			
 
				+static void __init allocate_node_data(int nid)
			
 
				+{
			
 
				+	unsigned long paddr, num_pages, start_pfn, end_pfn;
			
 
				+	struct pglist_data *p;
			
 
				+
			
 
				+#ifdef CONFIG_NEED_MULTIPLE_NODES
			
 
				+	paddr = lmb_alloc_nid(sizeof(struct pglist_data),
			
 
				+			      SMP_CACHE_BYTES, nid, nid_range);
			
 
				+	if (!paddr) {
			
 
				+		prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid);
			
 
				+		prom_halt();
			
 
				+	}
			
 
				+	NODE_DATA(nid) = __va(paddr);
			
 
				+	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
			
 
				 
			
 
				-		kern_end = PAGE_ALIGN(kern_base + kern_size);
			
 
				+	NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
			
 
				+#endif
			
 
				 
			
 
				-		trim_low = start = pavail[i].phys_addr;
			
 
				-		trim_high = end = start + pavail[i].reg_size;
			
 
				+	p = NODE_DATA(nid);
			
 
				 
			
 
				-		if (kern_base >= start &&
			
 
				-		    kern_base < end) {
			
 
				-			trim_low = kern_base;
			
 
				-			if (kern_end >= end)
			
 
				-				continue;
			
 
				-		}
			
 
				-		if (kern_end >= start &&
			
 
				-		    kern_end < end) {
			
 
				-			trim_high = kern_end;
			
 
				-		}
			
 
				-		if (avoid_start &&
			
 
				-		    avoid_start >= start &&
			
 
				-		    avoid_start < end) {
			
 
				-			if (trim_low > avoid_start)
			
 
				-				trim_low = avoid_start;
			
 
				-			if (avoid_end >= end)
			
 
				-				continue;
			
 
				-		}
			
 
				-		if (avoid_end &&
			
 
				-		    avoid_end >= start &&
			
 
				-		    avoid_end < end) {
			
 
				-			if (trim_high < avoid_end)
			
 
				-				trim_high = avoid_end;
			
 
				+	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
			
 
				+	p->node_start_pfn = start_pfn;
			
 
				+	p->node_spanned_pages = end_pfn - start_pfn;
			
 
				+
			
 
				+	if (p->node_spanned_pages) {
			
 
				+		num_pages = bootmem_bootmap_pages(p->node_spanned_pages);
			
 
				+
			
 
				+		paddr = lmb_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid,
			
 
				+				      nid_range);
			
 
				+		if (!paddr) {
			
 
				+			prom_printf("Cannot allocate bootmap for nid[%d]\n",
			
 
				+				  nid);
			
 
				+			prom_halt();
			
 
				 		}
			
 
				+		node_masks[nid].bootmem_paddr = paddr;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void init_node_masks_nonnuma(void)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	numadbg("Initializing tables for non-numa.\n");
			
 
				+
			
 
				+	node_masks[0].mask = node_masks[0].val = 0;
			
 
				+	num_node_masks = 1;
			
 
				+
			
 
				+	for (i = 0; i < NR_CPUS; i++)
			
 
				+		numa_cpu_lookup_table[i] = 0;
			
 
				+
			
 
				+	numa_cpumask_lookup_table[0] = CPU_MASK_ALL;
			
 
				+}
			
 
				+
			
 
				+#ifdef CONFIG_NEED_MULTIPLE_NODES
			
 
				+struct pglist_data *node_data[MAX_NUMNODES];
			
 
				+
			
 
				+EXPORT_SYMBOL(numa_cpu_lookup_table);
			
 
				+EXPORT_SYMBOL(numa_cpumask_lookup_table);
			
 
				+EXPORT_SYMBOL(node_data);
			
 
				+
			
 
				+struct mdesc_mlgroup {
			
 
				+	u64	node;
			
 
				+	u64	latency;
			
 
				+	u64	match;
			
 
				+	u64	mask;
			
 
				+};
			
 
				+static struct mdesc_mlgroup *mlgroups;
			
 
				+static int num_mlgroups;
			
 
				+
			
 
				+static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio,
			
 
				+				   u32 cfg_handle)
			
 
				+{
			
 
				+	u64 arc;
			
 
				 
			
 
				-		if (trim_high <= trim_low)
			
 
				+	mdesc_for_each_arc(arc, md, pio, MDESC_ARC_TYPE_FWD) {
			
 
				+		u64 target = mdesc_arc_target(md, arc);
			
 
				+		const u64 *val;
			
 
				+
			
 
				+		val = mdesc_get_property(md, target,
			
 
				+					 "cfg-handle", NULL);
			
 
				+		if (val && *val == cfg_handle)
			
 
				+			return 0;
			
 
				+	}
			
 
				+	return -ENODEV;
			
 
				+}
			
 
				+
			
 
				+static int scan_arcs_for_cfg_handle(struct mdesc_handle *md, u64 grp,
			
 
				+				    u32 cfg_handle)
			
 
				+{
			
 
				+	u64 arc, candidate, best_latency = ~(u64)0;
			
 
				+
			
 
				+	candidate = MDESC_NODE_NULL;
			
 
				+	mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) {
			
 
				+		u64 target = mdesc_arc_target(md, arc);
			
 
				+		const char *name = mdesc_node_name(md, target);
			
 
				+		const u64 *val;
			
 
				+
			
 
				+		if (strcmp(name, "pio-latency-group"))
			
 
				 			continue;
			
 
				 
			
 
				-		if (trim_low == start && trim_high == end) {
			
 
				-			/* Whole chunk is available for trimming.
			
 
				-			 * Trim all except one page, in order to keep
			
 
				-			 * entry non-empty.
			
 
				-			 */
			
 
				-			n = (end - start) - PAGE_SIZE;
			
 
				-			if (n > to_trim)
			
 
				-				n = to_trim;
			
 
				-
			
 
				-			if (n) {
			
 
				-				pavail[i].phys_addr += n;
			
 
				-				pavail[i].reg_size -= n;
			
 
				-				to_trim -= n;
			
 
				-			}
			
 
				-		} else {
			
 
				-			n = (trim_low - start);
			
 
				-			if (n > to_trim)
			
 
				-				n = to_trim;
			
 
				-
			
 
				-			if (n) {
			
 
				-				pavail[i].phys_addr += n;
			
 
				-				pavail[i].reg_size -= n;
			
 
				-				to_trim -= n;
			
 
				-			}
			
 
				-			if (to_trim) {
			
 
				-				n = end - trim_high;
			
 
				-				if (n > to_trim)
			
 
				-					n = to_trim;
			
 
				-				if (n) {
			
 
				-					pavail[i].reg_size -= n;
			
 
				-					to_trim -= n;
			
 
				-				}
			
 
				-			}
			
 
				+		val = mdesc_get_property(md, target, "latency", NULL);
			
 
				+		if (!val)
			
 
				+			continue;
			
 
				+
			
 
				+		if (*val < best_latency) {
			
 
				+			candidate = target;
			
 
				+			best_latency = *val;
			
 
				 		}
			
 
				+	}
			
 
				+
			
 
				+	if (candidate == MDESC_NODE_NULL)
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	return scan_pio_for_cfg_handle(md, candidate, cfg_handle);
			
 
				+}
			
 
				+
			
 
				+int of_node_to_nid(struct device_node *dp)
			
 
				+{
			
 
				+	const struct linux_prom64_registers *regs;
			
 
				+	struct mdesc_handle *md;
			
 
				+	u32 cfg_handle;
			
 
				+	int count, nid;
			
 
				+	u64 grp;
			
 
				 
			
 
				-		if (!to_trim)
			
 
				+	if (!mlgroups)
			
 
				+		return -1;
			
 
				+
			
 
				+	regs = of_get_property(dp, "reg", NULL);
			
 
				+	if (!regs)
			
 
				+		return -1;
			
 
				+
			
 
				+	cfg_handle = (regs->phys_addr >> 32UL) & 0x0fffffff;
			
 
				+
			
 
				+	md = mdesc_grab();
			
 
				+
			
 
				+	count = 0;
			
 
				+	nid = -1;
			
 
				+	mdesc_for_each_node_by_name(md, grp, "group") {
			
 
				+		if (!scan_arcs_for_cfg_handle(md, grp, cfg_handle)) {
			
 
				+			nid = count;
			
 
				 			break;
			
 
				+		}
			
 
				+		count++;
			
 
				 	}
			
 
				 
			
 
				-	/* Recalculate.  */
			
 
				-	*cur_size_p = 0UL;
			
 
				-	for (i = 0; i < pavail_ents; i++) {
			
 
				-		*end_of_phys_p = pavail[i].phys_addr +
			
 
				-			pavail[i].reg_size;
			
 
				-		*cur_size_p += pavail[i].reg_size;
			
 
				-	}
			
 
				+	mdesc_release(md);
			
 
				+
			
 
				+	return nid;
			
 
				 }
			
 
				 
			
 
				-/* About pages_avail, this is the value we will use to calculate
			
 
				- * the zholes_size[] argument given to free_area_init_node().  The
			
 
				- * page allocator uses this to calculate nr_kernel_pages,
			
 
				- * nr_all_pages and zone->present_pages.  On NUMA it is used
			
 
				- * to calculate zone->min_unmapped_pages and zone->min_slab_pages.
			
 
				- *
			
 
				- * So this number should really be set to what the page allocator
			
 
				- * actually ends up with.  This means:
			
 
				- * 1) It should include bootmem map pages, we'll release those.
			
 
				- * 2) It should not include the kernel image, except for the
			
 
				- *    __init sections which we will also release.
			
 
				- * 3) It should include the initrd image, since we'll release
			
 
				- *    that too.
			
 
				- */
			
 
				-static unsigned long __init bootmem_init(unsigned long *pages_avail,
			
 
				-					 unsigned long phys_base)
			
 
				+static void add_node_ranges(void)
			
 
				 {
			
 
				-	unsigned long bootmap_size, end_pfn;
			
 
				-	unsigned long end_of_phys_memory = 0UL;
			
 
				-	unsigned long bootmap_pfn, bytes_avail, size;
			
 
				 	int i;
			
 
				 
			
 
				-	bytes_avail = 0UL;
			
 
				-	for (i = 0; i < pavail_ents; i++) {
			
 
				-		end_of_phys_memory = pavail[i].phys_addr +
			
 
				-			pavail[i].reg_size;
			
 
				-		bytes_avail += pavail[i].reg_size;
			
 
				+	for (i = 0; i < lmb.memory.cnt; i++) {
			
 
				+		unsigned long size = lmb_size_bytes(&lmb.memory, i);
			
 
				+		unsigned long start, end;
			
 
				+
			
 
				+		start = lmb.memory.region[i].base;
			
 
				+		end = start + size;
			
 
				+		while (start < end) {
			
 
				+			unsigned long this_end;
			
 
				+			int nid;
			
 
				+
			
 
				+			this_end = nid_range(start, end, &nid);
			
 
				+
			
 
				+			numadbg("Adding active range nid[%d] "
			
 
				+				"start[%lx] end[%lx]\n",
			
 
				+				nid, start, this_end);
			
 
				+
			
 
				+			add_active_range(nid,
			
 
				+					 start >> PAGE_SHIFT,
			
 
				+					 this_end >> PAGE_SHIFT);
			
 
				+
			
 
				+			start = this_end;
			
 
				+		}
			
 
				 	}
			
 
				+}
			
 
				 
			
 
				-	/* Determine the location of the initial ramdisk before trying
			
 
				-	 * to honor the "mem=xxx" command line argument.  We must know
			
 
				-	 * where the kernel image and the ramdisk image are so that we
			
 
				-	 * do not trim those two areas from the physical memory map.
			
 
				-	 */
			
 
				+static int __init grab_mlgroups(struct mdesc_handle *md)
			
 
				+{
			
 
				+	unsigned long paddr;
			
 
				+	int count = 0;
			
 
				+	u64 node;
			
 
				+
			
 
				+	mdesc_for_each_node_by_name(md, node, "memory-latency-group")
			
 
				+		count++;
			
 
				+	if (!count)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	paddr = lmb_alloc(count * sizeof(struct mdesc_mlgroup),
			
 
				+			  SMP_CACHE_BYTES);
			
 
				+	if (!paddr)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	mlgroups = __va(paddr);
			
 
				+	num_mlgroups = count;
			
 
				+
			
 
				+	count = 0;
			
 
				+	mdesc_for_each_node_by_name(md, node, "memory-latency-group") {
			
 
				+		struct mdesc_mlgroup *m = &mlgroups[count++];
			
 
				+		const u64 *val;
			
 
				+
			
 
				+		m->node = node;
			
 
				+
			
 
				+		val = mdesc_get_property(md, node, "latency", NULL);
			
 
				+		m->latency = *val;
			
 
				+		val = mdesc_get_property(md, node, "address-match", NULL);
			
 
				+		m->match = *val;
			
 
				+		val = mdesc_get_property(md, node, "address-mask", NULL);
			
 
				+		m->mask = *val;
			
 
				+
			
 
				+		numadbg("MLGROUP[%d]: node[%lx] latency[%lx] "
			
 
				+			"match[%lx] mask[%lx]\n",
			
 
				+			count - 1, m->node, m->latency, m->match, m->mask);
			
 
				+	}
			
 
				 
			
 
				-#ifdef CONFIG_BLK_DEV_INITRD
			
 
				-	/* Now have to check initial ramdisk, so that bootmap does not overwrite it */
			
 
				-	if (sparc_ramdisk_image || sparc_ramdisk_image64) {
			
 
				-		unsigned long ramdisk_image = sparc_ramdisk_image ?
			
 
				-			sparc_ramdisk_image : sparc_ramdisk_image64;
			
 
				-		ramdisk_image -= KERNBASE;
			
 
				-		initrd_start = ramdisk_image + phys_base;
			
 
				-		initrd_end = initrd_start + sparc_ramdisk_size;
			
 
				-		if (initrd_end > end_of_phys_memory) {
			
 
				-			printk(KERN_CRIT "initrd extends beyond end of memory "
			
 
				-		                 	 "(0x%016lx > 0x%016lx)\ndisabling initrd\n",
			
 
				-			       initrd_end, end_of_phys_memory);
			
 
				-			initrd_start = 0;
			
 
				-			initrd_end = 0;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int __init grab_mblocks(struct mdesc_handle *md)
			
 
				+{
			
 
				+	unsigned long paddr;
			
 
				+	int count = 0;
			
 
				+	u64 node;
			
 
				+
			
 
				+	mdesc_for_each_node_by_name(md, node, "mblock")
			
 
				+		count++;
			
 
				+	if (!count)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	paddr = lmb_alloc(count * sizeof(struct mdesc_mblock),
			
 
				+			  SMP_CACHE_BYTES);
			
 
				+	if (!paddr)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	mblocks = __va(paddr);
			
 
				+	num_mblocks = count;
			
 
				+
			
 
				+	count = 0;
			
 
				+	mdesc_for_each_node_by_name(md, node, "mblock") {
			
 
				+		struct mdesc_mblock *m = &mblocks[count++];
			
 
				+		const u64 *val;
			
 
				+
			
 
				+		val = mdesc_get_property(md, node, "base", NULL);
			
 
				+		m->base = *val;
			
 
				+		val = mdesc_get_property(md, node, "size", NULL);
			
 
				+		m->size = *val;
			
 
				+		val = mdesc_get_property(md, node,
			
 
				+					 "address-congruence-offset", NULL);
			
 
				+		m->offset = *val;
			
 
				+
			
 
				+		numadbg("MBLOCK[%d]: base[%lx] size[%lx] offset[%lx]\n",
			
 
				+			count - 1, m->base, m->size, m->offset);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md,
			
 
				+					       u64 grp, cpumask_t *mask)
			
 
				+{
			
 
				+	u64 arc;
			
 
				+
			
 
				+	cpus_clear(*mask);
			
 
				+
			
 
				+	mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) {
			
 
				+		u64 target = mdesc_arc_target(md, arc);
			
 
				+		const char *name = mdesc_node_name(md, target);
			
 
				+		const u64 *id;
			
 
				+
			
 
				+		if (strcmp(name, "cpu"))
			
 
				+			continue;
			
 
				+		id = mdesc_get_property(md, target, "id", NULL);
			
 
				+		if (*id < NR_CPUS)
			
 
				+			cpu_set(*id, *mask);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static struct mdesc_mlgroup * __init find_mlgroup(u64 node)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < num_mlgroups; i++) {
			
 
				+		struct mdesc_mlgroup *m = &mlgroups[i];
			
 
				+		if (m->node == node)
			
 
				+			return m;
			
 
				+	}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp,
			
 
				+				      int index)
			
 
				+{
			
 
				+	struct mdesc_mlgroup *candidate = NULL;
			
 
				+	u64 arc, best_latency = ~(u64)0;
			
 
				+	struct node_mem_mask *n;
			
 
				+
			
 
				+	mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) {
			
 
				+		u64 target = mdesc_arc_target(md, arc);
			
 
				+		struct mdesc_mlgroup *m = find_mlgroup(target);
			
 
				+		if (!m)
			
 
				+			continue;
			
 
				+		if (m->latency < best_latency) {
			
 
				+			candidate = m;
			
 
				+			best_latency = m->latency;
			
 
				 		}
			
 
				 	}
			
 
				-#endif	
			
 
				+	if (!candidate)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	if (num_node_masks != index) {
			
 
				+		printk(KERN_ERR "Inconsistent NUMA state, "
			
 
				+		       "index[%d] != num_node_masks[%d]\n",
			
 
				+		       index, num_node_masks);
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				 
			
 
				-	if (cmdline_memory_size &&
			
 
				-	    bytes_avail > cmdline_memory_size)
			
 
				-		trim_pavail(&bytes_avail,
			
 
				-			    &end_of_phys_memory);
			
 
				+	n = &node_masks[num_node_masks++];
			
 
				 
			
 
				-	*pages_avail = bytes_avail >> PAGE_SHIFT;
			
 
				+	n->mask = candidate->mask;
			
 
				+	n->val = candidate->match;
			
 
				 
			
 
				-	end_pfn = end_of_phys_memory >> PAGE_SHIFT;
			
 
				+	numadbg("NUMA NODE[%d]: mask[%lx] val[%lx] (latency[%lx])\n",
			
 
				+		index, n->mask, n->val, candidate->latency);
			
 
				 
			
 
				-	/* Initialize the boot-time allocator. */
			
 
				-	max_pfn = max_low_pfn = end_pfn;
			
 
				-	min_low_pfn = (phys_base >> PAGE_SHIFT);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp,
			
 
				+					 int index)
			
 
				+{
			
 
				+	cpumask_t mask;
			
 
				+	int cpu;
			
 
				 
			
 
				-	bootmap_pfn = choose_bootmap_pfn(min_low_pfn, end_pfn);
			
 
				+	numa_parse_mdesc_group_cpus(md, grp, &mask);
			
 
				 
			
 
				-	bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn,
			
 
				-					 min_low_pfn, end_pfn);
			
 
				+	for_each_cpu_mask(cpu, mask)
			
 
				+		numa_cpu_lookup_table[cpu] = index;
			
 
				+	numa_cpumask_lookup_table[index] = mask;
			
 
				 
			
 
				-	/* Now register the available physical memory with the
			
 
				-	 * allocator.
			
 
				-	 */
			
 
				-	for (i = 0; i < pavail_ents; i++)
			
 
				-		free_bootmem(pavail[i].phys_addr, pavail[i].reg_size);
			
 
				+	if (numa_debug) {
			
 
				+		printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index);
			
 
				+		for_each_cpu_mask(cpu, mask)
			
 
				+			printk("%d ", cpu);
			
 
				+		printk("]\n");
			
 
				+	}
			
 
				 
			
 
				-#ifdef CONFIG_BLK_DEV_INITRD
			
 
				-	if (initrd_start) {
			
 
				-		size = initrd_end - initrd_start;
			
 
				+	return numa_attach_mlgroup(md, grp, index);
			
 
				+}
			
 
				+
			
 
				+static int __init numa_parse_mdesc(void)
			
 
				+{
			
 
				+	struct mdesc_handle *md = mdesc_grab();
			
 
				+	int i, err, count;
			
 
				+	u64 node;
			
 
				+
			
 
				+	node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups");
			
 
				+	if (node == MDESC_NODE_NULL) {
			
 
				+		mdesc_release(md);
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	err = grab_mblocks(md);
			
 
				+	if (err < 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	err = grab_mlgroups(md);
			
 
				+	if (err < 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	count = 0;
			
 
				+	mdesc_for_each_node_by_name(md, node, "group") {
			
 
				+		err = numa_parse_mdesc_group(md, node, count);
			
 
				+		if (err < 0)
			
 
				+			break;
			
 
				+		count++;
			
 
				+	}
			
 
				+
			
 
				+	add_node_ranges();
			
 
				+
			
 
				+	for (i = 0; i < num_node_masks; i++) {
			
 
				+		allocate_node_data(i);
			
 
				+		node_set_online(i);
			
 
				+	}
			
 
				+
			
 
				+	err = 0;
			
 
				+out:
			
 
				+	mdesc_release(md);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static int __init numa_parse_sun4u(void)
			
 
				+{
			
 
				+	return -1;
			
 
				+}
			
 
				 
			
 
				-		/* Reserve the initrd image area. */
			
 
				-		reserve_bootmem(initrd_start, size, BOOTMEM_DEFAULT);
			
 
				+static int __init bootmem_init_numa(void)
			
 
				+{
			
 
				+	int err = -1;
			
 
				 
			
 
				-		initrd_start += PAGE_OFFSET;
			
 
				-		initrd_end += PAGE_OFFSET;
			
 
				+	numadbg("bootmem_init_numa()\n");
			
 
				+
			
 
				+	if (numa_enabled) {
			
 
				+		if (tlb_type == hypervisor)
			
 
				+			err = numa_parse_mdesc();
			
 
				+		else
			
 
				+			err = numa_parse_sun4u();
			
 
				 	}
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+static int bootmem_init_numa(void)
			
 
				+{
			
 
				+	return -1;
			
 
				+}
			
 
				+
			
 
				 #endif
			
 
				-	/* Reserve the kernel text/data/bss. */
			
 
				-	reserve_bootmem(kern_base, kern_size, BOOTMEM_DEFAULT);
			
 
				-	*pages_avail -= PAGE_ALIGN(kern_size) >> PAGE_SHIFT;
			
 
				-
			
 
				-	/* Add back in the initmem pages. */
			
 
				-	size = ((unsigned long)(__init_end) & PAGE_MASK) -
			
 
				-		PAGE_ALIGN((unsigned long)__init_begin);
			
 
				-	*pages_avail += size >> PAGE_SHIFT;
			
 
				-
			
 
				-	/* Reserve the bootmem map.   We do not account for it
			
 
				-	 * in pages_avail because we will release that memory
			
 
				-	 * in free_all_bootmem.
			
 
				-	 */
			
 
				-	size = bootmap_size;
			
 
				-	reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size, BOOTMEM_DEFAULT);
			
 
				 
			
 
				-	for (i = 0; i < pavail_ents; i++) {
			
 
				+static void __init bootmem_init_nonnuma(void)
			
 
				+{
			
 
				+	unsigned long top_of_ram = lmb_end_of_DRAM();
			
 
				+	unsigned long total_ram = lmb_phys_mem_size();
			
 
				+	unsigned int i;
			
 
				+
			
 
				+	numadbg("bootmem_init_nonnuma()\n");
			
 
				+
			
 
				+	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
			
 
				+	       top_of_ram, total_ram);
			
 
				+	printk(KERN_INFO "Memory hole size: %ldMB\n",
			
 
				+	       (top_of_ram - total_ram) >> 20);
			
 
				+
			
 
				+	init_node_masks_nonnuma();
			
 
				+
			
 
				+	for (i = 0; i < lmb.memory.cnt; i++) {
			
 
				+		unsigned long size = lmb_size_bytes(&lmb.memory, i);
			
 
				 		unsigned long start_pfn, end_pfn;
			
 
				 
			
 
				-		start_pfn = pavail[i].phys_addr >> PAGE_SHIFT;
			
 
				-		end_pfn = (start_pfn + (pavail[i].reg_size >> PAGE_SHIFT));
			
 
				-		memory_present(0, start_pfn, end_pfn);
			
 
				+		if (!size)
			
 
				+			continue;
			
 
				+
			
 
				+		start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
			
 
				+		end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
			
 
				+		add_active_range(0, start_pfn, end_pfn);
			
 
				+	}
			
 
				+
			
 
				+	allocate_node_data(0);
			
 
				+
			
 
				+	node_set_online(0);
			
 
				+}
			
 
				+
			
 
				+static void __init reserve_range_in_node(int nid, unsigned long start,
			
 
				+					 unsigned long end)
			
 
				+{
			
 
				+	numadbg("    reserve_range_in_node(nid[%d],start[%lx],end[%lx]\n",
			
 
				+		nid, start, end);
			
 
				+	while (start < end) {
			
 
				+		unsigned long this_end;
			
 
				+		int n;
			
 
				+
			
 
				+		this_end = nid_range(start, end, &n);
			
 
				+		if (n == nid) {
			
 
				+			numadbg("      MATCH reserving range [%lx:%lx]\n",
			
 
				+				start, this_end);
			
 
				+			reserve_bootmem_node(NODE_DATA(nid), start,
			
 
				+					     (this_end - start), BOOTMEM_DEFAULT);
			
 
				+		} else
			
 
				+			numadbg("      NO MATCH, advancing start to %lx\n",
			
 
				+				this_end);
			
 
				+
			
 
				+		start = this_end;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void __init trim_reserved_in_node(int nid)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	numadbg("  trim_reserved_in_node(%d)\n", nid);
			
 
				+
			
 
				+	for (i = 0; i < lmb.reserved.cnt; i++) {
			
 
				+		unsigned long start = lmb.reserved.region[i].base;
			
 
				+		unsigned long size = lmb_size_bytes(&lmb.reserved, i);
			
 
				+		unsigned long end = start + size;
			
 
				+
			
 
				+		reserve_range_in_node(nid, start, end);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void __init bootmem_init_one_node(int nid)
			
 
				+{
			
 
				+	struct pglist_data *p;
			
 
				+
			
 
				+	numadbg("bootmem_init_one_node(%d)\n", nid);
			
 
				+
			
 
				+	p = NODE_DATA(nid);
			
 
				+
			
 
				+	if (p->node_spanned_pages) {
			
 
				+		unsigned long paddr = node_masks[nid].bootmem_paddr;
			
 
				+		unsigned long end_pfn;
			
 
				+
			
 
				+		end_pfn = p->node_start_pfn + p->node_spanned_pages;
			
 
				+
			
 
				+		numadbg("  init_bootmem_node(%d, %lx, %lx, %lx)\n",
			
 
				+			nid, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn);
			
 
				+
			
 
				+		init_bootmem_node(p, paddr >> PAGE_SHIFT,
			
 
				+				  p->node_start_pfn, end_pfn);
			
 
				+
			
 
				+		numadbg("  free_bootmem_with_active_regions(%d, %lx)\n",
			
 
				+			nid, end_pfn);
			
 
				+		free_bootmem_with_active_regions(nid, end_pfn);
			
 
				+
			
 
				+		trim_reserved_in_node(nid);
			
 
				+
			
 
				+		numadbg("  sparse_memory_present_with_active_regions(%d)\n",
			
 
				+			nid);
			
 
				+		sparse_memory_present_with_active_regions(nid);
			
 
				 	}
			
 
				+}
			
 
				+
			
 
				+static unsigned long __init bootmem_init(unsigned long phys_base)
			
 
				+{
			
 
				+	unsigned long end_pfn;
			
 
				+	int nid;
			
 
				+
			
 
				+	end_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
			
 
				+	max_pfn = max_low_pfn = end_pfn;
			
 
				+	min_low_pfn = (phys_base >> PAGE_SHIFT);
			
 
				+
			
 
				+	if (bootmem_init_numa() < 0)
			
 
				+		bootmem_init_nonnuma();
			
 
				+
			
 
				+	/* XXX cpu notifier XXX */
			
 
				+
			
 
				+	for_each_online_node(nid)
			
 
				+		bootmem_init_one_node(nid);
			
 
				 
			
 
				 	sparse_init();
			
 
				 
			
@@ -1289,7 +1688,7 @@ void __init setup_per_cpu_areas(void)
 
				 
			
 
				 void __init paging_init(void)
			
 
				 {
			
 
				-	unsigned long end_pfn, pages_avail, shift, phys_base;
			
 
				+	unsigned long end_pfn, shift, phys_base;
			
 
				 	unsigned long real_end, i;
			
 
				 
			
 
				 	/* These build time checkes make sure that the dcache_dirty_cpu()
			
@@ -1330,12 +1729,26 @@ void __init paging_init(void)
 
				 		sun4v_ktsb_init();
			
 
				 	}
			
 
				 
			
 
				+	lmb_init();
			
 
				+
			
 
				 	/* Find available physical memory... */
			
 
				 	read_obp_memory("available", &pavail[0], &pavail_ents);
			
 
				 
			
 
				 	phys_base = 0xffffffffffffffffUL;
			
 
				-	for (i = 0; i < pavail_ents; i++)
			
 
				+	for (i = 0; i < pavail_ents; i++) {
			
 
				 		phys_base = min(phys_base, pavail[i].phys_addr);
			
 
				+		lmb_add(pavail[i].phys_addr, pavail[i].reg_size);
			
 
				+	}
			
 
				+
			
 
				+	lmb_reserve(kern_base, kern_size);
			
 
				+
			
 
				+	find_ramdisk(phys_base);
			
 
				+
			
 
				+	if (cmdline_memory_size)
			
 
				+		lmb_enforce_memory_limit(phys_base + cmdline_memory_size);
			
 
				+
			
 
				+	lmb_analyze();
			
 
				+	lmb_dump_all();
			
 
				 
			
 
				 	set_bit(0, mmu_context_bmap);
			
 
				 
			
@@ -1371,14 +1784,10 @@ void __init paging_init(void)
 
				 	if (tlb_type == hypervisor)
			
 
				 		sun4v_ktsb_register();
			
 
				 
			
 
				-	/* Setup bootmem... */
			
 
				-	pages_avail = 0;
			
 
				-	last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base);
			
 
				-
			
 
				-	max_mapnr = last_valid_pfn;
			
 
				-
			
 
				-	kernel_physical_mapping_init();
			
 
				-
			
 
				+	/* We must setup the per-cpu areas before we pull in the
			
 
				+	 * PROM and the MDESC.  The code there fills in cpu and
			
 
				+	 * other information into per-cpu data structures.
			
 
				+	 */
			
 
				 	real_setup_per_cpu_areas();
			
 
				 
			
 
				 	prom_build_devicetree();
			
@@ -1386,20 +1795,22 @@ void __init paging_init(void)
 
				 	if (tlb_type == hypervisor)
			
 
				 		sun4v_mdesc_init();
			
 
				 
			
 
				+	/* Setup bootmem... */
			
 
				+	last_valid_pfn = end_pfn = bootmem_init(phys_base);
			
 
				+
			
 
				+#ifndef CONFIG_NEED_MULTIPLE_NODES
			
 
				+	max_mapnr = last_valid_pfn;
			
 
				+#endif
			
 
				+	kernel_physical_mapping_init();
			
 
				+
			
 
				 	{
			
 
				-		unsigned long zones_size[MAX_NR_ZONES];
			
 
				-		unsigned long zholes_size[MAX_NR_ZONES];
			
 
				-		int znum;
			
 
				+		unsigned long max_zone_pfns[MAX_NR_ZONES];
			
 
				 
			
 
				-		for (znum = 0; znum < MAX_NR_ZONES; znum++)
			
 
				-			zones_size[znum] = zholes_size[znum] = 0;
			
 
				+		memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
			
 
				 
			
 
				-		zones_size[ZONE_NORMAL] = end_pfn;
			
 
				-		zholes_size[ZONE_NORMAL] = end_pfn - pages_avail;
			
 
				+		max_zone_pfns[ZONE_NORMAL] = end_pfn;
			
 
				 
			
 
				-		free_area_init_node(0, &contig_page_data, zones_size,
			
 
				-				    __pa(PAGE_OFFSET) >> PAGE_SHIFT,
			
 
				-				    zholes_size);
			
 
				+		free_area_init_nodes(max_zone_pfns);
			
 
				 	}
			
 
				 
			
 
				 	printk("Booting Linux...\n");
			
@@ -1408,21 +1819,52 @@ void __init paging_init(void)
 
				 	cpu_probe();
			
 
				 }
			
 
				 
			
 
				-static void __init taint_real_pages(void)
			
 
				+int __init page_in_phys_avail(unsigned long paddr)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	paddr &= PAGE_MASK;
			
 
				+
			
 
				+	for (i = 0; i < pavail_ents; i++) {
			
 
				+		unsigned long start, end;
			
 
				+
			
 
				+		start = pavail[i].phys_addr;
			
 
				+		end = start + pavail[i].reg_size;
			
 
				+
			
 
				+		if (paddr >= start && paddr < end)
			
 
				+			return 1;
			
 
				+	}
			
 
				+	if (paddr >= kern_base && paddr < (kern_base + kern_size))
			
 
				+		return 1;
			
 
				+#ifdef CONFIG_BLK_DEV_INITRD
			
 
				+	if (paddr >= __pa(initrd_start) &&
			
 
				+	    paddr < __pa(PAGE_ALIGN(initrd_end)))
			
 
				+		return 1;
			
 
				+#endif
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata;
			
 
				+static int pavail_rescan_ents __initdata;
			
 
				+
			
 
				+/* Certain OBP calls, such as fetching "available" properties, can
			
 
				+ * claim physical memory.  So, along with initializing the valid
			
 
				+ * address bitmap, what we do here is refetch the physical available
			
 
				+ * memory list again, and make sure it provides at least as much
			
 
				+ * memory as 'pavail' does.
			
 
				+ */
			
 
				+static void setup_valid_addr_bitmap_from_pavail(void)
			
 
				 {
			
 
				 	int i;
			
 
				 
			
 
				 	read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents);
			
 
				 
			
 
				-	/* Find changes discovered in the physmem available rescan and
			
 
				-	 * reserve the lost portions in the bootmem maps.
			
 
				-	 */
			
 
				 	for (i = 0; i < pavail_ents; i++) {
			
 
				 		unsigned long old_start, old_end;
			
 
				 
			
 
				 		old_start = pavail[i].phys_addr;
			
 
				-		old_end = old_start +
			
 
				-			pavail[i].reg_size;
			
 
				+		old_end = old_start + pavail[i].reg_size;
			
 
				 		while (old_start < old_end) {
			
 
				 			int n;
			
 
				 
			
@@ -1440,7 +1882,16 @@ static void __init taint_real_pages(void)
 
				 					goto do_next_page;
			
 
				 				}
			
 
				 			}
			
 
				-			reserve_bootmem(old_start, PAGE_SIZE, BOOTMEM_DEFAULT);
			
 
				+
			
 
				+			prom_printf("mem_init: Lost memory in pavail\n");
			
 
				+			prom_printf("mem_init: OLD start[%lx] size[%lx]\n",
			
 
				+				    pavail[i].phys_addr,
			
 
				+				    pavail[i].reg_size);
			
 
				+			prom_printf("mem_init: NEW start[%lx] size[%lx]\n",
			
 
				+				    pavail_rescan[i].phys_addr,
			
 
				+				    pavail_rescan[i].reg_size);
			
 
				+			prom_printf("mem_init: Cannot continue, aborting.\n");
			
 
				+			prom_halt();
			
 
				 
			
 
				 		do_next_page:
			
 
				 			old_start += PAGE_SIZE;
			
@@ -1448,32 +1899,6 @@ static void __init taint_real_pages(void)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-int __init page_in_phys_avail(unsigned long paddr)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	paddr &= PAGE_MASK;
			
 
				-
			
 
				-	for (i = 0; i < pavail_rescan_ents; i++) {
			
 
				-		unsigned long start, end;
			
 
				-
			
 
				-		start = pavail_rescan[i].phys_addr;
			
 
				-		end = start + pavail_rescan[i].reg_size;
			
 
				-
			
 
				-		if (paddr >= start && paddr < end)
			
 
				-			return 1;
			
 
				-	}
			
 
				-	if (paddr >= kern_base && paddr < (kern_base + kern_size))
			
 
				-		return 1;
			
 
				-#ifdef CONFIG_BLK_DEV_INITRD
			
 
				-	if (paddr >= __pa(initrd_start) &&
			
 
				-	    paddr < __pa(PAGE_ALIGN(initrd_end)))
			
 
				-		return 1;
			
 
				-#endif
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 void __init mem_init(void)
			
 
				 {
			
 
				 	unsigned long codepages, datapages, initpages;
			
@@ -1496,14 +1921,26 @@ void __init mem_init(void)
 
				 		addr += PAGE_SIZE;
			
 
				 	}
			
 
				 
			
 
				-	taint_real_pages();
			
 
				+	setup_valid_addr_bitmap_from_pavail();
			
 
				 
			
 
				 	high_memory = __va(last_valid_pfn << PAGE_SHIFT);
			
 
				 
			
 
				+#ifdef CONFIG_NEED_MULTIPLE_NODES
			
 
				+	for_each_online_node(i) {
			
 
				+		if (NODE_DATA(i)->node_spanned_pages != 0) {
			
 
				+			totalram_pages +=
			
 
				+				free_all_bootmem_node(NODE_DATA(i));
			
 
				+		}
			
 
				+	}
			
 
				+#else
			
 
				+	totalram_pages = free_all_bootmem();
			
 
				+#endif
			
 
				+
			
 
				 	/* We subtract one to account for the mem_map_zero page
			
 
				 	 * allocated below.
			
 
				 	 */
			
 
				-	totalram_pages = num_physpages = free_all_bootmem() - 1;
			
 
				+	totalram_pages -= 1;
			
 
				+	num_physpages = totalram_pages;
			
 
				 
			
 
				 	/*
			
 
				 	 * Set up the zero page, mark it reserved, so that page count
			
--- a/arch/sparc64/mm/tsb.c
+++ b/arch/sparc64/mm/tsb.c
@@ -321,7 +321,8 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
 
				 	if (new_size > (PAGE_SIZE * 2))
			
 
				 		gfp_flags = __GFP_NOWARN | __GFP_NORETRY;
			
 
				 
			
 
				-	new_tsb = kmem_cache_alloc(tsb_caches[new_cache_index], gfp_flags);
			
 
				+	new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index],
			
 
				+					gfp_flags, numa_node_id());
			
 
				 	if (unlikely(!new_tsb)) {
			
 
				 		/* Not being able to fork due to a high-order TSB
			
 
				 		 * allocation failure is very bad behavior.  Just back
			
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -476,7 +476,6 @@ xcall_sync_tick:
 
				 #endif
			
 
				 	call		smp_synchronize_tick_client
			
 
				 	 nop
			
 
				-	clr		%l6
			
 
				 	b		rtrap_xcall
			
 
				 	 ldx		[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
			
 
				 
			
@@ -511,7 +510,6 @@ xcall_report_regs:
 
				 #endif
			
 
				 	call		__show_regs
			
 
				 	 add		%sp, PTREGS_OFF, %o0
			
 
				-	clr		%l6
			
 
				 	/* Has to be a non-v9 branch due to the large distance. */
			
 
				 	b		rtrap_xcall
			
 
				 	 ldx		[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
			
@@ -576,7 +574,7 @@ __hypervisor_tlb_xcall_error:
 
				 	mov	%l4, %o0
			
 
				 	call	hypervisor_tlbop_error_xcall
			
 
				 	 mov	%l5, %o1
			
 
				-	ba,a,pt	%xcc, rtrap_clr_l6
			
 
				+	ba,a,pt	%xcc, rtrap
			
 
				 
			
 
				 	.globl		__hypervisor_xcall_flush_tlb_mm
			
 
				 __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
			
--- a/drivers/serial/sunzilog.c
+++ b/drivers/serial/sunzilog.c
@@ -1015,6 +1015,7 @@ static struct uart_ops sunzilog_pops = {
 
				 	.verify_port	=	sunzilog_verify_port,
			
 
				 };
			
 
				 
			
 
				+static int uart_chip_count;
			
 
				 static struct uart_sunzilog_port *sunzilog_port_table;
			
 
				 static struct zilog_layout __iomem **sunzilog_chip_regs;
			
 
				 
			
@@ -1350,16 +1351,22 @@ static int zilog_irq = -1;
 
				 
			
 
				 static int __devinit zs_probe(struct of_device *op, const struct of_device_id *match)
			
 
				 {
			
 
				-	static int inst;
			
 
				+	static int kbm_inst, uart_inst;
			
 
				+	int inst;
			
 
				 	struct uart_sunzilog_port *up;
			
 
				 	struct zilog_layout __iomem *rp;
			
 
				-	int keyboard_mouse;
			
 
				+	int keyboard_mouse = 0;
			
 
				 	int err;
			
 
				 
			
 
				-	keyboard_mouse = 0;
			
 
				 	if (of_find_property(op->node, "keyboard", NULL))
			
 
				 		keyboard_mouse = 1;
			
 
				 
			
 
				+	/* uarts must come before keyboards/mice */
			
 
				+	if (keyboard_mouse)
			
 
				+		inst = uart_chip_count + kbm_inst;
			
 
				+	else
			
 
				+		inst = uart_inst;
			
 
				+
			
 
				 	sunzilog_chip_regs[inst] = of_ioremap(&op->resource[0], 0,
			
 
				 					      sizeof(struct zilog_layout),
			
 
				 					      "zs");
			
@@ -1427,6 +1434,7 @@ static int __devinit zs_probe(struct of_device *op, const struct of_device_id *m
 
				 				   rp, sizeof(struct zilog_layout));
			
 
				 			return err;
			
 
				 		}
			
 
				+		uart_inst++;
			
 
				 	} else {
			
 
				 		printk(KERN_INFO "%s: Keyboard at MMIO 0x%llx (irq = %d) "
			
 
				 		       "is a %s\n",
			
@@ -1438,12 +1446,11 @@ static int __devinit zs_probe(struct of_device *op, const struct of_device_id *m
 
				 		       op->dev.bus_id,
			
 
				 		       (unsigned long long) up[1].port.mapbase,
			
 
				 		       op->irqs[0], sunzilog_type(&up[1].port));
			
 
				+		kbm_inst++;
			
 
				 	}
			
 
				 
			
 
				 	dev_set_drvdata(&op->dev, &up[0]);
			
 
				 
			
 
				-	inst++;
			
 
				-
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -1491,28 +1498,25 @@ static struct of_platform_driver zs_driver = {
 
				 static int __init sunzilog_init(void)
			
 
				 {
			
 
				 	struct device_node *dp;
			
 
				-	int err, uart_count;
			
 
				-	int num_keybms;
			
 
				+	int err;
			
 
				+	int num_keybms = 0;
			
 
				 	int num_sunzilog = 0;
			
 
				 
			
 
				-	num_keybms = 0;
			
 
				 	for_each_node_by_name(dp, "zs") {
			
 
				 		num_sunzilog++;
			
 
				 		if (of_find_property(dp, "keyboard", NULL))
			
 
				 			num_keybms++;
			
 
				 	}
			
 
				 
			
 
				-	uart_count = 0;
			
 
				 	if (num_sunzilog) {
			
 
				-		int uart_count;
			
 
				-
			
 
				 		err = sunzilog_alloc_tables(num_sunzilog);
			
 
				 		if (err)
			
 
				 			goto out;
			
 
				 
			
 
				-		uart_count = (num_sunzilog * 2) - (2 * num_keybms);
			
 
				+		uart_chip_count = num_sunzilog - num_keybms;
			
 
				 
			
 
				-		err = sunserial_register_minors(&sunzilog_reg, uart_count);
			
 
				+		err = sunserial_register_minors(&sunzilog_reg,
			
 
				+						uart_chip_count * 2);
			
 
				 		if (err)
			
 
				 			goto out_free_tables;
			
 
				 	}
			
--- a/include/asm-sparc/device.h
+++ b/include/asm-sparc/device.h
@@ -16,6 +16,8 @@ struct dev_archdata {
 
				 
			
 
				 	struct device_node	*prom_node;
			
 
				 	struct of_device	*op;
			
 
				+
			
 
				+	int			numa_node;
			
 
				 };
			
 
				 
			
 
				 #endif /* _ASM_SPARC_DEVICE_H */
			
--- a/include/asm-sparc/prom.h
+++ b/include/asm-sparc/prom.h
@@ -77,6 +77,11 @@ extern int of_getintprop_default(struct device_node *np,
 
				 				 const char *name,
			
 
				 				 int def);
			
 
				 extern int of_find_in_proplist(const char *list, const char *match, int len);
			
 
				+#ifdef CONFIG_NUMA
			
 
				+extern int of_node_to_nid(struct device_node *dp);
			
 
				+#else
			
 
				+#define of_node_to_nid(dp)	(-1)
			
 
				+#endif
			
 
				 
			
 
				 extern void prom_build_devicetree(void);
			
 
				 
			
--- a/include/asm-sparc64/iommu.h
+++ b/include/asm-sparc64/iommu.h
@@ -56,6 +56,7 @@ struct strbuf {
 
				 };
			
 
				 
			
 
				 extern int iommu_table_init(struct iommu *iommu, int tsbsize,
			
 
				-			    u32 dma_offset, u32 dma_addr_mask);
			
 
				+			    u32 dma_offset, u32 dma_addr_mask,
			
 
				+			    int numa_node);
			
 
				 
			
 
				 #endif /* !(_SPARC64_IOMMU_H) */
			
--- a/include/asm-sparc64/mmzone.h
+++ b/include/asm-sparc64/mmzone.h
@@ -0,0 +1,17 @@
 
				+#ifndef _SPARC64_MMZONE_H
			
 
				+#define _SPARC64_MMZONE_H
			
 
				+
			
 
				+#ifdef CONFIG_NEED_MULTIPLE_NODES
			
 
				+
			
 
				+extern struct pglist_data *node_data[];
			
 
				+
			
 
				+#define NODE_DATA(nid)		(node_data[nid])
			
 
				+#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
			
 
				+#define node_end_pfn(nid)	(NODE_DATA(nid)->node_end_pfn)
			
 
				+
			
 
				+extern int numa_cpu_lookup_table[];
			
 
				+extern cpumask_t numa_cpumask_lookup_table[];
			
 
				+
			
 
				+#endif /* CONFIG_NEED_MULTIPLE_NODES */
			
 
				+
			
 
				+#endif /* _SPARC64_MMZONE_H */
			
--- a/include/asm-sparc64/numnodes.h
+++ b/include/asm-sparc64/numnodes.h
@@ -1,6 +0,0 @@
 
				-#ifndef _SPARC64_NUMNODES_H
			
 
				-#define _SPARC64_NUMNODES_H
			
 
				-
			
 
				-#define NODES_SHIFT	0
			
 
				-
			
 
				-#endif /* !(_SPARC64_NUMNODES_H) */
			
--- a/include/asm-sparc64/ptrace.h
+++ b/include/asm-sparc64/ptrace.h
@@ -8,6 +8,8 @@
 
				  * stack during a system call and basically all traps.
			
 
				  */
			
 
				 
			
 
				+#define PT_REGS_MAGIC 0x57ac6c00
			
 
				+
			
 
				 #ifndef __ASSEMBLY__
			
 
				 
			
 
				 struct pt_regs {
			
@@ -16,7 +18,19 @@ struct pt_regs {
 
				 	unsigned long tpc;
			
 
				 	unsigned long tnpc;
			
 
				 	unsigned int y;
			
 
				-	unsigned int fprs;
			
 
				+
			
 
				+	/* We encode a magic number, PT_REGS_MAGIC, along
			
 
				+	 * with the %tt (trap type) register value at trap
			
 
				+	 * entry time.  The magic number allows us to identify
			
 
				+	 * accurately a trap stack frame in the stack
			
 
				+	 * unwinder, and the %tt value allows us to test
			
 
				+	 * things like "in a system call" etc. for an arbitray
			
 
				+	 * process.
			
 
				+	 *
			
 
				+	 * The PT_REGS_MAGIC is choosen such that it can be
			
 
				+	 * loaded completely using just a sethi instruction.
			
 
				+	 */
			
 
				+	unsigned int magic;
			
 
				 };
			
 
				 
			
 
				 struct pt_regs32 {
			
@@ -147,7 +161,7 @@ extern void __show_regs(struct pt_regs *);
 
				 #define PT_V9_TPC    0x88
			
 
				 #define PT_V9_TNPC   0x90
			
 
				 #define PT_V9_Y      0x98
			
 
				-#define PT_V9_FPRS   0x9c
			
 
				+#define PT_V9_MAGIC  0x9c
			
 
				 #define PT_TSTATE	PT_V9_TSTATE
			
 
				 #define PT_TPC		PT_V9_TPC
			
 
				 #define PT_TNPC		PT_V9_TNPC
			
--- a/include/asm-sparc64/sparsemem.h
+++ b/include/asm-sparc64/sparsemem.h
@@ -3,7 +3,7 @@
 
				 
			
 
				 #ifdef __KERNEL__
			
 
				 
			
 
				-#define SECTION_SIZE_BITS       31
			
 
				+#define SECTION_SIZE_BITS       30
			
 
				 #define MAX_PHYSADDR_BITS       42
			
 
				 #define MAX_PHYSMEM_BITS        42
			
 
				 
			
--- a/include/asm-sparc64/topology.h
+++ b/include/asm-sparc64/topology.h
@@ -1,6 +1,77 @@
 
				 #ifndef _ASM_SPARC64_TOPOLOGY_H
			
 
				 #define _ASM_SPARC64_TOPOLOGY_H
			
 
				 
			
 
				+#ifdef CONFIG_NUMA
			
 
				+
			
 
				+#include <asm/mmzone.h>
			
 
				+
			
 
				+static inline int cpu_to_node(int cpu)
			
 
				+{
			
 
				+	return numa_cpu_lookup_table[cpu];
			
 
				+}
			
 
				+
			
 
				+#define parent_node(node)	(node)
			
 
				+
			
 
				+static inline cpumask_t node_to_cpumask(int node)
			
 
				+{
			
 
				+	return numa_cpumask_lookup_table[node];
			
 
				+}
			
 
				+
			
 
				+/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
			
 
				+#define node_to_cpumask_ptr(v, node)		\
			
 
				+		cpumask_t *v = &(numa_cpumask_lookup_table[node])
			
 
				+
			
 
				+#define node_to_cpumask_ptr_next(v, node)	\
			
 
				+			   v = &(numa_cpumask_lookup_table[node])
			
 
				+
			
 
				+static inline int node_to_first_cpu(int node)
			
 
				+{
			
 
				+	cpumask_t tmp;
			
 
				+	tmp = node_to_cpumask(node);
			
 
				+	return first_cpu(tmp);
			
 
				+}
			
 
				+
			
 
				+struct pci_bus;
			
 
				+#ifdef CONFIG_PCI
			
 
				+extern int pcibus_to_node(struct pci_bus *pbus);
			
 
				+#else
			
 
				+static inline int pcibus_to_node(struct pci_bus *pbus)
			
 
				+{
			
 
				+	return -1;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#define pcibus_to_cpumask(bus)	\
			
 
				+	(pcibus_to_node(bus) == -1 ? \
			
 
				+	 CPU_MASK_ALL : \
			
 
				+	 node_to_cpumask(pcibus_to_node(bus)))
			
 
				+
			
 
				+#define SD_NODE_INIT (struct sched_domain) {		\
			
 
				+	.min_interval		= 8,			\
			
 
				+	.max_interval		= 32,			\
			
 
				+	.busy_factor		= 32,			\
			
 
				+	.imbalance_pct		= 125,			\
			
 
				+	.cache_nice_tries	= 2,			\
			
 
				+	.busy_idx		= 3,			\
			
 
				+	.idle_idx		= 2,			\
			
 
				+	.newidle_idx		= 0, 			\
			
 
				+	.wake_idx		= 1,			\
			
 
				+	.forkexec_idx		= 1,			\
			
 
				+	.flags			= SD_LOAD_BALANCE	\
			
 
				+				| SD_BALANCE_FORK	\
			
 
				+				| SD_BALANCE_EXEC	\
			
 
				+				| SD_SERIALIZE		\
			
 
				+				| SD_WAKE_BALANCE,	\
			
 
				+	.last_balance		= jiffies,		\
			
 
				+	.balance_interval	= 1,			\
			
 
				+}
			
 
				+
			
 
				+#else /* CONFIG_NUMA */
			
 
				+
			
 
				+#include <asm-generic/topology.h>
			
 
				+
			
 
				+#endif /* !(CONFIG_NUMA) */
			
 
				+
			
 
				 #ifdef CONFIG_SMP
			
 
				 #define topology_physical_package_id(cpu)	(cpu_data(cpu).proc_id)
			
 
				 #define topology_core_id(cpu)			(cpu_data(cpu).core_id)
			
@@ -10,8 +81,6 @@
 
				 #define smt_capable()				(sparc64_multi_core)
			
 
				 #endif /* CONFIG_SMP */
			
 
				 
			
 
				-#include <asm-generic/topology.h>
			
 
				-
			
 
				 #define cpu_coregroup_map(cpu)			(cpu_core_map[cpu])
			
 
				 
			
 
				 #endif /* _ASM_SPARC64_TOPOLOGY_H */
			
--- a/include/asm-sparc64/ttable.h
+++ b/include/asm-sparc64/ttable.h
@@ -28,7 +28,7 @@
 
				 	call	routine;				\
			
 
				 	 add	%sp, PTREGS_OFF, %o0;			\
			
 
				 	ba,pt	%xcc, rtrap;				\
			
 
				-	 clr	%l6;					\
			
 
				+	 nop;						\
			
 
				 	nop;
			
 
				 
			
 
				 #define TRAP_7INSNS(routine)				\
			
@@ -38,7 +38,7 @@
 
				 	call	routine;				\
			
 
				 	 add	%sp, PTREGS_OFF, %o0;			\
			
 
				 	ba,pt	%xcc, rtrap;				\
			
 
				-	 clr	%l6;
			
 
				+	 nop;
			
 
				 
			
 
				 #define TRAP_SAVEFPU(routine)				\
			
 
				 	sethi	%hi(109f), %g7;				\
			
@@ -47,7 +47,7 @@
 
				 	call	routine;				\
			
 
				 	 add	%sp, PTREGS_OFF, %o0;			\
			
 
				 	ba,pt	%xcc, rtrap;				\
			
 
				-	 clr	%l6;					\
			
 
				+	 nop;						\
			
 
				 	nop;
			
 
				 
			
 
				 #define TRAP_NOSAVE(routine)				\
			
@@ -67,7 +67,7 @@
 
				 	call	routine;				\
			
 
				 	 add	%sp, PTREGS_OFF, %o0;			\
			
 
				 	ba,pt	%xcc, rtrap;				\
			
 
				-	 clr	%l6;					\
			
 
				+	 nop;						\
			
 
				 	nop;
			
 
				 	
			
 
				 #define TRAP_ARG(routine, arg)				\
			
@@ -78,7 +78,7 @@
 
				 	call	routine;				\
			
 
				 	 mov	arg, %o1;				\
			
 
				 	ba,pt	%xcc, rtrap;				\
			
 
				-	 clr	%l6;
			
 
				+	 nop;
			
 
				 	
			
 
				 #define TRAPTL1_ARG(routine, arg)			\
			
 
				 	sethi	%hi(109f), %g7;				\
			
@@ -88,7 +88,7 @@
 
				 	call	routine;				\
			
 
				 	 mov	arg, %o1;				\
			
 
				 	ba,pt	%xcc, rtrap;				\
			
 
				-	 clr	%l6;
			
 
				+	 nop;
			
 
				 	
			
 
				 #define SYSCALL_TRAP(routine, systbl)			\
			
 
				 	sethi	%hi(109f), %g7;				\
			
@@ -166,7 +166,7 @@
 
				 	ldx	[%sp + PTREGS_OFF + PT_V9_TNPC], %l1;			\
			
 
				 	add	%l1, 4, %l2;						\
			
 
				 	stx	%l1, [%sp + PTREGS_OFF + PT_V9_TPC];			\
			
 
				-	ba,pt	%xcc, rtrap_clr_l6;					\
			
 
				+	ba,pt	%xcc, rtrap;						\
			
 
				 	 stx	%l2, [%sp + PTREGS_OFF + PT_V9_TNPC];
			
 
				 	        
			
 
				 #ifdef CONFIG_KPROBES
			
--- a/lib/lmb.c
+++ b/lib/lmb.c
@@ -346,7 +346,7 @@ u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr)
 
				 			if (j < 0) {
			
 
				 				/* this area isn't reserved, take it */
			
 
				 				if (lmb_add_region(&lmb.reserved, base,
			
 
				-						   size) < 0)
			
 
				+						   lmb_align_up(size, align)) < 0)
			
 
				 					return 0;
			
 
				 				return base;
			
 
				 			}