소스 검색

Merge branch 'xip_zdata' of http://git.linaro.org/people/nicolas.pitre/linux into devel-testing

This contains important fixes to the XIP linker script, some more linker
script cleanups, .bss clearing and .data copying speedups related to the
above, and an opt-in config option for XIP kernels that allows for
compressing .data in ROM that depend on those other patches to work
properly.
Russell King 7 년 전
부모
커밋
476242482b

+ 11 - 0
arch/arm/Kconfig

@@ -2005,6 +2005,17 @@ config XIP_PHYS_ADDR
 	  be linked for and stored to.  This address is dependent on your
 	  own flash usage.
 
+config XIP_DEFLATED_DATA
+	bool "Store kernel .data section compressed in ROM"
+	depends on XIP_KERNEL
+	select ZLIB_INFLATE
+	help
+	  Before the kernel is actually executed, its .data section has to be
+	  copied to RAM from ROM. This option allows for storing that data
+	  in compressed form and decompressed to RAM rather than merely being
+	  copied, saving some precious ROM space. A possible drawback is a
+	  slightly longer boot delay.
+
 config KEXEC
 	bool "Kexec system call (EXPERIMENTAL)"
 	depends on (!SMP || PM_SLEEP_SMP)

+ 12 - 1
arch/arm/boot/Makefile

@@ -31,8 +31,19 @@ targets := Image zImage xipImage bootpImage uImage
 
 ifeq ($(CONFIG_XIP_KERNEL),y)
 
+cmd_deflate_xip_data = $(CONFIG_SHELL) -c \
+	'$(srctree)/$(src)/deflate_xip_data.sh $< $@ || { rm -f $@; false; }'
+
+ifeq ($(CONFIG_XIP_DEFLATED_DATA),y)
+quiet_cmd_mkxip = XIPZ    $@
+cmd_mkxip = $(cmd_objcopy) && $(cmd_deflate_xip_data)
+else
+quiet_cmd_mkxip = $(quiet_cmd_objcopy)
+cmd_mkxip = $(cmd_objcopy)
+endif
+
 $(obj)/xipImage: vmlinux FORCE
-	$(call if_changed,objcopy)
+	$(call if_changed,mkxip)
 	@$(kecho) '  Physical Address of xipImage: $(CONFIG_XIP_PHYS_ADDR)'
 
 $(obj)/Image $(obj)/zImage: FORCE

+ 64 - 0
arch/arm/boot/deflate_xip_data.sh

@@ -0,0 +1,64 @@
+#!/bin/sh
+
+# XIP kernel .data segment compressor
+#
+# Created by:	Nicolas Pitre, August 2017
+# Copyright:	(C) 2017  Linaro Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+# This script locates the start of the .data section in xipImage and
+# substitutes it with a compressed version. The needed offsets are obtained
+# from symbol addresses in vmlinux. It is expected that .data extends to
+# the end of xipImage.
+
+set -e
+
+VMLINUX="$1"
+XIPIMAGE="$2"
+
+DD="dd status=none"
+
+# Use "make V=1" to debug this script.
+case "$KBUILD_VERBOSE" in
+*1*)
+	set -x
+	;;
+esac
+
+sym_val() {
+	# extract hex value for symbol in $1
+	local val=$($NM "$VMLINUX" | sed -n "/ $1$/{s/ .*$//p;q}")
+	[ "$val" ] || { echo "can't find $1 in $VMLINUX" 1>&2; exit 1; }
+	# convert from hex to decimal
+	echo $((0x$val))
+}
+
+__data_loc=$(sym_val __data_loc)
+_edata_loc=$(sym_val _edata_loc)
+base_offset=$(sym_val _xiprom)
+
+# convert to file based offsets
+data_start=$(($__data_loc - $base_offset))
+data_end=$(($_edata_loc - $base_offset))
+
+# Make sure data occupies the last part of the file.
+file_end=$(stat -c "%s" "$XIPIMAGE")
+if [ "$file_end" != "$data_end" ]; then
+	printf "end of xipImage doesn't match with _edata_loc (%#x vs %#x)\n" \
+	       $(($file_end + $base_offset)) $_edata_loc 2>&1
+	exit 1;
+fi
+
+# be ready to clean up
+trap 'rm -f "$XIPIMAGE.tmp"' 0 1 2 3
+
+# substitute the data section by a compressed version
+$DD if="$XIPIMAGE" count=$data_start iflag=count_bytes of="$XIPIMAGE.tmp"
+$DD if="$XIPIMAGE"  skip=$data_start iflag=skip_bytes |
+gzip -9 >> "$XIPIMAGE.tmp"
+
+# replace kernel binary
+mv -f "$XIPIMAGE.tmp" "$XIPIMAGE"

+ 5 - 0
arch/arm/kernel/Makefile

@@ -87,6 +87,11 @@ head-y			:= head$(MMUEXT).o
 obj-$(CONFIG_DEBUG_LL)	+= debug.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
+# This is executed very early using a temporary stack when no memory allocator
+# nor global data is available. Everything has to be allocated on the stack.
+CFLAGS_head-inflate-data.o := $(call cc-option,-Wframe-larger-than=10240)
+obj-$(CONFIG_XIP_DEFLATED_DATA) += head-inflate-data.o
+
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
 AFLAGS_hyp-stub.o		:=-Wa,-march=armv7-a
 ifeq ($(CONFIG_ARM_PSCI),y)

+ 53 - 32
arch/arm/kernel/head-common.S

@@ -79,47 +79,68 @@ ENDPROC(__vet_atags)
  */
 	__INIT
 __mmap_switched:
-	adr	r3, __mmap_switched_data
-
-	ldmia	r3!, {r4, r5, r6, r7}
-	cmp	r4, r5				@ Copy data segment if needed
-1:	cmpne	r5, r6
-	ldrne	fp, [r4], #4
-	strne	fp, [r5], #4
-	bne	1b
-
-	mov	fp, #0				@ Clear BSS (and zero fp)
-1:	cmp	r6, r7
-	strcc	fp, [r6],#4
-	bcc	1b
-
- ARM(	ldmia	r3, {r4, r5, r6, r7, sp})
- THUMB(	ldmia	r3, {r4, r5, r6, r7}	)
- THUMB(	ldr	sp, [r3, #16]		)
-	str	r9, [r4]			@ Save processor ID
-	str	r1, [r5]			@ Save machine type
-	str	r2, [r6]			@ Save atags pointer
-	cmp	r7, #0
-	strne	r0, [r7]			@ Save control register values
+
+	mov	r7, r1
+	mov	r8, r2
+	mov	r10, r0
+
+	adr	r4, __mmap_switched_data
+	mov	fp, #0
+
+#if defined(CONFIG_XIP_DEFLATED_DATA)
+   ARM(	ldr	sp, [r4], #4 )
+ THUMB(	ldr	sp, [r4] )
+ THUMB(	add	r4, #4 )
+	bl	__inflate_kernel_data		@ decompress .data to RAM
+	teq	r0, #0
+	bne	__error
+#elif defined(CONFIG_XIP_KERNEL)
+   ARM(	ldmia	r4!, {r0, r1, r2, sp} )
+ THUMB(	ldmia	r4!, {r0, r1, r2, r3} )
+ THUMB(	mov	sp, r3 )
+	sub	r2, r2, r1
+	bl	memcpy				@ copy .data to RAM
+#endif
+
+   ARM(	ldmia	r4!, {r0, r1, sp} )
+ THUMB(	ldmia	r4!, {r0, r1, r3} )
+ THUMB(	mov	sp, r3 )
+	sub	r1, r1, r0
+	bl	__memzero			@ clear .bss
+
+	ldmia	r4, {r0, r1, r2, r3}
+	str	r9, [r0]			@ Save processor ID
+	str	r7, [r1]			@ Save machine type
+	str	r8, [r2]			@ Save atags pointer
+	cmp	r3, #0
+	strne	r10, [r3]			@ Save control register values
 	b	start_kernel
 ENDPROC(__mmap_switched)
 
 	.align	2
 	.type	__mmap_switched_data, %object
 __mmap_switched_data:
-	.long	__data_loc			@ r4
-	.long	_sdata				@ r5
-	.long	__bss_start			@ r6
-	.long	_end				@ r7
-	.long	processor_id			@ r4
-	.long	__machine_arch_type		@ r5
-	.long	__atags_pointer			@ r6
+#ifdef CONFIG_XIP_KERNEL
+#ifndef CONFIG_XIP_DEFLATED_DATA
+	.long	_sdata				@ r0
+	.long	__data_loc			@ r1
+	.long	_edata_loc			@ r2
+#endif
+	.long	__bss_stop			@ sp (temporary stack in .bss)
+#endif
+
+	.long	__bss_start			@ r0
+	.long	__bss_stop			@ r1
+	.long	init_thread_union + THREAD_START_SP @ sp
+
+	.long	processor_id			@ r0
+	.long	__machine_arch_type		@ r1
+	.long	__atags_pointer			@ r2
 #ifdef CONFIG_CPU_CP15
-	.long	cr_alignment			@ r7
+	.long	cr_alignment			@ r3
 #else
-	.long	0				@ r7
+	.long	0				@ r3
 #endif
-	.long	init_thread_union + THREAD_START_SP @ sp
 	.size	__mmap_switched_data, . - __mmap_switched_data
 
 /*

+ 62 - 0
arch/arm/kernel/head-inflate-data.c

@@ -0,0 +1,62 @@
+/*
+ * XIP kernel .data segment decompressor
+ *
+ * Created by:	Nicolas Pitre, August 2017
+ * Copyright:	(C) 2017  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/zutil.h>
+
+/* for struct inflate_state */
+#include "../../../lib/zlib_inflate/inftrees.h"
+#include "../../../lib/zlib_inflate/inflate.h"
+#include "../../../lib/zlib_inflate/infutil.h"
+
+extern char __data_loc[];
+extern char _edata_loc[];
+extern char _sdata[];
+
+/*
+ * This code is called very early during the boot process to decompress
+ * the .data segment stored compressed in ROM. Therefore none of the global
+ * variables are valid yet, hence no kernel services such as memory
+ * allocation is available. Everything must be allocated on the stack and
+ * we must avoid any global data access. We use a temporary stack located
+ * in the .bss area. The linker script makes sure the .bss is big enough
+ * to hold our stack frame plus some room for called functions.
+ *
+ * We mimic the code in lib/decompress_inflate.c to use the smallest work
+ * area possible. And because everything is statically allocated on the
+ * stack then there is no need to clean up before returning.
+ */
+
+int __init __inflate_kernel_data(void)
+{
+	struct z_stream_s stream, *strm = &stream;
+	struct inflate_state state;
+	char *in = __data_loc;
+	int rc;
+
+	/* Check and skip gzip header (assume no filename) */
+	if (in[0] != 0x1f || in[1] != 0x8b || in[2] != 0x08 || in[3] & ~3)
+		return -1;
+	in += 10;
+
+	strm->workspace = &state;
+	strm->next_in = in;
+	strm->avail_in = _edata_loc - __data_loc;  /* upper bound */
+	strm->next_out = _sdata;
+	strm->avail_out = _edata_loc - __data_loc;
+	zlib_inflateInit2(strm, -MAX_WBITS);
+	WS(strm)->inflate_state.wsize = 0;
+	WS(strm)->inflate_state.window = NULL;
+	rc = zlib_inflate(strm, Z_FINISH);
+	if (rc == Z_OK || rc == Z_STREAM_END)
+		rc = strm->avail_out;  /* should be 0 */
+	return rc;
+}

+ 43 - 51
arch/arm/kernel/vmlinux-xip.lds.S

@@ -77,9 +77,7 @@ SECTIONS
 		*(.text.fixup)
 		*(__ex_table)
 #endif
-#ifndef CONFIG_SMP_ON_UP
 		*(.alt.smp.init)
-#endif
 		*(.discard)
 		*(.discard.*)
 	}
@@ -181,19 +179,7 @@ SECTIONS
 		*(.taglist.init)
 		__tagtable_end = .;
 	}
-#ifdef CONFIG_SMP_ON_UP
-	.init.smpalt : {
-		__smpalt_begin = .;
-		*(.alt.smp.init)
-		__smpalt_end = .;
-	}
-#endif
-	.init.pv_table : {
-		__pv_table_begin = .;
-		*(.pv_table)
-		__pv_table_end = .;
-	}
-	.init.data : {
+	.init.rodata : {
 		INIT_SETUP(16)
 		INIT_CALLS
 		CON_INITCALL
@@ -201,48 +187,46 @@ SECTIONS
 		INIT_RAM_FS
 	}
 
-#ifdef CONFIG_SMP
-	PERCPU_SECTION(L1_CACHE_BYTES)
-#endif
-
 	_exiprom = .;			/* End of XIP ROM area */
-	__data_loc = ALIGN(4);		/* location in binary */
-	. = PAGE_OFFSET + TEXT_OFFSET;
-
-	.data : AT(__data_loc) {
-		_data = .;		/* address in memory */
-		_sdata = .;
 
-		/*
-		 * first, the init task union, aligned
-		 * to an 8192 byte boundary.
-		 */
-		INIT_TASK_DATA(THREAD_SIZE)
+/*
+ * From this point, stuff is considered writable and will be copied to RAM
+ */
+	__data_loc = ALIGN(4);		/* location in file */
+	. = PAGE_OFFSET + TEXT_OFFSET;	/* location in memory */
+#undef LOAD_OFFSET
+#define LOAD_OFFSET (PAGE_OFFSET + TEXT_OFFSET - __data_loc)
+
+	. = ALIGN(THREAD_SIZE);
+	_sdata = .;
+	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	.data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) {
+		*(.data..ro_after_init)
+	}
+	_edata = .;
 
-		. = ALIGN(PAGE_SIZE);
-		__init_begin = .;
+	. = ALIGN(PAGE_SIZE);
+	__init_begin = .;
+	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
 		INIT_DATA
+	}
+	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
 		ARM_EXIT_KEEP(EXIT_DATA)
-		. = ALIGN(PAGE_SIZE);
-		__init_end = .;
-
-		*(.data..ro_after_init)
-
-		NOSAVE_DATA
-		CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
-		READ_MOSTLY_DATA(L1_CACHE_BYTES)
-
-		/*
-		 * and the usual data section
-		 */
-		DATA_DATA
-		CONSTRUCTORS
-
-		_edata = .;
 	}
-	_edata_loc = __data_loc + SIZEOF(.data);
+#ifdef CONFIG_SMP
+	PERCPU_SECTION(L1_CACHE_BYTES)
+#endif
+
+	/*
+	 * End of copied data. We need a dummy section to get its LMA.
+	 * Also located before final ALIGN() as trailing padding is not stored
+	 * in the resulting binary file and useless to copy.
+	 */
+	.data.endmark : AT(ADDR(.data.endmark) - LOAD_OFFSET) { }
+	_edata_loc = LOADADDR(.data.endmark);
 
-	BUG_TABLE
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
 
 #ifdef CONFIG_HAVE_TCM
         /*
@@ -301,7 +285,7 @@ SECTIONS
 	}
 #endif
 
-	BSS_SECTION(0, 0, 0)
+	BSS_SECTION(0, 0, 8)
 	_end = .;
 
 	STABS_DEBUG
@@ -322,3 +306,11 @@ ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
  */
 ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
 	"HYP init code too big or misaligned")
+
+#ifdef CONFIG_XIP_DEFLATED_DATA
+/*
+ * The .bss is used as a stack area for __inflate_kernel_data() whose stack
+ * frame is 9568 bytes. Make sure it has extra room left.
+ */
+ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA")
+#endif

+ 6 - 34
arch/arm/kernel/vmlinux.lds.S

@@ -214,14 +214,9 @@ SECTIONS
 		*(.pv_table)
 		__pv_table_end = .;
 	}
-	.init.data : {
-		INIT_DATA
-		INIT_SETUP(16)
-		INIT_CALLS
-		CON_INITCALL
-		SECURITY_INITCALL
-		INIT_RAM_FS
-	}
+
+	INIT_DATA_SECTION(16)
+
 	.exit.data : {
 		ARM_EXIT_KEEP(EXIT_DATA)
 	}
@@ -236,33 +231,10 @@ SECTIONS
 	. = ALIGN(THREAD_SIZE);
 #endif
 	__init_end = .;
-	__data_loc = .;
-
-	.data : AT(__data_loc) {
-		_data = .;		/* address in memory */
-		_sdata = .;
-
-		/*
-		 * first, the init task union, aligned
-		 * to an 8192 byte boundary.
-		 */
-		INIT_TASK_DATA(THREAD_SIZE)
-
-		NOSAVE_DATA
-		CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
-		READ_MOSTLY_DATA(L1_CACHE_BYTES)
-
-		/*
-		 * and the usual data section
-		 */
-		DATA_DATA
-		CONSTRUCTORS
-
-		_edata = .;
-	}
-	_edata_loc = __data_loc + SIZEOF(.data);
 
-	BUG_TABLE
+	_sdata = .;
+	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	_edata = .;
 
 #ifdef CONFIG_HAVE_TCM
         /*