Эх сурвалжийг харах

Merge branch 'xip_zdata' of http://git.linaro.org/people/nicolas.pitre/linux into devel-testing

This contains important fixes to the XIP linker script, some more linker
script cleanups, .bss clearing and .data copying speedups related to the
above, and an opt-in config option for XIP kernels that allows for
compressing .data in ROM that depend on those other patches to work
properly.
Russell King 7 жил өмнө
parent
commit
476242482b

+ 11 - 0
arch/arm/Kconfig

@@ -2005,6 +2005,17 @@ config XIP_PHYS_ADDR
 	  be linked for and stored to.  This address is dependent on your
 	  be linked for and stored to.  This address is dependent on your
 	  own flash usage.
 	  own flash usage.
 
 
+config XIP_DEFLATED_DATA
+	bool "Store kernel .data section compressed in ROM"
+	depends on XIP_KERNEL
+	select ZLIB_INFLATE
+	help
+	  Before the kernel is actually executed, its .data section has to be
+	  copied to RAM from ROM. This option allows for storing that data
+	  in compressed form and decompressed to RAM rather than merely being
+	  copied, saving some precious ROM space. A possible drawback is a
+	  slightly longer boot delay.
+
 config KEXEC
 config KEXEC
 	bool "Kexec system call (EXPERIMENTAL)"
 	bool "Kexec system call (EXPERIMENTAL)"
 	depends on (!SMP || PM_SLEEP_SMP)
 	depends on (!SMP || PM_SLEEP_SMP)

+ 12 - 1
arch/arm/boot/Makefile

@@ -31,8 +31,19 @@ targets := Image zImage xipImage bootpImage uImage
 
 
 ifeq ($(CONFIG_XIP_KERNEL),y)
 ifeq ($(CONFIG_XIP_KERNEL),y)
 
 
+cmd_deflate_xip_data = $(CONFIG_SHELL) -c \
+	'$(srctree)/$(src)/deflate_xip_data.sh $< $@ || { rm -f $@; false; }'
+
+ifeq ($(CONFIG_XIP_DEFLATED_DATA),y)
+quiet_cmd_mkxip = XIPZ    $@
+cmd_mkxip = $(cmd_objcopy) && $(cmd_deflate_xip_data)
+else
+quiet_cmd_mkxip = $(quiet_cmd_objcopy)
+cmd_mkxip = $(cmd_objcopy)
+endif
+
 $(obj)/xipImage: vmlinux FORCE
 $(obj)/xipImage: vmlinux FORCE
-	$(call if_changed,objcopy)
+	$(call if_changed,mkxip)
 	@$(kecho) '  Physical Address of xipImage: $(CONFIG_XIP_PHYS_ADDR)'
 	@$(kecho) '  Physical Address of xipImage: $(CONFIG_XIP_PHYS_ADDR)'
 
 
 $(obj)/Image $(obj)/zImage: FORCE
 $(obj)/Image $(obj)/zImage: FORCE

+ 64 - 0
arch/arm/boot/deflate_xip_data.sh

@@ -0,0 +1,64 @@
+#!/bin/sh
+
+# XIP kernel .data segment compressor
+#
+# Created by:	Nicolas Pitre, August 2017
+# Copyright:	(C) 2017  Linaro Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+# This script locates the start of the .data section in xipImage and
+# substitutes it with a compressed version. The needed offsets are obtained
+# from symbol addresses in vmlinux. It is expected that .data extends to
+# the end of xipImage.
+
+set -e
+
+VMLINUX="$1"
+XIPIMAGE="$2"
+
+DD="dd status=none"
+
+# Use "make V=1" to debug this script.
+case "$KBUILD_VERBOSE" in
+*1*)
+	set -x
+	;;
+esac
+
+sym_val() {
+	# extract hex value for symbol in $1
+	local val=$($NM "$VMLINUX" | sed -n "/ $1$/{s/ .*$//p;q}")
+	[ "$val" ] || { echo "can't find $1 in $VMLINUX" 1>&2; exit 1; }
+	# convert from hex to decimal
+	echo $((0x$val))
+}
+
+__data_loc=$(sym_val __data_loc)
+_edata_loc=$(sym_val _edata_loc)
+base_offset=$(sym_val _xiprom)
+
+# convert to file based offsets
+data_start=$(($__data_loc - $base_offset))
+data_end=$(($_edata_loc - $base_offset))
+
+# Make sure data occupies the last part of the file.
+file_end=$(stat -c "%s" "$XIPIMAGE")
+if [ "$file_end" != "$data_end" ]; then
+	printf "end of xipImage doesn't match with _edata_loc (%#x vs %#x)\n" \
+	       $(($file_end + $base_offset)) $_edata_loc 2>&1
+	exit 1;
+fi
+
+# be ready to clean up
+trap 'rm -f "$XIPIMAGE.tmp"' 0 1 2 3
+
+# substitute the data section by a compressed version
+$DD if="$XIPIMAGE" count=$data_start iflag=count_bytes of="$XIPIMAGE.tmp"
+$DD if="$XIPIMAGE"  skip=$data_start iflag=skip_bytes |
+gzip -9 >> "$XIPIMAGE.tmp"
+
+# replace kernel binary
+mv -f "$XIPIMAGE.tmp" "$XIPIMAGE"

+ 5 - 0
arch/arm/kernel/Makefile

@@ -87,6 +87,11 @@ head-y			:= head$(MMUEXT).o
 obj-$(CONFIG_DEBUG_LL)	+= debug.o
 obj-$(CONFIG_DEBUG_LL)	+= debug.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 
+# This is executed very early using a temporary stack when no memory allocator
+# nor global data is available. Everything has to be allocated on the stack.
+CFLAGS_head-inflate-data.o := $(call cc-option,-Wframe-larger-than=10240)
+obj-$(CONFIG_XIP_DEFLATED_DATA) += head-inflate-data.o
+
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
 AFLAGS_hyp-stub.o		:=-Wa,-march=armv7-a
 AFLAGS_hyp-stub.o		:=-Wa,-march=armv7-a
 ifeq ($(CONFIG_ARM_PSCI),y)
 ifeq ($(CONFIG_ARM_PSCI),y)

+ 53 - 32
arch/arm/kernel/head-common.S

@@ -79,47 +79,68 @@ ENDPROC(__vet_atags)
  */
  */
 	__INIT
 	__INIT
 __mmap_switched:
 __mmap_switched:
-	adr	r3, __mmap_switched_data
-
-	ldmia	r3!, {r4, r5, r6, r7}
-	cmp	r4, r5				@ Copy data segment if needed
-1:	cmpne	r5, r6
-	ldrne	fp, [r4], #4
-	strne	fp, [r5], #4
-	bne	1b
-
-	mov	fp, #0				@ Clear BSS (and zero fp)
-1:	cmp	r6, r7
-	strcc	fp, [r6],#4
-	bcc	1b
-
- ARM(	ldmia	r3, {r4, r5, r6, r7, sp})
- THUMB(	ldmia	r3, {r4, r5, r6, r7}	)
- THUMB(	ldr	sp, [r3, #16]		)
-	str	r9, [r4]			@ Save processor ID
-	str	r1, [r5]			@ Save machine type
-	str	r2, [r6]			@ Save atags pointer
-	cmp	r7, #0
-	strne	r0, [r7]			@ Save control register values
+
+	mov	r7, r1
+	mov	r8, r2
+	mov	r10, r0
+
+	adr	r4, __mmap_switched_data
+	mov	fp, #0
+
+#if defined(CONFIG_XIP_DEFLATED_DATA)
+   ARM(	ldr	sp, [r4], #4 )
+ THUMB(	ldr	sp, [r4] )
+ THUMB(	add	r4, #4 )
+	bl	__inflate_kernel_data		@ decompress .data to RAM
+	teq	r0, #0
+	bne	__error
+#elif defined(CONFIG_XIP_KERNEL)
+   ARM(	ldmia	r4!, {r0, r1, r2, sp} )
+ THUMB(	ldmia	r4!, {r0, r1, r2, r3} )
+ THUMB(	mov	sp, r3 )
+	sub	r2, r2, r1
+	bl	memcpy				@ copy .data to RAM
+#endif
+
+   ARM(	ldmia	r4!, {r0, r1, sp} )
+ THUMB(	ldmia	r4!, {r0, r1, r3} )
+ THUMB(	mov	sp, r3 )
+	sub	r1, r1, r0
+	bl	__memzero			@ clear .bss
+
+	ldmia	r4, {r0, r1, r2, r3}
+	str	r9, [r0]			@ Save processor ID
+	str	r7, [r1]			@ Save machine type
+	str	r8, [r2]			@ Save atags pointer
+	cmp	r3, #0
+	strne	r10, [r3]			@ Save control register values
 	b	start_kernel
 	b	start_kernel
 ENDPROC(__mmap_switched)
 ENDPROC(__mmap_switched)
 
 
 	.align	2
 	.align	2
 	.type	__mmap_switched_data, %object
 	.type	__mmap_switched_data, %object
 __mmap_switched_data:
 __mmap_switched_data:
-	.long	__data_loc			@ r4
-	.long	_sdata				@ r5
-	.long	__bss_start			@ r6
-	.long	_end				@ r7
-	.long	processor_id			@ r4
-	.long	__machine_arch_type		@ r5
-	.long	__atags_pointer			@ r6
+#ifdef CONFIG_XIP_KERNEL
+#ifndef CONFIG_XIP_DEFLATED_DATA
+	.long	_sdata				@ r0
+	.long	__data_loc			@ r1
+	.long	_edata_loc			@ r2
+#endif
+	.long	__bss_stop			@ sp (temporary stack in .bss)
+#endif
+
+	.long	__bss_start			@ r0
+	.long	__bss_stop			@ r1
+	.long	init_thread_union + THREAD_START_SP @ sp
+
+	.long	processor_id			@ r0
+	.long	__machine_arch_type		@ r1
+	.long	__atags_pointer			@ r2
 #ifdef CONFIG_CPU_CP15
 #ifdef CONFIG_CPU_CP15
-	.long	cr_alignment			@ r7
+	.long	cr_alignment			@ r3
 #else
 #else
-	.long	0				@ r7
+	.long	0				@ r3
 #endif
 #endif
-	.long	init_thread_union + THREAD_START_SP @ sp
 	.size	__mmap_switched_data, . - __mmap_switched_data
 	.size	__mmap_switched_data, . - __mmap_switched_data
 
 
 /*
 /*

+ 62 - 0
arch/arm/kernel/head-inflate-data.c

@@ -0,0 +1,62 @@
+/*
+ * XIP kernel .data segment decompressor
+ *
+ * Created by:	Nicolas Pitre, August 2017
+ * Copyright:	(C) 2017  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/zutil.h>
+
+/* for struct inflate_state */
+#include "../../../lib/zlib_inflate/inftrees.h"
+#include "../../../lib/zlib_inflate/inflate.h"
+#include "../../../lib/zlib_inflate/infutil.h"
+
+extern char __data_loc[];
+extern char _edata_loc[];
+extern char _sdata[];
+
+/*
+ * This code is called very early during the boot process to decompress
+ * the .data segment stored compressed in ROM. Therefore none of the global
+ * variables are valid yet, hence no kernel services such as memory
+ * allocation is available. Everything must be allocated on the stack and
+ * we must avoid any global data access. We use a temporary stack located
+ * in the .bss area. The linker script makes sure the .bss is big enough
+ * to hold our stack frame plus some room for called functions.
+ *
+ * We mimic the code in lib/decompress_inflate.c to use the smallest work
+ * area possible. And because everything is statically allocated on the
+ * stack then there is no need to clean up before returning.
+ */
+
+int __init __inflate_kernel_data(void)
+{
+	struct z_stream_s stream, *strm = &stream;
+	struct inflate_state state;
+	char *in = __data_loc;
+	int rc;
+
+	/* Check and skip gzip header (assume no filename) */
+	if (in[0] != 0x1f || in[1] != 0x8b || in[2] != 0x08 || in[3] & ~3)
+		return -1;
+	in += 10;
+
+	strm->workspace = &state;
+	strm->next_in = in;
+	strm->avail_in = _edata_loc - __data_loc;  /* upper bound */
+	strm->next_out = _sdata;
+	strm->avail_out = _edata_loc - __data_loc;
+	zlib_inflateInit2(strm, -MAX_WBITS);
+	WS(strm)->inflate_state.wsize = 0;
+	WS(strm)->inflate_state.window = NULL;
+	rc = zlib_inflate(strm, Z_FINISH);
+	if (rc == Z_OK || rc == Z_STREAM_END)
+		rc = strm->avail_out;  /* should be 0 */
+	return rc;
+}

+ 43 - 51
arch/arm/kernel/vmlinux-xip.lds.S

@@ -77,9 +77,7 @@ SECTIONS
 		*(.text.fixup)
 		*(.text.fixup)
 		*(__ex_table)
 		*(__ex_table)
 #endif
 #endif
-#ifndef CONFIG_SMP_ON_UP
 		*(.alt.smp.init)
 		*(.alt.smp.init)
-#endif
 		*(.discard)
 		*(.discard)
 		*(.discard.*)
 		*(.discard.*)
 	}
 	}
@@ -181,19 +179,7 @@ SECTIONS
 		*(.taglist.init)
 		*(.taglist.init)
 		__tagtable_end = .;
 		__tagtable_end = .;
 	}
 	}
-#ifdef CONFIG_SMP_ON_UP
-	.init.smpalt : {
-		__smpalt_begin = .;
-		*(.alt.smp.init)
-		__smpalt_end = .;
-	}
-#endif
-	.init.pv_table : {
-		__pv_table_begin = .;
-		*(.pv_table)
-		__pv_table_end = .;
-	}
-	.init.data : {
+	.init.rodata : {
 		INIT_SETUP(16)
 		INIT_SETUP(16)
 		INIT_CALLS
 		INIT_CALLS
 		CON_INITCALL
 		CON_INITCALL
@@ -201,48 +187,46 @@ SECTIONS
 		INIT_RAM_FS
 		INIT_RAM_FS
 	}
 	}
 
 
-#ifdef CONFIG_SMP
-	PERCPU_SECTION(L1_CACHE_BYTES)
-#endif
-
 	_exiprom = .;			/* End of XIP ROM area */
 	_exiprom = .;			/* End of XIP ROM area */
-	__data_loc = ALIGN(4);		/* location in binary */
-	. = PAGE_OFFSET + TEXT_OFFSET;
-
-	.data : AT(__data_loc) {
-		_data = .;		/* address in memory */
-		_sdata = .;
 
 
-		/*
-		 * first, the init task union, aligned
-		 * to an 8192 byte boundary.
-		 */
-		INIT_TASK_DATA(THREAD_SIZE)
+/*
+ * From this point, stuff is considered writable and will be copied to RAM
+ */
+	__data_loc = ALIGN(4);		/* location in file */
+	. = PAGE_OFFSET + TEXT_OFFSET;	/* location in memory */
+#undef LOAD_OFFSET
+#define LOAD_OFFSET (PAGE_OFFSET + TEXT_OFFSET - __data_loc)
+
+	. = ALIGN(THREAD_SIZE);
+	_sdata = .;
+	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	.data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) {
+		*(.data..ro_after_init)
+	}
+	_edata = .;
 
 
-		. = ALIGN(PAGE_SIZE);
-		__init_begin = .;
+	. = ALIGN(PAGE_SIZE);
+	__init_begin = .;
+	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
 		INIT_DATA
 		INIT_DATA
+	}
+	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
 		ARM_EXIT_KEEP(EXIT_DATA)
 		ARM_EXIT_KEEP(EXIT_DATA)
-		. = ALIGN(PAGE_SIZE);
-		__init_end = .;
-
-		*(.data..ro_after_init)
-
-		NOSAVE_DATA
-		CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
-		READ_MOSTLY_DATA(L1_CACHE_BYTES)
-
-		/*
-		 * and the usual data section
-		 */
-		DATA_DATA
-		CONSTRUCTORS
-
-		_edata = .;
 	}
 	}
-	_edata_loc = __data_loc + SIZEOF(.data);
+#ifdef CONFIG_SMP
+	PERCPU_SECTION(L1_CACHE_BYTES)
+#endif
+
+	/*
+	 * End of copied data. We need a dummy section to get its LMA.
+	 * Also located before final ALIGN() as trailing padding is not stored
+	 * in the resulting binary file and useless to copy.
+	 */
+	.data.endmark : AT(ADDR(.data.endmark) - LOAD_OFFSET) { }
+	_edata_loc = LOADADDR(.data.endmark);
 
 
-	BUG_TABLE
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
 
 
 #ifdef CONFIG_HAVE_TCM
 #ifdef CONFIG_HAVE_TCM
         /*
         /*
@@ -301,7 +285,7 @@ SECTIONS
 	}
 	}
 #endif
 #endif
 
 
-	BSS_SECTION(0, 0, 0)
+	BSS_SECTION(0, 0, 8)
 	_end = .;
 	_end = .;
 
 
 	STABS_DEBUG
 	STABS_DEBUG
@@ -322,3 +306,11 @@ ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
  */
  */
 ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
 ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
 	"HYP init code too big or misaligned")
 	"HYP init code too big or misaligned")
+
+#ifdef CONFIG_XIP_DEFLATED_DATA
+/*
+ * The .bss is used as a stack area for __inflate_kernel_data() whose stack
+ * frame is 9568 bytes. Make sure it has extra room left.
+ */
+ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA")
+#endif

+ 6 - 34
arch/arm/kernel/vmlinux.lds.S

@@ -214,14 +214,9 @@ SECTIONS
 		*(.pv_table)
 		*(.pv_table)
 		__pv_table_end = .;
 		__pv_table_end = .;
 	}
 	}
-	.init.data : {
-		INIT_DATA
-		INIT_SETUP(16)
-		INIT_CALLS
-		CON_INITCALL
-		SECURITY_INITCALL
-		INIT_RAM_FS
-	}
+
+	INIT_DATA_SECTION(16)
+
 	.exit.data : {
 	.exit.data : {
 		ARM_EXIT_KEEP(EXIT_DATA)
 		ARM_EXIT_KEEP(EXIT_DATA)
 	}
 	}
@@ -236,33 +231,10 @@ SECTIONS
 	. = ALIGN(THREAD_SIZE);
 	. = ALIGN(THREAD_SIZE);
 #endif
 #endif
 	__init_end = .;
 	__init_end = .;
-	__data_loc = .;
-
-	.data : AT(__data_loc) {
-		_data = .;		/* address in memory */
-		_sdata = .;
-
-		/*
-		 * first, the init task union, aligned
-		 * to an 8192 byte boundary.
-		 */
-		INIT_TASK_DATA(THREAD_SIZE)
-
-		NOSAVE_DATA
-		CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
-		READ_MOSTLY_DATA(L1_CACHE_BYTES)
-
-		/*
-		 * and the usual data section
-		 */
-		DATA_DATA
-		CONSTRUCTORS
-
-		_edata = .;
-	}
-	_edata_loc = __data_loc + SIZEOF(.data);
 
 
-	BUG_TABLE
+	_sdata = .;
+	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	_edata = .;
 
 
 #ifdef CONFIG_HAVE_TCM
 #ifdef CONFIG_HAVE_TCM
         /*
         /*