7 years ago · b5518c7051
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -469,7 +469,8 @@ struct sock_fprog_kern {
 
				 };
			
 
				 
			
 
				 struct bpf_binary_header {
			
 
				-	unsigned int pages;
			
 
				+	u16 pages;
			
 
				+	u16 locked:1;
			
 
				 	u8 image[];
			
 
				 };
			
 
				 
			
@@ -671,15 +672,18 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
 
				 
			
 
				 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
			
 
				 
			
 
				-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
			
 
				 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
			
 
				 {
			
 
				+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
			
 
				 	fp->locked = 1;
			
 
				-	WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages));
			
 
				+	if (set_memory_ro((unsigned long)fp, fp->pages))
			
 
				+		fp->locked = 0;
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
			
 
				 {
			
 
				+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
			
 
				 	if (fp->locked) {
			
 
				 		WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
			
 
				 		/* In case set_memory_rw() fails, we want to be the first
			
@@ -687,34 +691,30 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 
				 		 */
			
 
				 		fp->locked = 0;
			
 
				 	}
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
			
 
				 {
			
 
				-	WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages));
			
 
				-}
			
 
				-
			
 
				-static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
			
 
				-{
			
 
				-	WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
			
 
				-}
			
 
				-#else
			
 
				-static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
			
 
				-{
			
 
				+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
			
 
				+	hdr->locked = 1;
			
 
				+	if (set_memory_ro((unsigned long)hdr, hdr->pages))
			
 
				+		hdr->locked = 0;
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
			
 
				 {
			
 
				+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
			
 
				+	if (hdr->locked) {
			
 
				+		WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
			
 
				+		/* In case set_memory_rw() fails, we want to be the first
			
 
				+		 * to crash here instead of some random place later on.
			
 
				+		 */
			
 
				+		hdr->locked = 0;
			
 
				+	}
			
 
				+#endif
			
 
				 }
			
 
				-#endif /* CONFIG_ARCH_HAS_SET_MEMORY */
			
 
				 
			
 
				 static inline struct bpf_binary_header *
			
 
				 bpf_jit_binary_hdr(const struct bpf_prog *fp)
			
@@ -725,6 +725,22 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
 
				 	return (void *)addr;
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
			
 
				+static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
			
 
				+{
			
 
				+	if (!fp->locked)
			
 
				+		return -ENOLCK;
			
 
				+	if (fp->jited) {
			
 
				+		const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
			
 
				+
			
 
				+		if (!hdr->locked)
			
 
				+			return -ENOLCK;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
			
 
				 static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
			
 
				 {
			
@@ -961,6 +977,9 @@ static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
 
				 }
			
 
				 #endif /* CONFIG_BPF_JIT */
			
 
				 
			
 
				+void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp);
			
 
				+void bpf_prog_kallsyms_del_all(struct bpf_prog *fp);
			
 
				+
			
 
				 #define BPF_ANC		BIT(15)
			
 
				 
			
 
				 static inline bool bpf_needs_clear_a(const struct sock_filter *first)
			
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -350,6 +350,20 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 
				 	return prog_adj;
			
 
				 }
			
 
				 
			
 
				+void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < fp->aux->func_cnt; i++)
			
 
				+		bpf_prog_kallsyms_del(fp->aux->func[i]);
			
 
				+}
			
 
				+
			
 
				+void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
			
 
				+{
			
 
				+	bpf_prog_kallsyms_del_subprogs(fp);
			
 
				+	bpf_prog_kallsyms_del(fp);
			
 
				+}
			
 
				+
			
 
				 #ifdef CONFIG_BPF_JIT
			
 
				 /* All BPF JIT sysctl knobs here. */
			
 
				 int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
			
@@ -584,6 +598,8 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 
				 	bpf_fill_ill_insns(hdr, size);
			
 
				 
			
 
				 	hdr->pages = size / PAGE_SIZE;
			
 
				+	hdr->locked = 0;
			
 
				+
			
 
				 	hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
			
 
				 		     PAGE_SIZE - sizeof(*hdr));
			
 
				 	start = (get_random_int() % hole) & ~(alignment - 1);
			
@@ -1434,6 +1450,33 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp)
			
 
				+{
			
 
				+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
			
 
				+	int i, err;
			
 
				+
			
 
				+	for (i = 0; i < fp->aux->func_cnt; i++) {
			
 
				+		err = bpf_prog_check_pages_ro_single(fp->aux->func[i]);
			
 
				+		if (err)
			
 
				+			return err;
			
 
				+	}
			
 
				+
			
 
				+	return bpf_prog_check_pages_ro_single(fp);
			
 
				+#endif
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void bpf_prog_select_func(struct bpf_prog *fp)
			
 
				+{
			
 
				+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
			
 
				+	u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
			
 
				+
			
 
				+	fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
			
 
				+#else
			
 
				+	fp->bpf_func = __bpf_prog_ret0_warn;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  *	bpf_prog_select_runtime - select exec runtime for BPF program
			
 
				  *	@fp: bpf_prog populated with internal BPF program
			
@@ -1444,13 +1487,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
 
				  */
			
 
				 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
			
 
				 {
			
 
				-#ifndef CONFIG_BPF_JIT_ALWAYS_ON
			
 
				-	u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
			
 
				+	/* In case of BPF to BPF calls, verifier did all the prep
			
 
				+	 * work with regards to JITing, etc.
			
 
				+	 */
			
 
				+	if (fp->bpf_func)
			
 
				+		goto finalize;
			
 
				 
			
 
				-	fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
			
 
				-#else
			
 
				-	fp->bpf_func = __bpf_prog_ret0_warn;
			
 
				-#endif
			
 
				+	bpf_prog_select_func(fp);
			
 
				 
			
 
				 	/* eBPF JITs can rewrite the program in case constant
			
 
				 	 * blinding is active. However, in case of error during
			
@@ -1471,6 +1514,8 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 
				 		if (*err)
			
 
				 			return fp;
			
 
				 	}
			
 
				+
			
 
				+finalize:
			
 
				 	bpf_prog_lock_ro(fp);
			
 
				 
			
 
				 	/* The tail call compatibility check can only be done at
			
@@ -1479,7 +1524,17 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 
				 	 * all eBPF JITs might immediately support all features.
			
 
				 	 */
			
 
				 	*err = bpf_check_tail_call(fp);
			
 
				-
			
 
				+	if (*err)
			
 
				+		return fp;
			
 
				+
			
 
				+	/* Checkpoint: at this point onwards any cBPF -> eBPF or
			
 
				+	 * native eBPF program is read-only. If we failed to change
			
 
				+	 * the page attributes (e.g. allocation failure from
			
 
				+	 * splitting large pages), then reject the whole program
			
 
				+	 * in order to guarantee not ending up with any W+X pages
			
 
				+	 * from BPF side in kernel.
			
 
				+	 */
			
 
				+	*err = bpf_prog_check_pages_ro_locked(fp);
			
 
				 	return fp;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
			
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1034,14 +1034,9 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 
				 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
			
 
				 {
			
 
				 	if (atomic_dec_and_test(&prog->aux->refcnt)) {
			
 
				-		int i;
			
 
				-
			
 
				 		/* bpf_prog_free_id() must be called first */
			
 
				 		bpf_prog_free_id(prog, do_idr_lock);
			
 
				-
			
 
				-		for (i = 0; i < prog->aux->func_cnt; i++)
			
 
				-			bpf_prog_kallsyms_del(prog->aux->func[i]);
			
 
				-		bpf_prog_kallsyms_del(prog);
			
 
				+		bpf_prog_kallsyms_del_all(prog);
			
 
				 
			
 
				 		call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
			
 
				 	}
			
@@ -1358,9 +1353,7 @@ static int bpf_prog_load(union bpf_attr *attr)
 
				 	if (err < 0)
			
 
				 		goto free_used_maps;
			
 
				 
			
 
				-	/* eBPF program is ready to be JITed */
			
 
				-	if (!prog->bpf_func)
			
 
				-		prog = bpf_prog_select_runtime(prog, &err);
			
 
				+	prog = bpf_prog_select_runtime(prog, &err);
			
 
				 	if (err < 0)
			
 
				 		goto free_used_maps;
			
 
				 
			
@@ -1384,6 +1377,7 @@ static int bpf_prog_load(union bpf_attr *attr)
 
				 	return err;
			
 
				 
			
 
				 free_used_maps:
			
 
				+	bpf_prog_kallsyms_del_subprogs(prog);
			
 
				 	free_used_maps(prog->aux);
			
 
				 free_prog:
			
 
				 	bpf_prog_uncharge_memlock(prog);