6 年之前 · cb86d0f878
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -2,6 +2,7 @@
 
				 #ifndef _BPF_CGROUP_H
			
 
				 #define _BPF_CGROUP_H
			
 
				 
			
 
				+#include <linux/bpf.h>
			
 
				 #include <linux/errno.h>
			
 
				 #include <linux/jump_label.h>
			
 
				 #include <linux/percpu.h>
			
@@ -22,7 +23,11 @@ struct bpf_cgroup_storage;
 
				 extern struct static_key_false cgroup_bpf_enabled_key;
			
 
				 #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
			
 
				 
			
 
				-DECLARE_PER_CPU(void*, bpf_cgroup_storage);
			
 
				+DECLARE_PER_CPU(struct bpf_cgroup_storage*,
			
 
				+		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
			
 
				+
			
 
				+#define for_each_cgroup_storage_type(stype) \
			
 
				+	for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
			
 
				 
			
 
				 struct bpf_cgroup_storage_map;
			
 
				 
			
@@ -32,7 +37,10 @@ struct bpf_storage_buffer {
 
				 };
			
 
				 
			
 
				 struct bpf_cgroup_storage {
			
 
				-	struct bpf_storage_buffer *buf;
			
 
				+	union {
			
 
				+		struct bpf_storage_buffer *buf;
			
 
				+		void __percpu *percpu_buf;
			
 
				+	};
			
 
				 	struct bpf_cgroup_storage_map *map;
			
 
				 	struct bpf_cgroup_storage_key key;
			
 
				 	struct list_head list;
			
@@ -43,7 +51,7 @@ struct bpf_cgroup_storage {
 
				 struct bpf_prog_list {
			
 
				 	struct list_head node;
			
 
				 	struct bpf_prog *prog;
			
 
				-	struct bpf_cgroup_storage *storage;
			
 
				+	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
			
 
				 };
			
 
				 
			
 
				 struct bpf_prog_array;
			
@@ -101,18 +109,26 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 
				 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
			
 
				 				      short access, enum bpf_attach_type type);
			
 
				 
			
 
				-static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage)
			
 
				+static inline enum bpf_cgroup_storage_type cgroup_storage_type(
			
 
				+	struct bpf_map *map)
			
 
				 {
			
 
				-	struct bpf_storage_buffer *buf;
			
 
				+	if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
			
 
				+		return BPF_CGROUP_STORAGE_PERCPU;
			
 
				+
			
 
				+	return BPF_CGROUP_STORAGE_SHARED;
			
 
				+}
			
 
				 
			
 
				-	if (!storage)
			
 
				-		return;
			
 
				+static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
			
 
				+					  *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
			
 
				+{
			
 
				+	enum bpf_cgroup_storage_type stype;
			
 
				 
			
 
				-	buf = READ_ONCE(storage->buf);
			
 
				-	this_cpu_write(bpf_cgroup_storage, &buf->data[0]);
			
 
				+	for_each_cgroup_storage_type(stype)
			
 
				+		this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
			
 
				 }
			
 
				 
			
 
				-struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
			
 
				+struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
			
 
				+					enum bpf_cgroup_storage_type stype);
			
 
				 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
			
 
				 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
			
 
				 			     struct cgroup *cgroup,
			
@@ -121,6 +137,10 @@ void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
 
				 int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map);
			
 
				 void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map);
			
 
				 
			
 
				+int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
			
 
				+int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
			
 
				+				     void *value, u64 flags);
			
 
				+
			
 
				 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
			
 
				 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
			
 
				 ({									      \
			
@@ -265,15 +285,24 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
 
				 	return -EINVAL;
			
 
				 }
			
 
				 
			
 
				-static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {}
			
 
				+static inline void bpf_cgroup_storage_set(
			
 
				+	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
			
 
				 static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
			
 
				 					    struct bpf_map *map) { return 0; }
			
 
				 static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
			
 
				 					      struct bpf_map *map) {}
			
 
				 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
			
 
				-	struct bpf_prog *prog) { return 0; }
			
 
				+	struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; }
			
 
				 static inline void bpf_cgroup_storage_free(
			
 
				 	struct bpf_cgroup_storage *storage) {}
			
 
				+static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
			
 
				+						 void *value) {
			
 
				+	return 0;
			
 
				+}
			
 
				+static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
			
 
				+					void *key, void *value, u64 flags) {
			
 
				+	return 0;
			
 
				+}
			
 
				 
			
 
				 #define cgroup_bpf_enabled (0)
			
 
				 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
			
@@ -293,6 +322,8 @@ static inline void bpf_cgroup_storage_free(
 
				 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
			
 
				 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
			
 
				 
			
 
				+#define for_each_cgroup_storage_type(stype) for (; false; )
			
 
				+
			
 
				 #endif /* CONFIG_CGROUP_BPF */
			
 
				 
			
 
				 #endif /* _BPF_CGROUP_H */
			
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -272,6 +272,14 @@ struct bpf_prog_offload {
 
				 	u32			jited_len;
			
 
				 };
			
 
				 
			
 
				+enum bpf_cgroup_storage_type {
			
 
				+	BPF_CGROUP_STORAGE_SHARED,
			
 
				+	BPF_CGROUP_STORAGE_PERCPU,
			
 
				+	__BPF_CGROUP_STORAGE_MAX
			
 
				+};
			
 
				+
			
 
				+#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
			
 
				+
			
 
				 struct bpf_prog_aux {
			
 
				 	atomic_t refcnt;
			
 
				 	u32 used_map_cnt;
			
@@ -289,7 +297,7 @@ struct bpf_prog_aux {
 
				 	struct bpf_prog *prog;
			
 
				 	struct user_struct *user;
			
 
				 	u64 load_time; /* ns since boottime */
			
 
				-	struct bpf_map *cgroup_storage;
			
 
				+	struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
			
 
				 	char name[BPF_OBJ_NAME_LEN];
			
 
				 #ifdef CONFIG_SECURITY
			
 
				 	void *security;
			
@@ -358,7 +366,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 
				  */
			
 
				 struct bpf_prog_array_item {
			
 
				 	struct bpf_prog *prog;
			
 
				-	struct bpf_cgroup_storage *cgroup_storage;
			
 
				+	struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
			
 
				 };
			
 
				 
			
 
				 struct bpf_prog_array {
			
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -43,6 +43,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
 
				 #endif
			
 
				 #ifdef CONFIG_CGROUP_BPF
			
 
				 BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
			
 
				+BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops)
			
 
				 #endif
			
 
				 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
			
 
				 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
			
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -127,6 +127,7 @@ enum bpf_map_type {
 
				 	BPF_MAP_TYPE_SOCKHASH,
			
 
				 	BPF_MAP_TYPE_CGROUP_STORAGE,
			
 
				 	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
			
 
				+	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
			
 
				 };
			
 
				 
			
 
				 enum bpf_prog_type {
			
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -25,6 +25,7 @@ EXPORT_SYMBOL(cgroup_bpf_enabled_key);
 
				  */
			
 
				 void cgroup_bpf_put(struct cgroup *cgrp)
			
 
				 {
			
 
				+	enum bpf_cgroup_storage_type stype;
			
 
				 	unsigned int type;
			
 
				 
			
 
				 	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
			
@@ -34,8 +35,10 @@ void cgroup_bpf_put(struct cgroup *cgrp)
 
				 		list_for_each_entry_safe(pl, tmp, progs, node) {
			
 
				 			list_del(&pl->node);
			
 
				 			bpf_prog_put(pl->prog);
			
 
				-			bpf_cgroup_storage_unlink(pl->storage);
			
 
				-			bpf_cgroup_storage_free(pl->storage);
			
 
				+			for_each_cgroup_storage_type(stype) {
			
 
				+				bpf_cgroup_storage_unlink(pl->storage[stype]);
			
 
				+				bpf_cgroup_storage_free(pl->storage[stype]);
			
 
				+			}
			
 
				 			kfree(pl);
			
 
				 			static_branch_dec(&cgroup_bpf_enabled_key);
			
 
				 		}
			
@@ -97,6 +100,7 @@ static int compute_effective_progs(struct cgroup *cgrp,
 
				 				   enum bpf_attach_type type,
			
 
				 				   struct bpf_prog_array __rcu **array)
			
 
				 {
			
 
				+	enum bpf_cgroup_storage_type stype;
			
 
				 	struct bpf_prog_array *progs;
			
 
				 	struct bpf_prog_list *pl;
			
 
				 	struct cgroup *p = cgrp;
			
@@ -125,7 +129,9 @@ static int compute_effective_progs(struct cgroup *cgrp,
 
				 				continue;
			
 
				 
			
 
				 			progs->items[cnt].prog = pl->prog;
			
 
				-			progs->items[cnt].cgroup_storage = pl->storage;
			
 
				+			for_each_cgroup_storage_type(stype)
			
 
				+				progs->items[cnt].cgroup_storage[stype] =
			
 
				+					pl->storage[stype];
			
 
				 			cnt++;
			
 
				 		}
			
 
				 	} while ((p = cgroup_parent(p)));
			
@@ -232,7 +238,9 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 
				 {
			
 
				 	struct list_head *progs = &cgrp->bpf.progs[type];
			
 
				 	struct bpf_prog *old_prog = NULL;
			
 
				-	struct bpf_cgroup_storage *storage, *old_storage = NULL;
			
 
				+	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
			
 
				+		*old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
			
 
				+	enum bpf_cgroup_storage_type stype;
			
 
				 	struct bpf_prog_list *pl;
			
 
				 	bool pl_was_allocated;
			
 
				 	int err;
			
@@ -254,34 +262,44 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 
				 	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
			
 
				 		return -E2BIG;
			
 
				 
			
 
				-	storage = bpf_cgroup_storage_alloc(prog);
			
 
				-	if (IS_ERR(storage))
			
 
				-		return -ENOMEM;
			
 
				+	for_each_cgroup_storage_type(stype) {
			
 
				+		storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
			
 
				+		if (IS_ERR(storage[stype])) {
			
 
				+			storage[stype] = NULL;
			
 
				+			for_each_cgroup_storage_type(stype)
			
 
				+				bpf_cgroup_storage_free(storage[stype]);
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	if (flags & BPF_F_ALLOW_MULTI) {
			
 
				 		list_for_each_entry(pl, progs, node) {
			
 
				 			if (pl->prog == prog) {
			
 
				 				/* disallow attaching the same prog twice */
			
 
				-				bpf_cgroup_storage_free(storage);
			
 
				+				for_each_cgroup_storage_type(stype)
			
 
				+					bpf_cgroup_storage_free(storage[stype]);
			
 
				 				return -EINVAL;
			
 
				 			}
			
 
				 		}
			
 
				 
			
 
				 		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
			
 
				 		if (!pl) {
			
 
				-			bpf_cgroup_storage_free(storage);
			
 
				+			for_each_cgroup_storage_type(stype)
			
 
				+				bpf_cgroup_storage_free(storage[stype]);
			
 
				 			return -ENOMEM;
			
 
				 		}
			
 
				 
			
 
				 		pl_was_allocated = true;
			
 
				 		pl->prog = prog;
			
 
				-		pl->storage = storage;
			
 
				+		for_each_cgroup_storage_type(stype)
			
 
				+			pl->storage[stype] = storage[stype];
			
 
				 		list_add_tail(&pl->node, progs);
			
 
				 	} else {
			
 
				 		if (list_empty(progs)) {
			
 
				 			pl = kmalloc(sizeof(*pl), GFP_KERNEL);
			
 
				 			if (!pl) {
			
 
				-				bpf_cgroup_storage_free(storage);
			
 
				+				for_each_cgroup_storage_type(stype)
			
 
				+					bpf_cgroup_storage_free(storage[stype]);
			
 
				 				return -ENOMEM;
			
 
				 			}
			
 
				 			pl_was_allocated = true;
			
@@ -289,12 +307,15 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 
				 		} else {
			
 
				 			pl = list_first_entry(progs, typeof(*pl), node);
			
 
				 			old_prog = pl->prog;
			
 
				-			old_storage = pl->storage;
			
 
				-			bpf_cgroup_storage_unlink(old_storage);
			
 
				+			for_each_cgroup_storage_type(stype) {
			
 
				+				old_storage[stype] = pl->storage[stype];
			
 
				+				bpf_cgroup_storage_unlink(old_storage[stype]);
			
 
				+			}
			
 
				 			pl_was_allocated = false;
			
 
				 		}
			
 
				 		pl->prog = prog;
			
 
				-		pl->storage = storage;
			
 
				+		for_each_cgroup_storage_type(stype)
			
 
				+			pl->storage[stype] = storage[stype];
			
 
				 	}
			
 
				 
			
 
				 	cgrp->bpf.flags[type] = flags;
			
@@ -304,21 +325,27 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 
				 		goto cleanup;
			
 
				 
			
 
				 	static_branch_inc(&cgroup_bpf_enabled_key);
			
 
				-	if (old_storage)
			
 
				-		bpf_cgroup_storage_free(old_storage);
			
 
				+	for_each_cgroup_storage_type(stype) {
			
 
				+		if (!old_storage[stype])
			
 
				+			continue;
			
 
				+		bpf_cgroup_storage_free(old_storage[stype]);
			
 
				+	}
			
 
				 	if (old_prog) {
			
 
				 		bpf_prog_put(old_prog);
			
 
				 		static_branch_dec(&cgroup_bpf_enabled_key);
			
 
				 	}
			
 
				-	bpf_cgroup_storage_link(storage, cgrp, type);
			
 
				+	for_each_cgroup_storage_type(stype)
			
 
				+		bpf_cgroup_storage_link(storage[stype], cgrp, type);
			
 
				 	return 0;
			
 
				 
			
 
				 cleanup:
			
 
				 	/* and cleanup the prog list */
			
 
				 	pl->prog = old_prog;
			
 
				-	bpf_cgroup_storage_free(pl->storage);
			
 
				-	pl->storage = old_storage;
			
 
				-	bpf_cgroup_storage_link(old_storage, cgrp, type);
			
 
				+	for_each_cgroup_storage_type(stype) {
			
 
				+		bpf_cgroup_storage_free(pl->storage[stype]);
			
 
				+		pl->storage[stype] = old_storage[stype];
			
 
				+		bpf_cgroup_storage_link(old_storage[stype], cgrp, type);
			
 
				+	}
			
 
				 	if (pl_was_allocated) {
			
 
				 		list_del(&pl->node);
			
 
				 		kfree(pl);
			
@@ -339,6 +366,7 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 
				 			enum bpf_attach_type type, u32 unused_flags)
			
 
				 {
			
 
				 	struct list_head *progs = &cgrp->bpf.progs[type];
			
 
				+	enum bpf_cgroup_storage_type stype;
			
 
				 	u32 flags = cgrp->bpf.flags[type];
			
 
				 	struct bpf_prog *old_prog = NULL;
			
 
				 	struct bpf_prog_list *pl;
			
@@ -385,8 +413,10 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 
				 
			
 
				 	/* now can actually delete it from this cgroup list */
			
 
				 	list_del(&pl->node);
			
 
				-	bpf_cgroup_storage_unlink(pl->storage);
			
 
				-	bpf_cgroup_storage_free(pl->storage);
			
 
				+	for_each_cgroup_storage_type(stype) {
			
 
				+		bpf_cgroup_storage_unlink(pl->storage[stype]);
			
 
				+		bpf_cgroup_storage_free(pl->storage[stype]);
			
 
				+	}
			
 
				 	kfree(pl);
			
 
				 	if (list_empty(progs))
			
 
				 		/* last program was detached, reset flags to zero */
			
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -194,16 +194,28 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
 
				 	.ret_type	= RET_INTEGER,
			
 
				 };
			
 
				 
			
 
				-DECLARE_PER_CPU(void*, bpf_cgroup_storage);
			
 
				+#ifdef CONFIG_CGROUP_BPF
			
 
				+DECLARE_PER_CPU(struct bpf_cgroup_storage*,
			
 
				+		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
			
 
				 
			
 
				 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
			
 
				 {
			
 
				-	/* map and flags arguments are not used now,
			
 
				-	 * but provide an ability to extend the API
			
 
				-	 * for other types of local storages.
			
 
				-	 * verifier checks that their values are correct.
			
 
				+	/* flags argument is not used now,
			
 
				+	 * but provides an ability to extend the API.
			
 
				+	 * verifier checks that its value is correct.
			
 
				 	 */
			
 
				-	return (unsigned long) this_cpu_read(bpf_cgroup_storage);
			
 
				+	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
			
 
				+	struct bpf_cgroup_storage *storage;
			
 
				+	void *ptr;
			
 
				+
			
 
				+	storage = this_cpu_read(bpf_cgroup_storage[stype]);
			
 
				+
			
 
				+	if (stype == BPF_CGROUP_STORAGE_SHARED)
			
 
				+		ptr = &READ_ONCE(storage->buf)->data[0];
			
 
				+	else
			
 
				+		ptr = this_cpu_ptr(storage->percpu_buf);
			
 
				+
			
 
				+	return (unsigned long)ptr;
			
 
				 }
			
 
				 
			
 
				 const struct bpf_func_proto bpf_get_local_storage_proto = {
			
@@ -214,3 +226,4 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
 
				 	.arg2_type	= ARG_ANYTHING,
			
 
				 };
			
 
				 #endif
			
 
				+#endif
			
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -7,7 +7,8 @@
 
				 #include <linux/rbtree.h>
			
 
				 #include <linux/slab.h>
			
 
				 
			
 
				-DEFINE_PER_CPU(void*, bpf_cgroup_storage);
			
 
				+DEFINE_PER_CPU(struct bpf_cgroup_storage*,
			
 
				+	       bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
			
 
				 
			
 
				 #ifdef CONFIG_CGROUP_BPF
			
 
				 
			
@@ -151,6 +152,71 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
			
 
				+				   void *value)
			
 
				+{
			
 
				+	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
			
 
				+	struct bpf_cgroup_storage_key *key = _key;
			
 
				+	struct bpf_cgroup_storage *storage;
			
 
				+	int cpu, off = 0;
			
 
				+	u32 size;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	storage = cgroup_storage_lookup(map, key, false);
			
 
				+	if (!storage) {
			
 
				+		rcu_read_unlock();
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	/* per_cpu areas are zero-filled and bpf programs can only
			
 
				+	 * access 'value_size' of them, so copying rounded areas
			
 
				+	 * will not leak any kernel data
			
 
				+	 */
			
 
				+	size = round_up(_map->value_size, 8);
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				+		bpf_long_memcpy(value + off,
			
 
				+				per_cpu_ptr(storage->percpu_buf, cpu), size);
			
 
				+		off += size;
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
			
 
				+				     void *value, u64 map_flags)
			
 
				+{
			
 
				+	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
			
 
				+	struct bpf_cgroup_storage_key *key = _key;
			
 
				+	struct bpf_cgroup_storage *storage;
			
 
				+	int cpu, off = 0;
			
 
				+	u32 size;
			
 
				+
			
 
				+	if (map_flags != BPF_ANY && map_flags != BPF_EXIST)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	storage = cgroup_storage_lookup(map, key, false);
			
 
				+	if (!storage) {
			
 
				+		rcu_read_unlock();
			
 
				+		return -ENOENT;
			
 
				+	}
			
 
				+
			
 
				+	/* the user space will provide round_up(value_size, 8) bytes that
			
 
				+	 * will be copied into per-cpu area. bpf programs can only access
			
 
				+	 * value_size of it. During lookup the same extra bytes will be
			
 
				+	 * returned or zeros which were zero-filled by percpu_alloc,
			
 
				+	 * so no kernel data leaks possible
			
 
				+	 */
			
 
				+	size = round_up(_map->value_size, 8);
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				+		bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu),
			
 
				+				value + off, size);
			
 
				+		off += size;
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
			
 
				 				       void *_next_key)
			
 
				 {
			
@@ -251,6 +317,7 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
 
				 
			
 
				 int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
			
 
				 {
			
 
				+	enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
			
 
				 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
			
 
				 	int ret = -EBUSY;
			
 
				 
			
@@ -258,11 +325,12 @@ int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
 
				 
			
 
				 	if (map->prog && map->prog != prog)
			
 
				 		goto unlock;
			
 
				-	if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map)
			
 
				+	if (prog->aux->cgroup_storage[stype] &&
			
 
				+	    prog->aux->cgroup_storage[stype] != _map)
			
 
				 		goto unlock;
			
 
				 
			
 
				 	map->prog = prog;
			
 
				-	prog->aux->cgroup_storage = _map;
			
 
				+	prog->aux->cgroup_storage[stype] = _map;
			
 
				 	ret = 0;
			
 
				 unlock:
			
 
				 	spin_unlock_bh(&map->lock);
			
@@ -272,70 +340,117 @@ unlock:
 
				 
			
 
				 void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
			
 
				 {
			
 
				+	enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
			
 
				 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
			
 
				 
			
 
				 	spin_lock_bh(&map->lock);
			
 
				 	if (map->prog == prog) {
			
 
				-		WARN_ON(prog->aux->cgroup_storage != _map);
			
 
				+		WARN_ON(prog->aux->cgroup_storage[stype] != _map);
			
 
				 		map->prog = NULL;
			
 
				-		prog->aux->cgroup_storage = NULL;
			
 
				+		prog->aux->cgroup_storage[stype] = NULL;
			
 
				 	}
			
 
				 	spin_unlock_bh(&map->lock);
			
 
				 }
			
 
				 
			
 
				-struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog)
			
 
				+static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages)
			
 
				+{
			
 
				+	size_t size;
			
 
				+
			
 
				+	if (cgroup_storage_type(map) == BPF_CGROUP_STORAGE_SHARED) {
			
 
				+		size = sizeof(struct bpf_storage_buffer) + map->value_size;
			
 
				+		*pages = round_up(sizeof(struct bpf_cgroup_storage) + size,
			
 
				+				  PAGE_SIZE) >> PAGE_SHIFT;
			
 
				+	} else {
			
 
				+		size = map->value_size;
			
 
				+		*pages = round_up(round_up(size, 8) * num_possible_cpus(),
			
 
				+				  PAGE_SIZE) >> PAGE_SHIFT;
			
 
				+	}
			
 
				+
			
 
				+	return size;
			
 
				+}
			
 
				+
			
 
				+struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
			
 
				+					enum bpf_cgroup_storage_type stype)
			
 
				 {
			
 
				 	struct bpf_cgroup_storage *storage;
			
 
				 	struct bpf_map *map;
			
 
				+	gfp_t flags;
			
 
				+	size_t size;
			
 
				 	u32 pages;
			
 
				 
			
 
				-	map = prog->aux->cgroup_storage;
			
 
				+	map = prog->aux->cgroup_storage[stype];
			
 
				 	if (!map)
			
 
				 		return NULL;
			
 
				 
			
 
				-	pages = round_up(sizeof(struct bpf_cgroup_storage) +
			
 
				-			 sizeof(struct bpf_storage_buffer) +
			
 
				-			 map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
			
 
				+	size = bpf_cgroup_storage_calculate_size(map, &pages);
			
 
				+
			
 
				 	if (bpf_map_charge_memlock(map, pages))
			
 
				 		return ERR_PTR(-EPERM);
			
 
				 
			
 
				 	storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
			
 
				 			       __GFP_ZERO | GFP_USER, map->numa_node);
			
 
				-	if (!storage) {
			
 
				-		bpf_map_uncharge_memlock(map, pages);
			
 
				-		return ERR_PTR(-ENOMEM);
			
 
				-	}
			
 
				+	if (!storage)
			
 
				+		goto enomem;
			
 
				 
			
 
				-	storage->buf = kmalloc_node(sizeof(struct bpf_storage_buffer) +
			
 
				-				    map->value_size, __GFP_ZERO | GFP_USER,
			
 
				-				    map->numa_node);
			
 
				-	if (!storage->buf) {
			
 
				-		bpf_map_uncharge_memlock(map, pages);
			
 
				-		kfree(storage);
			
 
				-		return ERR_PTR(-ENOMEM);
			
 
				+	flags = __GFP_ZERO | GFP_USER;
			
 
				+
			
 
				+	if (stype == BPF_CGROUP_STORAGE_SHARED) {
			
 
				+		storage->buf = kmalloc_node(size, flags, map->numa_node);
			
 
				+		if (!storage->buf)
			
 
				+			goto enomem;
			
 
				+	} else {
			
 
				+		storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags);
			
 
				+		if (!storage->percpu_buf)
			
 
				+			goto enomem;
			
 
				 	}
			
 
				 
			
 
				 	storage->map = (struct bpf_cgroup_storage_map *)map;
			
 
				 
			
 
				 	return storage;
			
 
				+
			
 
				+enomem:
			
 
				+	bpf_map_uncharge_memlock(map, pages);
			
 
				+	kfree(storage);
			
 
				+	return ERR_PTR(-ENOMEM);
			
 
				+}
			
 
				+
			
 
				+static void free_shared_cgroup_storage_rcu(struct rcu_head *rcu)
			
 
				+{
			
 
				+	struct bpf_cgroup_storage *storage =
			
 
				+		container_of(rcu, struct bpf_cgroup_storage, rcu);
			
 
				+
			
 
				+	kfree(storage->buf);
			
 
				+	kfree(storage);
			
 
				+}
			
 
				+
			
 
				+static void free_percpu_cgroup_storage_rcu(struct rcu_head *rcu)
			
 
				+{
			
 
				+	struct bpf_cgroup_storage *storage =
			
 
				+		container_of(rcu, struct bpf_cgroup_storage, rcu);
			
 
				+
			
 
				+	free_percpu(storage->percpu_buf);
			
 
				+	kfree(storage);
			
 
				 }
			
 
				 
			
 
				 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
			
 
				 {
			
 
				-	u32 pages;
			
 
				+	enum bpf_cgroup_storage_type stype;
			
 
				 	struct bpf_map *map;
			
 
				+	u32 pages;
			
 
				 
			
 
				 	if (!storage)
			
 
				 		return;
			
 
				 
			
 
				 	map = &storage->map->map;
			
 
				-	pages = round_up(sizeof(struct bpf_cgroup_storage) +
			
 
				-			 sizeof(struct bpf_storage_buffer) +
			
 
				-			 map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
			
 
				+
			
 
				+	bpf_cgroup_storage_calculate_size(map, &pages);
			
 
				 	bpf_map_uncharge_memlock(map, pages);
			
 
				 
			
 
				-	kfree_rcu(storage->buf, rcu);
			
 
				-	kfree_rcu(storage, rcu);
			
 
				+	stype = cgroup_storage_type(map);
			
 
				+	if (stype == BPF_CGROUP_STORAGE_SHARED)
			
 
				+		call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu);
			
 
				+	else
			
 
				+		call_rcu(&storage->rcu, free_percpu_cgroup_storage_rcu);
			
 
				 }
			
 
				 
			
 
				 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
			
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -24,7 +24,8 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
 
				 	 * in the verifier is not enough.
			
 
				 	 */
			
 
				 	if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
			
 
				-	    inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE) {
			
 
				+	    inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
			
 
				+	    inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
			
 
				 		fdput(f);
			
 
				 		return ERR_PTR(-ENOTSUPP);
			
 
				 	}
			
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -686,7 +686,8 @@ static int map_lookup_elem(union bpf_attr *attr)
 
				 
			
 
				 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
			
 
				 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
			
 
				-	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
			
 
				+	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
			
 
				+	    map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
			
 
				 		value_size = round_up(map->value_size, 8) * num_possible_cpus();
			
 
				 	else if (IS_FD_MAP(map))
			
 
				 		value_size = sizeof(u32);
			
@@ -705,6 +706,8 @@ static int map_lookup_elem(union bpf_attr *attr)
 
				 		err = bpf_percpu_hash_copy(map, key, value);
			
 
				 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
			
 
				 		err = bpf_percpu_array_copy(map, key, value);
			
 
				+	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
			
 
				+		err = bpf_percpu_cgroup_storage_copy(map, key, value);
			
 
				 	} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
			
 
				 		err = bpf_stackmap_copy(map, key, value);
			
 
				 	} else if (IS_FD_ARRAY(map)) {
			
@@ -774,7 +777,8 @@ static int map_update_elem(union bpf_attr *attr)
 
				 
			
 
				 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
			
 
				 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
			
 
				-	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
			
 
				+	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
			
 
				+	    map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
			
 
				 		value_size = round_up(map->value_size, 8) * num_possible_cpus();
			
 
				 	else
			
 
				 		value_size = map->value_size;
			
@@ -809,6 +813,9 @@ static int map_update_elem(union bpf_attr *attr)
 
				 		err = bpf_percpu_hash_update(map, key, value, attr->flags);
			
 
				 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
			
 
				 		err = bpf_percpu_array_update(map, key, value, attr->flags);
			
 
				+	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
			
 
				+		err = bpf_percpu_cgroup_storage_update(map, key, value,
			
 
				+						       attr->flags);
			
 
				 	} else if (IS_FD_ARRAY(map)) {
			
 
				 		rcu_read_lock();
			
 
				 		err = bpf_fd_array_map_update_elem(map, f.file, key, value,
			
@@ -988,10 +995,15 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 
				 /* drop refcnt on maps used by eBPF program and free auxilary data */
			
 
				 static void free_used_maps(struct bpf_prog_aux *aux)
			
 
				 {
			
 
				+	enum bpf_cgroup_storage_type stype;
			
 
				 	int i;
			
 
				 
			
 
				-	if (aux->cgroup_storage)
			
 
				-		bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage);
			
 
				+	for_each_cgroup_storage_type(stype) {
			
 
				+		if (!aux->cgroup_storage[stype])
			
 
				+			continue;
			
 
				+		bpf_cgroup_storage_release(aux->prog,
			
 
				+					   aux->cgroup_storage[stype]);
			
 
				+	}
			
 
				 
			
 
				 	for (i = 0; i < aux->used_map_cnt; i++)
			
 
				 		bpf_map_put(aux->used_maps[i]);
			
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2074,6 +2074,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 
				 			goto error;
			
 
				 		break;
			
 
				 	case BPF_MAP_TYPE_CGROUP_STORAGE:
			
 
				+	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
			
 
				 		if (func_id != BPF_FUNC_get_local_storage)
			
 
				 			goto error;
			
 
				 		break;
			
@@ -2164,7 +2165,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 
				 			goto error;
			
 
				 		break;
			
 
				 	case BPF_FUNC_get_local_storage:
			
 
				-		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
			
 
				+		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
			
 
				+		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
			
 
				 			goto error;
			
 
				 		break;
			
 
				 	case BPF_FUNC_sk_select_reuseport:
			
@@ -5049,6 +5051,12 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
			
 
				+{
			
 
				+	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
			
 
				+		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
			
 
				+}
			
 
				+
			
 
				 /* look for pseudo eBPF instructions that access map FDs and
			
 
				  * replace them with actual map pointers
			
 
				  */
			
@@ -5139,10 +5147,9 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 
				 			}
			
 
				 			env->used_maps[env->used_map_cnt++] = map;
			
 
				 
			
 
				-			if (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE &&
			
 
				+			if (bpf_map_is_cgroup_storage(map) &&
			
 
				 			    bpf_cgroup_storage_assign(env->prog, map)) {
			
 
				-				verbose(env,
			
 
				-					"only one cgroup storage is allowed\n");
			
 
				+				verbose(env, "only one cgroup storage of each type is allowed\n");
			
 
				 				fdput(f);
			
 
				 				return -EBUSY;
			
 
				 			}
			
@@ -5171,11 +5178,15 @@ next_insn:
 
				 /* drop refcnt of maps used by the rejected program */
			
 
				 static void release_maps(struct bpf_verifier_env *env)
			
 
				 {
			
 
				+	enum bpf_cgroup_storage_type stype;
			
 
				 	int i;
			
 
				 
			
 
				-	if (env->prog->aux->cgroup_storage)
			
 
				+	for_each_cgroup_storage_type(stype) {
			
 
				+		if (!env->prog->aux->cgroup_storage[stype])
			
 
				+			continue;
			
 
				 		bpf_cgroup_storage_release(env->prog,
			
 
				-					   env->prog->aux->cgroup_storage);
			
 
				+			env->prog->aux->cgroup_storage[stype]);
			
 
				+	}
			
 
				 
			
 
				 	for (i = 0; i < env->used_map_cnt; i++)
			
 
				 		bpf_map_put(env->used_maps[i]);
			
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -12,7 +12,7 @@
 
				 #include <linux/sched/signal.h>
			
 
				 
			
 
				 static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
			
 
				-					    struct bpf_cgroup_storage *storage)
			
 
				+		struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
			
 
				 {
			
 
				 	u32 ret;
			
 
				 
			
@@ -28,13 +28,20 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
 
				 
			
 
				 static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
			
 
				 {
			
 
				-	struct bpf_cgroup_storage *storage = NULL;
			
 
				+	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
			
 
				+	enum bpf_cgroup_storage_type stype;
			
 
				 	u64 time_start, time_spent = 0;
			
 
				 	u32 ret = 0, i;
			
 
				 
			
 
				-	storage = bpf_cgroup_storage_alloc(prog);
			
 
				-	if (IS_ERR(storage))
			
 
				-		return PTR_ERR(storage);
			
 
				+	for_each_cgroup_storage_type(stype) {
			
 
				+		storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
			
 
				+		if (IS_ERR(storage[stype])) {
			
 
				+			storage[stype] = NULL;
			
 
				+			for_each_cgroup_storage_type(stype)
			
 
				+				bpf_cgroup_storage_free(storage[stype]);
			
 
				+			return -ENOMEM;
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	if (!repeat)
			
 
				 		repeat = 1;
			
@@ -53,7 +60,8 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
 
				 	do_div(time_spent, repeat);
			
 
				 	*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
			
 
				 
			
 
				-	bpf_cgroup_storage_free(storage);
			
 
				+	for_each_cgroup_storage_type(stype)
			
 
				+		bpf_cgroup_storage_free(storage[stype]);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
--- a/samples/bpf/test_cgrp2_attach2.c
+++ b/samples/bpf/test_cgrp2_attach2.c
@@ -209,7 +209,7 @@ static int map_fd = -1;
 
				 
			
 
				 static int prog_load_cnt(int verdict, int val)
			
 
				 {
			
 
				-	int cgroup_storage_fd;
			
 
				+	int cgroup_storage_fd, percpu_cgroup_storage_fd;
			
 
				 
			
 
				 	if (map_fd < 0)
			
 
				 		map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
			
@@ -225,6 +225,14 @@ static int prog_load_cnt(int verdict, int val)
 
				 		return -1;
			
 
				 	}
			
 
				 
			
 
				+	percpu_cgroup_storage_fd = bpf_create_map(
			
 
				+		BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
			
 
				+		sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
			
 
				+	if (percpu_cgroup_storage_fd < 0) {
			
 
				+		printf("failed to create map '%s'\n", strerror(errno));
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				 	struct bpf_insn prog[] = {
			
 
				 		BPF_MOV32_IMM(BPF_REG_0, 0),
			
 
				 		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
			
@@ -235,11 +243,20 @@ static int prog_load_cnt(int verdict, int val)
 
				 		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
			
 
				 		BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
			
 
				 		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
			
 
				+
			
 
				 		BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
			
 
				 		BPF_MOV64_IMM(BPF_REG_2, 0),
			
 
				 		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
			
 
				 		BPF_MOV64_IMM(BPF_REG_1, val),
			
 
				 		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
			
 
				+
			
 
				+		BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd),
			
 
				+		BPF_MOV64_IMM(BPF_REG_2, 0),
			
 
				+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
			
 
				+		BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
			
 
				+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1),
			
 
				+		BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
			
 
				+
			
 
				 		BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
			
 
				 		BPF_EXIT_INSN(),
			
 
				 	};
			
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -72,13 +72,15 @@ static const char * const map_type_name[] = {
 
				 	[BPF_MAP_TYPE_SOCKHASH]		= "sockhash",
			
 
				 	[BPF_MAP_TYPE_CGROUP_STORAGE]	= "cgroup_storage",
			
 
				 	[BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
			
 
				+	[BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]	= "percpu_cgroup_storage",
			
 
				 };
			
 
				 
			
 
				 static bool map_is_per_cpu(__u32 type)
			
 
				 {
			
 
				 	return type == BPF_MAP_TYPE_PERCPU_HASH ||
			
 
				 	       type == BPF_MAP_TYPE_PERCPU_ARRAY ||
			
 
				-	       type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
			
 
				+	       type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
			
 
				+	       type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
			
 
				 }
			
 
				 
			
 
				 static bool map_is_map_of_maps(__u32 type)
			
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -127,6 +127,7 @@ enum bpf_map_type {
 
				 	BPF_MAP_TYPE_SOCKHASH,
			
 
				 	BPF_MAP_TYPE_CGROUP_STORAGE,
			
 
				 	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
			
 
				+	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
			
 
				 };
			
 
				 
			
 
				 enum bpf_prog_type {
			
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,7 +23,8 @@ $(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c
 
				 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
			
 
				 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
			
 
				 	test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
			
 
				-	test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names
			
 
				+	test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
			
 
				+	test_netcnt
			
 
				 
			
 
				 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
			
 
				 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
			
@@ -35,7 +36,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 
				 	test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
			
 
				 	test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
			
 
				 	get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
			
 
				-	test_skb_cgroup_id_kern.o bpf_flow.o
			
 
				+	test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o
			
 
				 
			
 
				 # Order correspond to 'make run_tests' order
			
 
				 TEST_PROGS := test_kmod.sh \
			
@@ -72,6 +73,7 @@ $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
 
				 $(OUTPUT)/test_progs: trace_helpers.c
			
 
				 $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
			
 
				 $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
			
 
				+$(OUTPUT)/test_netcnt: cgroup_helpers.c
			
 
				 
			
 
				 .PHONY: force
			
 
				 
			
--- a/tools/testing/selftests/bpf/netcnt_common.h
+++ b/tools/testing/selftests/bpf/netcnt_common.h
@@ -0,0 +1,24 @@
 
				+// SPDX-License-Identifier: GPL-2.0
			
 
				+#ifndef __NETCNT_COMMON_H
			
 
				+#define __NETCNT_COMMON_H
			
 
				+
			
 
				+#include <linux/types.h>
			
 
				+
			
 
				+#define MAX_PERCPU_PACKETS 32
			
 
				+
			
 
				+struct percpu_net_cnt {
			
 
				+	__u64 packets;
			
 
				+	__u64 bytes;
			
 
				+
			
 
				+	__u64 prev_ts;
			
 
				+
			
 
				+	__u64 prev_packets;
			
 
				+	__u64 prev_bytes;
			
 
				+};
			
 
				+
			
 
				+struct net_cnt {
			
 
				+	__u64 packets;
			
 
				+	__u64 bytes;
			
 
				+};
			
 
				+
			
 
				+#endif
			
--- a/tools/testing/selftests/bpf/netcnt_prog.c
+++ b/tools/testing/selftests/bpf/netcnt_prog.c
@@ -0,0 +1,71 @@
 
				+// SPDX-License-Identifier: GPL-2.0
			
 
				+#include <linux/bpf.h>
			
 
				+#include <linux/version.h>
			
 
				+
			
 
				+#include "bpf_helpers.h"
			
 
				+#include "netcnt_common.h"
			
 
				+
			
 
				+#define MAX_BPS	(3 * 1024 * 1024)
			
 
				+
			
 
				+#define REFRESH_TIME_NS	100000000
			
 
				+#define NS_PER_SEC	1000000000
			
 
				+
			
 
				+struct bpf_map_def SEC("maps") percpu_netcnt = {
			
 
				+	.type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
			
 
				+	.key_size = sizeof(struct bpf_cgroup_storage_key),
			
 
				+	.value_size = sizeof(struct percpu_net_cnt),
			
 
				+};
			
 
				+
			
 
				+struct bpf_map_def SEC("maps") netcnt = {
			
 
				+	.type = BPF_MAP_TYPE_CGROUP_STORAGE,
			
 
				+	.key_size = sizeof(struct bpf_cgroup_storage_key),
			
 
				+	.value_size = sizeof(struct net_cnt),
			
 
				+};
			
 
				+
			
 
				+SEC("cgroup/skb")
			
 
				+int bpf_nextcnt(struct __sk_buff *skb)
			
 
				+{
			
 
				+	struct percpu_net_cnt *percpu_cnt;
			
 
				+	char fmt[] = "%d %llu %llu\n";
			
 
				+	struct net_cnt *cnt;
			
 
				+	__u64 ts, dt;
			
 
				+	int ret;
			
 
				+
			
 
				+	cnt = bpf_get_local_storage(&netcnt, 0);
			
 
				+	percpu_cnt = bpf_get_local_storage(&percpu_netcnt, 0);
			
 
				+
			
 
				+	percpu_cnt->packets++;
			
 
				+	percpu_cnt->bytes += skb->len;
			
 
				+
			
 
				+	if (percpu_cnt->packets > MAX_PERCPU_PACKETS) {
			
 
				+		__sync_fetch_and_add(&cnt->packets,
			
 
				+				     percpu_cnt->packets);
			
 
				+		percpu_cnt->packets = 0;
			
 
				+
			
 
				+		__sync_fetch_and_add(&cnt->bytes,
			
 
				+				     percpu_cnt->bytes);
			
 
				+		percpu_cnt->bytes = 0;
			
 
				+	}
			
 
				+
			
 
				+	ts = bpf_ktime_get_ns();
			
 
				+	dt = ts - percpu_cnt->prev_ts;
			
 
				+
			
 
				+	dt *= MAX_BPS;
			
 
				+	dt /= NS_PER_SEC;
			
 
				+
			
 
				+	if (cnt->bytes + percpu_cnt->bytes - percpu_cnt->prev_bytes < dt)
			
 
				+		ret = 1;
			
 
				+	else
			
 
				+		ret = 0;
			
 
				+
			
 
				+	if (dt > REFRESH_TIME_NS) {
			
 
				+		percpu_cnt->prev_ts = ts;
			
 
				+		percpu_cnt->prev_packets = cnt->packets;
			
 
				+		percpu_cnt->prev_bytes = cnt->bytes;
			
 
				+	}
			
 
				+
			
 
				+	return !!ret;
			
 
				+}
			
 
				+
			
 
				+char _license[] SEC("license") = "GPL";
			
 
				+__u32 _version SEC("version") = LINUX_VERSION_CODE;
			
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -4,6 +4,7 @@
 
				 #include <linux/filter.h>
			
 
				 #include <stdio.h>
			
 
				 #include <stdlib.h>
			
 
				+#include <sys/sysinfo.h>
			
 
				 
			
 
				 #include "bpf_rlimit.h"
			
 
				 #include "cgroup_helpers.h"
			
@@ -15,6 +16,14 @@ char bpf_log_buf[BPF_LOG_BUF_SIZE];
 
				 int main(int argc, char **argv)
			
 
				 {
			
 
				 	struct bpf_insn prog[] = {
			
 
				+		BPF_LD_MAP_FD(BPF_REG_1, 0), /* percpu map fd */
			
 
				+		BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
			
 
				+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
			
 
				+			     BPF_FUNC_get_local_storage),
			
 
				+		BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
			
 
				+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1),
			
 
				+		BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
			
 
				+
			
 
				 		BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */
			
 
				 		BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
			
 
				 		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
			
@@ -28,9 +37,18 @@ int main(int argc, char **argv)
 
				 	};
			
 
				 	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
			
 
				 	int error = EXIT_FAILURE;
			
 
				-	int map_fd, prog_fd, cgroup_fd;
			
 
				+	int map_fd, percpu_map_fd, prog_fd, cgroup_fd;
			
 
				 	struct bpf_cgroup_storage_key key;
			
 
				 	unsigned long long value;
			
 
				+	unsigned long long *percpu_value;
			
 
				+	int cpu, nproc;
			
 
				+
			
 
				+	nproc = get_nprocs_conf();
			
 
				+	percpu_value = malloc(sizeof(*percpu_value) * nproc);
			
 
				+	if (!percpu_value) {
			
 
				+		printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
			
 
				+		goto err;
			
 
				+	}
			
 
				 
			
 
				 	map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),
			
 
				 				sizeof(value), 0, 0);
			
@@ -39,7 +57,15 @@ int main(int argc, char **argv)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	prog[0].imm = map_fd;
			
 
				+	percpu_map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
			
 
				+				       sizeof(key), sizeof(value), 0, 0);
			
 
				+	if (percpu_map_fd < 0) {
			
 
				+		printf("Failed to create map: %s\n", strerror(errno));
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	prog[0].imm = percpu_map_fd;
			
 
				+	prog[7].imm = map_fd;
			
 
				 	prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
			
 
				 				   prog, insns_cnt, "GPL", 0,
			
 
				 				   bpf_log_buf, BPF_LOG_BUF_SIZE);
			
@@ -77,7 +103,15 @@ int main(int argc, char **argv)
 
				 	}
			
 
				 
			
 
				 	if (bpf_map_lookup_elem(map_fd, &key, &value)) {
			
 
				-		printf("Failed to lookup cgroup storage\n");
			
 
				+		printf("Failed to lookup cgroup storage 0\n");
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				+	for (cpu = 0; cpu < nproc; cpu++)
			
 
				+		percpu_value[cpu] = 1000;
			
 
				+
			
 
				+	if (bpf_map_update_elem(percpu_map_fd, &key, percpu_value, 0)) {
			
 
				+		printf("Failed to update the data in the cgroup storage\n");
			
 
				 		goto err;
			
 
				 	}
			
 
				 
			
@@ -120,11 +154,31 @@ int main(int argc, char **argv)
 
				 		goto err;
			
 
				 	}
			
 
				 
			
 
				+	/* Check the final value of the counter in the percpu local storage */
			
 
				+
			
 
				+	for (cpu = 0; cpu < nproc; cpu++)
			
 
				+		percpu_value[cpu] = 0;
			
 
				+
			
 
				+	if (bpf_map_lookup_elem(percpu_map_fd, &key, percpu_value)) {
			
 
				+		printf("Failed to lookup the per-cpu cgroup storage\n");
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				+	value = 0;
			
 
				+	for (cpu = 0; cpu < nproc; cpu++)
			
 
				+		value += percpu_value[cpu];
			
 
				+
			
 
				+	if (value != nproc * 1000 + 6) {
			
 
				+		printf("Unexpected data in the per-cpu cgroup storage\n");
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				 	error = 0;
			
 
				 	printf("test_cgroup_storage:PASS\n");
			
 
				 
			
 
				 err:
			
 
				 	cleanup_cgroup_environment();
			
 
				+	free(percpu_value);
			
 
				 
			
 
				 out:
			
 
				 	return error;
			
--- a/tools/testing/selftests/bpf/test_netcnt.c
+++ b/tools/testing/selftests/bpf/test_netcnt.c
@@ -0,0 +1,158 @@
 
				+// SPDX-License-Identifier: GPL-2.0
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <string.h>
			
 
				+#include <errno.h>
			
 
				+#include <assert.h>
			
 
				+#include <sys/sysinfo.h>
			
 
				+#include <sys/time.h>
			
 
				+
			
 
				+#include <linux/bpf.h>
			
 
				+#include <bpf/bpf.h>
			
 
				+#include <bpf/libbpf.h>
			
 
				+
			
 
				+#include "cgroup_helpers.h"
			
 
				+#include "bpf_rlimit.h"
			
 
				+#include "netcnt_common.h"
			
 
				+
			
 
				+#define BPF_PROG "./netcnt_prog.o"
			
 
				+#define TEST_CGROUP "/test-network-counters/"
			
 
				+
			
 
				+static int bpf_find_map(const char *test, struct bpf_object *obj,
			
 
				+			const char *name)
			
 
				+{
			
 
				+	struct bpf_map *map;
			
 
				+
			
 
				+	map = bpf_object__find_map_by_name(obj, name);
			
 
				+	if (!map) {
			
 
				+		printf("%s:FAIL:map '%s' not found\n", test, name);
			
 
				+		return -1;
			
 
				+	}
			
 
				+	return bpf_map__fd(map);
			
 
				+}
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	struct percpu_net_cnt *percpu_netcnt;
			
 
				+	struct bpf_cgroup_storage_key key;
			
 
				+	int map_fd, percpu_map_fd;
			
 
				+	int error = EXIT_FAILURE;
			
 
				+	struct net_cnt netcnt;
			
 
				+	struct bpf_object *obj;
			
 
				+	int prog_fd, cgroup_fd;
			
 
				+	unsigned long packets;
			
 
				+	unsigned long bytes;
			
 
				+	int cpu, nproc;
			
 
				+	__u32 prog_cnt;
			
 
				+
			
 
				+	nproc = get_nprocs_conf();
			
 
				+	percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
			
 
				+	if (!percpu_netcnt) {
			
 
				+		printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				+	if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB,
			
 
				+			  &obj, &prog_fd)) {
			
 
				+		printf("Failed to load bpf program\n");
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (setup_cgroup_environment()) {
			
 
				+		printf("Failed to load bpf program\n");
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				+	/* Create a cgroup, get fd, and join it */
			
 
				+	cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
			
 
				+	if (!cgroup_fd) {
			
 
				+		printf("Failed to create test cgroup\n");
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				+	if (join_cgroup(TEST_CGROUP)) {
			
 
				+		printf("Failed to join cgroup\n");
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				+	/* Attach bpf program */
			
 
				+	if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
			
 
				+		printf("Failed to attach bpf program");
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				+	assert(system("ping localhost -6 -c 10000 -f -q > /dev/null") == 0);
			
 
				+
			
 
				+	if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
			
 
				+			   &prog_cnt)) {
			
 
				+		printf("Failed to query attached programs");
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				+	map_fd = bpf_find_map(__func__, obj, "netcnt");
			
 
				+	if (map_fd < 0) {
			
 
				+		printf("Failed to find bpf map with net counters");
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				+	percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt");
			
 
				+	if (percpu_map_fd < 0) {
			
 
				+		printf("Failed to find bpf map with percpu net counters");
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				+	if (bpf_map_get_next_key(map_fd, NULL, &key)) {
			
 
				+		printf("Failed to get key in cgroup storage\n");
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				+	if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) {
			
 
				+		printf("Failed to lookup cgroup storage\n");
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				+	if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) {
			
 
				+		printf("Failed to lookup percpu cgroup storage\n");
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				+	/* Some packets can be still in per-cpu cache, but not more than
			
 
				+	 * MAX_PERCPU_PACKETS.
			
 
				+	 */
			
 
				+	packets = netcnt.packets;
			
 
				+	bytes = netcnt.bytes;
			
 
				+	for (cpu = 0; cpu < nproc; cpu++) {
			
 
				+		if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) {
			
 
				+			printf("Unexpected percpu value: %llu\n",
			
 
				+			       percpu_netcnt[cpu].packets);
			
 
				+			goto err;
			
 
				+		}
			
 
				+
			
 
				+		packets += percpu_netcnt[cpu].packets;
			
 
				+		bytes += percpu_netcnt[cpu].bytes;
			
 
				+	}
			
 
				+
			
 
				+	/* No packets should be lost */
			
 
				+	if (packets != 10000) {
			
 
				+		printf("Unexpected packet count: %lu\n", packets);
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				+	/* Let's check that bytes counter matches the number of packets
			
 
				+	 * multiplied by the size of ipv6 ICMP packet.
			
 
				+	 */
			
 
				+	if (bytes != packets * 104) {
			
 
				+		printf("Unexpected bytes count: %lu\n", bytes);
			
 
				+		goto err;
			
 
				+	}
			
 
				+
			
 
				+	error = 0;
			
 
				+	printf("test_netcnt:PASS\n");
			
 
				+
			
 
				+err:
			
 
				+	cleanup_cgroup_environment();
			
 
				+	free(percpu_netcnt);
			
 
				+
			
 
				+out:
			
 
				+	return error;
			
 
				+}
			
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -68,6 +68,7 @@ struct bpf_test {
 
				 	int fixup_prog2[MAX_FIXUPS];
			
 
				 	int fixup_map_in_map[MAX_FIXUPS];
			
 
				 	int fixup_cgroup_storage[MAX_FIXUPS];
			
 
				+	int fixup_percpu_cgroup_storage[MAX_FIXUPS];
			
 
				 	const char *errstr;
			
 
				 	const char *errstr_unpriv;
			
 
				 	uint32_t retval;
			
@@ -4676,7 +4677,7 @@ static struct bpf_test tests[] = {
 
				 		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
			
 
				 	},
			
 
				 	{
			
 
				-		"invalid per-cgroup storage access 3",
			
 
				+		"invalid cgroup storage access 3",
			
 
				 		.insns = {
			
 
				 			BPF_MOV64_IMM(BPF_REG_2, 0),
			
 
				 			BPF_LD_MAP_FD(BPF_REG_1, 0),
			
@@ -4743,6 +4744,121 @@ static struct bpf_test tests[] = {
 
				 		.errstr = "get_local_storage() doesn't support non-zero flags",
			
 
				 		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
			
 
				 	},
			
 
				+	{
			
 
				+		"valid per-cpu cgroup storage access",
			
 
				+		.insns = {
			
 
				+			BPF_MOV64_IMM(BPF_REG_2, 0),
			
 
				+			BPF_LD_MAP_FD(BPF_REG_1, 0),
			
 
				+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
			
 
				+				     BPF_FUNC_get_local_storage),
			
 
				+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
			
 
				+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
			
 
				+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
			
 
				+			BPF_EXIT_INSN(),
			
 
				+		},
			
 
				+		.fixup_percpu_cgroup_storage = { 1 },
			
 
				+		.result = ACCEPT,
			
 
				+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
			
 
				+	},
			
 
				+	{
			
 
				+		"invalid per-cpu cgroup storage access 1",
			
 
				+		.insns = {
			
 
				+			BPF_MOV64_IMM(BPF_REG_2, 0),
			
 
				+			BPF_LD_MAP_FD(BPF_REG_1, 0),
			
 
				+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
			
 
				+				     BPF_FUNC_get_local_storage),
			
 
				+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
			
 
				+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
			
 
				+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
			
 
				+			BPF_EXIT_INSN(),
			
 
				+		},
			
 
				+		.fixup_map1 = { 1 },
			
 
				+		.result = REJECT,
			
 
				+		.errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
			
 
				+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
			
 
				+	},
			
 
				+	{
			
 
				+		"invalid per-cpu cgroup storage access 2",
			
 
				+		.insns = {
			
 
				+			BPF_MOV64_IMM(BPF_REG_2, 0),
			
 
				+			BPF_LD_MAP_FD(BPF_REG_1, 1),
			
 
				+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
			
 
				+				     BPF_FUNC_get_local_storage),
			
 
				+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
			
 
				+			BPF_EXIT_INSN(),
			
 
				+		},
			
 
				+		.result = REJECT,
			
 
				+		.errstr = "fd 1 is not pointing to valid bpf_map",
			
 
				+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
			
 
				+	},
			
 
				+	{
			
 
				+		"invalid per-cpu cgroup storage access 3",
			
 
				+		.insns = {
			
 
				+			BPF_MOV64_IMM(BPF_REG_2, 0),
			
 
				+			BPF_LD_MAP_FD(BPF_REG_1, 0),
			
 
				+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
			
 
				+				     BPF_FUNC_get_local_storage),
			
 
				+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
			
 
				+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
			
 
				+			BPF_MOV64_IMM(BPF_REG_0, 0),
			
 
				+			BPF_EXIT_INSN(),
			
 
				+		},
			
 
				+		.fixup_percpu_cgroup_storage = { 1 },
			
 
				+		.result = REJECT,
			
 
				+		.errstr = "invalid access to map value, value_size=64 off=256 size=4",
			
 
				+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
			
 
				+	},
			
 
				+	{
			
 
				+		"invalid per-cpu cgroup storage access 4",
			
 
				+		.insns = {
			
 
				+			BPF_MOV64_IMM(BPF_REG_2, 0),
			
 
				+			BPF_LD_MAP_FD(BPF_REG_1, 0),
			
 
				+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
			
 
				+				     BPF_FUNC_get_local_storage),
			
 
				+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
			
 
				+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
			
 
				+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
			
 
				+			BPF_EXIT_INSN(),
			
 
				+		},
			
 
				+		.fixup_cgroup_storage = { 1 },
			
 
				+		.result = REJECT,
			
 
				+		.errstr = "invalid access to map value, value_size=64 off=-2 size=4",
			
 
				+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
			
 
				+	},
			
 
				+	{
			
 
				+		"invalid per-cpu cgroup storage access 5",
			
 
				+		.insns = {
			
 
				+			BPF_MOV64_IMM(BPF_REG_2, 7),
			
 
				+			BPF_LD_MAP_FD(BPF_REG_1, 0),
			
 
				+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
			
 
				+				     BPF_FUNC_get_local_storage),
			
 
				+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
			
 
				+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
			
 
				+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
			
 
				+			BPF_EXIT_INSN(),
			
 
				+		},
			
 
				+		.fixup_percpu_cgroup_storage = { 1 },
			
 
				+		.result = REJECT,
			
 
				+		.errstr = "get_local_storage() doesn't support non-zero flags",
			
 
				+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
			
 
				+	},
			
 
				+	{
			
 
				+		"invalid per-cpu cgroup storage access 6",
			
 
				+		.insns = {
			
 
				+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
			
 
				+			BPF_LD_MAP_FD(BPF_REG_1, 0),
			
 
				+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
			
 
				+				     BPF_FUNC_get_local_storage),
			
 
				+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
			
 
				+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
			
 
				+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
			
 
				+			BPF_EXIT_INSN(),
			
 
				+		},
			
 
				+		.fixup_percpu_cgroup_storage = { 1 },
			
 
				+		.result = REJECT,
			
 
				+		.errstr = "get_local_storage() doesn't support non-zero flags",
			
 
				+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
			
 
				+	},
			
 
				 	{
			
 
				 		"multiple registers share map_lookup_elem result",
			
 
				 		.insns = {
			
@@ -12615,15 +12731,17 @@ static int create_map_in_map(void)
 
				 	return outer_map_fd;
			
 
				 }
			
 
				 
			
 
				-static int create_cgroup_storage(void)
			
 
				+static int create_cgroup_storage(bool percpu)
			
 
				 {
			
 
				+	enum bpf_map_type type = percpu ? BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE :
			
 
				+		BPF_MAP_TYPE_CGROUP_STORAGE;
			
 
				 	int fd;
			
 
				 
			
 
				-	fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
			
 
				-			    sizeof(struct bpf_cgroup_storage_key),
			
 
				+	fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key),
			
 
				 			    TEST_DATA_LEN, 0, 0);
			
 
				 	if (fd < 0)
			
 
				-		printf("Failed to create array '%s'!\n", strerror(errno));
			
 
				+		printf("Failed to create cgroup storage '%s'!\n",
			
 
				+		       strerror(errno));
			
 
				 
			
 
				 	return fd;
			
 
				 }
			
@@ -12641,6 +12759,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
 
				 	int *fixup_prog2 = test->fixup_prog2;
			
 
				 	int *fixup_map_in_map = test->fixup_map_in_map;
			
 
				 	int *fixup_cgroup_storage = test->fixup_cgroup_storage;
			
 
				+	int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage;
			
 
				 
			
 
				 	if (test->fill_helper)
			
 
				 		test->fill_helper(test);
			
@@ -12710,12 +12829,20 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
 
				 	}
			
 
				 
			
 
				 	if (*fixup_cgroup_storage) {
			
 
				-		map_fds[7] = create_cgroup_storage();
			
 
				+		map_fds[7] = create_cgroup_storage(false);
			
 
				 		do {
			
 
				 			prog[*fixup_cgroup_storage].imm = map_fds[7];
			
 
				 			fixup_cgroup_storage++;
			
 
				 		} while (*fixup_cgroup_storage);
			
 
				 	}
			
 
				+
			
 
				+	if (*fixup_percpu_cgroup_storage) {
			
 
				+		map_fds[8] = create_cgroup_storage(true);
			
 
				+		do {
			
 
				+			prog[*fixup_percpu_cgroup_storage].imm = map_fds[8];
			
 
				+			fixup_percpu_cgroup_storage++;
			
 
				+		} while (*fixup_percpu_cgroup_storage);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void do_test_single(struct bpf_test *test, bool unpriv,