|
@@ -544,31 +544,107 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
|
|
|
|
|
|
#ifdef CONFIG_SOCK_CGROUP_DATA
|
|
#ifdef CONFIG_SOCK_CGROUP_DATA
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains
|
|
|
|
+ * per-socket cgroup information except for memcg association.
|
|
|
|
+ *
|
|
|
|
+ * On legacy hierarchies, net_prio and net_cls controllers directly set
|
|
|
|
+ * attributes on each sock which can then be tested by the network layer.
|
|
|
|
+ * On the default hierarchy, each sock is associated with the cgroup it was
|
|
|
|
+ * created in and the networking layer can match the cgroup directly.
|
|
|
|
+ *
|
|
|
|
+ * To avoid carrying all three cgroup related fields separately in sock,
|
|
|
|
+ * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer.
|
|
|
|
+ * On boot, sock_cgroup_data records the cgroup that the sock was created
|
|
|
|
+ * in so that cgroup2 matches can be made; however, once either net_prio or
|
|
|
|
+ * net_cls starts being used, the area is overriden to carry prioidx and/or
|
|
|
|
+ * classid. The two modes are distinguished by whether the lowest bit is
|
|
|
|
+ * set. Clear bit indicates cgroup pointer while set bit prioidx and
|
|
|
|
+ * classid.
|
|
|
|
+ *
|
|
|
|
+ * While userland may start using net_prio or net_cls at any time, once
|
|
|
|
+ * either is used, cgroup2 matching no longer works. There is no reason to
|
|
|
|
+ * mix the two and this is in line with how legacy and v2 compatibility is
|
|
|
|
+ * handled. On mode switch, cgroup references which are already being
|
|
|
|
+ * pointed to by socks may be leaked. While this can be remedied by adding
|
|
|
|
+ * synchronization around sock_cgroup_data, given that the number of leaked
|
|
|
|
+ * cgroups is bound and highly unlikely to be high, this seems to be the
|
|
|
|
+ * better trade-off.
|
|
|
|
+ */
|
|
struct sock_cgroup_data {
|
|
struct sock_cgroup_data {
|
|
- u16 prioidx;
|
|
|
|
- u32 classid;
|
|
|
|
|
|
+ union {
|
|
|
|
+#ifdef __LITTLE_ENDIAN
|
|
|
|
+ struct {
|
|
|
|
+ u8 is_data;
|
|
|
|
+ u8 padding;
|
|
|
|
+ u16 prioidx;
|
|
|
|
+ u32 classid;
|
|
|
|
+ } __packed;
|
|
|
|
+#else
|
|
|
|
+ struct {
|
|
|
|
+ u32 classid;
|
|
|
|
+ u16 prioidx;
|
|
|
|
+ u8 padding;
|
|
|
|
+ u8 is_data;
|
|
|
|
+ } __packed;
|
|
|
|
+#endif
|
|
|
|
+ u64 val;
|
|
|
|
+ };
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * There's a theoretical window where the following accessors race with
|
|
|
|
+ * updaters and return part of the previous pointer as the prioidx or
|
|
|
|
+ * classid. Such races are short-lived and the result isn't critical.
|
|
|
|
+ */
|
|
static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd)
|
|
static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd)
|
|
{
|
|
{
|
|
- return skcd->prioidx;
|
|
|
|
|
|
+ /* fallback to 1 which is always the ID of the root cgroup */
|
|
|
|
+ return (skcd->is_data & 1) ? skcd->prioidx : 1;
|
|
}
|
|
}
|
|
|
|
|
|
static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd)
|
|
static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd)
|
|
{
|
|
{
|
|
- return skcd->classid;
|
|
|
|
|
|
+ /* fallback to 0 which is the unconfigured default classid */
|
|
|
|
+ return (skcd->is_data & 1) ? skcd->classid : 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * If invoked concurrently, the updaters may clobber each other. The
|
|
|
|
+ * caller is responsible for synchronization.
|
|
|
|
+ */
|
|
static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
|
|
static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
|
|
u16 prioidx)
|
|
u16 prioidx)
|
|
{
|
|
{
|
|
- skcd->prioidx = prioidx;
|
|
|
|
|
|
+ struct sock_cgroup_data skcd_buf = { .val = READ_ONCE(skcd->val) };
|
|
|
|
+
|
|
|
|
+ if (sock_cgroup_prioidx(&skcd_buf) == prioidx)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ if (!(skcd_buf.is_data & 1)) {
|
|
|
|
+ skcd_buf.val = 0;
|
|
|
|
+ skcd_buf.is_data = 1;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ skcd_buf.prioidx = prioidx;
|
|
|
|
+ WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
|
|
}
|
|
}
|
|
|
|
|
|
static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
|
|
static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
|
|
u32 classid)
|
|
u32 classid)
|
|
{
|
|
{
|
|
- skcd->classid = classid;
|
|
|
|
|
|
+ struct sock_cgroup_data skcd_buf = { .val = READ_ONCE(skcd->val) };
|
|
|
|
+
|
|
|
|
+ if (sock_cgroup_classid(&skcd_buf) == classid)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ if (!(skcd_buf.is_data & 1)) {
|
|
|
|
+ skcd_buf.val = 0;
|
|
|
|
+ skcd_buf.is_data = 1;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ skcd_buf.classid = classid;
|
|
|
|
+ WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
|
|
}
|
|
}
|
|
|
|
|
|
#else /* CONFIG_SOCK_CGROUP_DATA */
|
|
#else /* CONFIG_SOCK_CGROUP_DATA */
|