|
@@ -17,7 +17,7 @@
|
|
#define BPF_ALU64 0x07 /* alu mode in double word width */
|
|
#define BPF_ALU64 0x07 /* alu mode in double word width */
|
|
|
|
|
|
/* ld/ldx fields */
|
|
/* ld/ldx fields */
|
|
-#define BPF_DW 0x18 /* double word */
|
|
|
|
|
|
+#define BPF_DW 0x18 /* double word (64-bit) */
|
|
#define BPF_XADD 0xc0 /* exclusive add */
|
|
#define BPF_XADD 0xc0 /* exclusive add */
|
|
|
|
|
|
/* alu/jmp fields */
|
|
/* alu/jmp fields */
|
|
@@ -642,6 +642,14 @@ union bpf_attr {
|
|
* @optlen: length of optval in bytes
|
|
* @optlen: length of optval in bytes
|
|
* Return: 0 or negative error
|
|
* Return: 0 or negative error
|
|
*
|
|
*
|
|
|
|
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
|
|
|
|
+ * Set callback flags for sock_ops
|
|
|
|
+ * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
|
|
|
|
+ * @flags: flags value
|
|
|
|
+ * Return: 0 for no error
|
|
|
|
+ * -EINVAL if there is no full tcp socket
|
|
|
|
+ * bits in flags that are not supported by current kernel
|
|
|
|
+ *
|
|
* int bpf_skb_adjust_room(skb, len_diff, mode, flags)
|
|
* int bpf_skb_adjust_room(skb, len_diff, mode, flags)
|
|
* Grow or shrink room in sk_buff.
|
|
* Grow or shrink room in sk_buff.
|
|
* @skb: pointer to skb
|
|
* @skb: pointer to skb
|
|
@@ -748,7 +756,8 @@ union bpf_attr {
|
|
FN(perf_event_read_value), \
|
|
FN(perf_event_read_value), \
|
|
FN(perf_prog_read_value), \
|
|
FN(perf_prog_read_value), \
|
|
FN(getsockopt), \
|
|
FN(getsockopt), \
|
|
- FN(override_return),
|
|
|
|
|
|
+ FN(override_return), \
|
|
|
|
+ FN(sock_ops_cb_flags_set),
|
|
|
|
|
|
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
|
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
|
* function eBPF program intends to call
|
|
* function eBPF program intends to call
|
|
@@ -952,8 +961,9 @@ struct bpf_map_info {
|
|
struct bpf_sock_ops {
|
|
struct bpf_sock_ops {
|
|
__u32 op;
|
|
__u32 op;
|
|
union {
|
|
union {
|
|
- __u32 reply;
|
|
|
|
- __u32 replylong[4];
|
|
|
|
|
|
+ __u32 args[4]; /* Optionally passed to bpf program */
|
|
|
|
+ __u32 reply; /* Returned by bpf program */
|
|
|
|
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
|
|
};
|
|
};
|
|
__u32 family;
|
|
__u32 family;
|
|
__u32 remote_ip4; /* Stored in network byte order */
|
|
__u32 remote_ip4; /* Stored in network byte order */
|
|
@@ -968,8 +978,39 @@ struct bpf_sock_ops {
|
|
*/
|
|
*/
|
|
__u32 snd_cwnd;
|
|
__u32 snd_cwnd;
|
|
__u32 srtt_us; /* Averaged RTT << 3 in usecs */
|
|
__u32 srtt_us; /* Averaged RTT << 3 in usecs */
|
|
|
|
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
|
|
|
|
+ __u32 state;
|
|
|
|
+ __u32 rtt_min;
|
|
|
|
+ __u32 snd_ssthresh;
|
|
|
|
+ __u32 rcv_nxt;
|
|
|
|
+ __u32 snd_nxt;
|
|
|
|
+ __u32 snd_una;
|
|
|
|
+ __u32 mss_cache;
|
|
|
|
+ __u32 ecn_flags;
|
|
|
|
+ __u32 rate_delivered;
|
|
|
|
+ __u32 rate_interval_us;
|
|
|
|
+ __u32 packets_out;
|
|
|
|
+ __u32 retrans_out;
|
|
|
|
+ __u32 total_retrans;
|
|
|
|
+ __u32 segs_in;
|
|
|
|
+ __u32 data_segs_in;
|
|
|
|
+ __u32 segs_out;
|
|
|
|
+ __u32 data_segs_out;
|
|
|
|
+ __u32 lost_out;
|
|
|
|
+ __u32 sacked_out;
|
|
|
|
+ __u32 sk_txhash;
|
|
|
|
+ __u64 bytes_received;
|
|
|
|
+ __u64 bytes_acked;
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+/* Definitions for bpf_sock_ops_cb_flags */
|
|
|
|
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
|
|
|
|
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
|
|
|
|
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
|
|
|
|
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
|
|
|
|
+ * supported cb flags
|
|
|
|
+ */
|
|
|
|
+
|
|
/* List of known BPF sock_ops operators.
|
|
/* List of known BPF sock_ops operators.
|
|
* New entries can only be added at the end
|
|
* New entries can only be added at the end
|
|
*/
|
|
*/
|
|
@@ -1003,6 +1044,43 @@ enum {
|
|
* a congestion threshold. RTTs above
|
|
* a congestion threshold. RTTs above
|
|
* this indicate congestion
|
|
* this indicate congestion
|
|
*/
|
|
*/
|
|
|
|
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
|
|
|
|
+ * Arg1: value of icsk_retransmits
|
|
|
|
+ * Arg2: value of icsk_rto
|
|
|
|
+ * Arg3: whether RTO has expired
|
|
|
|
+ */
|
|
|
|
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
|
|
|
|
+ * Arg1: sequence number of 1st byte
|
|
|
|
+ * Arg2: # segments
|
|
|
|
+ * Arg3: return value of
|
|
|
|
+ * tcp_transmit_skb (0 => success)
|
|
|
|
+ */
|
|
|
|
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
|
|
|
|
+ * Arg1: old_state
|
|
|
|
+ * Arg2: new_state
|
|
|
|
+ */
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
|
|
|
|
+ * changes between the TCP and BPF versions. Ideally this should never happen.
|
|
|
|
+ * If it does, we need to add code to convert them before calling
|
|
|
|
+ * the BPF sock_ops function.
|
|
|
|
+ */
|
|
|
|
+enum {
|
|
|
|
+ BPF_TCP_ESTABLISHED = 1,
|
|
|
|
+ BPF_TCP_SYN_SENT,
|
|
|
|
+ BPF_TCP_SYN_RECV,
|
|
|
|
+ BPF_TCP_FIN_WAIT1,
|
|
|
|
+ BPF_TCP_FIN_WAIT2,
|
|
|
|
+ BPF_TCP_TIME_WAIT,
|
|
|
|
+ BPF_TCP_CLOSE,
|
|
|
|
+ BPF_TCP_CLOSE_WAIT,
|
|
|
|
+ BPF_TCP_LAST_ACK,
|
|
|
|
+ BPF_TCP_LISTEN,
|
|
|
|
+ BPF_TCP_CLOSING, /* Now a valid state */
|
|
|
|
+ BPF_TCP_NEW_SYN_RECV,
|
|
|
|
+
|
|
|
|
+ BPF_TCP_MAX_STATES /* Leave at the end! */
|
|
};
|
|
};
|
|
|
|
|
|
#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
|
|
#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
|