meter.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. /*
  2. * Copyright (c) 2017 Nicira, Inc.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of version 2 of the GNU General Public
  6. * License as published by the Free Software Foundation.
  7. */
  8. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  9. #include <linux/if.h>
  10. #include <linux/skbuff.h>
  11. #include <linux/ip.h>
  12. #include <linux/kernel.h>
  13. #include <linux/openvswitch.h>
  14. #include <linux/netlink.h>
  15. #include <linux/rculist.h>
  16. #include <net/netlink.h>
  17. #include <net/genetlink.h>
  18. #include "datapath.h"
  19. #include "meter.h"
  20. #define METER_HASH_BUCKETS 1024
  21. static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
  22. [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
  23. [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
  24. [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
  25. [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED },
  26. [OVS_METER_ATTR_USED] = { .type = NLA_U64 },
  27. [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG },
  28. [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 },
  29. [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 },
  30. };
  31. static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
  32. [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, },
  33. [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, },
  34. [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, },
  35. [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
  36. };
  37. static void ovs_meter_free(struct dp_meter *meter)
  38. {
  39. if (!meter)
  40. return;
  41. kfree_rcu(meter, rcu);
  42. }
  43. static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
  44. u32 meter_id)
  45. {
  46. return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
  47. }
  48. /* Call with ovs_mutex or RCU read lock. */
  49. static struct dp_meter *lookup_meter(const struct datapath *dp,
  50. u32 meter_id)
  51. {
  52. struct dp_meter *meter;
  53. struct hlist_head *head;
  54. head = meter_hash_bucket(dp, meter_id);
  55. hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
  56. if (meter->id == meter_id)
  57. return meter;
  58. }
  59. return NULL;
  60. }
  61. static void attach_meter(struct datapath *dp, struct dp_meter *meter)
  62. {
  63. struct hlist_head *head = meter_hash_bucket(dp, meter->id);
  64. hlist_add_head_rcu(&meter->dp_hash_node, head);
  65. }
  66. static void detach_meter(struct dp_meter *meter)
  67. {
  68. ASSERT_OVSL();
  69. if (meter)
  70. hlist_del_rcu(&meter->dp_hash_node);
  71. }
  72. static struct sk_buff *
  73. ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
  74. struct ovs_header **ovs_reply_header)
  75. {
  76. struct sk_buff *skb;
  77. struct ovs_header *ovs_header = info->userhdr;
  78. skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
  79. if (!skb)
  80. return ERR_PTR(-ENOMEM);
  81. *ovs_reply_header = genlmsg_put(skb, info->snd_portid,
  82. info->snd_seq,
  83. &dp_meter_genl_family, 0, cmd);
  84. if (!*ovs_reply_header) {
  85. nlmsg_free(skb);
  86. return ERR_PTR(-EMSGSIZE);
  87. }
  88. (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
  89. return skb;
  90. }
  91. static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
  92. struct dp_meter *meter)
  93. {
  94. struct nlattr *nla;
  95. struct dp_meter_band *band;
  96. u16 i;
  97. if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
  98. goto error;
  99. if (!meter)
  100. return 0;
  101. if (nla_put(reply, OVS_METER_ATTR_STATS,
  102. sizeof(struct ovs_flow_stats), &meter->stats) ||
  103. nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
  104. OVS_METER_ATTR_PAD))
  105. goto error;
  106. nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
  107. if (!nla)
  108. goto error;
  109. band = meter->bands;
  110. for (i = 0; i < meter->n_bands; ++i, ++band) {
  111. struct nlattr *band_nla;
  112. band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
  113. if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
  114. sizeof(struct ovs_flow_stats),
  115. &band->stats))
  116. goto error;
  117. nla_nest_end(reply, band_nla);
  118. }
  119. nla_nest_end(reply, nla);
  120. return 0;
  121. error:
  122. return -EMSGSIZE;
  123. }
  124. static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
  125. {
  126. struct sk_buff *reply;
  127. struct ovs_header *ovs_reply_header;
  128. struct nlattr *nla, *band_nla;
  129. int err;
  130. reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
  131. &ovs_reply_header);
  132. if (IS_ERR(reply))
  133. return PTR_ERR(reply);
  134. if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
  135. nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
  136. goto nla_put_failure;
  137. nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
  138. if (!nla)
  139. goto nla_put_failure;
  140. band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
  141. if (!band_nla)
  142. goto nla_put_failure;
  143. /* Currently only DROP band type is supported. */
  144. if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
  145. goto nla_put_failure;
  146. nla_nest_end(reply, band_nla);
  147. nla_nest_end(reply, nla);
  148. genlmsg_end(reply, ovs_reply_header);
  149. return genlmsg_reply(reply, info);
  150. nla_put_failure:
  151. nlmsg_free(reply);
  152. err = -EMSGSIZE;
  153. return err;
  154. }
  155. static struct dp_meter *dp_meter_create(struct nlattr **a)
  156. {
  157. struct nlattr *nla;
  158. int rem;
  159. u16 n_bands = 0;
  160. struct dp_meter *meter;
  161. struct dp_meter_band *band;
  162. int err;
  163. /* Validate attributes, count the bands. */
  164. if (!a[OVS_METER_ATTR_BANDS])
  165. return ERR_PTR(-EINVAL);
  166. nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem)
  167. if (++n_bands > DP_MAX_BANDS)
  168. return ERR_PTR(-EINVAL);
  169. /* Allocate and set up the meter before locking anything. */
  170. meter = kzalloc(n_bands * sizeof(struct dp_meter_band) +
  171. sizeof(*meter), GFP_KERNEL);
  172. if (!meter)
  173. return ERR_PTR(-ENOMEM);
  174. meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
  175. meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
  176. meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
  177. spin_lock_init(&meter->lock);
  178. if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) {
  179. meter->stats = *(struct ovs_flow_stats *)
  180. nla_data(a[OVS_METER_ATTR_STATS]);
  181. }
  182. meter->n_bands = n_bands;
  183. /* Set up meter bands. */
  184. band = meter->bands;
  185. nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) {
  186. struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
  187. u32 band_max_delta_t;
  188. err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX,
  189. nla_data(nla), nla_len(nla), band_policy,
  190. NULL);
  191. if (err)
  192. goto exit_free_meter;
  193. if (!attr[OVS_BAND_ATTR_TYPE] ||
  194. !attr[OVS_BAND_ATTR_RATE] ||
  195. !attr[OVS_BAND_ATTR_BURST]) {
  196. err = -EINVAL;
  197. goto exit_free_meter;
  198. }
  199. band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
  200. band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
  201. band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
  202. /* Figure out max delta_t that is enough to fill any bucket.
  203. * Keep max_delta_t size to the bucket units:
  204. * pkts => 1/1000 packets, kilobits => bits.
  205. */
  206. band_max_delta_t = (band->burst_size + band->rate) * 1000;
  207. /* Start with a full bucket. */
  208. band->bucket = band_max_delta_t;
  209. if (band_max_delta_t > meter->max_delta_t)
  210. meter->max_delta_t = band_max_delta_t;
  211. band++;
  212. }
  213. return meter;
  214. exit_free_meter:
  215. kfree(meter);
  216. return ERR_PTR(err);
  217. }
  218. static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
  219. {
  220. struct nlattr **a = info->attrs;
  221. struct dp_meter *meter, *old_meter;
  222. struct sk_buff *reply;
  223. struct ovs_header *ovs_reply_header;
  224. struct ovs_header *ovs_header = info->userhdr;
  225. struct datapath *dp;
  226. int err;
  227. u32 meter_id;
  228. bool failed;
  229. meter = dp_meter_create(a);
  230. if (IS_ERR_OR_NULL(meter))
  231. return PTR_ERR(meter);
  232. reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
  233. &ovs_reply_header);
  234. if (IS_ERR(reply)) {
  235. err = PTR_ERR(reply);
  236. goto exit_free_meter;
  237. }
  238. ovs_lock();
  239. dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
  240. if (!dp) {
  241. err = -ENODEV;
  242. goto exit_unlock;
  243. }
  244. if (!a[OVS_METER_ATTR_ID]) {
  245. err = -ENODEV;
  246. goto exit_unlock;
  247. }
  248. meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
  249. /* Cannot fail after this. */
  250. old_meter = lookup_meter(dp, meter_id);
  251. detach_meter(old_meter);
  252. attach_meter(dp, meter);
  253. ovs_unlock();
  254. /* Build response with the meter_id and stats from
  255. * the old meter, if any.
  256. */
  257. failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
  258. WARN_ON(failed);
  259. if (old_meter) {
  260. spin_lock_bh(&old_meter->lock);
  261. if (old_meter->keep_stats) {
  262. err = ovs_meter_cmd_reply_stats(reply, meter_id,
  263. old_meter);
  264. WARN_ON(err);
  265. }
  266. spin_unlock_bh(&old_meter->lock);
  267. ovs_meter_free(old_meter);
  268. }
  269. genlmsg_end(reply, ovs_reply_header);
  270. return genlmsg_reply(reply, info);
  271. exit_unlock:
  272. ovs_unlock();
  273. nlmsg_free(reply);
  274. exit_free_meter:
  275. kfree(meter);
  276. return err;
  277. }
  278. static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
  279. {
  280. struct nlattr **a = info->attrs;
  281. u32 meter_id;
  282. struct ovs_header *ovs_header = info->userhdr;
  283. struct ovs_header *ovs_reply_header;
  284. struct datapath *dp;
  285. int err;
  286. struct sk_buff *reply;
  287. struct dp_meter *meter;
  288. if (!a[OVS_METER_ATTR_ID])
  289. return -EINVAL;
  290. meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
  291. reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET,
  292. &ovs_reply_header);
  293. if (IS_ERR(reply))
  294. return PTR_ERR(reply);
  295. ovs_lock();
  296. dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
  297. if (!dp) {
  298. err = -ENODEV;
  299. goto exit_unlock;
  300. }
  301. /* Locate meter, copy stats. */
  302. meter = lookup_meter(dp, meter_id);
  303. if (!meter) {
  304. err = -ENOENT;
  305. goto exit_unlock;
  306. }
  307. spin_lock_bh(&meter->lock);
  308. err = ovs_meter_cmd_reply_stats(reply, meter_id, meter);
  309. spin_unlock_bh(&meter->lock);
  310. if (err)
  311. goto exit_unlock;
  312. ovs_unlock();
  313. genlmsg_end(reply, ovs_reply_header);
  314. return genlmsg_reply(reply, info);
  315. exit_unlock:
  316. ovs_unlock();
  317. nlmsg_free(reply);
  318. return err;
  319. }
  320. static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
  321. {
  322. struct nlattr **a = info->attrs;
  323. u32 meter_id;
  324. struct ovs_header *ovs_header = info->userhdr;
  325. struct ovs_header *ovs_reply_header;
  326. struct datapath *dp;
  327. int err;
  328. struct sk_buff *reply;
  329. struct dp_meter *old_meter;
  330. if (!a[OVS_METER_ATTR_ID])
  331. return -EINVAL;
  332. meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
  333. reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
  334. &ovs_reply_header);
  335. if (IS_ERR(reply))
  336. return PTR_ERR(reply);
  337. ovs_lock();
  338. dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
  339. if (!dp) {
  340. err = -ENODEV;
  341. goto exit_unlock;
  342. }
  343. old_meter = lookup_meter(dp, meter_id);
  344. if (old_meter) {
  345. spin_lock_bh(&old_meter->lock);
  346. err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
  347. WARN_ON(err);
  348. spin_unlock_bh(&old_meter->lock);
  349. detach_meter(old_meter);
  350. }
  351. ovs_unlock();
  352. ovs_meter_free(old_meter);
  353. genlmsg_end(reply, ovs_reply_header);
  354. return genlmsg_reply(reply, info);
  355. exit_unlock:
  356. ovs_unlock();
  357. nlmsg_free(reply);
  358. return err;
  359. }
  360. /* Meter action execution.
  361. *
  362. * Return true 'meter_id' drop band is triggered. The 'skb' should be
  363. * dropped by the caller'.
  364. */
  365. bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
  366. struct sw_flow_key *key, u32 meter_id)
  367. {
  368. struct dp_meter *meter;
  369. struct dp_meter_band *band;
  370. long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
  371. long long int long_delta_ms;
  372. u32 delta_ms;
  373. u32 cost;
  374. int i, band_exceeded_max = -1;
  375. u32 band_exceeded_rate = 0;
  376. meter = lookup_meter(dp, meter_id);
  377. /* Do not drop the packet when there is no meter. */
  378. if (!meter)
  379. return false;
  380. /* Lock the meter while using it. */
  381. spin_lock(&meter->lock);
  382. long_delta_ms = (now_ms - meter->used); /* ms */
  383. /* Make sure delta_ms will not be too large, so that bucket will not
  384. * wrap around below.
  385. */
  386. delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
  387. ? meter->max_delta_t : (u32)long_delta_ms;
  388. /* Update meter statistics.
  389. */
  390. meter->used = now_ms;
  391. meter->stats.n_packets += 1;
  392. meter->stats.n_bytes += skb->len;
  393. /* Bucket rate is either in kilobits per second, or in packets per
  394. * second. We maintain the bucket in the units of either bits or
  395. * 1/1000th of a packet, correspondingly.
  396. * Then, when rate is multiplied with milliseconds, we get the
  397. * bucket units:
  398. * msec * kbps = bits, and
  399. * msec * packets/sec = 1/1000 packets.
  400. *
  401. * 'cost' is the number of bucket units in this packet.
  402. */
  403. cost = (meter->kbps) ? skb->len * 8 : 1000;
  404. /* Update all bands and find the one hit with the highest rate. */
  405. for (i = 0; i < meter->n_bands; ++i) {
  406. long long int max_bucket_size;
  407. band = &meter->bands[i];
  408. max_bucket_size = (band->burst_size + band->rate) * 1000;
  409. band->bucket += delta_ms * band->rate;
  410. if (band->bucket > max_bucket_size)
  411. band->bucket = max_bucket_size;
  412. if (band->bucket >= cost) {
  413. band->bucket -= cost;
  414. } else if (band->rate > band_exceeded_rate) {
  415. band_exceeded_rate = band->rate;
  416. band_exceeded_max = i;
  417. }
  418. }
  419. if (band_exceeded_max >= 0) {
  420. /* Update band statistics. */
  421. band = &meter->bands[band_exceeded_max];
  422. band->stats.n_packets += 1;
  423. band->stats.n_bytes += skb->len;
  424. /* Drop band triggered, let the caller drop the 'skb'. */
  425. if (band->type == OVS_METER_BAND_TYPE_DROP) {
  426. spin_unlock(&meter->lock);
  427. return true;
  428. }
  429. }
  430. spin_unlock(&meter->lock);
  431. return false;
  432. }
  433. static struct genl_ops dp_meter_genl_ops[] = {
  434. { .cmd = OVS_METER_CMD_FEATURES,
  435. .flags = 0, /* OK for unprivileged users. */
  436. .policy = meter_policy,
  437. .doit = ovs_meter_cmd_features
  438. },
  439. { .cmd = OVS_METER_CMD_SET,
  440. .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
  441. * privilege.
  442. */
  443. .policy = meter_policy,
  444. .doit = ovs_meter_cmd_set,
  445. },
  446. { .cmd = OVS_METER_CMD_GET,
  447. .flags = 0, /* OK for unprivileged users. */
  448. .policy = meter_policy,
  449. .doit = ovs_meter_cmd_get,
  450. },
  451. { .cmd = OVS_METER_CMD_DEL,
  452. .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
  453. * privilege.
  454. */
  455. .policy = meter_policy,
  456. .doit = ovs_meter_cmd_del
  457. },
  458. };
  459. static const struct genl_multicast_group ovs_meter_multicast_group = {
  460. .name = OVS_METER_MCGROUP,
  461. };
  462. struct genl_family dp_meter_genl_family __ro_after_init = {
  463. .hdrsize = sizeof(struct ovs_header),
  464. .name = OVS_METER_FAMILY,
  465. .version = OVS_METER_VERSION,
  466. .maxattr = OVS_METER_ATTR_MAX,
  467. .netnsok = true,
  468. .parallel_ops = true,
  469. .ops = dp_meter_genl_ops,
  470. .n_ops = ARRAY_SIZE(dp_meter_genl_ops),
  471. .mcgrps = &ovs_meter_multicast_group,
  472. .n_mcgrps = 1,
  473. .module = THIS_MODULE,
  474. };
  475. int ovs_meters_init(struct datapath *dp)
  476. {
  477. int i;
  478. dp->meters = kmalloc_array(METER_HASH_BUCKETS,
  479. sizeof(struct hlist_head), GFP_KERNEL);
  480. if (!dp->meters)
  481. return -ENOMEM;
  482. for (i = 0; i < METER_HASH_BUCKETS; i++)
  483. INIT_HLIST_HEAD(&dp->meters[i]);
  484. return 0;
  485. }
  486. void ovs_meters_exit(struct datapath *dp)
  487. {
  488. int i;
  489. for (i = 0; i < METER_HASH_BUCKETS; i++) {
  490. struct hlist_head *head = &dp->meters[i];
  491. struct dp_meter *meter;
  492. struct hlist_node *n;
  493. hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
  494. kfree(meter);
  495. }
  496. kfree(dp->meters);
  497. }