ring.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. /*
  2. * Copyright (C) 2016 Red Hat, Inc.
  3. * Author: Michael S. Tsirkin <mst@redhat.com>
  4. * This work is licensed under the terms of the GNU GPL, version 2.
  5. *
  6. * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
  7. * signalling, unconditionally.
  8. */
  9. #define _GNU_SOURCE
  10. #include "main.h"
  11. #include <stdlib.h>
  12. #include <stdio.h>
  13. #include <string.h>
  14. /* Next - Where next entry will be written.
  15. * Prev - "Next" value when event triggered previously.
  16. * Event - Peer requested event after writing this entry.
  17. */
  18. static inline bool need_event(unsigned short event,
  19. unsigned short next,
  20. unsigned short prev)
  21. {
  22. return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
  23. }
  24. /* Design:
  25. * Guest adds descriptors with unique index values and DESC_HW in flags.
  26. * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
  27. * Flags are always set last.
  28. */
  29. #define DESC_HW 0x1
  30. struct desc {
  31. unsigned short flags;
  32. unsigned short index;
  33. unsigned len;
  34. unsigned long long addr;
  35. };
  36. /* how much padding is needed to avoid false cache sharing */
  37. #define HOST_GUEST_PADDING 0x80
  38. /* Mostly read */
  39. struct event {
  40. unsigned short kick_index;
  41. unsigned char reserved0[HOST_GUEST_PADDING - 2];
  42. unsigned short call_index;
  43. unsigned char reserved1[HOST_GUEST_PADDING - 2];
  44. };
  45. struct data {
  46. void *buf; /* descriptor is writeable, we can't get buf from there */
  47. void *data;
  48. } *data;
  49. struct desc *ring;
  50. struct event *event;
  51. struct guest {
  52. unsigned avail_idx;
  53. unsigned last_used_idx;
  54. unsigned num_free;
  55. unsigned kicked_avail_idx;
  56. unsigned char reserved[HOST_GUEST_PADDING - 12];
  57. } guest;
  58. struct host {
  59. /* we do not need to track last avail index
  60. * unless we have more than one in flight.
  61. */
  62. unsigned used_idx;
  63. unsigned called_used_idx;
  64. unsigned char reserved[HOST_GUEST_PADDING - 4];
  65. } host;
  66. /* implemented by ring */
  67. void alloc_ring(void)
  68. {
  69. int ret;
  70. int i;
  71. ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
  72. if (ret) {
  73. perror("Unable to allocate ring buffer.\n");
  74. exit(3);
  75. }
  76. event = calloc(1, sizeof(*event));
  77. if (!event) {
  78. perror("Unable to allocate event buffer.\n");
  79. exit(3);
  80. }
  81. guest.avail_idx = 0;
  82. guest.kicked_avail_idx = -1;
  83. guest.last_used_idx = 0;
  84. host.used_idx = 0;
  85. host.called_used_idx = -1;
  86. for (i = 0; i < ring_size; ++i) {
  87. struct desc desc = {
  88. .index = i,
  89. };
  90. ring[i] = desc;
  91. }
  92. guest.num_free = ring_size;
  93. data = calloc(ring_size, sizeof(*data));
  94. if (!data) {
  95. perror("Unable to allocate data buffer.\n");
  96. exit(3);
  97. }
  98. }
  99. /* guest side */
  100. int add_inbuf(unsigned len, void *buf, void *datap)
  101. {
  102. unsigned head, index;
  103. if (!guest.num_free)
  104. return -1;
  105. guest.num_free--;
  106. head = (ring_size - 1) & (guest.avail_idx++);
  107. /* Start with a write. On MESI architectures this helps
  108. * avoid a shared state with consumer that is polling this descriptor.
  109. */
  110. ring[head].addr = (unsigned long)(void*)buf;
  111. ring[head].len = len;
  112. /* read below might bypass write above. That is OK because it's just an
  113. * optimization. If this happens, we will get the cache line in a
  114. * shared state which is unfortunate, but probably not worth it to
  115. * add an explicit full barrier to avoid this.
  116. */
  117. barrier();
  118. index = ring[head].index;
  119. data[index].buf = buf;
  120. data[index].data = datap;
  121. /* Barrier A (for pairing) */
  122. smp_release();
  123. ring[head].flags = DESC_HW;
  124. return 0;
  125. }
  126. void *get_buf(unsigned *lenp, void **bufp)
  127. {
  128. unsigned head = (ring_size - 1) & guest.last_used_idx;
  129. unsigned index;
  130. void *datap;
  131. if (ring[head].flags & DESC_HW)
  132. return NULL;
  133. /* Barrier B (for pairing) */
  134. smp_acquire();
  135. *lenp = ring[head].len;
  136. index = ring[head].index & (ring_size - 1);
  137. datap = data[index].data;
  138. *bufp = data[index].buf;
  139. data[index].buf = NULL;
  140. data[index].data = NULL;
  141. guest.num_free++;
  142. guest.last_used_idx++;
  143. return datap;
  144. }
  145. bool used_empty()
  146. {
  147. unsigned head = (ring_size - 1) & guest.last_used_idx;
  148. return (ring[head].flags & DESC_HW);
  149. }
  150. void disable_call()
  151. {
  152. /* Doing nothing to disable calls might cause
  153. * extra interrupts, but reduces the number of cache misses.
  154. */
  155. }
  156. bool enable_call()
  157. {
  158. event->call_index = guest.last_used_idx;
  159. /* Flush call index write */
  160. /* Barrier D (for pairing) */
  161. smp_mb();
  162. return used_empty();
  163. }
  164. void kick_available(void)
  165. {
  166. bool need;
  167. /* Flush in previous flags write */
  168. /* Barrier C (for pairing) */
  169. smp_mb();
  170. need = need_event(event->kick_index,
  171. guest.avail_idx,
  172. guest.kicked_avail_idx);
  173. guest.kicked_avail_idx = guest.avail_idx;
  174. if (need)
  175. kick();
  176. }
  177. /* host side */
  178. void disable_kick()
  179. {
  180. /* Doing nothing to disable kicks might cause
  181. * extra interrupts, but reduces the number of cache misses.
  182. */
  183. }
  184. bool enable_kick()
  185. {
  186. event->kick_index = host.used_idx;
  187. /* Barrier C (for pairing) */
  188. smp_mb();
  189. return avail_empty();
  190. }
  191. bool avail_empty()
  192. {
  193. unsigned head = (ring_size - 1) & host.used_idx;
  194. return !(ring[head].flags & DESC_HW);
  195. }
  196. bool use_buf(unsigned *lenp, void **bufp)
  197. {
  198. unsigned head = (ring_size - 1) & host.used_idx;
  199. if (!(ring[head].flags & DESC_HW))
  200. return false;
  201. /* make sure length read below is not speculated */
  202. /* Barrier A (for pairing) */
  203. smp_acquire();
  204. /* simple in-order completion: we don't need
  205. * to touch index at all. This also means we
  206. * can just modify the descriptor in-place.
  207. */
  208. ring[head].len--;
  209. /* Make sure len is valid before flags.
  210. * Note: alternative is to write len and flags in one access -
  211. * possible on 64 bit architectures but wmb is free on Intel anyway
  212. * so I have no way to test whether it's a gain.
  213. */
  214. /* Barrier B (for pairing) */
  215. smp_release();
  216. ring[head].flags = 0;
  217. host.used_idx++;
  218. return true;
  219. }
  220. void call_used(void)
  221. {
  222. bool need;
  223. /* Flush in previous flags write */
  224. /* Barrier D (for pairing) */
  225. smp_mb();
  226. need = need_event(event->call_index,
  227. host.used_idx,
  228. host.called_used_idx);
  229. host.called_used_idx = host.used_idx;
  230. if (need)
  231. call();
  232. }