evtchn.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677
  1. /******************************************************************************
  2. * evtchn.c
  3. *
  4. * Driver for receiving and demuxing event-channel signals.
  5. *
  6. * Copyright (c) 2004-2005, K A Fraser
  7. * Multi-process extensions Copyright (c) 2004, Steven Smith
  8. *
  9. * This program is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU General Public License version 2
  11. * as published by the Free Software Foundation; or, when distributed
  12. * separately from the Linux kernel or incorporated into other
  13. * software packages, subject to the following license:
  14. *
  15. * Permission is hereby granted, free of charge, to any person obtaining a copy
  16. * of this source file (the "Software"), to deal in the Software without
  17. * restriction, including without limitation the rights to use, copy, modify,
  18. * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  19. * and to permit persons to whom the Software is furnished to do so, subject to
  20. * the following conditions:
  21. *
  22. * The above copyright notice and this permission notice shall be included in
  23. * all copies or substantial portions of the Software.
  24. *
  25. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  26. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  27. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  28. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  29. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  30. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  31. * IN THE SOFTWARE.
  32. */
  33. #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
  34. #include <linux/module.h>
  35. #include <linux/kernel.h>
  36. #include <linux/sched.h>
  37. #include <linux/slab.h>
  38. #include <linux/string.h>
  39. #include <linux/errno.h>
  40. #include <linux/fs.h>
  41. #include <linux/miscdevice.h>
  42. #include <linux/major.h>
  43. #include <linux/proc_fs.h>
  44. #include <linux/stat.h>
  45. #include <linux/poll.h>
  46. #include <linux/irq.h>
  47. #include <linux/init.h>
  48. #include <linux/mutex.h>
  49. #include <linux/cpu.h>
  50. #include <linux/mm.h>
  51. #include <linux/vmalloc.h>
  52. #include <xen/xen.h>
  53. #include <xen/events.h>
  54. #include <xen/evtchn.h>
  55. #include <asm/xen/hypervisor.h>
  56. struct per_user_data {
  57. struct mutex bind_mutex; /* serialize bind/unbind operations */
  58. struct rb_root evtchns;
  59. unsigned int nr_evtchns;
  60. /* Notification ring, accessed via /dev/xen/evtchn. */
  61. unsigned int ring_size;
  62. evtchn_port_t *ring;
  63. unsigned int ring_cons, ring_prod, ring_overflow;
  64. struct mutex ring_cons_mutex; /* protect against concurrent readers */
  65. spinlock_t ring_prod_lock; /* product against concurrent interrupts */
  66. /* Processes wait on this queue when ring is empty. */
  67. wait_queue_head_t evtchn_wait;
  68. struct fasync_struct *evtchn_async_queue;
  69. const char *name;
  70. };
  71. struct user_evtchn {
  72. struct rb_node node;
  73. struct per_user_data *user;
  74. unsigned port;
  75. bool enabled;
  76. };
  77. static evtchn_port_t *evtchn_alloc_ring(unsigned int size)
  78. {
  79. evtchn_port_t *ring;
  80. size_t s = size * sizeof(*ring);
  81. ring = kmalloc(s, GFP_KERNEL);
  82. if (!ring)
  83. ring = vmalloc(s);
  84. return ring;
  85. }
  86. static void evtchn_free_ring(evtchn_port_t *ring)
  87. {
  88. kvfree(ring);
  89. }
  90. static unsigned int evtchn_ring_offset(struct per_user_data *u,
  91. unsigned int idx)
  92. {
  93. return idx & (u->ring_size - 1);
  94. }
  95. static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u,
  96. unsigned int idx)
  97. {
  98. return u->ring + evtchn_ring_offset(u, idx);
  99. }
  100. static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
  101. {
  102. struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
  103. u->nr_evtchns++;
  104. while (*new) {
  105. struct user_evtchn *this;
  106. this = container_of(*new, struct user_evtchn, node);
  107. parent = *new;
  108. if (this->port < evtchn->port)
  109. new = &((*new)->rb_left);
  110. else if (this->port > evtchn->port)
  111. new = &((*new)->rb_right);
  112. else
  113. return -EEXIST;
  114. }
  115. /* Add new node and rebalance tree. */
  116. rb_link_node(&evtchn->node, parent, new);
  117. rb_insert_color(&evtchn->node, &u->evtchns);
  118. return 0;
  119. }
  120. static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
  121. {
  122. u->nr_evtchns--;
  123. rb_erase(&evtchn->node, &u->evtchns);
  124. kfree(evtchn);
  125. }
  126. static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port)
  127. {
  128. struct rb_node *node = u->evtchns.rb_node;
  129. while (node) {
  130. struct user_evtchn *evtchn;
  131. evtchn = container_of(node, struct user_evtchn, node);
  132. if (evtchn->port < port)
  133. node = node->rb_left;
  134. else if (evtchn->port > port)
  135. node = node->rb_right;
  136. else
  137. return evtchn;
  138. }
  139. return NULL;
  140. }
  141. static irqreturn_t evtchn_interrupt(int irq, void *data)
  142. {
  143. struct user_evtchn *evtchn = data;
  144. struct per_user_data *u = evtchn->user;
  145. WARN(!evtchn->enabled,
  146. "Interrupt for port %d, but apparently not enabled; per-user %p\n",
  147. evtchn->port, u);
  148. disable_irq_nosync(irq);
  149. evtchn->enabled = false;
  150. spin_lock(&u->ring_prod_lock);
  151. if ((u->ring_prod - u->ring_cons) < u->ring_size) {
  152. *evtchn_ring_entry(u, u->ring_prod) = evtchn->port;
  153. wmb(); /* Ensure ring contents visible */
  154. if (u->ring_cons == u->ring_prod++) {
  155. wake_up_interruptible(&u->evtchn_wait);
  156. kill_fasync(&u->evtchn_async_queue,
  157. SIGIO, POLL_IN);
  158. }
  159. } else
  160. u->ring_overflow = 1;
  161. spin_unlock(&u->ring_prod_lock);
  162. return IRQ_HANDLED;
  163. }
  164. static ssize_t evtchn_read(struct file *file, char __user *buf,
  165. size_t count, loff_t *ppos)
  166. {
  167. int rc;
  168. unsigned int c, p, bytes1 = 0, bytes2 = 0;
  169. struct per_user_data *u = file->private_data;
  170. /* Whole number of ports. */
  171. count &= ~(sizeof(evtchn_port_t)-1);
  172. if (count == 0)
  173. return 0;
  174. if (count > PAGE_SIZE)
  175. count = PAGE_SIZE;
  176. for (;;) {
  177. mutex_lock(&u->ring_cons_mutex);
  178. rc = -EFBIG;
  179. if (u->ring_overflow)
  180. goto unlock_out;
  181. c = u->ring_cons;
  182. p = u->ring_prod;
  183. if (c != p)
  184. break;
  185. mutex_unlock(&u->ring_cons_mutex);
  186. if (file->f_flags & O_NONBLOCK)
  187. return -EAGAIN;
  188. rc = wait_event_interruptible(u->evtchn_wait,
  189. u->ring_cons != u->ring_prod);
  190. if (rc)
  191. return rc;
  192. }
  193. /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
  194. if (((c ^ p) & u->ring_size) != 0) {
  195. bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) *
  196. sizeof(evtchn_port_t);
  197. bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t);
  198. } else {
  199. bytes1 = (p - c) * sizeof(evtchn_port_t);
  200. bytes2 = 0;
  201. }
  202. /* Truncate chunks according to caller's maximum byte count. */
  203. if (bytes1 > count) {
  204. bytes1 = count;
  205. bytes2 = 0;
  206. } else if ((bytes1 + bytes2) > count) {
  207. bytes2 = count - bytes1;
  208. }
  209. rc = -EFAULT;
  210. rmb(); /* Ensure that we see the port before we copy it. */
  211. if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) ||
  212. ((bytes2 != 0) &&
  213. copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
  214. goto unlock_out;
  215. u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
  216. rc = bytes1 + bytes2;
  217. unlock_out:
  218. mutex_unlock(&u->ring_cons_mutex);
  219. return rc;
  220. }
  221. static ssize_t evtchn_write(struct file *file, const char __user *buf,
  222. size_t count, loff_t *ppos)
  223. {
  224. int rc, i;
  225. evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
  226. struct per_user_data *u = file->private_data;
  227. if (kbuf == NULL)
  228. return -ENOMEM;
  229. /* Whole number of ports. */
  230. count &= ~(sizeof(evtchn_port_t)-1);
  231. rc = 0;
  232. if (count == 0)
  233. goto out;
  234. if (count > PAGE_SIZE)
  235. count = PAGE_SIZE;
  236. rc = -EFAULT;
  237. if (copy_from_user(kbuf, buf, count) != 0)
  238. goto out;
  239. mutex_lock(&u->bind_mutex);
  240. for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
  241. unsigned port = kbuf[i];
  242. struct user_evtchn *evtchn;
  243. evtchn = find_evtchn(u, port);
  244. if (evtchn && !evtchn->enabled) {
  245. evtchn->enabled = true;
  246. enable_irq(irq_from_evtchn(port));
  247. }
  248. }
  249. mutex_unlock(&u->bind_mutex);
  250. rc = count;
  251. out:
  252. free_page((unsigned long)kbuf);
  253. return rc;
  254. }
  255. static int evtchn_resize_ring(struct per_user_data *u)
  256. {
  257. unsigned int new_size;
  258. evtchn_port_t *new_ring, *old_ring;
  259. unsigned int p, c;
  260. /*
  261. * Ensure the ring is large enough to capture all possible
  262. * events. i.e., one free slot for each bound event.
  263. */
  264. if (u->nr_evtchns <= u->ring_size)
  265. return 0;
  266. if (u->ring_size == 0)
  267. new_size = 64;
  268. else
  269. new_size = 2 * u->ring_size;
  270. new_ring = evtchn_alloc_ring(new_size);
  271. if (!new_ring)
  272. return -ENOMEM;
  273. old_ring = u->ring;
  274. /*
  275. * Access to the ring contents is serialized by either the
  276. * prod /or/ cons lock so take both when resizing.
  277. */
  278. mutex_lock(&u->ring_cons_mutex);
  279. spin_lock_irq(&u->ring_prod_lock);
  280. /*
  281. * Copy the old ring contents to the new ring.
  282. *
  283. * If the ring contents crosses the end of the current ring,
  284. * it needs to be copied in two chunks.
  285. *
  286. * +---------+ +------------------+
  287. * |34567 12| -> | 1234567 |
  288. * +-----p-c-+ +------------------+
  289. */
  290. p = evtchn_ring_offset(u, u->ring_prod);
  291. c = evtchn_ring_offset(u, u->ring_cons);
  292. if (p < c) {
  293. memcpy(new_ring + c, u->ring + c, (u->ring_size - c) * sizeof(*u->ring));
  294. memcpy(new_ring + u->ring_size, u->ring, p * sizeof(*u->ring));
  295. } else
  296. memcpy(new_ring + c, u->ring + c, (p - c) * sizeof(*u->ring));
  297. u->ring = new_ring;
  298. u->ring_size = new_size;
  299. spin_unlock_irq(&u->ring_prod_lock);
  300. mutex_unlock(&u->ring_cons_mutex);
  301. evtchn_free_ring(old_ring);
  302. return 0;
  303. }
  304. static int evtchn_bind_to_user(struct per_user_data *u, int port)
  305. {
  306. struct user_evtchn *evtchn;
  307. struct evtchn_close close;
  308. int rc = 0;
  309. /*
  310. * Ports are never reused, so every caller should pass in a
  311. * unique port.
  312. *
  313. * (Locking not necessary because we haven't registered the
  314. * interrupt handler yet, and our caller has already
  315. * serialized bind operations.)
  316. */
  317. evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL);
  318. if (!evtchn)
  319. return -ENOMEM;
  320. evtchn->user = u;
  321. evtchn->port = port;
  322. evtchn->enabled = true; /* start enabled */
  323. rc = add_evtchn(u, evtchn);
  324. if (rc < 0)
  325. goto err;
  326. rc = evtchn_resize_ring(u);
  327. if (rc < 0)
  328. goto err;
  329. rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0,
  330. u->name, evtchn);
  331. if (rc < 0)
  332. goto err;
  333. rc = evtchn_make_refcounted(port);
  334. return rc;
  335. err:
  336. /* bind failed, should close the port now */
  337. close.port = port;
  338. if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
  339. BUG();
  340. del_evtchn(u, evtchn);
  341. return rc;
  342. }
  343. static void evtchn_unbind_from_user(struct per_user_data *u,
  344. struct user_evtchn *evtchn)
  345. {
  346. int irq = irq_from_evtchn(evtchn->port);
  347. BUG_ON(irq < 0);
  348. unbind_from_irqhandler(irq, evtchn);
  349. del_evtchn(u, evtchn);
  350. }
  351. static long evtchn_ioctl(struct file *file,
  352. unsigned int cmd, unsigned long arg)
  353. {
  354. int rc;
  355. struct per_user_data *u = file->private_data;
  356. void __user *uarg = (void __user *) arg;
  357. /* Prevent bind from racing with unbind */
  358. mutex_lock(&u->bind_mutex);
  359. switch (cmd) {
  360. case IOCTL_EVTCHN_BIND_VIRQ: {
  361. struct ioctl_evtchn_bind_virq bind;
  362. struct evtchn_bind_virq bind_virq;
  363. rc = -EFAULT;
  364. if (copy_from_user(&bind, uarg, sizeof(bind)))
  365. break;
  366. bind_virq.virq = bind.virq;
  367. bind_virq.vcpu = 0;
  368. rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
  369. &bind_virq);
  370. if (rc != 0)
  371. break;
  372. rc = evtchn_bind_to_user(u, bind_virq.port);
  373. if (rc == 0)
  374. rc = bind_virq.port;
  375. break;
  376. }
  377. case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
  378. struct ioctl_evtchn_bind_interdomain bind;
  379. struct evtchn_bind_interdomain bind_interdomain;
  380. rc = -EFAULT;
  381. if (copy_from_user(&bind, uarg, sizeof(bind)))
  382. break;
  383. bind_interdomain.remote_dom = bind.remote_domain;
  384. bind_interdomain.remote_port = bind.remote_port;
  385. rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
  386. &bind_interdomain);
  387. if (rc != 0)
  388. break;
  389. rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
  390. if (rc == 0)
  391. rc = bind_interdomain.local_port;
  392. break;
  393. }
  394. case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
  395. struct ioctl_evtchn_bind_unbound_port bind;
  396. struct evtchn_alloc_unbound alloc_unbound;
  397. rc = -EFAULT;
  398. if (copy_from_user(&bind, uarg, sizeof(bind)))
  399. break;
  400. alloc_unbound.dom = DOMID_SELF;
  401. alloc_unbound.remote_dom = bind.remote_domain;
  402. rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
  403. &alloc_unbound);
  404. if (rc != 0)
  405. break;
  406. rc = evtchn_bind_to_user(u, alloc_unbound.port);
  407. if (rc == 0)
  408. rc = alloc_unbound.port;
  409. break;
  410. }
  411. case IOCTL_EVTCHN_UNBIND: {
  412. struct ioctl_evtchn_unbind unbind;
  413. struct user_evtchn *evtchn;
  414. rc = -EFAULT;
  415. if (copy_from_user(&unbind, uarg, sizeof(unbind)))
  416. break;
  417. rc = -EINVAL;
  418. if (unbind.port >= xen_evtchn_nr_channels())
  419. break;
  420. rc = -ENOTCONN;
  421. evtchn = find_evtchn(u, unbind.port);
  422. if (!evtchn)
  423. break;
  424. disable_irq(irq_from_evtchn(unbind.port));
  425. evtchn_unbind_from_user(u, evtchn);
  426. rc = 0;
  427. break;
  428. }
  429. case IOCTL_EVTCHN_NOTIFY: {
  430. struct ioctl_evtchn_notify notify;
  431. struct user_evtchn *evtchn;
  432. rc = -EFAULT;
  433. if (copy_from_user(&notify, uarg, sizeof(notify)))
  434. break;
  435. rc = -ENOTCONN;
  436. evtchn = find_evtchn(u, notify.port);
  437. if (evtchn) {
  438. notify_remote_via_evtchn(notify.port);
  439. rc = 0;
  440. }
  441. break;
  442. }
  443. case IOCTL_EVTCHN_RESET: {
  444. /* Initialise the ring to empty. Clear errors. */
  445. mutex_lock(&u->ring_cons_mutex);
  446. spin_lock_irq(&u->ring_prod_lock);
  447. u->ring_cons = u->ring_prod = u->ring_overflow = 0;
  448. spin_unlock_irq(&u->ring_prod_lock);
  449. mutex_unlock(&u->ring_cons_mutex);
  450. rc = 0;
  451. break;
  452. }
  453. default:
  454. rc = -ENOSYS;
  455. break;
  456. }
  457. mutex_unlock(&u->bind_mutex);
  458. return rc;
  459. }
  460. static unsigned int evtchn_poll(struct file *file, poll_table *wait)
  461. {
  462. unsigned int mask = POLLOUT | POLLWRNORM;
  463. struct per_user_data *u = file->private_data;
  464. poll_wait(file, &u->evtchn_wait, wait);
  465. if (u->ring_cons != u->ring_prod)
  466. mask |= POLLIN | POLLRDNORM;
  467. if (u->ring_overflow)
  468. mask = POLLERR;
  469. return mask;
  470. }
  471. static int evtchn_fasync(int fd, struct file *filp, int on)
  472. {
  473. struct per_user_data *u = filp->private_data;
  474. return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
  475. }
  476. static int evtchn_open(struct inode *inode, struct file *filp)
  477. {
  478. struct per_user_data *u;
  479. u = kzalloc(sizeof(*u), GFP_KERNEL);
  480. if (u == NULL)
  481. return -ENOMEM;
  482. u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm);
  483. if (u->name == NULL) {
  484. kfree(u);
  485. return -ENOMEM;
  486. }
  487. init_waitqueue_head(&u->evtchn_wait);
  488. mutex_init(&u->bind_mutex);
  489. mutex_init(&u->ring_cons_mutex);
  490. spin_lock_init(&u->ring_prod_lock);
  491. filp->private_data = u;
  492. return nonseekable_open(inode, filp);
  493. }
  494. static int evtchn_release(struct inode *inode, struct file *filp)
  495. {
  496. struct per_user_data *u = filp->private_data;
  497. struct rb_node *node;
  498. while ((node = u->evtchns.rb_node)) {
  499. struct user_evtchn *evtchn;
  500. evtchn = rb_entry(node, struct user_evtchn, node);
  501. disable_irq(irq_from_evtchn(evtchn->port));
  502. evtchn_unbind_from_user(u, evtchn);
  503. }
  504. evtchn_free_ring(u->ring);
  505. kfree(u->name);
  506. kfree(u);
  507. return 0;
  508. }
  509. static const struct file_operations evtchn_fops = {
  510. .owner = THIS_MODULE,
  511. .read = evtchn_read,
  512. .write = evtchn_write,
  513. .unlocked_ioctl = evtchn_ioctl,
  514. .poll = evtchn_poll,
  515. .fasync = evtchn_fasync,
  516. .open = evtchn_open,
  517. .release = evtchn_release,
  518. .llseek = no_llseek,
  519. };
  520. static struct miscdevice evtchn_miscdev = {
  521. .minor = MISC_DYNAMIC_MINOR,
  522. .name = "xen/evtchn",
  523. .fops = &evtchn_fops,
  524. };
  525. static int __init evtchn_init(void)
  526. {
  527. int err;
  528. if (!xen_domain())
  529. return -ENODEV;
  530. /* Create '/dev/xen/evtchn'. */
  531. err = misc_register(&evtchn_miscdev);
  532. if (err != 0) {
  533. pr_err("Could not register /dev/xen/evtchn\n");
  534. return err;
  535. }
  536. pr_info("Event-channel device installed\n");
  537. return 0;
  538. }
  539. static void __exit evtchn_cleanup(void)
  540. {
  541. misc_deregister(&evtchn_miscdev);
  542. }
  543. module_init(evtchn_init);
  544. module_exit(evtchn_cleanup);
  545. MODULE_LICENSE("GPL");