virtio_balloon.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035
  1. /*
  2. * Virtio balloon implementation, inspired by Dor Laor and Marcelo
  3. * Tosatti's implementations.
  4. *
  5. * Copyright 2008 Rusty Russell IBM Corporation
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include <linux/virtio.h>
  22. #include <linux/virtio_balloon.h>
  23. #include <linux/swap.h>
  24. #include <linux/workqueue.h>
  25. #include <linux/delay.h>
  26. #include <linux/slab.h>
  27. #include <linux/module.h>
  28. #include <linux/balloon_compaction.h>
  29. #include <linux/wait.h>
  30. #include <linux/mm.h>
  31. #include <linux/mount.h>
  32. #include <linux/magic.h>
  33. /*
  34. * Balloon device works in 4K page units. So each page is pointed to by
  35. * multiple balloon pages. All memory counters in this driver are in balloon
  36. * page units.
  37. */
  38. #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
  39. #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
  40. #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
  41. #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
  42. __GFP_NOMEMALLOC)
  43. /* The order of free page blocks to report to host */
  44. #define VIRTIO_BALLOON_FREE_PAGE_ORDER (MAX_ORDER - 1)
  45. /* The size of a free page block in bytes */
  46. #define VIRTIO_BALLOON_FREE_PAGE_SIZE \
  47. (1 << (VIRTIO_BALLOON_FREE_PAGE_ORDER + PAGE_SHIFT))
  48. #ifdef CONFIG_BALLOON_COMPACTION
  49. static struct vfsmount *balloon_mnt;
  50. #endif
  51. enum virtio_balloon_vq {
  52. VIRTIO_BALLOON_VQ_INFLATE,
  53. VIRTIO_BALLOON_VQ_DEFLATE,
  54. VIRTIO_BALLOON_VQ_STATS,
  55. VIRTIO_BALLOON_VQ_FREE_PAGE,
  56. VIRTIO_BALLOON_VQ_MAX
  57. };
  58. struct virtio_balloon {
  59. struct virtio_device *vdev;
  60. struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
  61. /* Balloon's own wq for cpu-intensive work items */
  62. struct workqueue_struct *balloon_wq;
  63. /* The free page reporting work item submitted to the balloon wq */
  64. struct work_struct report_free_page_work;
  65. /* The balloon servicing is delegated to a freezable workqueue. */
  66. struct work_struct update_balloon_stats_work;
  67. struct work_struct update_balloon_size_work;
  68. /* Prevent updating balloon when it is being canceled. */
  69. spinlock_t stop_update_lock;
  70. bool stop_update;
  71. /* The list of allocated free pages, waiting to be given back to mm */
  72. struct list_head free_page_list;
  73. spinlock_t free_page_list_lock;
  74. /* The number of free page blocks on the above list */
  75. unsigned long num_free_page_blocks;
  76. /* The cmd id received from host */
  77. u32 cmd_id_received;
  78. /* The cmd id that is actively in use */
  79. __virtio32 cmd_id_active;
  80. /* Buffer to store the stop sign */
  81. __virtio32 cmd_id_stop;
  82. /* Waiting for host to ack the pages we released. */
  83. wait_queue_head_t acked;
  84. /* Number of balloon pages we've told the Host we're not using. */
  85. unsigned int num_pages;
  86. /*
  87. * The pages we've told the Host we're not using are enqueued
  88. * at vb_dev_info->pages list.
  89. * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE
  90. * to num_pages above.
  91. */
  92. struct balloon_dev_info vb_dev_info;
  93. /* Synchronize access/update to this struct virtio_balloon elements */
  94. struct mutex balloon_lock;
  95. /* The array of pfns we tell the Host about. */
  96. unsigned int num_pfns;
  97. __virtio32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX];
  98. /* Memory statistics */
  99. struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
  100. /* To register a shrinker to shrink memory upon memory pressure */
  101. struct shrinker shrinker;
  102. };
  103. static struct virtio_device_id id_table[] = {
  104. { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID },
  105. { 0 },
  106. };
  107. static u32 page_to_balloon_pfn(struct page *page)
  108. {
  109. unsigned long pfn = page_to_pfn(page);
  110. BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT);
  111. /* Convert pfn from Linux page size to balloon page size. */
  112. return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE;
  113. }
  114. static void balloon_ack(struct virtqueue *vq)
  115. {
  116. struct virtio_balloon *vb = vq->vdev->priv;
  117. wake_up(&vb->acked);
  118. }
  119. static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
  120. {
  121. struct scatterlist sg;
  122. unsigned int len;
  123. sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
  124. /* We should always be able to add one buffer to an empty queue. */
  125. virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
  126. virtqueue_kick(vq);
  127. /* When host has read buffer, this completes via balloon_ack */
  128. wait_event(vb->acked, virtqueue_get_buf(vq, &len));
  129. }
  130. static void set_page_pfns(struct virtio_balloon *vb,
  131. __virtio32 pfns[], struct page *page)
  132. {
  133. unsigned int i;
  134. /*
  135. * Set balloon pfns pointing at this page.
  136. * Note that the first pfn points at start of the page.
  137. */
  138. for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
  139. pfns[i] = cpu_to_virtio32(vb->vdev,
  140. page_to_balloon_pfn(page) + i);
  141. }
  142. static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
  143. {
  144. unsigned num_allocated_pages;
  145. unsigned num_pfns;
  146. struct page *page;
  147. LIST_HEAD(pages);
  148. /* We can only do one array worth at a time. */
  149. num = min(num, ARRAY_SIZE(vb->pfns));
  150. for (num_pfns = 0; num_pfns < num;
  151. num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
  152. struct page *page = balloon_page_alloc();
  153. if (!page) {
  154. dev_info_ratelimited(&vb->vdev->dev,
  155. "Out of puff! Can't get %u pages\n",
  156. VIRTIO_BALLOON_PAGES_PER_PAGE);
  157. /* Sleep for at least 1/5 of a second before retry. */
  158. msleep(200);
  159. break;
  160. }
  161. balloon_page_push(&pages, page);
  162. }
  163. mutex_lock(&vb->balloon_lock);
  164. vb->num_pfns = 0;
  165. while ((page = balloon_page_pop(&pages))) {
  166. balloon_page_enqueue(&vb->vb_dev_info, page);
  167. set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
  168. vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
  169. if (!virtio_has_feature(vb->vdev,
  170. VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
  171. adjust_managed_page_count(page, -1);
  172. vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE;
  173. }
  174. num_allocated_pages = vb->num_pfns;
  175. /* Did we get any? */
  176. if (vb->num_pfns != 0)
  177. tell_host(vb, vb->inflate_vq);
  178. mutex_unlock(&vb->balloon_lock);
  179. return num_allocated_pages;
  180. }
  181. static void release_pages_balloon(struct virtio_balloon *vb,
  182. struct list_head *pages)
  183. {
  184. struct page *page, *next;
  185. list_for_each_entry_safe(page, next, pages, lru) {
  186. if (!virtio_has_feature(vb->vdev,
  187. VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
  188. adjust_managed_page_count(page, 1);
  189. list_del(&page->lru);
  190. put_page(page); /* balloon reference */
  191. }
  192. }
  193. static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
  194. {
  195. unsigned num_freed_pages;
  196. struct page *page;
  197. struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
  198. LIST_HEAD(pages);
  199. /* We can only do one array worth at a time. */
  200. num = min(num, ARRAY_SIZE(vb->pfns));
  201. mutex_lock(&vb->balloon_lock);
  202. /* We can't release more pages than taken */
  203. num = min(num, (size_t)vb->num_pages);
  204. for (vb->num_pfns = 0; vb->num_pfns < num;
  205. vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
  206. page = balloon_page_dequeue(vb_dev_info);
  207. if (!page)
  208. break;
  209. set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
  210. list_add(&page->lru, &pages);
  211. vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
  212. }
  213. num_freed_pages = vb->num_pfns;
  214. /*
  215. * Note that if
  216. * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
  217. * is true, we *have* to do it in this order
  218. */
  219. if (vb->num_pfns != 0)
  220. tell_host(vb, vb->deflate_vq);
  221. release_pages_balloon(vb, &pages);
  222. mutex_unlock(&vb->balloon_lock);
  223. return num_freed_pages;
  224. }
  225. static inline void update_stat(struct virtio_balloon *vb, int idx,
  226. u16 tag, u64 val)
  227. {
  228. BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
  229. vb->stats[idx].tag = cpu_to_virtio16(vb->vdev, tag);
  230. vb->stats[idx].val = cpu_to_virtio64(vb->vdev, val);
  231. }
  232. #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT)
  233. static unsigned int update_balloon_stats(struct virtio_balloon *vb)
  234. {
  235. unsigned long events[NR_VM_EVENT_ITEMS];
  236. struct sysinfo i;
  237. unsigned int idx = 0;
  238. long available;
  239. unsigned long caches;
  240. all_vm_events(events);
  241. si_meminfo(&i);
  242. available = si_mem_available();
  243. caches = global_node_page_state(NR_FILE_PAGES);
  244. #ifdef CONFIG_VM_EVENT_COUNTERS
  245. update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN,
  246. pages_to_bytes(events[PSWPIN]));
  247. update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT,
  248. pages_to_bytes(events[PSWPOUT]));
  249. update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
  250. update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
  251. #ifdef CONFIG_HUGETLB_PAGE
  252. update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC,
  253. events[HTLB_BUDDY_PGALLOC]);
  254. update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGFAIL,
  255. events[HTLB_BUDDY_PGALLOC_FAIL]);
  256. #endif
  257. #endif
  258. update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
  259. pages_to_bytes(i.freeram));
  260. update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT,
  261. pages_to_bytes(i.totalram));
  262. update_stat(vb, idx++, VIRTIO_BALLOON_S_AVAIL,
  263. pages_to_bytes(available));
  264. update_stat(vb, idx++, VIRTIO_BALLOON_S_CACHES,
  265. pages_to_bytes(caches));
  266. return idx;
  267. }
  268. /*
  269. * While most virtqueues communicate guest-initiated requests to the hypervisor,
  270. * the stats queue operates in reverse. The driver initializes the virtqueue
  271. * with a single buffer. From that point forward, all conversations consist of
  272. * a hypervisor request (a call to this function) which directs us to refill
  273. * the virtqueue with a fresh stats buffer. Since stats collection can sleep,
  274. * we delegate the job to a freezable workqueue that will do the actual work via
  275. * stats_handle_request().
  276. */
  277. static void stats_request(struct virtqueue *vq)
  278. {
  279. struct virtio_balloon *vb = vq->vdev->priv;
  280. spin_lock(&vb->stop_update_lock);
  281. if (!vb->stop_update)
  282. queue_work(system_freezable_wq, &vb->update_balloon_stats_work);
  283. spin_unlock(&vb->stop_update_lock);
  284. }
  285. static void stats_handle_request(struct virtio_balloon *vb)
  286. {
  287. struct virtqueue *vq;
  288. struct scatterlist sg;
  289. unsigned int len, num_stats;
  290. num_stats = update_balloon_stats(vb);
  291. vq = vb->stats_vq;
  292. if (!virtqueue_get_buf(vq, &len))
  293. return;
  294. sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
  295. virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
  296. virtqueue_kick(vq);
  297. }
  298. static inline s64 towards_target(struct virtio_balloon *vb)
  299. {
  300. s64 target;
  301. u32 num_pages;
  302. virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages,
  303. &num_pages);
  304. /* Legacy balloon config space is LE, unlike all other devices. */
  305. if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
  306. num_pages = le32_to_cpu((__force __le32)num_pages);
  307. target = num_pages;
  308. return target - vb->num_pages;
  309. }
  310. /* Gives back @num_to_return blocks of free pages to mm. */
  311. static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb,
  312. unsigned long num_to_return)
  313. {
  314. struct page *page;
  315. unsigned long num_returned;
  316. spin_lock_irq(&vb->free_page_list_lock);
  317. for (num_returned = 0; num_returned < num_to_return; num_returned++) {
  318. page = balloon_page_pop(&vb->free_page_list);
  319. if (!page)
  320. break;
  321. free_pages((unsigned long)page_address(page),
  322. VIRTIO_BALLOON_FREE_PAGE_ORDER);
  323. }
  324. vb->num_free_page_blocks -= num_returned;
  325. spin_unlock_irq(&vb->free_page_list_lock);
  326. return num_returned;
  327. }
  328. static void virtballoon_changed(struct virtio_device *vdev)
  329. {
  330. struct virtio_balloon *vb = vdev->priv;
  331. unsigned long flags;
  332. s64 diff = towards_target(vb);
  333. if (diff) {
  334. spin_lock_irqsave(&vb->stop_update_lock, flags);
  335. if (!vb->stop_update)
  336. queue_work(system_freezable_wq,
  337. &vb->update_balloon_size_work);
  338. spin_unlock_irqrestore(&vb->stop_update_lock, flags);
  339. }
  340. if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
  341. virtio_cread(vdev, struct virtio_balloon_config,
  342. free_page_report_cmd_id, &vb->cmd_id_received);
  343. if (vb->cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) {
  344. /* Pass ULONG_MAX to give back all the free pages */
  345. return_free_pages_to_mm(vb, ULONG_MAX);
  346. } else if (vb->cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP &&
  347. vb->cmd_id_received !=
  348. virtio32_to_cpu(vdev, vb->cmd_id_active)) {
  349. spin_lock_irqsave(&vb->stop_update_lock, flags);
  350. if (!vb->stop_update) {
  351. queue_work(vb->balloon_wq,
  352. &vb->report_free_page_work);
  353. }
  354. spin_unlock_irqrestore(&vb->stop_update_lock, flags);
  355. }
  356. }
  357. }
  358. static void update_balloon_size(struct virtio_balloon *vb)
  359. {
  360. u32 actual = vb->num_pages;
  361. /* Legacy balloon config space is LE, unlike all other devices. */
  362. if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
  363. actual = (__force u32)cpu_to_le32(actual);
  364. virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual,
  365. &actual);
  366. }
  367. static void update_balloon_stats_func(struct work_struct *work)
  368. {
  369. struct virtio_balloon *vb;
  370. vb = container_of(work, struct virtio_balloon,
  371. update_balloon_stats_work);
  372. stats_handle_request(vb);
  373. }
  374. static void update_balloon_size_func(struct work_struct *work)
  375. {
  376. struct virtio_balloon *vb;
  377. s64 diff;
  378. vb = container_of(work, struct virtio_balloon,
  379. update_balloon_size_work);
  380. diff = towards_target(vb);
  381. if (diff > 0)
  382. diff -= fill_balloon(vb, diff);
  383. else if (diff < 0)
  384. diff += leak_balloon(vb, -diff);
  385. update_balloon_size(vb);
  386. if (diff)
  387. queue_work(system_freezable_wq, work);
  388. }
  389. static int init_vqs(struct virtio_balloon *vb)
  390. {
  391. struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
  392. vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
  393. const char *names[VIRTIO_BALLOON_VQ_MAX];
  394. int err;
  395. /*
  396. * Inflateq and deflateq are used unconditionally. The names[]
  397. * will be NULL if the related feature is not enabled, which will
  398. * cause no allocation for the corresponding virtqueue in find_vqs.
  399. */
  400. callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
  401. names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
  402. callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
  403. names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
  404. names[VIRTIO_BALLOON_VQ_STATS] = NULL;
  405. names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
  406. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
  407. names[VIRTIO_BALLOON_VQ_STATS] = "stats";
  408. callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request;
  409. }
  410. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
  411. names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq";
  412. callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
  413. }
  414. err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
  415. vqs, callbacks, names, NULL, NULL);
  416. if (err)
  417. return err;
  418. vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
  419. vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
  420. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
  421. struct scatterlist sg;
  422. unsigned int num_stats;
  423. vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS];
  424. /*
  425. * Prime this virtqueue with one buffer so the hypervisor can
  426. * use it to signal us later (it can't be broken yet!).
  427. */
  428. num_stats = update_balloon_stats(vb);
  429. sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
  430. err = virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb,
  431. GFP_KERNEL);
  432. if (err) {
  433. dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n",
  434. __func__);
  435. return err;
  436. }
  437. virtqueue_kick(vb->stats_vq);
  438. }
  439. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
  440. vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE];
  441. return 0;
  442. }
  443. static int send_cmd_id_start(struct virtio_balloon *vb)
  444. {
  445. struct scatterlist sg;
  446. struct virtqueue *vq = vb->free_page_vq;
  447. int err, unused;
  448. /* Detach all the used buffers from the vq */
  449. while (virtqueue_get_buf(vq, &unused))
  450. ;
  451. vb->cmd_id_active = cpu_to_virtio32(vb->vdev, vb->cmd_id_received);
  452. sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active));
  453. err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL);
  454. if (!err)
  455. virtqueue_kick(vq);
  456. return err;
  457. }
  458. static int send_cmd_id_stop(struct virtio_balloon *vb)
  459. {
  460. struct scatterlist sg;
  461. struct virtqueue *vq = vb->free_page_vq;
  462. int err, unused;
  463. /* Detach all the used buffers from the vq */
  464. while (virtqueue_get_buf(vq, &unused))
  465. ;
  466. sg_init_one(&sg, &vb->cmd_id_stop, sizeof(vb->cmd_id_stop));
  467. err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_stop, GFP_KERNEL);
  468. if (!err)
  469. virtqueue_kick(vq);
  470. return err;
  471. }
  472. static int get_free_page_and_send(struct virtio_balloon *vb)
  473. {
  474. struct virtqueue *vq = vb->free_page_vq;
  475. struct page *page;
  476. struct scatterlist sg;
  477. int err, unused;
  478. void *p;
  479. /* Detach all the used buffers from the vq */
  480. while (virtqueue_get_buf(vq, &unused))
  481. ;
  482. page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG,
  483. VIRTIO_BALLOON_FREE_PAGE_ORDER);
  484. /*
  485. * When the allocation returns NULL, it indicates that we have got all
  486. * the possible free pages, so return -EINTR to stop.
  487. */
  488. if (!page)
  489. return -EINTR;
  490. p = page_address(page);
  491. sg_init_one(&sg, p, VIRTIO_BALLOON_FREE_PAGE_SIZE);
  492. /* There is always 1 entry reserved for the cmd id to use. */
  493. if (vq->num_free > 1) {
  494. err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL);
  495. if (unlikely(err)) {
  496. free_pages((unsigned long)p,
  497. VIRTIO_BALLOON_FREE_PAGE_ORDER);
  498. return err;
  499. }
  500. virtqueue_kick(vq);
  501. spin_lock_irq(&vb->free_page_list_lock);
  502. balloon_page_push(&vb->free_page_list, page);
  503. vb->num_free_page_blocks++;
  504. spin_unlock_irq(&vb->free_page_list_lock);
  505. } else {
  506. /*
  507. * The vq has no available entry to add this page block, so
  508. * just free it.
  509. */
  510. free_pages((unsigned long)p, VIRTIO_BALLOON_FREE_PAGE_ORDER);
  511. }
  512. return 0;
  513. }
  514. static int send_free_pages(struct virtio_balloon *vb)
  515. {
  516. int err;
  517. u32 cmd_id_active;
  518. while (1) {
  519. /*
  520. * If a stop id or a new cmd id was just received from host,
  521. * stop the reporting.
  522. */
  523. cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active);
  524. if (cmd_id_active != vb->cmd_id_received)
  525. break;
  526. /*
  527. * The free page blocks are allocated and sent to host one by
  528. * one.
  529. */
  530. err = get_free_page_and_send(vb);
  531. if (err == -EINTR)
  532. break;
  533. else if (unlikely(err))
  534. return err;
  535. }
  536. return 0;
  537. }
  538. static void report_free_page_func(struct work_struct *work)
  539. {
  540. int err;
  541. struct virtio_balloon *vb = container_of(work, struct virtio_balloon,
  542. report_free_page_work);
  543. struct device *dev = &vb->vdev->dev;
  544. /* Start by sending the received cmd id to host with an outbuf. */
  545. err = send_cmd_id_start(vb);
  546. if (unlikely(err))
  547. dev_err(dev, "Failed to send a start id, err = %d\n", err);
  548. err = send_free_pages(vb);
  549. if (unlikely(err))
  550. dev_err(dev, "Failed to send a free page, err = %d\n", err);
  551. /* End by sending a stop id to host with an outbuf. */
  552. err = send_cmd_id_stop(vb);
  553. if (unlikely(err))
  554. dev_err(dev, "Failed to send a stop id, err = %d\n", err);
  555. }
  556. #ifdef CONFIG_BALLOON_COMPACTION
  557. /*
  558. * virtballoon_migratepage - perform the balloon page migration on behalf of
  559. * a compation thread. (called under page lock)
  560. * @vb_dev_info: the balloon device
  561. * @newpage: page that will replace the isolated page after migration finishes.
  562. * @page : the isolated (old) page that is about to be migrated to newpage.
  563. * @mode : compaction mode -- not used for balloon page migration.
  564. *
  565. * After a ballooned page gets isolated by compaction procedures, this is the
  566. * function that performs the page migration on behalf of a compaction thread
  567. * The page migration for virtio balloon is done in a simple swap fashion which
  568. * follows these two macro steps:
  569. * 1) insert newpage into vb->pages list and update the host about it;
  570. * 2) update the host about the old page removed from vb->pages list;
  571. *
  572. * This function preforms the balloon page migration task.
  573. * Called through balloon_mapping->a_ops->migratepage
  574. */
  575. static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
  576. struct page *newpage, struct page *page, enum migrate_mode mode)
  577. {
  578. struct virtio_balloon *vb = container_of(vb_dev_info,
  579. struct virtio_balloon, vb_dev_info);
  580. unsigned long flags;
  581. /*
  582. * In order to avoid lock contention while migrating pages concurrently
  583. * to leak_balloon() or fill_balloon() we just give up the balloon_lock
  584. * this turn, as it is easier to retry the page migration later.
  585. * This also prevents fill_balloon() getting stuck into a mutex
  586. * recursion in the case it ends up triggering memory compaction
  587. * while it is attempting to inflate the ballon.
  588. */
  589. if (!mutex_trylock(&vb->balloon_lock))
  590. return -EAGAIN;
  591. get_page(newpage); /* balloon reference */
  592. /* balloon's page migration 1st step -- inflate "newpage" */
  593. spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
  594. balloon_page_insert(vb_dev_info, newpage);
  595. vb_dev_info->isolated_pages--;
  596. __count_vm_event(BALLOON_MIGRATE);
  597. spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
  598. vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
  599. set_page_pfns(vb, vb->pfns, newpage);
  600. tell_host(vb, vb->inflate_vq);
  601. /* balloon's page migration 2nd step -- deflate "page" */
  602. spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
  603. balloon_page_delete(page);
  604. spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
  605. vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
  606. set_page_pfns(vb, vb->pfns, page);
  607. tell_host(vb, vb->deflate_vq);
  608. mutex_unlock(&vb->balloon_lock);
  609. put_page(page); /* balloon reference */
  610. return MIGRATEPAGE_SUCCESS;
  611. }
  612. static struct dentry *balloon_mount(struct file_system_type *fs_type,
  613. int flags, const char *dev_name, void *data)
  614. {
  615. static const struct dentry_operations ops = {
  616. .d_dname = simple_dname,
  617. };
  618. return mount_pseudo(fs_type, "balloon-kvm:", NULL, &ops,
  619. BALLOON_KVM_MAGIC);
  620. }
  621. static struct file_system_type balloon_fs = {
  622. .name = "balloon-kvm",
  623. .mount = balloon_mount,
  624. .kill_sb = kill_anon_super,
  625. };
  626. #endif /* CONFIG_BALLOON_COMPACTION */
  627. static unsigned long shrink_free_pages(struct virtio_balloon *vb,
  628. unsigned long pages_to_free)
  629. {
  630. unsigned long blocks_to_free, blocks_freed;
  631. pages_to_free = round_up(pages_to_free,
  632. 1 << VIRTIO_BALLOON_FREE_PAGE_ORDER);
  633. blocks_to_free = pages_to_free >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
  634. blocks_freed = return_free_pages_to_mm(vb, blocks_to_free);
  635. return blocks_freed << VIRTIO_BALLOON_FREE_PAGE_ORDER;
  636. }
  637. static unsigned long shrink_balloon_pages(struct virtio_balloon *vb,
  638. unsigned long pages_to_free)
  639. {
  640. unsigned long pages_freed = 0;
  641. /*
  642. * One invocation of leak_balloon can deflate at most
  643. * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it
  644. * multiple times to deflate pages till reaching pages_to_free.
  645. */
  646. while (vb->num_pages && pages_to_free) {
  647. pages_freed += leak_balloon(vb, pages_to_free) /
  648. VIRTIO_BALLOON_PAGES_PER_PAGE;
  649. pages_to_free -= pages_freed;
  650. }
  651. update_balloon_size(vb);
  652. return pages_freed;
  653. }
  654. static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
  655. struct shrink_control *sc)
  656. {
  657. unsigned long pages_to_free, pages_freed = 0;
  658. struct virtio_balloon *vb = container_of(shrinker,
  659. struct virtio_balloon, shrinker);
  660. pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE;
  661. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
  662. pages_freed = shrink_free_pages(vb, pages_to_free);
  663. if (pages_freed >= pages_to_free)
  664. return pages_freed;
  665. pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed);
  666. return pages_freed;
  667. }
  668. static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
  669. struct shrink_control *sc)
  670. {
  671. struct virtio_balloon *vb = container_of(shrinker,
  672. struct virtio_balloon, shrinker);
  673. unsigned long count;
  674. count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
  675. count += vb->num_free_page_blocks >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
  676. return count;
  677. }
  678. static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb)
  679. {
  680. unregister_shrinker(&vb->shrinker);
  681. }
  682. static int virtio_balloon_register_shrinker(struct virtio_balloon *vb)
  683. {
  684. vb->shrinker.scan_objects = virtio_balloon_shrinker_scan;
  685. vb->shrinker.count_objects = virtio_balloon_shrinker_count;
  686. vb->shrinker.seeks = DEFAULT_SEEKS;
  687. return register_shrinker(&vb->shrinker);
  688. }
  689. static int virtballoon_probe(struct virtio_device *vdev)
  690. {
  691. struct virtio_balloon *vb;
  692. __u32 poison_val;
  693. int err;
  694. if (!vdev->config->get) {
  695. dev_err(&vdev->dev, "%s failure: config access disabled\n",
  696. __func__);
  697. return -EINVAL;
  698. }
  699. vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL);
  700. if (!vb) {
  701. err = -ENOMEM;
  702. goto out;
  703. }
  704. INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func);
  705. INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func);
  706. spin_lock_init(&vb->stop_update_lock);
  707. mutex_init(&vb->balloon_lock);
  708. init_waitqueue_head(&vb->acked);
  709. vb->vdev = vdev;
  710. balloon_devinfo_init(&vb->vb_dev_info);
  711. err = init_vqs(vb);
  712. if (err)
  713. goto out_free_vb;
  714. #ifdef CONFIG_BALLOON_COMPACTION
  715. balloon_mnt = kern_mount(&balloon_fs);
  716. if (IS_ERR(balloon_mnt)) {
  717. err = PTR_ERR(balloon_mnt);
  718. goto out_del_vqs;
  719. }
  720. vb->vb_dev_info.migratepage = virtballoon_migratepage;
  721. vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
  722. if (IS_ERR(vb->vb_dev_info.inode)) {
  723. err = PTR_ERR(vb->vb_dev_info.inode);
  724. kern_unmount(balloon_mnt);
  725. goto out_del_vqs;
  726. }
  727. vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
  728. #endif
  729. if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
  730. /*
  731. * There is always one entry reserved for cmd id, so the ring
  732. * size needs to be at least two to report free page hints.
  733. */
  734. if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
  735. err = -ENOSPC;
  736. goto out_del_vqs;
  737. }
  738. vb->balloon_wq = alloc_workqueue("balloon-wq",
  739. WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
  740. if (!vb->balloon_wq) {
  741. err = -ENOMEM;
  742. goto out_del_vqs;
  743. }
  744. INIT_WORK(&vb->report_free_page_work, report_free_page_func);
  745. vb->cmd_id_received = VIRTIO_BALLOON_CMD_ID_STOP;
  746. vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
  747. VIRTIO_BALLOON_CMD_ID_STOP);
  748. vb->cmd_id_stop = cpu_to_virtio32(vb->vdev,
  749. VIRTIO_BALLOON_CMD_ID_STOP);
  750. vb->num_free_page_blocks = 0;
  751. spin_lock_init(&vb->free_page_list_lock);
  752. INIT_LIST_HEAD(&vb->free_page_list);
  753. if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
  754. memset(&poison_val, PAGE_POISON, sizeof(poison_val));
  755. virtio_cwrite(vb->vdev, struct virtio_balloon_config,
  756. poison_val, &poison_val);
  757. }
  758. }
  759. /*
  760. * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
  761. * shrinker needs to be registered to relieve memory pressure.
  762. */
  763. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
  764. err = virtio_balloon_register_shrinker(vb);
  765. if (err)
  766. goto out_del_balloon_wq;
  767. }
  768. virtio_device_ready(vdev);
  769. if (towards_target(vb))
  770. virtballoon_changed(vdev);
  771. return 0;
  772. out_del_balloon_wq:
  773. if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
  774. destroy_workqueue(vb->balloon_wq);
  775. out_del_vqs:
  776. vdev->config->del_vqs(vdev);
  777. out_free_vb:
  778. kfree(vb);
  779. out:
  780. return err;
  781. }
  782. static void remove_common(struct virtio_balloon *vb)
  783. {
  784. /* There might be pages left in the balloon: free them. */
  785. while (vb->num_pages)
  786. leak_balloon(vb, vb->num_pages);
  787. update_balloon_size(vb);
  788. /* Now we reset the device so we can clean up the queues. */
  789. vb->vdev->config->reset(vb->vdev);
  790. vb->vdev->config->del_vqs(vb->vdev);
  791. }
  792. static void virtballoon_remove(struct virtio_device *vdev)
  793. {
  794. struct virtio_balloon *vb = vdev->priv;
  795. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
  796. virtio_balloon_unregister_shrinker(vb);
  797. spin_lock_irq(&vb->stop_update_lock);
  798. vb->stop_update = true;
  799. spin_unlock_irq(&vb->stop_update_lock);
  800. cancel_work_sync(&vb->update_balloon_size_work);
  801. cancel_work_sync(&vb->update_balloon_stats_work);
  802. if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
  803. cancel_work_sync(&vb->report_free_page_work);
  804. destroy_workqueue(vb->balloon_wq);
  805. }
  806. remove_common(vb);
  807. #ifdef CONFIG_BALLOON_COMPACTION
  808. if (vb->vb_dev_info.inode)
  809. iput(vb->vb_dev_info.inode);
  810. kern_unmount(balloon_mnt);
  811. #endif
  812. kfree(vb);
  813. }
  814. #ifdef CONFIG_PM_SLEEP
  815. static int virtballoon_freeze(struct virtio_device *vdev)
  816. {
  817. struct virtio_balloon *vb = vdev->priv;
  818. /*
  819. * The workqueue is already frozen by the PM core before this
  820. * function is called.
  821. */
  822. remove_common(vb);
  823. return 0;
  824. }
  825. static int virtballoon_restore(struct virtio_device *vdev)
  826. {
  827. struct virtio_balloon *vb = vdev->priv;
  828. int ret;
  829. ret = init_vqs(vdev->priv);
  830. if (ret)
  831. return ret;
  832. virtio_device_ready(vdev);
  833. if (towards_target(vb))
  834. virtballoon_changed(vdev);
  835. update_balloon_size(vb);
  836. return 0;
  837. }
  838. #endif
  839. static int virtballoon_validate(struct virtio_device *vdev)
  840. {
  841. if (!page_poisoning_enabled())
  842. __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
  843. __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
  844. return 0;
  845. }
  846. static unsigned int features[] = {
  847. VIRTIO_BALLOON_F_MUST_TELL_HOST,
  848. VIRTIO_BALLOON_F_STATS_VQ,
  849. VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
  850. VIRTIO_BALLOON_F_FREE_PAGE_HINT,
  851. VIRTIO_BALLOON_F_PAGE_POISON,
  852. };
  853. static struct virtio_driver virtio_balloon_driver = {
  854. .feature_table = features,
  855. .feature_table_size = ARRAY_SIZE(features),
  856. .driver.name = KBUILD_MODNAME,
  857. .driver.owner = THIS_MODULE,
  858. .id_table = id_table,
  859. .validate = virtballoon_validate,
  860. .probe = virtballoon_probe,
  861. .remove = virtballoon_remove,
  862. .config_changed = virtballoon_changed,
  863. #ifdef CONFIG_PM_SLEEP
  864. .freeze = virtballoon_freeze,
  865. .restore = virtballoon_restore,
  866. #endif
  867. };
  868. module_virtio_driver(virtio_balloon_driver);
  869. MODULE_DEVICE_TABLE(virtio, id_table);
  870. MODULE_DESCRIPTION("Virtio balloon driver");
  871. MODULE_LICENSE("GPL");