vmw_balloon.c 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * VMware Balloon driver.
  4. *
  5. * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved.
  6. *
  7. * This is VMware physical memory management driver for Linux. The driver
  8. * acts like a "balloon" that can be inflated to reclaim physical pages by
  9. * reserving them in the guest and invalidating them in the monitor,
  10. * freeing up the underlying machine pages so they can be allocated to
  11. * other guests. The balloon can also be deflated to allow the guest to
  12. * use more physical memory. Higher level policies can control the sizes
  13. * of balloons in VMs in order to manage physical memory resources.
  14. */
  15. //#define DEBUG
  16. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  17. #include <linux/types.h>
  18. #include <linux/kernel.h>
  19. #include <linux/mm.h>
  20. #include <linux/vmalloc.h>
  21. #include <linux/sched.h>
  22. #include <linux/module.h>
  23. #include <linux/workqueue.h>
  24. #include <linux/debugfs.h>
  25. #include <linux/seq_file.h>
  26. #include <linux/rwsem.h>
  27. #include <linux/slab.h>
  28. #include <linux/spinlock.h>
  29. #include <linux/vmw_vmci_defs.h>
  30. #include <linux/vmw_vmci_api.h>
  31. #include <asm/hypervisor.h>
  32. MODULE_AUTHOR("VMware, Inc.");
  33. MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
  34. MODULE_VERSION("1.5.0.0-k");
  35. MODULE_ALIAS("dmi:*:svnVMware*:*");
  36. MODULE_ALIAS("vmware_vmmemctl");
  37. MODULE_LICENSE("GPL");
  38. /*
  39. * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't allow wait
  40. * (__GFP_RECLAIM) for huge page allocations. Use __GFP_NOWARN, to suppress page
  41. * allocation failure warnings. Disallow access to emergency low-memory pools.
  42. */
  43. #define VMW_HUGE_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \
  44. __GFP_NOMEMALLOC)
  45. /*
  46. * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We allow lightweight
  47. * reclamation (__GFP_NORETRY). Use __GFP_NOWARN, to suppress page allocation
  48. * failure warnings. Disallow access to emergency low-memory pools.
  49. */
  50. #define VMW_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \
  51. __GFP_NOMEMALLOC|__GFP_NORETRY)
  52. /* Maximum number of refused pages we accumulate during inflation cycle */
  53. #define VMW_BALLOON_MAX_REFUSED 16
  54. /*
  55. * Hypervisor communication port definitions.
  56. */
  57. #define VMW_BALLOON_HV_PORT 0x5670
  58. #define VMW_BALLOON_HV_MAGIC 0x456c6d6f
  59. #define VMW_BALLOON_GUEST_ID 1 /* Linux */
  60. enum vmwballoon_capabilities {
  61. /*
  62. * Bit 0 is reserved and not associated to any capability.
  63. */
  64. VMW_BALLOON_BASIC_CMDS = (1 << 1),
  65. VMW_BALLOON_BATCHED_CMDS = (1 << 2),
  66. VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3),
  67. VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4),
  68. };
  69. #define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \
  70. | VMW_BALLOON_BATCHED_CMDS \
  71. | VMW_BALLOON_BATCHED_2M_CMDS \
  72. | VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
  73. #define VMW_BALLOON_2M_ORDER (PMD_SHIFT - PAGE_SHIFT)
  74. enum vmballoon_page_size_type {
  75. VMW_BALLOON_4K_PAGE,
  76. VMW_BALLOON_2M_PAGE,
  77. VMW_BALLOON_LAST_SIZE = VMW_BALLOON_2M_PAGE
  78. };
  79. #define VMW_BALLOON_NUM_PAGE_SIZES (VMW_BALLOON_LAST_SIZE + 1)
  80. static const char * const vmballoon_page_size_names[] = {
  81. [VMW_BALLOON_4K_PAGE] = "4k",
  82. [VMW_BALLOON_2M_PAGE] = "2M"
  83. };
  84. enum vmballoon_op {
  85. VMW_BALLOON_INFLATE,
  86. VMW_BALLOON_DEFLATE
  87. };
  88. enum vmballoon_op_stat_type {
  89. VMW_BALLOON_OP_STAT,
  90. VMW_BALLOON_OP_FAIL_STAT
  91. };
  92. #define VMW_BALLOON_OP_STAT_TYPES (VMW_BALLOON_OP_FAIL_STAT + 1)
  93. /**
  94. * enum vmballoon_cmd_type - backdoor commands.
  95. *
  96. * Availability of the commands is as followed:
  97. *
  98. * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and
  99. * %VMW_BALLOON_CMD_GUEST_ID are always available.
  100. *
  101. * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then
  102. * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available.
  103. *
  104. * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then
  105. * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands
  106. * are available.
  107. *
  108. * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then
  109. * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK
  110. * are supported.
  111. *
  112. * If the host reports VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then
  113. * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported.
  114. *
  115. * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor.
  116. * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size.
  117. * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page.
  118. * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about
  119. * to be deflated from the balloon.
  120. * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that
  121. * runs in the VM.
  122. * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of
  123. * ballooned pages (up to 512).
  124. * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of
  125. * pages that are about to be deflated from the
  126. * balloon (up to 512).
  127. * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK
  128. * for 2MB pages.
  129. * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to
  130. * @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB
  131. * pages.
  132. * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification
  133. * that would be invoked when the balloon
  134. * size changes.
  135. * @VMW_BALLOON_CMD_LAST: Value of the last command.
  136. */
  137. enum vmballoon_cmd_type {
  138. VMW_BALLOON_CMD_START,
  139. VMW_BALLOON_CMD_GET_TARGET,
  140. VMW_BALLOON_CMD_LOCK,
  141. VMW_BALLOON_CMD_UNLOCK,
  142. VMW_BALLOON_CMD_GUEST_ID,
  143. /* No command 5 */
  144. VMW_BALLOON_CMD_BATCHED_LOCK = 6,
  145. VMW_BALLOON_CMD_BATCHED_UNLOCK,
  146. VMW_BALLOON_CMD_BATCHED_2M_LOCK,
  147. VMW_BALLOON_CMD_BATCHED_2M_UNLOCK,
  148. VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
  149. VMW_BALLOON_CMD_LAST = VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
  150. };
  151. #define VMW_BALLOON_CMD_NUM (VMW_BALLOON_CMD_LAST + 1)
  152. enum vmballoon_error_codes {
  153. VMW_BALLOON_SUCCESS,
  154. VMW_BALLOON_ERROR_CMD_INVALID,
  155. VMW_BALLOON_ERROR_PPN_INVALID,
  156. VMW_BALLOON_ERROR_PPN_LOCKED,
  157. VMW_BALLOON_ERROR_PPN_UNLOCKED,
  158. VMW_BALLOON_ERROR_PPN_PINNED,
  159. VMW_BALLOON_ERROR_PPN_NOTNEEDED,
  160. VMW_BALLOON_ERROR_RESET,
  161. VMW_BALLOON_ERROR_BUSY
  162. };
  163. #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000)
  164. #define VMW_BALLOON_CMD_WITH_TARGET_MASK \
  165. ((1UL << VMW_BALLOON_CMD_GET_TARGET) | \
  166. (1UL << VMW_BALLOON_CMD_LOCK) | \
  167. (1UL << VMW_BALLOON_CMD_UNLOCK) | \
  168. (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) | \
  169. (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) | \
  170. (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) | \
  171. (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK))
  172. static const char * const vmballoon_cmd_names[] = {
  173. [VMW_BALLOON_CMD_START] = "start",
  174. [VMW_BALLOON_CMD_GET_TARGET] = "target",
  175. [VMW_BALLOON_CMD_LOCK] = "lock",
  176. [VMW_BALLOON_CMD_UNLOCK] = "unlock",
  177. [VMW_BALLOON_CMD_GUEST_ID] = "guestType",
  178. [VMW_BALLOON_CMD_BATCHED_LOCK] = "batchLock",
  179. [VMW_BALLOON_CMD_BATCHED_UNLOCK] = "batchUnlock",
  180. [VMW_BALLOON_CMD_BATCHED_2M_LOCK] = "2m-lock",
  181. [VMW_BALLOON_CMD_BATCHED_2M_UNLOCK] = "2m-unlock",
  182. [VMW_BALLOON_CMD_VMCI_DOORBELL_SET] = "doorbellSet"
  183. };
  184. enum vmballoon_stat_page {
  185. VMW_BALLOON_PAGE_STAT_ALLOC,
  186. VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
  187. VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
  188. VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
  189. VMW_BALLOON_PAGE_STAT_FREE,
  190. VMW_BALLOON_PAGE_STAT_LAST = VMW_BALLOON_PAGE_STAT_FREE
  191. };
  192. #define VMW_BALLOON_PAGE_STAT_NUM (VMW_BALLOON_PAGE_STAT_LAST + 1)
  193. enum vmballoon_stat_general {
  194. VMW_BALLOON_STAT_TIMER,
  195. VMW_BALLOON_STAT_DOORBELL,
  196. VMW_BALLOON_STAT_RESET,
  197. VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_RESET
  198. };
  199. #define VMW_BALLOON_STAT_NUM (VMW_BALLOON_STAT_LAST + 1)
  200. static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching);
  201. static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled);
  202. struct vmballoon_ctl {
  203. struct list_head pages;
  204. struct list_head refused_pages;
  205. unsigned int n_refused_pages;
  206. unsigned int n_pages;
  207. enum vmballoon_page_size_type page_size;
  208. enum vmballoon_op op;
  209. };
  210. struct vmballoon_page_size {
  211. /* list of reserved physical pages */
  212. struct list_head pages;
  213. };
  214. /**
  215. * struct vmballoon_batch_entry - a batch entry for lock or unlock.
  216. *
  217. * @status: the status of the operation, which is written by the hypervisor.
  218. * @reserved: reserved for future use. Must be set to zero.
  219. * @pfn: the physical frame number of the page to be locked or unlocked.
  220. */
  221. struct vmballoon_batch_entry {
  222. u64 status : 5;
  223. u64 reserved : PAGE_SHIFT - 5;
  224. u64 pfn : 52;
  225. } __packed;
  226. struct vmballoon {
  227. struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
  228. /**
  229. * @max_page_size: maximum supported page size for ballooning.
  230. *
  231. * Protected by @conf_sem
  232. */
  233. enum vmballoon_page_size_type max_page_size;
  234. /**
  235. * @size: balloon actual size in basic page size (frames).
  236. *
  237. * While we currently do not support size which is bigger than 32-bit,
  238. * in preparation for future support, use 64-bits.
  239. */
  240. atomic64_t size;
  241. /**
  242. * @target: balloon target size in basic page size (frames).
  243. *
  244. * We do not protect the target under the assumption that setting the
  245. * value is always done through a single write. If this assumption ever
  246. * breaks, we would have to use X_ONCE for accesses, and suffer the less
  247. * optimized code. Although we may read stale target value if multiple
  248. * accesses happen at once, the performance impact should be minor.
  249. */
  250. unsigned long target;
  251. /**
  252. * @reset_required: reset flag
  253. *
  254. * Setting this flag may introduce races, but the code is expected to
  255. * handle them gracefully. In the worst case, another operation will
  256. * fail as reset did not take place. Clearing the flag is done while
  257. * holding @conf_sem for write.
  258. */
  259. bool reset_required;
  260. /**
  261. * @capabilities: hypervisor balloon capabilities.
  262. *
  263. * Protected by @conf_sem.
  264. */
  265. unsigned long capabilities;
  266. /**
  267. * @batch_page: pointer to communication batch page.
  268. *
  269. * When batching is used, batch_page points to a page, which holds up to
  270. * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking.
  271. */
  272. struct vmballoon_batch_entry *batch_page;
  273. /**
  274. * @batch_max_pages: maximum pages that can be locked/unlocked.
  275. *
  276. * Indicates the number of pages that the hypervisor can lock or unlock
  277. * at once, according to whether batching is enabled. If batching is
  278. * disabled, only a single page can be locked/unlock on each operation.
  279. *
  280. * Protected by @conf_sem.
  281. */
  282. unsigned int batch_max_pages;
  283. /**
  284. * @page: page to be locked/unlocked by the hypervisor
  285. *
  286. * @page is only used when batching is disabled and a single page is
  287. * reclaimed on each iteration.
  288. *
  289. * Protected by @comm_lock.
  290. */
  291. struct page *page;
  292. /* statistics */
  293. struct vmballoon_stats *stats;
  294. #ifdef CONFIG_DEBUG_FS
  295. /* debugfs file exporting statistics */
  296. struct dentry *dbg_entry;
  297. #endif
  298. struct delayed_work dwork;
  299. /**
  300. * @vmci_doorbell.
  301. *
  302. * Protected by @conf_sem.
  303. */
  304. struct vmci_handle vmci_doorbell;
  305. /**
  306. * @conf_sem: semaphore to protect the configuration and the statistics.
  307. */
  308. struct rw_semaphore conf_sem;
  309. /**
  310. * @comm_lock: lock to protect the communication with the host.
  311. *
  312. * Lock ordering: @conf_sem -> @comm_lock .
  313. */
  314. spinlock_t comm_lock;
  315. };
  316. static struct vmballoon balloon;
  317. struct vmballoon_stats {
  318. /* timer / doorbell operations */
  319. atomic64_t general_stat[VMW_BALLOON_STAT_NUM];
  320. /* allocation statistics for huge and small pages */
  321. atomic64_t
  322. page_stat[VMW_BALLOON_PAGE_STAT_NUM][VMW_BALLOON_NUM_PAGE_SIZES];
  323. /* Monitor operations: total operations, and failures */
  324. atomic64_t ops[VMW_BALLOON_CMD_NUM][VMW_BALLOON_OP_STAT_TYPES];
  325. };
  326. static inline bool is_vmballoon_stats_on(void)
  327. {
  328. return IS_ENABLED(CONFIG_DEBUG_FS) &&
  329. static_branch_unlikely(&balloon_stat_enabled);
  330. }
  331. static inline void vmballoon_stats_op_inc(struct vmballoon *b, unsigned int op,
  332. enum vmballoon_op_stat_type type)
  333. {
  334. if (is_vmballoon_stats_on())
  335. atomic64_inc(&b->stats->ops[op][type]);
  336. }
  337. static inline void vmballoon_stats_gen_inc(struct vmballoon *b,
  338. enum vmballoon_stat_general stat)
  339. {
  340. if (is_vmballoon_stats_on())
  341. atomic64_inc(&b->stats->general_stat[stat]);
  342. }
  343. static inline void vmballoon_stats_gen_add(struct vmballoon *b,
  344. enum vmballoon_stat_general stat,
  345. unsigned int val)
  346. {
  347. if (is_vmballoon_stats_on())
  348. atomic64_add(val, &b->stats->general_stat[stat]);
  349. }
  350. static inline void vmballoon_stats_page_inc(struct vmballoon *b,
  351. enum vmballoon_stat_page stat,
  352. enum vmballoon_page_size_type size)
  353. {
  354. if (is_vmballoon_stats_on())
  355. atomic64_inc(&b->stats->page_stat[stat][size]);
  356. }
  357. static inline void vmballoon_stats_page_add(struct vmballoon *b,
  358. enum vmballoon_stat_page stat,
  359. enum vmballoon_page_size_type size,
  360. unsigned int val)
  361. {
  362. if (is_vmballoon_stats_on())
  363. atomic64_add(val, &b->stats->page_stat[stat][size]);
  364. }
  365. static inline unsigned long
  366. __vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
  367. unsigned long arg2, unsigned long *result)
  368. {
  369. unsigned long status, dummy1, dummy2, dummy3, local_result;
  370. vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_STAT);
  371. asm volatile ("inl %%dx" :
  372. "=a"(status),
  373. "=c"(dummy1),
  374. "=d"(dummy2),
  375. "=b"(local_result),
  376. "=S"(dummy3) :
  377. "0"(VMW_BALLOON_HV_MAGIC),
  378. "1"(cmd),
  379. "2"(VMW_BALLOON_HV_PORT),
  380. "3"(arg1),
  381. "4"(arg2) :
  382. "memory");
  383. /* update the result if needed */
  384. if (result)
  385. *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 :
  386. local_result;
  387. /* update target when applicable */
  388. if (status == VMW_BALLOON_SUCCESS &&
  389. ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK))
  390. WRITE_ONCE(b->target, local_result);
  391. if (status != VMW_BALLOON_SUCCESS &&
  392. status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) {
  393. vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_FAIL_STAT);
  394. pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n",
  395. __func__, vmballoon_cmd_names[cmd], arg1, arg2,
  396. status);
  397. }
  398. /* mark reset required accordingly */
  399. if (status == VMW_BALLOON_ERROR_RESET)
  400. b->reset_required = true;
  401. return status;
  402. }
  403. static __always_inline unsigned long
  404. vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
  405. unsigned long arg2)
  406. {
  407. unsigned long dummy;
  408. return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy);
  409. }
  410. /*
  411. * Send "start" command to the host, communicating supported version
  412. * of the protocol.
  413. */
  414. static int vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
  415. {
  416. unsigned long status, capabilities;
  417. status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0,
  418. &capabilities);
  419. switch (status) {
  420. case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
  421. b->capabilities = capabilities;
  422. break;
  423. case VMW_BALLOON_SUCCESS:
  424. b->capabilities = VMW_BALLOON_BASIC_CMDS;
  425. break;
  426. default:
  427. return -EIO;
  428. }
  429. /*
  430. * 2MB pages are only supported with batching. If batching is for some
  431. * reason disabled, do not use 2MB pages, since otherwise the legacy
  432. * mechanism is used with 2MB pages, causing a failure.
  433. */
  434. b->max_page_size = VMW_BALLOON_4K_PAGE;
  435. if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
  436. (b->capabilities & VMW_BALLOON_BATCHED_CMDS))
  437. b->max_page_size = VMW_BALLOON_2M_PAGE;
  438. return 0;
  439. }
  440. /**
  441. * vmballoon_send_guest_id - communicate guest type to the host.
  442. *
  443. * @b: pointer to the balloon.
  444. *
  445. * Communicate guest type to the host so that it can adjust ballooning
  446. * algorithm to the one most appropriate for the guest. This command
  447. * is normally issued after sending "start" command and is part of
  448. * standard reset sequence.
  449. *
  450. * Return: zero on success or appropriate error code.
  451. */
  452. static int vmballoon_send_guest_id(struct vmballoon *b)
  453. {
  454. unsigned long status;
  455. status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID,
  456. VMW_BALLOON_GUEST_ID, 0);
  457. return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
  458. }
  459. /**
  460. * vmballoon_page_order() - return the order of the page
  461. * @page_size: the size of the page.
  462. *
  463. * Return: the allocation order.
  464. */
  465. static inline
  466. unsigned int vmballoon_page_order(enum vmballoon_page_size_type page_size)
  467. {
  468. return page_size == VMW_BALLOON_2M_PAGE ? VMW_BALLOON_2M_ORDER : 0;
  469. }
  470. /**
  471. * vmballoon_page_in_frames() - returns the number of frames in a page.
  472. * @page_size: the size of the page.
  473. *
  474. * Return: the number of 4k frames.
  475. */
  476. static inline unsigned int
  477. vmballoon_page_in_frames(enum vmballoon_page_size_type page_size)
  478. {
  479. return 1 << vmballoon_page_order(page_size);
  480. }
  481. /**
  482. * vmballoon_send_get_target() - Retrieve desired balloon size from the host.
  483. *
  484. * @b: pointer to the balloon.
  485. *
  486. * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required
  487. * by the host-guest protocol and EIO if an error occurred in communicating with
  488. * the host.
  489. */
  490. static int vmballoon_send_get_target(struct vmballoon *b)
  491. {
  492. unsigned long status;
  493. unsigned long limit;
  494. limit = totalram_pages;
  495. /* Ensure limit fits in 32-bits */
  496. if (limit != (u32)limit)
  497. return -EINVAL;
  498. status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0);
  499. return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
  500. }
  501. /**
  502. * vmballoon_alloc_page_list - allocates a list of pages.
  503. *
  504. * @b: pointer to the balloon.
  505. * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
  506. * @req_n_pages: the number of requested pages.
  507. *
  508. * Tries to allocate @req_n_pages. Add them to the list of balloon pages in
  509. * @ctl.pages and updates @ctl.n_pages to reflect the number of pages.
  510. *
  511. * Return: zero on success or error code otherwise.
  512. */
  513. static int vmballoon_alloc_page_list(struct vmballoon *b,
  514. struct vmballoon_ctl *ctl,
  515. unsigned int req_n_pages)
  516. {
  517. struct page *page;
  518. unsigned int i;
  519. for (i = 0; i < req_n_pages; i++) {
  520. if (ctl->page_size == VMW_BALLOON_2M_PAGE)
  521. page = alloc_pages(VMW_HUGE_PAGE_ALLOC_FLAGS,
  522. VMW_BALLOON_2M_ORDER);
  523. else
  524. page = alloc_page(VMW_PAGE_ALLOC_FLAGS);
  525. /* Update statistics */
  526. vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC,
  527. ctl->page_size);
  528. if (page) {
  529. /* Success. Add the page to the list and continue. */
  530. list_add(&page->lru, &ctl->pages);
  531. continue;
  532. }
  533. /* Allocation failed. Update statistics and stop. */
  534. vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
  535. ctl->page_size);
  536. break;
  537. }
  538. ctl->n_pages = i;
  539. return req_n_pages == ctl->n_pages ? 0 : -ENOMEM;
  540. }
  541. /**
  542. * vmballoon_handle_one_result - Handle lock/unlock result for a single page.
  543. *
  544. * @b: pointer for %struct vmballoon.
  545. * @page: pointer for the page whose result should be handled.
  546. * @page_size: size of the page.
  547. * @status: status of the operation as provided by the hypervisor.
  548. */
  549. static int vmballoon_handle_one_result(struct vmballoon *b, struct page *page,
  550. enum vmballoon_page_size_type page_size,
  551. unsigned long status)
  552. {
  553. /* On success do nothing. The page is already on the balloon list. */
  554. if (likely(status == VMW_BALLOON_SUCCESS))
  555. return 0;
  556. pr_debug("%s: failed comm pfn %lx status %lu page_size %s\n", __func__,
  557. page_to_pfn(page), status,
  558. vmballoon_page_size_names[page_size]);
  559. /* Error occurred */
  560. vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
  561. page_size);
  562. return -EIO;
  563. }
  564. /**
  565. * vmballoon_status_page - returns the status of (un)lock operation
  566. *
  567. * @b: pointer to the balloon.
  568. * @idx: index for the page for which the operation is performed.
  569. * @p: pointer to where the page struct is returned.
  570. *
  571. * Following a lock or unlock operation, returns the status of the operation for
  572. * an individual page. Provides the page that the operation was performed on on
  573. * the @page argument.
  574. *
  575. * Returns: The status of a lock or unlock operation for an individual page.
  576. */
  577. static unsigned long vmballoon_status_page(struct vmballoon *b, int idx,
  578. struct page **p)
  579. {
  580. if (static_branch_likely(&vmw_balloon_batching)) {
  581. /* batching mode */
  582. *p = pfn_to_page(b->batch_page[idx].pfn);
  583. return b->batch_page[idx].status;
  584. }
  585. /* non-batching mode */
  586. *p = b->page;
  587. /*
  588. * If a failure occurs, the indication will be provided in the status
  589. * of the entire operation, which is considered before the individual
  590. * page status. So for non-batching mode, the indication is always of
  591. * success.
  592. */
  593. return VMW_BALLOON_SUCCESS;
  594. }
  595. /**
  596. * vmballoon_lock_op - notifies the host about inflated/deflated pages.
  597. * @b: pointer to the balloon.
  598. * @num_pages: number of inflated/deflated pages.
  599. * @page_size: size of the page.
  600. * @op: the type of operation (lock or unlock).
  601. *
  602. * Notify the host about page(s) that were ballooned (or removed from the
  603. * balloon) so that host can use it without fear that guest will need it (or
  604. * stop using them since the VM does). Host may reject some pages, we need to
  605. * check the return value and maybe submit a different page. The pages that are
  606. * inflated/deflated are pointed by @b->page.
  607. *
  608. * Return: result as provided by the hypervisor.
  609. */
  610. static unsigned long vmballoon_lock_op(struct vmballoon *b,
  611. unsigned int num_pages,
  612. enum vmballoon_page_size_type page_size,
  613. enum vmballoon_op op)
  614. {
  615. unsigned long cmd, pfn;
  616. lockdep_assert_held(&b->comm_lock);
  617. if (static_branch_likely(&vmw_balloon_batching)) {
  618. if (op == VMW_BALLOON_INFLATE)
  619. cmd = page_size == VMW_BALLOON_2M_PAGE ?
  620. VMW_BALLOON_CMD_BATCHED_2M_LOCK :
  621. VMW_BALLOON_CMD_BATCHED_LOCK;
  622. else
  623. cmd = page_size == VMW_BALLOON_2M_PAGE ?
  624. VMW_BALLOON_CMD_BATCHED_2M_UNLOCK :
  625. VMW_BALLOON_CMD_BATCHED_UNLOCK;
  626. pfn = PHYS_PFN(virt_to_phys(b->batch_page));
  627. } else {
  628. cmd = op == VMW_BALLOON_INFLATE ? VMW_BALLOON_CMD_LOCK :
  629. VMW_BALLOON_CMD_UNLOCK;
  630. pfn = page_to_pfn(b->page);
  631. /* In non-batching mode, PFNs must fit in 32-bit */
  632. if (unlikely(pfn != (u32)pfn))
  633. return VMW_BALLOON_ERROR_PPN_INVALID;
  634. }
  635. return vmballoon_cmd(b, cmd, pfn, num_pages);
  636. }
  637. /**
  638. * vmballoon_add_page - adds a page towards lock/unlock operation.
  639. *
  640. * @b: pointer to the balloon.
  641. * @idx: index of the page to be ballooned in this batch.
  642. * @p: pointer to the page that is about to be ballooned.
  643. *
  644. * Adds the page to be ballooned. Must be called while holding @comm_lock.
  645. */
  646. static void vmballoon_add_page(struct vmballoon *b, unsigned int idx,
  647. struct page *p)
  648. {
  649. lockdep_assert_held(&b->comm_lock);
  650. if (static_branch_likely(&vmw_balloon_batching))
  651. b->batch_page[idx] = (struct vmballoon_batch_entry)
  652. { .pfn = page_to_pfn(p) };
  653. else
  654. b->page = p;
  655. }
  656. /**
  657. * vmballoon_lock - lock or unlock a batch of pages.
  658. *
  659. * @b: pointer to the balloon.
  660. * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
  661. *
  662. * Notifies the host of about ballooned pages (after inflation or deflation,
  663. * according to @ctl). If the host rejects the page put it on the
  664. * @ctl refuse list. These refused page are then released when moving to the
  665. * next size of pages.
  666. *
  667. * Note that we neither free any @page here nor put them back on the ballooned
  668. * pages list. Instead we queue it for later processing. We do that for several
  669. * reasons. First, we do not want to free the page under the lock. Second, it
  670. * allows us to unify the handling of lock and unlock. In the inflate case, the
  671. * caller will check if there are too many refused pages and release them.
  672. * Although it is not identical to the past behavior, it should not affect
  673. * performance.
  674. */
  675. static int vmballoon_lock(struct vmballoon *b, struct vmballoon_ctl *ctl)
  676. {
  677. unsigned long batch_status;
  678. struct page *page;
  679. unsigned int i, num_pages;
  680. num_pages = ctl->n_pages;
  681. if (num_pages == 0)
  682. return 0;
  683. /* communication with the host is done under the communication lock */
  684. spin_lock(&b->comm_lock);
  685. i = 0;
  686. list_for_each_entry(page, &ctl->pages, lru)
  687. vmballoon_add_page(b, i++, page);
  688. batch_status = vmballoon_lock_op(b, ctl->n_pages, ctl->page_size,
  689. ctl->op);
  690. /*
  691. * Iterate over the pages in the provided list. Since we are changing
  692. * @ctl->n_pages we are saving the original value in @num_pages and
  693. * use this value to bound the loop.
  694. */
  695. for (i = 0; i < num_pages; i++) {
  696. unsigned long status;
  697. status = vmballoon_status_page(b, i, &page);
  698. /*
  699. * Failure of the whole batch overrides a single operation
  700. * results.
  701. */
  702. if (batch_status != VMW_BALLOON_SUCCESS)
  703. status = batch_status;
  704. /* Continue if no error happened */
  705. if (!vmballoon_handle_one_result(b, page, ctl->page_size,
  706. status))
  707. continue;
  708. /*
  709. * Error happened. Move the pages to the refused list and update
  710. * the pages number.
  711. */
  712. list_move(&page->lru, &ctl->refused_pages);
  713. ctl->n_pages--;
  714. ctl->n_refused_pages++;
  715. }
  716. spin_unlock(&b->comm_lock);
  717. return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
  718. }
  719. /**
  720. * vmballoon_release_page_list() - Releases a page list
  721. *
  722. * @page_list: list of pages to release.
  723. * @n_pages: pointer to the number of pages.
  724. * @page_size: whether the pages in the list are 2MB (or else 4KB).
  725. *
  726. * Releases the list of pages and zeros the number of pages.
  727. */
  728. static void vmballoon_release_page_list(struct list_head *page_list,
  729. int *n_pages,
  730. enum vmballoon_page_size_type page_size)
  731. {
  732. struct page *page, *tmp;
  733. list_for_each_entry_safe(page, tmp, page_list, lru) {
  734. list_del(&page->lru);
  735. __free_pages(page, vmballoon_page_order(page_size));
  736. }
  737. *n_pages = 0;
  738. }
  739. /*
  740. * Release pages that were allocated while attempting to inflate the
  741. * balloon but were refused by the host for one reason or another.
  742. */
  743. static void vmballoon_release_refused_pages(struct vmballoon *b,
  744. struct vmballoon_ctl *ctl)
  745. {
  746. vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
  747. ctl->page_size);
  748. vmballoon_release_page_list(&ctl->refused_pages, &ctl->n_refused_pages,
  749. ctl->page_size);
  750. }
  751. /**
  752. * vmballoon_change - retrieve the required balloon change
  753. *
  754. * @b: pointer for the balloon.
  755. *
  756. * Return: the required change for the balloon size. A positive number
  757. * indicates inflation, a negative number indicates a deflation.
  758. */
  759. static int64_t vmballoon_change(struct vmballoon *b)
  760. {
  761. int64_t size, target;
  762. size = atomic64_read(&b->size);
  763. target = READ_ONCE(b->target);
  764. /*
  765. * We must cast first because of int sizes
  766. * Otherwise we might get huge positives instead of negatives
  767. */
  768. if (b->reset_required)
  769. return 0;
  770. /* consider a 2MB slack on deflate, unless the balloon is emptied */
  771. if (target < size && target != 0 &&
  772. size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE))
  773. return 0;
  774. return target - size;
  775. }
  776. /**
  777. * vmballoon_enqueue_page_list() - Enqueues list of pages after inflation.
  778. *
  779. * @b: pointer to balloon.
  780. * @pages: list of pages to enqueue.
  781. * @n_pages: pointer to number of pages in list. The value is zeroed.
  782. * @page_size: whether the pages are 2MB or 4KB pages.
  783. *
  784. * Enqueues the provides list of pages in the ballooned page list, clears the
  785. * list and zeroes the number of pages that was provided.
  786. */
  787. static void vmballoon_enqueue_page_list(struct vmballoon *b,
  788. struct list_head *pages,
  789. unsigned int *n_pages,
  790. enum vmballoon_page_size_type page_size)
  791. {
  792. struct vmballoon_page_size *page_size_info = &b->page_sizes[page_size];
  793. list_splice_init(pages, &page_size_info->pages);
  794. *n_pages = 0;
  795. }
  796. /**
  797. * vmballoon_dequeue_page_list() - Dequeues page lists for deflation.
  798. *
  799. * @b: pointer to balloon.
  800. * @pages: list of pages to enqueue.
  801. * @n_pages: pointer to number of pages in list. The value is zeroed.
  802. * @page_size: whether the pages are 2MB or 4KB pages.
  803. * @n_req_pages: the number of requested pages.
  804. *
  805. * Dequeues the number of requested pages from the balloon for deflation. The
  806. * number of dequeued pages may be lower, if not enough pages in the requested
  807. * size are available.
  808. */
  809. static void vmballoon_dequeue_page_list(struct vmballoon *b,
  810. struct list_head *pages,
  811. unsigned int *n_pages,
  812. enum vmballoon_page_size_type page_size,
  813. unsigned int n_req_pages)
  814. {
  815. struct vmballoon_page_size *page_size_info = &b->page_sizes[page_size];
  816. struct page *page, *tmp;
  817. unsigned int i = 0;
  818. list_for_each_entry_safe(page, tmp, &page_size_info->pages, lru) {
  819. list_move(&page->lru, pages);
  820. if (++i == n_req_pages)
  821. break;
  822. }
  823. *n_pages = i;
  824. }
  825. /**
  826. * vmballoon_inflate() - Inflate the balloon towards its target size.
  827. *
  828. * @b: pointer to the balloon.
  829. */
  830. static void vmballoon_inflate(struct vmballoon *b)
  831. {
  832. int64_t to_inflate_frames;
  833. struct vmballoon_ctl ctl = {
  834. .pages = LIST_HEAD_INIT(ctl.pages),
  835. .refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
  836. .page_size = b->max_page_size,
  837. .op = VMW_BALLOON_INFLATE
  838. };
  839. while ((to_inflate_frames = vmballoon_change(b)) > 0) {
  840. unsigned int to_inflate_pages, page_in_frames;
  841. int alloc_error, lock_error = 0;
  842. VM_BUG_ON(!list_empty(&ctl.pages));
  843. VM_BUG_ON(ctl.n_pages != 0);
  844. page_in_frames = vmballoon_page_in_frames(ctl.page_size);
  845. to_inflate_pages = min_t(unsigned long, b->batch_max_pages,
  846. DIV_ROUND_UP_ULL(to_inflate_frames,
  847. page_in_frames));
  848. /* Start by allocating */
  849. alloc_error = vmballoon_alloc_page_list(b, &ctl,
  850. to_inflate_pages);
  851. /* Actually lock the pages by telling the hypervisor */
  852. lock_error = vmballoon_lock(b, &ctl);
  853. /*
  854. * If an error indicates that something serious went wrong,
  855. * stop the inflation.
  856. */
  857. if (lock_error)
  858. break;
  859. /* Update the balloon size */
  860. atomic64_add(ctl.n_pages * page_in_frames, &b->size);
  861. vmballoon_enqueue_page_list(b, &ctl.pages, &ctl.n_pages,
  862. ctl.page_size);
  863. /*
  864. * If allocation failed or the number of refused pages exceeds
  865. * the maximum allowed, move to the next page size.
  866. */
  867. if (alloc_error ||
  868. ctl.n_refused_pages >= VMW_BALLOON_MAX_REFUSED) {
  869. if (ctl.page_size == VMW_BALLOON_4K_PAGE)
  870. break;
  871. /*
  872. * Ignore errors from locking as we now switch to 4k
  873. * pages and we might get different errors.
  874. */
  875. vmballoon_release_refused_pages(b, &ctl);
  876. ctl.page_size--;
  877. }
  878. cond_resched();
  879. }
  880. /*
  881. * Release pages that were allocated while attempting to inflate the
  882. * balloon but were refused by the host for one reason or another,
  883. * and update the statistics.
  884. */
  885. if (ctl.n_refused_pages != 0)
  886. vmballoon_release_refused_pages(b, &ctl);
  887. }
  888. /**
  889. * vmballoon_deflate() - Decrease the size of the balloon.
  890. *
  891. * @b: pointer to the balloon
  892. * @n_frames: the number of frames to deflate. If zero, automatically
  893. * calculated according to the target size.
  894. * @coordinated: whether to coordinate with the host
  895. *
  896. * Decrease the size of the balloon allowing guest to use more memory.
  897. *
  898. * Return: The number of deflated frames (i.e., basic page size units)
  899. */
  900. static unsigned long vmballoon_deflate(struct vmballoon *b, uint64_t n_frames,
  901. bool coordinated)
  902. {
  903. unsigned long deflated_frames = 0;
  904. unsigned long tried_frames = 0;
  905. struct vmballoon_ctl ctl = {
  906. .pages = LIST_HEAD_INIT(ctl.pages),
  907. .refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
  908. .page_size = VMW_BALLOON_4K_PAGE,
  909. .op = VMW_BALLOON_DEFLATE
  910. };
  911. /* free pages to reach target */
  912. while (true) {
  913. unsigned int to_deflate_pages, n_unlocked_frames;
  914. unsigned int page_in_frames;
  915. int64_t to_deflate_frames;
  916. bool deflated_all;
  917. page_in_frames = vmballoon_page_in_frames(ctl.page_size);
  918. VM_BUG_ON(!list_empty(&ctl.pages));
  919. VM_BUG_ON(ctl.n_pages);
  920. VM_BUG_ON(!list_empty(&ctl.refused_pages));
  921. VM_BUG_ON(ctl.n_refused_pages);
  922. /*
  923. * If we were requested a specific number of frames, we try to
  924. * deflate this number of frames. Otherwise, deflation is
  925. * performed according to the target and balloon size.
  926. */
  927. to_deflate_frames = n_frames ? n_frames - tried_frames :
  928. -vmballoon_change(b);
  929. /* break if no work to do */
  930. if (to_deflate_frames <= 0)
  931. break;
  932. /*
  933. * Calculate the number of frames based on current page size,
  934. * but limit the deflated frames to a single chunk
  935. */
  936. to_deflate_pages = min_t(unsigned long, b->batch_max_pages,
  937. DIV_ROUND_UP_ULL(to_deflate_frames,
  938. page_in_frames));
  939. /* First take the pages from the balloon pages. */
  940. vmballoon_dequeue_page_list(b, &ctl.pages, &ctl.n_pages,
  941. ctl.page_size, to_deflate_pages);
  942. /*
  943. * Before pages are moving to the refused list, count their
  944. * frames as frames that we tried to deflate.
  945. */
  946. tried_frames += ctl.n_pages * page_in_frames;
  947. /*
  948. * Unlock the pages by communicating with the hypervisor if the
  949. * communication is coordinated (i.e., not pop). We ignore the
  950. * return code. Instead we check if all the pages we manage to
  951. * unlock all the pages. If we failed, we will move to the next
  952. * page size, and would eventually try again later.
  953. */
  954. if (coordinated)
  955. vmballoon_lock(b, &ctl);
  956. /*
  957. * Check if we deflated enough. We will move to the next page
  958. * size if we did not manage to do so. This calculation takes
  959. * place now, as once the pages are released, the number of
  960. * pages is zeroed.
  961. */
  962. deflated_all = (ctl.n_pages == to_deflate_pages);
  963. /* Update local and global counters */
  964. n_unlocked_frames = ctl.n_pages * page_in_frames;
  965. atomic64_sub(n_unlocked_frames, &b->size);
  966. deflated_frames += n_unlocked_frames;
  967. vmballoon_stats_page_add(b, VMW_BALLOON_PAGE_STAT_FREE,
  968. ctl.page_size, ctl.n_pages);
  969. /* free the ballooned pages */
  970. vmballoon_release_page_list(&ctl.pages, &ctl.n_pages,
  971. ctl.page_size);
  972. /* Return the refused pages to the ballooned list. */
  973. vmballoon_enqueue_page_list(b, &ctl.refused_pages,
  974. &ctl.n_refused_pages,
  975. ctl.page_size);
  976. /* If we failed to unlock all the pages, move to next size. */
  977. if (!deflated_all) {
  978. if (ctl.page_size == b->max_page_size)
  979. break;
  980. ctl.page_size++;
  981. }
  982. cond_resched();
  983. }
  984. return deflated_frames;
  985. }
  986. /**
  987. * vmballoon_deinit_batching - disables batching mode.
  988. *
  989. * @b: pointer to &struct vmballoon.
  990. *
  991. * Disables batching, by deallocating the page for communication with the
  992. * hypervisor and disabling the static key to indicate that batching is off.
  993. */
  994. static void vmballoon_deinit_batching(struct vmballoon *b)
  995. {
  996. free_page((unsigned long)b->batch_page);
  997. b->batch_page = NULL;
  998. static_branch_disable(&vmw_balloon_batching);
  999. b->batch_max_pages = 1;
  1000. }
  1001. /**
  1002. * vmballoon_init_batching - enable batching mode.
  1003. *
  1004. * @b: pointer to &struct vmballoon.
  1005. *
  1006. * Enables batching, by allocating a page for communication with the hypervisor
  1007. * and enabling the static_key to use batching.
  1008. *
  1009. * Return: zero on success or an appropriate error-code.
  1010. */
  1011. static int vmballoon_init_batching(struct vmballoon *b)
  1012. {
  1013. struct page *page;
  1014. page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  1015. if (!page)
  1016. return -ENOMEM;
  1017. b->batch_page = page_address(page);
  1018. b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry);
  1019. static_branch_enable(&vmw_balloon_batching);
  1020. return 0;
  1021. }
  1022. /*
  1023. * Receive notification and resize balloon
  1024. */
  1025. static void vmballoon_doorbell(void *client_data)
  1026. {
  1027. struct vmballoon *b = client_data;
  1028. vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_DOORBELL);
  1029. mod_delayed_work(system_freezable_wq, &b->dwork, 0);
  1030. }
  1031. /*
  1032. * Clean up vmci doorbell
  1033. */
  1034. static void vmballoon_vmci_cleanup(struct vmballoon *b)
  1035. {
  1036. vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
  1037. VMCI_INVALID_ID, VMCI_INVALID_ID);
  1038. if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
  1039. vmci_doorbell_destroy(b->vmci_doorbell);
  1040. b->vmci_doorbell = VMCI_INVALID_HANDLE;
  1041. }
  1042. }
  1043. /**
  1044. * vmballoon_vmci_init - Initialize vmci doorbell.
  1045. *
  1046. * @b: pointer to the balloon.
  1047. *
  1048. * Return: zero on success or when wakeup command not supported. Error-code
  1049. * otherwise.
  1050. *
  1051. * Initialize vmci doorbell, to get notified as soon as balloon changes.
  1052. */
  1053. static int vmballoon_vmci_init(struct vmballoon *b)
  1054. {
  1055. unsigned long error;
  1056. if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0)
  1057. return 0;
  1058. error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB,
  1059. VMCI_PRIVILEGE_FLAG_RESTRICTED,
  1060. vmballoon_doorbell, b);
  1061. if (error != VMCI_SUCCESS)
  1062. goto fail;
  1063. error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
  1064. b->vmci_doorbell.context,
  1065. b->vmci_doorbell.resource, NULL);
  1066. if (error != VMW_BALLOON_SUCCESS)
  1067. goto fail;
  1068. return 0;
  1069. fail:
  1070. vmballoon_vmci_cleanup(b);
  1071. return -EIO;
  1072. }
  1073. /**
  1074. * vmballoon_pop - Quickly release all pages allocate for the balloon.
  1075. *
  1076. * @b: pointer to the balloon.
  1077. *
  1078. * This function is called when host decides to "reset" balloon for one reason
  1079. * or another. Unlike normal "deflate" we do not (shall not) notify host of the
  1080. * pages being released.
  1081. */
  1082. static void vmballoon_pop(struct vmballoon *b)
  1083. {
  1084. unsigned long size;
  1085. while ((size = atomic64_read(&b->size)))
  1086. vmballoon_deflate(b, size, false);
  1087. }
  1088. /*
  1089. * Perform standard reset sequence by popping the balloon (in case it
  1090. * is not empty) and then restarting protocol. This operation normally
  1091. * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
  1092. */
  1093. static void vmballoon_reset(struct vmballoon *b)
  1094. {
  1095. int error;
  1096. down_write(&b->conf_sem);
  1097. vmballoon_vmci_cleanup(b);
  1098. /* free all pages, skipping monitor unlock */
  1099. vmballoon_pop(b);
  1100. if (vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
  1101. return;
  1102. if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
  1103. if (vmballoon_init_batching(b)) {
  1104. /*
  1105. * We failed to initialize batching, inform the monitor
  1106. * about it by sending a null capability.
  1107. *
  1108. * The guest will retry in one second.
  1109. */
  1110. vmballoon_send_start(b, 0);
  1111. return;
  1112. }
  1113. } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
  1114. vmballoon_deinit_batching(b);
  1115. }
  1116. vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_RESET);
  1117. b->reset_required = false;
  1118. error = vmballoon_vmci_init(b);
  1119. if (error)
  1120. pr_err("failed to initialize vmci doorbell\n");
  1121. if (vmballoon_send_guest_id(b))
  1122. pr_err("failed to send guest ID to the host\n");
  1123. up_write(&b->conf_sem);
  1124. }
  1125. /**
  1126. * vmballoon_work - periodic balloon worker for reset, inflation and deflation.
  1127. *
  1128. * @work: pointer to the &work_struct which is provided by the workqueue.
  1129. *
  1130. * Resets the protocol if needed, gets the new size and adjusts balloon as
  1131. * needed. Repeat in 1 sec.
  1132. */
  1133. static void vmballoon_work(struct work_struct *work)
  1134. {
  1135. struct delayed_work *dwork = to_delayed_work(work);
  1136. struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
  1137. int64_t change = 0;
  1138. if (b->reset_required)
  1139. vmballoon_reset(b);
  1140. down_read(&b->conf_sem);
  1141. /*
  1142. * Update the stats while holding the semaphore to ensure that
  1143. * @stats_enabled is consistent with whether the stats are actually
  1144. * enabled
  1145. */
  1146. vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_TIMER);
  1147. if (!vmballoon_send_get_target(b))
  1148. change = vmballoon_change(b);
  1149. if (change != 0) {
  1150. pr_debug("%s - size: %llu, target %lu\n", __func__,
  1151. atomic64_read(&b->size), READ_ONCE(b->target));
  1152. if (change > 0)
  1153. vmballoon_inflate(b);
  1154. else /* (change < 0) */
  1155. vmballoon_deflate(b, 0, true);
  1156. }
  1157. up_read(&b->conf_sem);
  1158. /*
  1159. * We are using a freezable workqueue so that balloon operations are
  1160. * stopped while the system transitions to/from sleep/hibernation.
  1161. */
  1162. queue_delayed_work(system_freezable_wq,
  1163. dwork, round_jiffies_relative(HZ));
  1164. }
  1165. /*
  1166. * DEBUGFS Interface
  1167. */
  1168. #ifdef CONFIG_DEBUG_FS
  1169. static const char * const vmballoon_stat_page_names[] = {
  1170. [VMW_BALLOON_PAGE_STAT_ALLOC] = "alloc",
  1171. [VMW_BALLOON_PAGE_STAT_ALLOC_FAIL] = "allocFail",
  1172. [VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC] = "errAlloc",
  1173. [VMW_BALLOON_PAGE_STAT_REFUSED_FREE] = "errFree",
  1174. [VMW_BALLOON_PAGE_STAT_FREE] = "free"
  1175. };
  1176. static const char * const vmballoon_stat_names[] = {
  1177. [VMW_BALLOON_STAT_TIMER] = "timer",
  1178. [VMW_BALLOON_STAT_DOORBELL] = "doorbell",
  1179. [VMW_BALLOON_STAT_RESET] = "reset",
  1180. };
  1181. static int vmballoon_enable_stats(struct vmballoon *b)
  1182. {
  1183. int r = 0;
  1184. down_write(&b->conf_sem);
  1185. /* did we somehow race with another reader which enabled stats? */
  1186. if (b->stats)
  1187. goto out;
  1188. b->stats = kzalloc(sizeof(*b->stats), GFP_KERNEL);
  1189. if (!b->stats) {
  1190. /* allocation failed */
  1191. r = -ENOMEM;
  1192. goto out;
  1193. }
  1194. static_key_enable(&balloon_stat_enabled.key);
  1195. out:
  1196. up_write(&b->conf_sem);
  1197. return r;
  1198. }
  1199. /**
  1200. * vmballoon_debug_show - shows statistics of balloon operations.
  1201. * @f: pointer to the &struct seq_file.
  1202. * @offset: ignored.
  1203. *
  1204. * Provides the statistics that can be accessed in vmmemctl in the debugfs.
  1205. * To avoid the overhead - mainly that of memory - of collecting the statistics,
  1206. * we only collect statistics after the first time the counters are read.
  1207. *
  1208. * Return: zero on success or an error code.
  1209. */
  1210. static int vmballoon_debug_show(struct seq_file *f, void *offset)
  1211. {
  1212. struct vmballoon *b = f->private;
  1213. int i, j;
  1214. /* enables stats if they are disabled */
  1215. if (!b->stats) {
  1216. int r = vmballoon_enable_stats(b);
  1217. if (r)
  1218. return r;
  1219. }
  1220. /* format capabilities info */
  1221. seq_printf(f, "%-22s: %#16x\n", "balloon capabilities",
  1222. VMW_BALLOON_CAPABILITIES);
  1223. seq_printf(f, "%-22s: %#16lx\n", "used capabilities", b->capabilities);
  1224. seq_printf(f, "%-22s: %16s\n", "is resetting",
  1225. b->reset_required ? "y" : "n");
  1226. /* format size info */
  1227. seq_printf(f, "%-22s: %16lu\n", "target", READ_ONCE(b->target));
  1228. seq_printf(f, "%-22s: %16llu\n", "current", atomic64_read(&b->size));
  1229. for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) {
  1230. if (vmballoon_cmd_names[i] == NULL)
  1231. continue;
  1232. seq_printf(f, "%-22s: %16llu (%llu failed)\n",
  1233. vmballoon_cmd_names[i],
  1234. atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_STAT]),
  1235. atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_FAIL_STAT]));
  1236. }
  1237. for (i = 0; i < VMW_BALLOON_STAT_NUM; i++)
  1238. seq_printf(f, "%-22s: %16llu\n",
  1239. vmballoon_stat_names[i],
  1240. atomic64_read(&b->stats->general_stat[i]));
  1241. for (i = 0; i < VMW_BALLOON_PAGE_STAT_NUM; i++) {
  1242. for (j = 0; j < VMW_BALLOON_NUM_PAGE_SIZES; j++)
  1243. seq_printf(f, "%-18s(%s): %16llu\n",
  1244. vmballoon_stat_page_names[i],
  1245. vmballoon_page_size_names[j],
  1246. atomic64_read(&b->stats->page_stat[i][j]));
  1247. }
  1248. return 0;
  1249. }
  1250. static int vmballoon_debug_open(struct inode *inode, struct file *file)
  1251. {
  1252. return single_open(file, vmballoon_debug_show, inode->i_private);
  1253. }
  1254. static const struct file_operations vmballoon_debug_fops = {
  1255. .owner = THIS_MODULE,
  1256. .open = vmballoon_debug_open,
  1257. .read = seq_read,
  1258. .llseek = seq_lseek,
  1259. .release = single_release,
  1260. };
  1261. static int __init vmballoon_debugfs_init(struct vmballoon *b)
  1262. {
  1263. int error;
  1264. b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
  1265. &vmballoon_debug_fops);
  1266. if (IS_ERR(b->dbg_entry)) {
  1267. error = PTR_ERR(b->dbg_entry);
  1268. pr_err("failed to create debugfs entry, error: %d\n", error);
  1269. return error;
  1270. }
  1271. return 0;
  1272. }
  1273. static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
  1274. {
  1275. static_key_disable(&balloon_stat_enabled.key);
  1276. debugfs_remove(b->dbg_entry);
  1277. kfree(b->stats);
  1278. b->stats = NULL;
  1279. }
  1280. #else
  1281. static inline int vmballoon_debugfs_init(struct vmballoon *b)
  1282. {
  1283. return 0;
  1284. }
  1285. static inline void vmballoon_debugfs_exit(struct vmballoon *b)
  1286. {
  1287. }
  1288. #endif /* CONFIG_DEBUG_FS */
  1289. static int __init vmballoon_init(void)
  1290. {
  1291. enum vmballoon_page_size_type page_size;
  1292. int error;
  1293. /*
  1294. * Check if we are running on VMware's hypervisor and bail out
  1295. * if we are not.
  1296. */
  1297. if (x86_hyper_type != X86_HYPER_VMWARE)
  1298. return -ENODEV;
  1299. for (page_size = VMW_BALLOON_4K_PAGE;
  1300. page_size <= VMW_BALLOON_LAST_SIZE; page_size++)
  1301. INIT_LIST_HEAD(&balloon.page_sizes[page_size].pages);
  1302. INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
  1303. error = vmballoon_debugfs_init(&balloon);
  1304. if (error)
  1305. return error;
  1306. spin_lock_init(&balloon.comm_lock);
  1307. init_rwsem(&balloon.conf_sem);
  1308. balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
  1309. balloon.batch_page = NULL;
  1310. balloon.page = NULL;
  1311. balloon.reset_required = true;
  1312. queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
  1313. return 0;
  1314. }
  1315. /*
  1316. * Using late_initcall() instead of module_init() allows the balloon to use the
  1317. * VMCI doorbell even when the balloon is built into the kernel. Otherwise the
  1318. * VMCI is probed only after the balloon is initialized. If the balloon is used
  1319. * as a module, late_initcall() is equivalent to module_init().
  1320. */
  1321. late_initcall(vmballoon_init);
  1322. static void __exit vmballoon_exit(void)
  1323. {
  1324. vmballoon_vmci_cleanup(&balloon);
  1325. cancel_delayed_work_sync(&balloon.dwork);
  1326. vmballoon_debugfs_exit(&balloon);
  1327. /*
  1328. * Deallocate all reserved memory, and reset connection with monitor.
  1329. * Reset connection before deallocating memory to avoid potential for
  1330. * additional spurious resets from guest touching deallocated pages.
  1331. */
  1332. vmballoon_send_start(&balloon, 0);
  1333. vmballoon_pop(&balloon);
  1334. }
  1335. module_exit(vmballoon_exit);