channel_mgmt.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167
  1. /*
  2. * Copyright (c) 2009, Microsoft Corporation.
  3. *
  4. * This program is free software; you can redistribute it and/or modify it
  5. * under the terms and conditions of the GNU General Public License,
  6. * version 2, as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope it will be useful, but WITHOUT
  9. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  11. * more details.
  12. *
  13. * You should have received a copy of the GNU General Public License along with
  14. * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15. * Place - Suite 330, Boston, MA 02111-1307 USA.
  16. *
  17. * Authors:
  18. * Haiyang Zhang <haiyangz@microsoft.com>
  19. * Hank Janssen <hjanssen@microsoft.com>
  20. */
  21. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  22. #include <linux/kernel.h>
  23. #include <linux/sched.h>
  24. #include <linux/wait.h>
  25. #include <linux/mm.h>
  26. #include <linux/slab.h>
  27. #include <linux/list.h>
  28. #include <linux/module.h>
  29. #include <linux/completion.h>
  30. #include <linux/delay.h>
  31. #include <linux/hyperv.h>
  32. #include "hyperv_vmbus.h"
  33. static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
  34. static const struct vmbus_device vmbus_devs[] = {
  35. /* IDE */
  36. { .dev_type = HV_IDE,
  37. HV_IDE_GUID,
  38. .perf_device = true,
  39. },
  40. /* SCSI */
  41. { .dev_type = HV_SCSI,
  42. HV_SCSI_GUID,
  43. .perf_device = true,
  44. },
  45. /* Fibre Channel */
  46. { .dev_type = HV_FC,
  47. HV_SYNTHFC_GUID,
  48. .perf_device = true,
  49. },
  50. /* Synthetic NIC */
  51. { .dev_type = HV_NIC,
  52. HV_NIC_GUID,
  53. .perf_device = true,
  54. },
  55. /* Network Direct */
  56. { .dev_type = HV_ND,
  57. HV_ND_GUID,
  58. .perf_device = true,
  59. },
  60. /* PCIE */
  61. { .dev_type = HV_PCIE,
  62. HV_PCIE_GUID,
  63. .perf_device = true,
  64. },
  65. /* Synthetic Frame Buffer */
  66. { .dev_type = HV_FB,
  67. HV_SYNTHVID_GUID,
  68. .perf_device = false,
  69. },
  70. /* Synthetic Keyboard */
  71. { .dev_type = HV_KBD,
  72. HV_KBD_GUID,
  73. .perf_device = false,
  74. },
  75. /* Synthetic MOUSE */
  76. { .dev_type = HV_MOUSE,
  77. HV_MOUSE_GUID,
  78. .perf_device = false,
  79. },
  80. /* KVP */
  81. { .dev_type = HV_KVP,
  82. HV_KVP_GUID,
  83. .perf_device = false,
  84. },
  85. /* Time Synch */
  86. { .dev_type = HV_TS,
  87. HV_TS_GUID,
  88. .perf_device = false,
  89. },
  90. /* Heartbeat */
  91. { .dev_type = HV_HB,
  92. HV_HEART_BEAT_GUID,
  93. .perf_device = false,
  94. },
  95. /* Shutdown */
  96. { .dev_type = HV_SHUTDOWN,
  97. HV_SHUTDOWN_GUID,
  98. .perf_device = false,
  99. },
  100. /* File copy */
  101. { .dev_type = HV_FCOPY,
  102. HV_FCOPY_GUID,
  103. .perf_device = false,
  104. },
  105. /* Backup */
  106. { .dev_type = HV_BACKUP,
  107. HV_VSS_GUID,
  108. .perf_device = false,
  109. },
  110. /* Dynamic Memory */
  111. { .dev_type = HV_DM,
  112. HV_DM_GUID,
  113. .perf_device = false,
  114. },
  115. /* Unknown GUID */
  116. { .dev_type = HV_UNKOWN,
  117. .perf_device = false,
  118. },
  119. };
  120. static u16 hv_get_dev_type(const uuid_le *guid)
  121. {
  122. u16 i;
  123. for (i = HV_IDE; i < HV_UNKOWN; i++) {
  124. if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
  125. return i;
  126. }
  127. pr_info("Unknown GUID: %pUl\n", guid);
  128. return i;
  129. }
  130. /**
  131. * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
  132. * @icmsghdrp: Pointer to msg header structure
  133. * @icmsg_negotiate: Pointer to negotiate message structure
  134. * @buf: Raw buffer channel data
  135. *
  136. * @icmsghdrp is of type &struct icmsg_hdr.
  137. * @negop is of type &struct icmsg_negotiate.
  138. * Set up and fill in default negotiate response message.
  139. *
  140. * The fw_version specifies the framework version that
  141. * we can support and srv_version specifies the service
  142. * version we can support.
  143. *
  144. * Mainly used by Hyper-V drivers.
  145. */
  146. bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
  147. struct icmsg_negotiate *negop, u8 *buf,
  148. int fw_version, int srv_version)
  149. {
  150. int icframe_major, icframe_minor;
  151. int icmsg_major, icmsg_minor;
  152. int fw_major, fw_minor;
  153. int srv_major, srv_minor;
  154. int i;
  155. bool found_match = false;
  156. icmsghdrp->icmsgsize = 0x10;
  157. fw_major = (fw_version >> 16);
  158. fw_minor = (fw_version & 0xFFFF);
  159. srv_major = (srv_version >> 16);
  160. srv_minor = (srv_version & 0xFFFF);
  161. negop = (struct icmsg_negotiate *)&buf[
  162. sizeof(struct vmbuspipe_hdr) +
  163. sizeof(struct icmsg_hdr)];
  164. icframe_major = negop->icframe_vercnt;
  165. icframe_minor = 0;
  166. icmsg_major = negop->icmsg_vercnt;
  167. icmsg_minor = 0;
  168. /*
  169. * Select the framework version number we will
  170. * support.
  171. */
  172. for (i = 0; i < negop->icframe_vercnt; i++) {
  173. if ((negop->icversion_data[i].major == fw_major) &&
  174. (negop->icversion_data[i].minor == fw_minor)) {
  175. icframe_major = negop->icversion_data[i].major;
  176. icframe_minor = negop->icversion_data[i].minor;
  177. found_match = true;
  178. }
  179. }
  180. if (!found_match)
  181. goto fw_error;
  182. found_match = false;
  183. for (i = negop->icframe_vercnt;
  184. (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
  185. if ((negop->icversion_data[i].major == srv_major) &&
  186. (negop->icversion_data[i].minor == srv_minor)) {
  187. icmsg_major = negop->icversion_data[i].major;
  188. icmsg_minor = negop->icversion_data[i].minor;
  189. found_match = true;
  190. }
  191. }
  192. /*
  193. * Respond with the framework and service
  194. * version numbers we can support.
  195. */
  196. fw_error:
  197. if (!found_match) {
  198. negop->icframe_vercnt = 0;
  199. negop->icmsg_vercnt = 0;
  200. } else {
  201. negop->icframe_vercnt = 1;
  202. negop->icmsg_vercnt = 1;
  203. }
  204. negop->icversion_data[0].major = icframe_major;
  205. negop->icversion_data[0].minor = icframe_minor;
  206. negop->icversion_data[1].major = icmsg_major;
  207. negop->icversion_data[1].minor = icmsg_minor;
  208. return found_match;
  209. }
  210. EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
  211. /*
  212. * alloc_channel - Allocate and initialize a vmbus channel object
  213. */
  214. static struct vmbus_channel *alloc_channel(void)
  215. {
  216. static atomic_t chan_num = ATOMIC_INIT(0);
  217. struct vmbus_channel *channel;
  218. channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
  219. if (!channel)
  220. return NULL;
  221. channel->id = atomic_inc_return(&chan_num);
  222. channel->acquire_ring_lock = true;
  223. spin_lock_init(&channel->inbound_lock);
  224. spin_lock_init(&channel->lock);
  225. INIT_LIST_HEAD(&channel->sc_list);
  226. INIT_LIST_HEAD(&channel->percpu_list);
  227. return channel;
  228. }
  229. /*
  230. * free_channel - Release the resources used by the vmbus channel object
  231. */
  232. static void free_channel(struct vmbus_channel *channel)
  233. {
  234. kfree(channel);
  235. }
  236. static void percpu_channel_enq(void *arg)
  237. {
  238. struct vmbus_channel *channel = arg;
  239. int cpu = smp_processor_id();
  240. list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
  241. }
  242. static void percpu_channel_deq(void *arg)
  243. {
  244. struct vmbus_channel *channel = arg;
  245. list_del(&channel->percpu_list);
  246. }
  247. static void vmbus_release_relid(u32 relid)
  248. {
  249. struct vmbus_channel_relid_released msg;
  250. memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
  251. msg.child_relid = relid;
  252. msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
  253. vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
  254. }
  255. void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
  256. {
  257. unsigned long flags;
  258. struct vmbus_channel *primary_channel;
  259. vmbus_release_relid(relid);
  260. BUG_ON(!channel->rescind);
  261. BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
  262. if (channel->target_cpu != get_cpu()) {
  263. put_cpu();
  264. smp_call_function_single(channel->target_cpu,
  265. percpu_channel_deq, channel, true);
  266. } else {
  267. percpu_channel_deq(channel);
  268. put_cpu();
  269. }
  270. if (channel->primary_channel == NULL) {
  271. list_del(&channel->listentry);
  272. primary_channel = channel;
  273. } else {
  274. primary_channel = channel->primary_channel;
  275. spin_lock_irqsave(&primary_channel->lock, flags);
  276. list_del(&channel->sc_list);
  277. primary_channel->num_sc--;
  278. spin_unlock_irqrestore(&primary_channel->lock, flags);
  279. }
  280. /*
  281. * We need to free the bit for init_vp_index() to work in the case
  282. * of sub-channel, when we reload drivers like hv_netvsc.
  283. */
  284. cpumask_clear_cpu(channel->target_cpu,
  285. &primary_channel->alloced_cpus_in_node);
  286. free_channel(channel);
  287. }
  288. void vmbus_free_channels(void)
  289. {
  290. struct vmbus_channel *channel, *tmp;
  291. list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
  292. listentry) {
  293. /* hv_process_channel_removal() needs this */
  294. channel->rescind = true;
  295. vmbus_device_unregister(channel->device_obj);
  296. }
  297. }
  298. /*
  299. * vmbus_process_offer - Process the offer by creating a channel/device
  300. * associated with this offer
  301. */
  302. static void vmbus_process_offer(struct vmbus_channel *newchannel)
  303. {
  304. struct vmbus_channel *channel;
  305. bool fnew = true;
  306. unsigned long flags;
  307. u16 dev_type;
  308. int ret;
  309. /* Make sure this is a new offer */
  310. mutex_lock(&vmbus_connection.channel_mutex);
  311. list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
  312. if (!uuid_le_cmp(channel->offermsg.offer.if_type,
  313. newchannel->offermsg.offer.if_type) &&
  314. !uuid_le_cmp(channel->offermsg.offer.if_instance,
  315. newchannel->offermsg.offer.if_instance)) {
  316. fnew = false;
  317. break;
  318. }
  319. }
  320. if (fnew)
  321. list_add_tail(&newchannel->listentry,
  322. &vmbus_connection.chn_list);
  323. mutex_unlock(&vmbus_connection.channel_mutex);
  324. if (!fnew) {
  325. /*
  326. * Check to see if this is a sub-channel.
  327. */
  328. if (newchannel->offermsg.offer.sub_channel_index != 0) {
  329. /*
  330. * Process the sub-channel.
  331. */
  332. newchannel->primary_channel = channel;
  333. spin_lock_irqsave(&channel->lock, flags);
  334. list_add_tail(&newchannel->sc_list, &channel->sc_list);
  335. channel->num_sc++;
  336. spin_unlock_irqrestore(&channel->lock, flags);
  337. } else
  338. goto err_free_chan;
  339. }
  340. dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type);
  341. init_vp_index(newchannel, dev_type);
  342. if (newchannel->target_cpu != get_cpu()) {
  343. put_cpu();
  344. smp_call_function_single(newchannel->target_cpu,
  345. percpu_channel_enq,
  346. newchannel, true);
  347. } else {
  348. percpu_channel_enq(newchannel);
  349. put_cpu();
  350. }
  351. /*
  352. * This state is used to indicate a successful open
  353. * so that when we do close the channel normally, we
  354. * can cleanup properly
  355. */
  356. newchannel->state = CHANNEL_OPEN_STATE;
  357. if (!fnew) {
  358. if (channel->sc_creation_callback != NULL)
  359. channel->sc_creation_callback(newchannel);
  360. return;
  361. }
  362. /*
  363. * Start the process of binding this offer to the driver
  364. * We need to set the DeviceObject field before calling
  365. * vmbus_child_dev_add()
  366. */
  367. newchannel->device_obj = vmbus_device_create(
  368. &newchannel->offermsg.offer.if_type,
  369. &newchannel->offermsg.offer.if_instance,
  370. newchannel);
  371. if (!newchannel->device_obj)
  372. goto err_deq_chan;
  373. newchannel->device_obj->device_id = dev_type;
  374. /*
  375. * Add the new device to the bus. This will kick off device-driver
  376. * binding which eventually invokes the device driver's AddDevice()
  377. * method.
  378. */
  379. mutex_lock(&vmbus_connection.channel_mutex);
  380. ret = vmbus_device_register(newchannel->device_obj);
  381. mutex_unlock(&vmbus_connection.channel_mutex);
  382. if (ret != 0) {
  383. pr_err("unable to add child device object (relid %d)\n",
  384. newchannel->offermsg.child_relid);
  385. kfree(newchannel->device_obj);
  386. goto err_deq_chan;
  387. }
  388. return;
  389. err_deq_chan:
  390. vmbus_release_relid(newchannel->offermsg.child_relid);
  391. mutex_lock(&vmbus_connection.channel_mutex);
  392. list_del(&newchannel->listentry);
  393. mutex_unlock(&vmbus_connection.channel_mutex);
  394. if (newchannel->target_cpu != get_cpu()) {
  395. put_cpu();
  396. smp_call_function_single(newchannel->target_cpu,
  397. percpu_channel_deq, newchannel, true);
  398. } else {
  399. percpu_channel_deq(newchannel);
  400. put_cpu();
  401. }
  402. err_free_chan:
  403. free_channel(newchannel);
  404. }
  405. /*
  406. * We use this state to statically distribute the channel interrupt load.
  407. */
  408. static int next_numa_node_id;
  409. /*
  410. * Starting with Win8, we can statically distribute the incoming
  411. * channel interrupt load by binding a channel to VCPU.
  412. * We do this in a hierarchical fashion:
  413. * First distribute the primary channels across available NUMA nodes
  414. * and then distribute the subchannels amongst the CPUs in the NUMA
  415. * node assigned to the primary channel.
  416. *
  417. * For pre-win8 hosts or non-performance critical channels we assign the
  418. * first CPU in the first NUMA node.
  419. */
  420. static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
  421. {
  422. u32 cur_cpu;
  423. bool perf_chn = vmbus_devs[dev_type].perf_device;
  424. struct vmbus_channel *primary = channel->primary_channel;
  425. int next_node;
  426. struct cpumask available_mask;
  427. struct cpumask *alloced_mask;
  428. if ((vmbus_proto_version == VERSION_WS2008) ||
  429. (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
  430. /*
  431. * Prior to win8, all channel interrupts are
  432. * delivered on cpu 0.
  433. * Also if the channel is not a performance critical
  434. * channel, bind it to cpu 0.
  435. */
  436. channel->numa_node = 0;
  437. channel->target_cpu = 0;
  438. channel->target_vp = hv_context.vp_index[0];
  439. return;
  440. }
  441. /*
  442. * We distribute primary channels evenly across all the available
  443. * NUMA nodes and within the assigned NUMA node we will assign the
  444. * first available CPU to the primary channel.
  445. * The sub-channels will be assigned to the CPUs available in the
  446. * NUMA node evenly.
  447. */
  448. if (!primary) {
  449. while (true) {
  450. next_node = next_numa_node_id++;
  451. if (next_node == nr_node_ids)
  452. next_node = next_numa_node_id = 0;
  453. if (cpumask_empty(cpumask_of_node(next_node)))
  454. continue;
  455. break;
  456. }
  457. channel->numa_node = next_node;
  458. primary = channel;
  459. }
  460. alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
  461. if (cpumask_weight(alloced_mask) ==
  462. cpumask_weight(cpumask_of_node(primary->numa_node))) {
  463. /*
  464. * We have cycled through all the CPUs in the node;
  465. * reset the alloced map.
  466. */
  467. cpumask_clear(alloced_mask);
  468. }
  469. cpumask_xor(&available_mask, alloced_mask,
  470. cpumask_of_node(primary->numa_node));
  471. cur_cpu = -1;
  472. /*
  473. * Normally Hyper-V host doesn't create more subchannels than there
  474. * are VCPUs on the node but it is possible when not all present VCPUs
  475. * on the node are initialized by guest. Clear the alloced_cpus_in_node
  476. * to start over.
  477. */
  478. if (cpumask_equal(&primary->alloced_cpus_in_node,
  479. cpumask_of_node(primary->numa_node)))
  480. cpumask_clear(&primary->alloced_cpus_in_node);
  481. while (true) {
  482. cur_cpu = cpumask_next(cur_cpu, &available_mask);
  483. if (cur_cpu >= nr_cpu_ids) {
  484. cur_cpu = -1;
  485. cpumask_copy(&available_mask,
  486. cpumask_of_node(primary->numa_node));
  487. continue;
  488. }
  489. /*
  490. * NOTE: in the case of sub-channel, we clear the sub-channel
  491. * related bit(s) in primary->alloced_cpus_in_node in
  492. * hv_process_channel_removal(), so when we reload drivers
  493. * like hv_netvsc in SMP guest, here we're able to re-allocate
  494. * bit from primary->alloced_cpus_in_node.
  495. */
  496. if (!cpumask_test_cpu(cur_cpu,
  497. &primary->alloced_cpus_in_node)) {
  498. cpumask_set_cpu(cur_cpu,
  499. &primary->alloced_cpus_in_node);
  500. cpumask_set_cpu(cur_cpu, alloced_mask);
  501. break;
  502. }
  503. }
  504. channel->target_cpu = cur_cpu;
  505. channel->target_vp = hv_context.vp_index[cur_cpu];
  506. }
  507. static void vmbus_wait_for_unload(void)
  508. {
  509. int cpu;
  510. void *page_addr;
  511. struct hv_message *msg;
  512. struct vmbus_channel_message_header *hdr;
  513. u32 message_type;
  514. /*
  515. * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
  516. * used for initial contact or to CPU0 depending on host version. When
  517. * we're crashing on a different CPU let's hope that IRQ handler on
  518. * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
  519. * functional and vmbus_unload_response() will complete
  520. * vmbus_connection.unload_event. If not, the last thing we can do is
  521. * read message pages for all CPUs directly.
  522. */
  523. while (1) {
  524. if (completion_done(&vmbus_connection.unload_event))
  525. break;
  526. for_each_online_cpu(cpu) {
  527. page_addr = hv_context.synic_message_page[cpu];
  528. msg = (struct hv_message *)page_addr +
  529. VMBUS_MESSAGE_SINT;
  530. message_type = READ_ONCE(msg->header.message_type);
  531. if (message_type == HVMSG_NONE)
  532. continue;
  533. hdr = (struct vmbus_channel_message_header *)
  534. msg->u.payload;
  535. if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
  536. complete(&vmbus_connection.unload_event);
  537. vmbus_signal_eom(msg, message_type);
  538. }
  539. mdelay(10);
  540. }
  541. /*
  542. * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
  543. * maybe-pending messages on all CPUs to be able to receive new
  544. * messages after we reconnect.
  545. */
  546. for_each_online_cpu(cpu) {
  547. page_addr = hv_context.synic_message_page[cpu];
  548. msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
  549. msg->header.message_type = HVMSG_NONE;
  550. }
  551. }
  552. /*
  553. * vmbus_unload_response - Handler for the unload response.
  554. */
  555. static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
  556. {
  557. /*
  558. * This is a global event; just wakeup the waiting thread.
  559. * Once we successfully unload, we can cleanup the monitor state.
  560. */
  561. complete(&vmbus_connection.unload_event);
  562. }
  563. void vmbus_initiate_unload(bool crash)
  564. {
  565. struct vmbus_channel_message_header hdr;
  566. /* Pre-Win2012R2 hosts don't support reconnect */
  567. if (vmbus_proto_version < VERSION_WIN8_1)
  568. return;
  569. init_completion(&vmbus_connection.unload_event);
  570. memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
  571. hdr.msgtype = CHANNELMSG_UNLOAD;
  572. vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header));
  573. /*
  574. * vmbus_initiate_unload() is also called on crash and the crash can be
  575. * happening in an interrupt context, where scheduling is impossible.
  576. */
  577. if (!crash)
  578. wait_for_completion(&vmbus_connection.unload_event);
  579. else
  580. vmbus_wait_for_unload();
  581. }
  582. /*
  583. * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
  584. *
  585. */
  586. static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
  587. {
  588. struct vmbus_channel_offer_channel *offer;
  589. struct vmbus_channel *newchannel;
  590. offer = (struct vmbus_channel_offer_channel *)hdr;
  591. /* Allocate the channel object and save this offer. */
  592. newchannel = alloc_channel();
  593. if (!newchannel) {
  594. pr_err("Unable to allocate channel object\n");
  595. return;
  596. }
  597. /*
  598. * By default we setup state to enable batched
  599. * reading. A specific service can choose to
  600. * disable this prior to opening the channel.
  601. */
  602. newchannel->batched_reading = true;
  603. /*
  604. * Setup state for signalling the host.
  605. */
  606. newchannel->sig_event = (struct hv_input_signal_event *)
  607. (ALIGN((unsigned long)
  608. &newchannel->sig_buf,
  609. HV_HYPERCALL_PARAM_ALIGN));
  610. newchannel->sig_event->connectionid.asu32 = 0;
  611. newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
  612. newchannel->sig_event->flag_number = 0;
  613. newchannel->sig_event->rsvdz = 0;
  614. if (vmbus_proto_version != VERSION_WS2008) {
  615. newchannel->is_dedicated_interrupt =
  616. (offer->is_dedicated_interrupt != 0);
  617. newchannel->sig_event->connectionid.u.id =
  618. offer->connection_id;
  619. }
  620. memcpy(&newchannel->offermsg, offer,
  621. sizeof(struct vmbus_channel_offer_channel));
  622. newchannel->monitor_grp = (u8)offer->monitorid / 32;
  623. newchannel->monitor_bit = (u8)offer->monitorid % 32;
  624. vmbus_process_offer(newchannel);
  625. }
  626. /*
  627. * vmbus_onoffer_rescind - Rescind offer handler.
  628. *
  629. * We queue a work item to process this offer synchronously
  630. */
  631. static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
  632. {
  633. struct vmbus_channel_rescind_offer *rescind;
  634. struct vmbus_channel *channel;
  635. unsigned long flags;
  636. struct device *dev;
  637. rescind = (struct vmbus_channel_rescind_offer *)hdr;
  638. mutex_lock(&vmbus_connection.channel_mutex);
  639. channel = relid2channel(rescind->child_relid);
  640. if (channel == NULL) {
  641. /*
  642. * This is very impossible, because in
  643. * vmbus_process_offer(), we have already invoked
  644. * vmbus_release_relid() on error.
  645. */
  646. goto out;
  647. }
  648. spin_lock_irqsave(&channel->lock, flags);
  649. channel->rescind = true;
  650. spin_unlock_irqrestore(&channel->lock, flags);
  651. if (channel->device_obj) {
  652. if (channel->chn_rescind_callback) {
  653. channel->chn_rescind_callback(channel);
  654. goto out;
  655. }
  656. /*
  657. * We will have to unregister this device from the
  658. * driver core.
  659. */
  660. dev = get_device(&channel->device_obj->device);
  661. if (dev) {
  662. vmbus_device_unregister(channel->device_obj);
  663. put_device(dev);
  664. }
  665. } else {
  666. hv_process_channel_removal(channel,
  667. channel->offermsg.child_relid);
  668. }
  669. out:
  670. mutex_unlock(&vmbus_connection.channel_mutex);
  671. }
  672. void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
  673. {
  674. mutex_lock(&vmbus_connection.channel_mutex);
  675. BUG_ON(!is_hvsock_channel(channel));
  676. channel->rescind = true;
  677. vmbus_device_unregister(channel->device_obj);
  678. mutex_unlock(&vmbus_connection.channel_mutex);
  679. }
  680. EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
  681. /*
  682. * vmbus_onoffers_delivered -
  683. * This is invoked when all offers have been delivered.
  684. *
  685. * Nothing to do here.
  686. */
  687. static void vmbus_onoffers_delivered(
  688. struct vmbus_channel_message_header *hdr)
  689. {
  690. }
  691. /*
  692. * vmbus_onopen_result - Open result handler.
  693. *
  694. * This is invoked when we received a response to our channel open request.
  695. * Find the matching request, copy the response and signal the requesting
  696. * thread.
  697. */
  698. static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
  699. {
  700. struct vmbus_channel_open_result *result;
  701. struct vmbus_channel_msginfo *msginfo;
  702. struct vmbus_channel_message_header *requestheader;
  703. struct vmbus_channel_open_channel *openmsg;
  704. unsigned long flags;
  705. result = (struct vmbus_channel_open_result *)hdr;
  706. /*
  707. * Find the open msg, copy the result and signal/unblock the wait event
  708. */
  709. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  710. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  711. msglistentry) {
  712. requestheader =
  713. (struct vmbus_channel_message_header *)msginfo->msg;
  714. if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
  715. openmsg =
  716. (struct vmbus_channel_open_channel *)msginfo->msg;
  717. if (openmsg->child_relid == result->child_relid &&
  718. openmsg->openid == result->openid) {
  719. memcpy(&msginfo->response.open_result,
  720. result,
  721. sizeof(
  722. struct vmbus_channel_open_result));
  723. complete(&msginfo->waitevent);
  724. break;
  725. }
  726. }
  727. }
  728. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  729. }
  730. /*
  731. * vmbus_ongpadl_created - GPADL created handler.
  732. *
  733. * This is invoked when we received a response to our gpadl create request.
  734. * Find the matching request, copy the response and signal the requesting
  735. * thread.
  736. */
  737. static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
  738. {
  739. struct vmbus_channel_gpadl_created *gpadlcreated;
  740. struct vmbus_channel_msginfo *msginfo;
  741. struct vmbus_channel_message_header *requestheader;
  742. struct vmbus_channel_gpadl_header *gpadlheader;
  743. unsigned long flags;
  744. gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
  745. /*
  746. * Find the establish msg, copy the result and signal/unblock the wait
  747. * event
  748. */
  749. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  750. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  751. msglistentry) {
  752. requestheader =
  753. (struct vmbus_channel_message_header *)msginfo->msg;
  754. if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
  755. gpadlheader =
  756. (struct vmbus_channel_gpadl_header *)requestheader;
  757. if ((gpadlcreated->child_relid ==
  758. gpadlheader->child_relid) &&
  759. (gpadlcreated->gpadl == gpadlheader->gpadl)) {
  760. memcpy(&msginfo->response.gpadl_created,
  761. gpadlcreated,
  762. sizeof(
  763. struct vmbus_channel_gpadl_created));
  764. complete(&msginfo->waitevent);
  765. break;
  766. }
  767. }
  768. }
  769. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  770. }
  771. /*
  772. * vmbus_ongpadl_torndown - GPADL torndown handler.
  773. *
  774. * This is invoked when we received a response to our gpadl teardown request.
  775. * Find the matching request, copy the response and signal the requesting
  776. * thread.
  777. */
  778. static void vmbus_ongpadl_torndown(
  779. struct vmbus_channel_message_header *hdr)
  780. {
  781. struct vmbus_channel_gpadl_torndown *gpadl_torndown;
  782. struct vmbus_channel_msginfo *msginfo;
  783. struct vmbus_channel_message_header *requestheader;
  784. struct vmbus_channel_gpadl_teardown *gpadl_teardown;
  785. unsigned long flags;
  786. gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
  787. /*
  788. * Find the open msg, copy the result and signal/unblock the wait event
  789. */
  790. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  791. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  792. msglistentry) {
  793. requestheader =
  794. (struct vmbus_channel_message_header *)msginfo->msg;
  795. if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
  796. gpadl_teardown =
  797. (struct vmbus_channel_gpadl_teardown *)requestheader;
  798. if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
  799. memcpy(&msginfo->response.gpadl_torndown,
  800. gpadl_torndown,
  801. sizeof(
  802. struct vmbus_channel_gpadl_torndown));
  803. complete(&msginfo->waitevent);
  804. break;
  805. }
  806. }
  807. }
  808. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  809. }
  810. /*
  811. * vmbus_onversion_response - Version response handler
  812. *
  813. * This is invoked when we received a response to our initiate contact request.
  814. * Find the matching request, copy the response and signal the requesting
  815. * thread.
  816. */
  817. static void vmbus_onversion_response(
  818. struct vmbus_channel_message_header *hdr)
  819. {
  820. struct vmbus_channel_msginfo *msginfo;
  821. struct vmbus_channel_message_header *requestheader;
  822. struct vmbus_channel_version_response *version_response;
  823. unsigned long flags;
  824. version_response = (struct vmbus_channel_version_response *)hdr;
  825. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  826. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  827. msglistentry) {
  828. requestheader =
  829. (struct vmbus_channel_message_header *)msginfo->msg;
  830. if (requestheader->msgtype ==
  831. CHANNELMSG_INITIATE_CONTACT) {
  832. memcpy(&msginfo->response.version_response,
  833. version_response,
  834. sizeof(struct vmbus_channel_version_response));
  835. complete(&msginfo->waitevent);
  836. }
  837. }
  838. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  839. }
  840. /* Channel message dispatch table */
  841. struct vmbus_channel_message_table_entry
  842. channel_message_table[CHANNELMSG_COUNT] = {
  843. {CHANNELMSG_INVALID, 0, NULL},
  844. {CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer},
  845. {CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind},
  846. {CHANNELMSG_REQUESTOFFERS, 0, NULL},
  847. {CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered},
  848. {CHANNELMSG_OPENCHANNEL, 0, NULL},
  849. {CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result},
  850. {CHANNELMSG_CLOSECHANNEL, 0, NULL},
  851. {CHANNELMSG_GPADL_HEADER, 0, NULL},
  852. {CHANNELMSG_GPADL_BODY, 0, NULL},
  853. {CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created},
  854. {CHANNELMSG_GPADL_TEARDOWN, 0, NULL},
  855. {CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown},
  856. {CHANNELMSG_RELID_RELEASED, 0, NULL},
  857. {CHANNELMSG_INITIATE_CONTACT, 0, NULL},
  858. {CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response},
  859. {CHANNELMSG_UNLOAD, 0, NULL},
  860. {CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response},
  861. {CHANNELMSG_18, 0, NULL},
  862. {CHANNELMSG_19, 0, NULL},
  863. {CHANNELMSG_20, 0, NULL},
  864. {CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL},
  865. };
  866. /*
  867. * vmbus_onmessage - Handler for channel protocol messages.
  868. *
  869. * This is invoked in the vmbus worker thread context.
  870. */
  871. void vmbus_onmessage(void *context)
  872. {
  873. struct hv_message *msg = context;
  874. struct vmbus_channel_message_header *hdr;
  875. int size;
  876. hdr = (struct vmbus_channel_message_header *)msg->u.payload;
  877. size = msg->header.payload_size;
  878. if (hdr->msgtype >= CHANNELMSG_COUNT) {
  879. pr_err("Received invalid channel message type %d size %d\n",
  880. hdr->msgtype, size);
  881. print_hex_dump_bytes("", DUMP_PREFIX_NONE,
  882. (unsigned char *)msg->u.payload, size);
  883. return;
  884. }
  885. if (channel_message_table[hdr->msgtype].message_handler)
  886. channel_message_table[hdr->msgtype].message_handler(hdr);
  887. else
  888. pr_err("Unhandled channel message type %d\n", hdr->msgtype);
  889. }
  890. /*
  891. * vmbus_request_offers - Send a request to get all our pending offers.
  892. */
  893. int vmbus_request_offers(void)
  894. {
  895. struct vmbus_channel_message_header *msg;
  896. struct vmbus_channel_msginfo *msginfo;
  897. int ret;
  898. msginfo = kmalloc(sizeof(*msginfo) +
  899. sizeof(struct vmbus_channel_message_header),
  900. GFP_KERNEL);
  901. if (!msginfo)
  902. return -ENOMEM;
  903. msg = (struct vmbus_channel_message_header *)msginfo->msg;
  904. msg->msgtype = CHANNELMSG_REQUESTOFFERS;
  905. ret = vmbus_post_msg(msg,
  906. sizeof(struct vmbus_channel_message_header));
  907. if (ret != 0) {
  908. pr_err("Unable to request offers - %d\n", ret);
  909. goto cleanup;
  910. }
  911. cleanup:
  912. kfree(msginfo);
  913. return ret;
  914. }
  915. /*
  916. * Retrieve the (sub) channel on which to send an outgoing request.
  917. * When a primary channel has multiple sub-channels, we try to
  918. * distribute the load equally amongst all available channels.
  919. */
  920. struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
  921. {
  922. struct list_head *cur, *tmp;
  923. int cur_cpu;
  924. struct vmbus_channel *cur_channel;
  925. struct vmbus_channel *outgoing_channel = primary;
  926. int next_channel;
  927. int i = 1;
  928. if (list_empty(&primary->sc_list))
  929. return outgoing_channel;
  930. next_channel = primary->next_oc++;
  931. if (next_channel > (primary->num_sc)) {
  932. primary->next_oc = 0;
  933. return outgoing_channel;
  934. }
  935. cur_cpu = hv_context.vp_index[get_cpu()];
  936. put_cpu();
  937. list_for_each_safe(cur, tmp, &primary->sc_list) {
  938. cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
  939. if (cur_channel->state != CHANNEL_OPENED_STATE)
  940. continue;
  941. if (cur_channel->target_vp == cur_cpu)
  942. return cur_channel;
  943. if (i == next_channel)
  944. return cur_channel;
  945. i++;
  946. }
  947. return outgoing_channel;
  948. }
  949. EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);
  950. static void invoke_sc_cb(struct vmbus_channel *primary_channel)
  951. {
  952. struct list_head *cur, *tmp;
  953. struct vmbus_channel *cur_channel;
  954. if (primary_channel->sc_creation_callback == NULL)
  955. return;
  956. list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
  957. cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
  958. primary_channel->sc_creation_callback(cur_channel);
  959. }
  960. }
  961. void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
  962. void (*sc_cr_cb)(struct vmbus_channel *new_sc))
  963. {
  964. primary_channel->sc_creation_callback = sc_cr_cb;
  965. }
  966. EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
  967. bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
  968. {
  969. bool ret;
  970. ret = !list_empty(&primary->sc_list);
  971. if (ret) {
  972. /*
  973. * Invoke the callback on sub-channel creation.
  974. * This will present a uniform interface to the
  975. * clients.
  976. */
  977. invoke_sc_cb(primary);
  978. }
  979. return ret;
  980. }
  981. EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
  982. void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
  983. void (*chn_rescind_cb)(struct vmbus_channel *))
  984. {
  985. channel->chn_rescind_callback = chn_rescind_cb;
  986. }
  987. EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);