hyperv_transport.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949
  1. /*
  2. * Hyper-V transport for vsock
  3. *
  4. * Hyper-V Sockets supplies a byte-stream based communication mechanism
  5. * between the host and the VM. This driver implements the necessary
  6. * support in the VM by introducing the new vsock transport.
  7. *
  8. * Copyright (c) 2017, Microsoft Corporation.
  9. *
  10. * This program is free software; you can redistribute it and/or modify it
  11. * under the terms and conditions of the GNU General Public License,
  12. * version 2, as published by the Free Software Foundation.
  13. *
  14. * This program is distributed in the hope it will be useful, but WITHOUT
  15. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  17. * more details.
  18. *
  19. */
  20. #include <linux/module.h>
  21. #include <linux/vmalloc.h>
  22. #include <linux/hyperv.h>
  23. #include <net/sock.h>
  24. #include <net/af_vsock.h>
  25. /* The host side's design of the feature requires 6 exact 4KB pages for
  26. * recv/send rings respectively -- this is suboptimal considering memory
  27. * consumption, however unluckily we have to live with it, before the
  28. * host comes up with a better design in the future.
  29. */
  30. #define PAGE_SIZE_4K 4096
  31. #define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
  32. #define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
  33. /* The MTU is 16KB per the host side's design */
  34. #define HVS_MTU_SIZE (1024 * 16)
  35. /* How long to wait for graceful shutdown of a connection */
  36. #define HVS_CLOSE_TIMEOUT (8 * HZ)
  37. struct vmpipe_proto_header {
  38. u32 pkt_type;
  39. u32 data_size;
  40. };
  41. /* For recv, we use the VMBus in-place packet iterator APIs to directly copy
  42. * data from the ringbuffer into the userspace buffer.
  43. */
  44. struct hvs_recv_buf {
  45. /* The header before the payload data */
  46. struct vmpipe_proto_header hdr;
  47. /* The payload */
  48. u8 data[HVS_MTU_SIZE];
  49. };
  50. /* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use
  51. * a small size, i.e. HVS_SEND_BUF_SIZE, to minimize the dynamically-allocated
  52. * buffer, because tests show there is no significant performance difference.
  53. *
  54. * Note: the buffer can be eliminated in the future when we add new VMBus
  55. * ringbuffer APIs that allow us to directly copy data from userspace buffer
  56. * to VMBus ringbuffer.
  57. */
  58. #define HVS_SEND_BUF_SIZE (PAGE_SIZE_4K - sizeof(struct vmpipe_proto_header))
  59. struct hvs_send_buf {
  60. /* The header before the payload data */
  61. struct vmpipe_proto_header hdr;
  62. /* The payload */
  63. u8 data[HVS_SEND_BUF_SIZE];
  64. };
  65. #define HVS_HEADER_LEN (sizeof(struct vmpacket_descriptor) + \
  66. sizeof(struct vmpipe_proto_header))
  67. /* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write(), and
  68. * __hv_pkt_iter_next().
  69. */
  70. #define VMBUS_PKT_TRAILER_SIZE (sizeof(u64))
  71. #define HVS_PKT_LEN(payload_len) (HVS_HEADER_LEN + \
  72. ALIGN((payload_len), 8) + \
  73. VMBUS_PKT_TRAILER_SIZE)
  74. union hvs_service_id {
  75. uuid_le srv_id;
  76. struct {
  77. unsigned int svm_port;
  78. unsigned char b[sizeof(uuid_le) - sizeof(unsigned int)];
  79. };
  80. };
  81. /* Per-socket state (accessed via vsk->trans) */
  82. struct hvsock {
  83. struct vsock_sock *vsk;
  84. uuid_le vm_srv_id;
  85. uuid_le host_srv_id;
  86. struct vmbus_channel *chan;
  87. struct vmpacket_descriptor *recv_desc;
  88. /* The length of the payload not delivered to userland yet */
  89. u32 recv_data_len;
  90. /* The offset of the payload */
  91. u32 recv_data_off;
  92. /* Have we sent the zero-length packet (FIN)? */
  93. bool fin_sent;
  94. };
  95. /* In the VM, we support Hyper-V Sockets with AF_VSOCK, and the endpoint is
  96. * <cid, port> (see struct sockaddr_vm). Note: cid is not really used here:
  97. * when we write apps to connect to the host, we can only use VMADDR_CID_ANY
  98. * or VMADDR_CID_HOST (both are equivalent) as the remote cid, and when we
  99. * write apps to bind() & listen() in the VM, we can only use VMADDR_CID_ANY
  100. * as the local cid.
  101. *
  102. * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
  103. * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
  104. * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
  105. * the below sockaddr:
  106. *
  107. * struct SOCKADDR_HV
  108. * {
  109. * ADDRESS_FAMILY Family;
  110. * USHORT Reserved;
  111. * GUID VmId;
  112. * GUID ServiceId;
  113. * };
  114. * Note: VmID is not used by Linux VM and actually it isn't transmitted via
  115. * VMBus, because here it's obvious the host and the VM can easily identify
  116. * each other. Though the VmID is useful on the host, especially in the case
  117. * of Windows container, Linux VM doesn't need it at all.
  118. *
  119. * To make use of the AF_VSOCK infrastructure in Linux VM, we have to limit
  120. * the available GUID space of SOCKADDR_HV so that we can create a mapping
  121. * between AF_VSOCK port and SOCKADDR_HV Service GUID. The rule of writing
  122. * Hyper-V Sockets apps on the host and in Linux VM is:
  123. *
  124. ****************************************************************************
  125. * The only valid Service GUIDs, from the perspectives of both the host and *
  126. * Linux VM, that can be connected by the other end, must conform to this *
  127. * format: <port>-facb-11e6-bd58-64006a7986d3, and the "port" must be in *
  128. * this range [0, 0x7FFFFFFF]. *
  129. ****************************************************************************
  130. *
  131. * When we write apps on the host to connect(), the GUID ServiceID is used.
  132. * When we write apps in Linux VM to connect(), we only need to specify the
  133. * port and the driver will form the GUID and use that to request the host.
  134. *
  135. * From the perspective of Linux VM:
  136. * 1. the local ephemeral port (i.e. the local auto-bound port when we call
  137. * connect() without explicit bind()) is generated by __vsock_bind_stream(),
  138. * and the range is [1024, 0xFFFFFFFF).
  139. * 2. the remote ephemeral port (i.e. the auto-generated remote port for
  140. * a connect request initiated by the host's connect()) is generated by
  141. * hvs_remote_addr_init() and the range is [0x80000000, 0xFFFFFFFF).
  142. */
  143. #define MAX_LISTEN_PORT ((u32)0x7FFFFFFF)
  144. #define MAX_VM_LISTEN_PORT MAX_LISTEN_PORT
  145. #define MAX_HOST_LISTEN_PORT MAX_LISTEN_PORT
  146. #define MIN_HOST_EPHEMERAL_PORT (MAX_HOST_LISTEN_PORT + 1)
  147. /* 00000000-facb-11e6-bd58-64006a7986d3 */
  148. static const uuid_le srv_id_template =
  149. UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
  150. 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
  151. static bool is_valid_srv_id(const uuid_le *id)
  152. {
  153. return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(uuid_le) - 4);
  154. }
  155. static unsigned int get_port_by_srv_id(const uuid_le *svr_id)
  156. {
  157. return *((unsigned int *)svr_id);
  158. }
  159. static void hvs_addr_init(struct sockaddr_vm *addr, const uuid_le *svr_id)
  160. {
  161. unsigned int port = get_port_by_srv_id(svr_id);
  162. vsock_addr_init(addr, VMADDR_CID_ANY, port);
  163. }
  164. static void hvs_remote_addr_init(struct sockaddr_vm *remote,
  165. struct sockaddr_vm *local)
  166. {
  167. static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
  168. struct sock *sk;
  169. vsock_addr_init(remote, VMADDR_CID_ANY, VMADDR_PORT_ANY);
  170. while (1) {
  171. /* Wrap around ? */
  172. if (host_ephemeral_port < MIN_HOST_EPHEMERAL_PORT ||
  173. host_ephemeral_port == VMADDR_PORT_ANY)
  174. host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
  175. remote->svm_port = host_ephemeral_port++;
  176. sk = vsock_find_connected_socket(remote, local);
  177. if (!sk) {
  178. /* Found an available ephemeral port */
  179. return;
  180. }
  181. /* Release refcnt got in vsock_find_connected_socket */
  182. sock_put(sk);
  183. }
  184. }
  185. static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan)
  186. {
  187. set_channel_pending_send_size(chan,
  188. HVS_PKT_LEN(HVS_SEND_BUF_SIZE));
  189. virt_mb();
  190. }
  191. static bool hvs_channel_readable(struct vmbus_channel *chan)
  192. {
  193. u32 readable = hv_get_bytes_to_read(&chan->inbound);
  194. /* 0-size payload means FIN */
  195. return readable >= HVS_PKT_LEN(0);
  196. }
  197. static int hvs_channel_readable_payload(struct vmbus_channel *chan)
  198. {
  199. u32 readable = hv_get_bytes_to_read(&chan->inbound);
  200. if (readable > HVS_PKT_LEN(0)) {
  201. /* At least we have 1 byte to read. We don't need to return
  202. * the exact readable bytes: see vsock_stream_recvmsg() ->
  203. * vsock_stream_has_data().
  204. */
  205. return 1;
  206. }
  207. if (readable == HVS_PKT_LEN(0)) {
  208. /* 0-size payload means FIN */
  209. return 0;
  210. }
  211. /* No payload or FIN */
  212. return -1;
  213. }
  214. static size_t hvs_channel_writable_bytes(struct vmbus_channel *chan)
  215. {
  216. u32 writeable = hv_get_bytes_to_write(&chan->outbound);
  217. size_t ret;
  218. /* The ringbuffer mustn't be 100% full, and we should reserve a
  219. * zero-length-payload packet for the FIN: see hv_ringbuffer_write()
  220. * and hvs_shutdown().
  221. */
  222. if (writeable <= HVS_PKT_LEN(1) + HVS_PKT_LEN(0))
  223. return 0;
  224. ret = writeable - HVS_PKT_LEN(1) - HVS_PKT_LEN(0);
  225. return round_down(ret, 8);
  226. }
  227. static int hvs_send_data(struct vmbus_channel *chan,
  228. struct hvs_send_buf *send_buf, size_t to_write)
  229. {
  230. send_buf->hdr.pkt_type = 1;
  231. send_buf->hdr.data_size = to_write;
  232. return vmbus_sendpacket(chan, &send_buf->hdr,
  233. sizeof(send_buf->hdr) + to_write,
  234. 0, VM_PKT_DATA_INBAND, 0);
  235. }
  236. static void hvs_channel_cb(void *ctx)
  237. {
  238. struct sock *sk = (struct sock *)ctx;
  239. struct vsock_sock *vsk = vsock_sk(sk);
  240. struct hvsock *hvs = vsk->trans;
  241. struct vmbus_channel *chan = hvs->chan;
  242. if (hvs_channel_readable(chan))
  243. sk->sk_data_ready(sk);
  244. if (hv_get_bytes_to_write(&chan->outbound) > 0)
  245. sk->sk_write_space(sk);
  246. }
  247. static void hvs_do_close_lock_held(struct vsock_sock *vsk,
  248. bool cancel_timeout)
  249. {
  250. struct sock *sk = sk_vsock(vsk);
  251. sock_set_flag(sk, SOCK_DONE);
  252. vsk->peer_shutdown = SHUTDOWN_MASK;
  253. if (vsock_stream_has_data(vsk) <= 0)
  254. sk->sk_state = TCP_CLOSING;
  255. sk->sk_state_change(sk);
  256. if (vsk->close_work_scheduled &&
  257. (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
  258. vsk->close_work_scheduled = false;
  259. vsock_remove_sock(vsk);
  260. /* Release the reference taken while scheduling the timeout */
  261. sock_put(sk);
  262. }
  263. }
  264. static void hvs_close_connection(struct vmbus_channel *chan)
  265. {
  266. struct sock *sk = get_per_channel_state(chan);
  267. lock_sock(sk);
  268. hvs_do_close_lock_held(vsock_sk(sk), true);
  269. release_sock(sk);
  270. /* Release the refcnt for the channel that's opened in
  271. * hvs_open_connection().
  272. */
  273. sock_put(sk);
  274. }
  275. static void hvs_open_connection(struct vmbus_channel *chan)
  276. {
  277. uuid_le *if_instance, *if_type;
  278. unsigned char conn_from_host;
  279. struct sockaddr_vm addr;
  280. struct sock *sk, *new = NULL;
  281. struct vsock_sock *vnew = NULL;
  282. struct hvsock *hvs = NULL;
  283. struct hvsock *hvs_new = NULL;
  284. int ret;
  285. if_type = &chan->offermsg.offer.if_type;
  286. if_instance = &chan->offermsg.offer.if_instance;
  287. conn_from_host = chan->offermsg.offer.u.pipe.user_def[0];
  288. /* The host or the VM should only listen on a port in
  289. * [0, MAX_LISTEN_PORT]
  290. */
  291. if (!is_valid_srv_id(if_type) ||
  292. get_port_by_srv_id(if_type) > MAX_LISTEN_PORT)
  293. return;
  294. hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
  295. sk = vsock_find_bound_socket(&addr);
  296. if (!sk)
  297. return;
  298. lock_sock(sk);
  299. if ((conn_from_host && sk->sk_state != TCP_LISTEN) ||
  300. (!conn_from_host && sk->sk_state != TCP_SYN_SENT))
  301. goto out;
  302. if (conn_from_host) {
  303. if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog)
  304. goto out;
  305. new = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
  306. sk->sk_type, 0);
  307. if (!new)
  308. goto out;
  309. new->sk_state = TCP_SYN_SENT;
  310. vnew = vsock_sk(new);
  311. hvs_new = vnew->trans;
  312. hvs_new->chan = chan;
  313. } else {
  314. hvs = vsock_sk(sk)->trans;
  315. hvs->chan = chan;
  316. }
  317. set_channel_read_mode(chan, HV_CALL_DIRECT);
  318. ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE,
  319. RINGBUFFER_HVS_RCV_SIZE, NULL, 0,
  320. hvs_channel_cb, conn_from_host ? new : sk);
  321. if (ret != 0) {
  322. if (conn_from_host) {
  323. hvs_new->chan = NULL;
  324. sock_put(new);
  325. } else {
  326. hvs->chan = NULL;
  327. }
  328. goto out;
  329. }
  330. set_per_channel_state(chan, conn_from_host ? new : sk);
  331. /* This reference will be dropped by hvs_close_connection(). */
  332. sock_hold(conn_from_host ? new : sk);
  333. vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
  334. /* Set the pending send size to max packet size to always get
  335. * notifications from the host when there is enough writable space.
  336. * The host is optimized to send notifications only when the pending
  337. * size boundary is crossed, and not always.
  338. */
  339. hvs_set_channel_pending_send_size(chan);
  340. if (conn_from_host) {
  341. new->sk_state = TCP_ESTABLISHED;
  342. sk->sk_ack_backlog++;
  343. hvs_addr_init(&vnew->local_addr, if_type);
  344. hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
  345. hvs_new->vm_srv_id = *if_type;
  346. hvs_new->host_srv_id = *if_instance;
  347. vsock_insert_connected(vnew);
  348. vsock_enqueue_accept(sk, new);
  349. } else {
  350. sk->sk_state = TCP_ESTABLISHED;
  351. sk->sk_socket->state = SS_CONNECTED;
  352. vsock_insert_connected(vsock_sk(sk));
  353. }
  354. sk->sk_state_change(sk);
  355. out:
  356. /* Release refcnt obtained when we called vsock_find_bound_socket() */
  357. sock_put(sk);
  358. release_sock(sk);
  359. }
  360. static u32 hvs_get_local_cid(void)
  361. {
  362. return VMADDR_CID_ANY;
  363. }
  364. static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
  365. {
  366. struct hvsock *hvs;
  367. hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
  368. if (!hvs)
  369. return -ENOMEM;
  370. vsk->trans = hvs;
  371. hvs->vsk = vsk;
  372. return 0;
  373. }
  374. static int hvs_connect(struct vsock_sock *vsk)
  375. {
  376. union hvs_service_id vm, host;
  377. struct hvsock *h = vsk->trans;
  378. vm.srv_id = srv_id_template;
  379. vm.svm_port = vsk->local_addr.svm_port;
  380. h->vm_srv_id = vm.srv_id;
  381. host.srv_id = srv_id_template;
  382. host.svm_port = vsk->remote_addr.svm_port;
  383. h->host_srv_id = host.srv_id;
  384. return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
  385. }
  386. static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode)
  387. {
  388. struct vmpipe_proto_header hdr;
  389. if (hvs->fin_sent || !hvs->chan)
  390. return;
  391. /* It can't fail: see hvs_channel_writable_bytes(). */
  392. (void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0);
  393. hvs->fin_sent = true;
  394. }
  395. static int hvs_shutdown(struct vsock_sock *vsk, int mode)
  396. {
  397. struct sock *sk = sk_vsock(vsk);
  398. if (!(mode & SEND_SHUTDOWN))
  399. return 0;
  400. lock_sock(sk);
  401. hvs_shutdown_lock_held(vsk->trans, mode);
  402. release_sock(sk);
  403. return 0;
  404. }
  405. static void hvs_close_timeout(struct work_struct *work)
  406. {
  407. struct vsock_sock *vsk =
  408. container_of(work, struct vsock_sock, close_work.work);
  409. struct sock *sk = sk_vsock(vsk);
  410. sock_hold(sk);
  411. lock_sock(sk);
  412. if (!sock_flag(sk, SOCK_DONE))
  413. hvs_do_close_lock_held(vsk, false);
  414. vsk->close_work_scheduled = false;
  415. release_sock(sk);
  416. sock_put(sk);
  417. }
  418. /* Returns true, if it is safe to remove socket; false otherwise */
  419. static bool hvs_close_lock_held(struct vsock_sock *vsk)
  420. {
  421. struct sock *sk = sk_vsock(vsk);
  422. if (!(sk->sk_state == TCP_ESTABLISHED ||
  423. sk->sk_state == TCP_CLOSING))
  424. return true;
  425. if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
  426. hvs_shutdown_lock_held(vsk->trans, SHUTDOWN_MASK);
  427. if (sock_flag(sk, SOCK_DONE))
  428. return true;
  429. /* This reference will be dropped by the delayed close routine */
  430. sock_hold(sk);
  431. INIT_DELAYED_WORK(&vsk->close_work, hvs_close_timeout);
  432. vsk->close_work_scheduled = true;
  433. schedule_delayed_work(&vsk->close_work, HVS_CLOSE_TIMEOUT);
  434. return false;
  435. }
  436. static void hvs_release(struct vsock_sock *vsk)
  437. {
  438. struct sock *sk = sk_vsock(vsk);
  439. bool remove_sock;
  440. lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
  441. remove_sock = hvs_close_lock_held(vsk);
  442. release_sock(sk);
  443. if (remove_sock)
  444. vsock_remove_sock(vsk);
  445. }
  446. static void hvs_destruct(struct vsock_sock *vsk)
  447. {
  448. struct hvsock *hvs = vsk->trans;
  449. struct vmbus_channel *chan = hvs->chan;
  450. if (chan)
  451. vmbus_hvsock_device_unregister(chan);
  452. kfree(hvs);
  453. }
  454. static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr)
  455. {
  456. return -EOPNOTSUPP;
  457. }
  458. static int hvs_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
  459. size_t len, int flags)
  460. {
  461. return -EOPNOTSUPP;
  462. }
  463. static int hvs_dgram_enqueue(struct vsock_sock *vsk,
  464. struct sockaddr_vm *remote, struct msghdr *msg,
  465. size_t dgram_len)
  466. {
  467. return -EOPNOTSUPP;
  468. }
  469. static bool hvs_dgram_allow(u32 cid, u32 port)
  470. {
  471. return false;
  472. }
  473. static int hvs_update_recv_data(struct hvsock *hvs)
  474. {
  475. struct hvs_recv_buf *recv_buf;
  476. u32 payload_len;
  477. recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
  478. payload_len = recv_buf->hdr.data_size;
  479. if (payload_len > HVS_MTU_SIZE)
  480. return -EIO;
  481. if (payload_len == 0)
  482. hvs->vsk->peer_shutdown |= SEND_SHUTDOWN;
  483. hvs->recv_data_len = payload_len;
  484. hvs->recv_data_off = 0;
  485. return 0;
  486. }
  487. static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
  488. size_t len, int flags)
  489. {
  490. struct hvsock *hvs = vsk->trans;
  491. bool need_refill = !hvs->recv_desc;
  492. struct hvs_recv_buf *recv_buf;
  493. u32 to_read;
  494. int ret;
  495. if (flags & MSG_PEEK)
  496. return -EOPNOTSUPP;
  497. if (need_refill) {
  498. hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
  499. ret = hvs_update_recv_data(hvs);
  500. if (ret)
  501. return ret;
  502. }
  503. recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
  504. to_read = min_t(u32, len, hvs->recv_data_len);
  505. ret = memcpy_to_msg(msg, recv_buf->data + hvs->recv_data_off, to_read);
  506. if (ret != 0)
  507. return ret;
  508. hvs->recv_data_len -= to_read;
  509. if (hvs->recv_data_len == 0) {
  510. hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
  511. if (hvs->recv_desc) {
  512. ret = hvs_update_recv_data(hvs);
  513. if (ret)
  514. return ret;
  515. }
  516. } else {
  517. hvs->recv_data_off += to_read;
  518. }
  519. return to_read;
  520. }
  521. static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
  522. size_t len)
  523. {
  524. struct hvsock *hvs = vsk->trans;
  525. struct vmbus_channel *chan = hvs->chan;
  526. struct hvs_send_buf *send_buf;
  527. ssize_t to_write, max_writable, ret;
  528. BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
  529. send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL);
  530. if (!send_buf)
  531. return -ENOMEM;
  532. max_writable = hvs_channel_writable_bytes(chan);
  533. to_write = min_t(ssize_t, len, max_writable);
  534. to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
  535. ret = memcpy_from_msg(send_buf->data, msg, to_write);
  536. if (ret < 0)
  537. goto out;
  538. ret = hvs_send_data(hvs->chan, send_buf, to_write);
  539. if (ret < 0)
  540. goto out;
  541. ret = to_write;
  542. out:
  543. kfree(send_buf);
  544. return ret;
  545. }
  546. static s64 hvs_stream_has_data(struct vsock_sock *vsk)
  547. {
  548. struct hvsock *hvs = vsk->trans;
  549. s64 ret;
  550. if (hvs->recv_data_len > 0)
  551. return 1;
  552. switch (hvs_channel_readable_payload(hvs->chan)) {
  553. case 1:
  554. ret = 1;
  555. break;
  556. case 0:
  557. vsk->peer_shutdown |= SEND_SHUTDOWN;
  558. ret = 0;
  559. break;
  560. default: /* -1 */
  561. ret = 0;
  562. break;
  563. }
  564. return ret;
  565. }
  566. static s64 hvs_stream_has_space(struct vsock_sock *vsk)
  567. {
  568. struct hvsock *hvs = vsk->trans;
  569. return hvs_channel_writable_bytes(hvs->chan);
  570. }
  571. static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk)
  572. {
  573. return HVS_MTU_SIZE + 1;
  574. }
  575. static bool hvs_stream_is_active(struct vsock_sock *vsk)
  576. {
  577. struct hvsock *hvs = vsk->trans;
  578. return hvs->chan != NULL;
  579. }
  580. static bool hvs_stream_allow(u32 cid, u32 port)
  581. {
  582. /* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF) is
  583. * reserved as ephemeral ports, which are used as the host's ports
  584. * when the host initiates connections.
  585. *
  586. * Perform this check in the guest so an immediate error is produced
  587. * instead of a timeout.
  588. */
  589. if (port > MAX_HOST_LISTEN_PORT)
  590. return false;
  591. if (cid == VMADDR_CID_HOST)
  592. return true;
  593. return false;
  594. }
  595. static
  596. int hvs_notify_poll_in(struct vsock_sock *vsk, size_t target, bool *readable)
  597. {
  598. struct hvsock *hvs = vsk->trans;
  599. *readable = hvs_channel_readable(hvs->chan);
  600. return 0;
  601. }
  602. static
  603. int hvs_notify_poll_out(struct vsock_sock *vsk, size_t target, bool *writable)
  604. {
  605. *writable = hvs_stream_has_space(vsk) > 0;
  606. return 0;
  607. }
  608. static
  609. int hvs_notify_recv_init(struct vsock_sock *vsk, size_t target,
  610. struct vsock_transport_recv_notify_data *d)
  611. {
  612. return 0;
  613. }
  614. static
  615. int hvs_notify_recv_pre_block(struct vsock_sock *vsk, size_t target,
  616. struct vsock_transport_recv_notify_data *d)
  617. {
  618. return 0;
  619. }
  620. static
  621. int hvs_notify_recv_pre_dequeue(struct vsock_sock *vsk, size_t target,
  622. struct vsock_transport_recv_notify_data *d)
  623. {
  624. return 0;
  625. }
  626. static
  627. int hvs_notify_recv_post_dequeue(struct vsock_sock *vsk, size_t target,
  628. ssize_t copied, bool data_read,
  629. struct vsock_transport_recv_notify_data *d)
  630. {
  631. return 0;
  632. }
  633. static
  634. int hvs_notify_send_init(struct vsock_sock *vsk,
  635. struct vsock_transport_send_notify_data *d)
  636. {
  637. return 0;
  638. }
  639. static
  640. int hvs_notify_send_pre_block(struct vsock_sock *vsk,
  641. struct vsock_transport_send_notify_data *d)
  642. {
  643. return 0;
  644. }
  645. static
  646. int hvs_notify_send_pre_enqueue(struct vsock_sock *vsk,
  647. struct vsock_transport_send_notify_data *d)
  648. {
  649. return 0;
  650. }
  651. static
  652. int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
  653. struct vsock_transport_send_notify_data *d)
  654. {
  655. return 0;
  656. }
  657. static void hvs_set_buffer_size(struct vsock_sock *vsk, u64 val)
  658. {
  659. /* Ignored. */
  660. }
  661. static void hvs_set_min_buffer_size(struct vsock_sock *vsk, u64 val)
  662. {
  663. /* Ignored. */
  664. }
  665. static void hvs_set_max_buffer_size(struct vsock_sock *vsk, u64 val)
  666. {
  667. /* Ignored. */
  668. }
  669. static u64 hvs_get_buffer_size(struct vsock_sock *vsk)
  670. {
  671. return -ENOPROTOOPT;
  672. }
  673. static u64 hvs_get_min_buffer_size(struct vsock_sock *vsk)
  674. {
  675. return -ENOPROTOOPT;
  676. }
  677. static u64 hvs_get_max_buffer_size(struct vsock_sock *vsk)
  678. {
  679. return -ENOPROTOOPT;
  680. }
  681. static struct vsock_transport hvs_transport = {
  682. .get_local_cid = hvs_get_local_cid,
  683. .init = hvs_sock_init,
  684. .destruct = hvs_destruct,
  685. .release = hvs_release,
  686. .connect = hvs_connect,
  687. .shutdown = hvs_shutdown,
  688. .dgram_bind = hvs_dgram_bind,
  689. .dgram_dequeue = hvs_dgram_dequeue,
  690. .dgram_enqueue = hvs_dgram_enqueue,
  691. .dgram_allow = hvs_dgram_allow,
  692. .stream_dequeue = hvs_stream_dequeue,
  693. .stream_enqueue = hvs_stream_enqueue,
  694. .stream_has_data = hvs_stream_has_data,
  695. .stream_has_space = hvs_stream_has_space,
  696. .stream_rcvhiwat = hvs_stream_rcvhiwat,
  697. .stream_is_active = hvs_stream_is_active,
  698. .stream_allow = hvs_stream_allow,
  699. .notify_poll_in = hvs_notify_poll_in,
  700. .notify_poll_out = hvs_notify_poll_out,
  701. .notify_recv_init = hvs_notify_recv_init,
  702. .notify_recv_pre_block = hvs_notify_recv_pre_block,
  703. .notify_recv_pre_dequeue = hvs_notify_recv_pre_dequeue,
  704. .notify_recv_post_dequeue = hvs_notify_recv_post_dequeue,
  705. .notify_send_init = hvs_notify_send_init,
  706. .notify_send_pre_block = hvs_notify_send_pre_block,
  707. .notify_send_pre_enqueue = hvs_notify_send_pre_enqueue,
  708. .notify_send_post_enqueue = hvs_notify_send_post_enqueue,
  709. .set_buffer_size = hvs_set_buffer_size,
  710. .set_min_buffer_size = hvs_set_min_buffer_size,
  711. .set_max_buffer_size = hvs_set_max_buffer_size,
  712. .get_buffer_size = hvs_get_buffer_size,
  713. .get_min_buffer_size = hvs_get_min_buffer_size,
  714. .get_max_buffer_size = hvs_get_max_buffer_size,
  715. };
  716. static int hvs_probe(struct hv_device *hdev,
  717. const struct hv_vmbus_device_id *dev_id)
  718. {
  719. struct vmbus_channel *chan = hdev->channel;
  720. hvs_open_connection(chan);
  721. /* Always return success to suppress the unnecessary error message
  722. * in vmbus_probe(): on error the host will rescind the device in
  723. * 30 seconds and we can do cleanup at that time in
  724. * vmbus_onoffer_rescind().
  725. */
  726. return 0;
  727. }
  728. static int hvs_remove(struct hv_device *hdev)
  729. {
  730. struct vmbus_channel *chan = hdev->channel;
  731. vmbus_close(chan);
  732. return 0;
  733. }
  734. /* This isn't really used. See vmbus_match() and vmbus_probe() */
  735. static const struct hv_vmbus_device_id id_table[] = {
  736. {},
  737. };
  738. static struct hv_driver hvs_drv = {
  739. .name = "hv_sock",
  740. .hvsock = true,
  741. .id_table = id_table,
  742. .probe = hvs_probe,
  743. .remove = hvs_remove,
  744. };
  745. static int __init hvs_init(void)
  746. {
  747. int ret;
  748. if (vmbus_proto_version < VERSION_WIN10)
  749. return -ENODEV;
  750. ret = vmbus_driver_register(&hvs_drv);
  751. if (ret != 0)
  752. return ret;
  753. ret = vsock_core_init(&hvs_transport);
  754. if (ret) {
  755. vmbus_driver_unregister(&hvs_drv);
  756. return ret;
  757. }
  758. return 0;
  759. }
  760. static void __exit hvs_exit(void)
  761. {
  762. vsock_core_exit();
  763. vmbus_driver_unregister(&hvs_drv);
  764. }
  765. module_init(hvs_init);
  766. module_exit(hvs_exit);
  767. MODULE_DESCRIPTION("Hyper-V Sockets");
  768. MODULE_VERSION("1.0.0");
  769. MODULE_LICENSE("GPL");
  770. MODULE_ALIAS_NETPROTO(PF_VSOCK);