hyperv_transport.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917
  1. /*
  2. * Hyper-V transport for vsock
  3. *
  4. * Hyper-V Sockets supplies a byte-stream based communication mechanism
  5. * between the host and the VM. This driver implements the necessary
  6. * support in the VM by introducing the new vsock transport.
  7. *
  8. * Copyright (c) 2017, Microsoft Corporation.
  9. *
  10. * This program is free software; you can redistribute it and/or modify it
  11. * under the terms and conditions of the GNU General Public License,
  12. * version 2, as published by the Free Software Foundation.
  13. *
  14. * This program is distributed in the hope it will be useful, but WITHOUT
  15. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  17. * more details.
  18. *
  19. */
  20. #include <linux/module.h>
  21. #include <linux/vmalloc.h>
  22. #include <linux/hyperv.h>
  23. #include <net/sock.h>
  24. #include <net/af_vsock.h>
  25. /* The host side's design of the feature requires 6 exact 4KB pages for
  26. * recv/send rings respectively -- this is suboptimal considering memory
  27. * consumption, however unluckily we have to live with it, before the
  28. * host comes up with a better design in the future.
  29. */
  30. #define PAGE_SIZE_4K 4096
  31. #define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
  32. #define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
  33. /* The MTU is 16KB per the host side's design */
  34. #define HVS_MTU_SIZE (1024 * 16)
  35. struct vmpipe_proto_header {
  36. u32 pkt_type;
  37. u32 data_size;
  38. };
  39. /* For recv, we use the VMBus in-place packet iterator APIs to directly copy
  40. * data from the ringbuffer into the userspace buffer.
  41. */
  42. struct hvs_recv_buf {
  43. /* The header before the payload data */
  44. struct vmpipe_proto_header hdr;
  45. /* The payload */
  46. u8 data[HVS_MTU_SIZE];
  47. };
  48. /* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use
  49. * a small size, i.e. HVS_SEND_BUF_SIZE, to minimize the dynamically-allocated
  50. * buffer, because tests show there is no significant performance difference.
  51. *
  52. * Note: the buffer can be eliminated in the future when we add new VMBus
  53. * ringbuffer APIs that allow us to directly copy data from userspace buffer
  54. * to VMBus ringbuffer.
  55. */
  56. #define HVS_SEND_BUF_SIZE (PAGE_SIZE_4K - sizeof(struct vmpipe_proto_header))
  57. struct hvs_send_buf {
  58. /* The header before the payload data */
  59. struct vmpipe_proto_header hdr;
  60. /* The payload */
  61. u8 data[HVS_SEND_BUF_SIZE];
  62. };
  63. #define HVS_HEADER_LEN (sizeof(struct vmpacket_descriptor) + \
  64. sizeof(struct vmpipe_proto_header))
  65. /* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write(), and
  66. * __hv_pkt_iter_next().
  67. */
  68. #define VMBUS_PKT_TRAILER_SIZE (sizeof(u64))
  69. #define HVS_PKT_LEN(payload_len) (HVS_HEADER_LEN + \
  70. ALIGN((payload_len), 8) + \
  71. VMBUS_PKT_TRAILER_SIZE)
  72. union hvs_service_id {
  73. uuid_le srv_id;
  74. struct {
  75. unsigned int svm_port;
  76. unsigned char b[sizeof(uuid_le) - sizeof(unsigned int)];
  77. };
  78. };
  79. /* Per-socket state (accessed via vsk->trans) */
  80. struct hvsock {
  81. struct vsock_sock *vsk;
  82. uuid_le vm_srv_id;
  83. uuid_le host_srv_id;
  84. struct vmbus_channel *chan;
  85. struct vmpacket_descriptor *recv_desc;
  86. /* The length of the payload not delivered to userland yet */
  87. u32 recv_data_len;
  88. /* The offset of the payload */
  89. u32 recv_data_off;
  90. /* Have we sent the zero-length packet (FIN)? */
  91. bool fin_sent;
  92. };
  93. /* In the VM, we support Hyper-V Sockets with AF_VSOCK, and the endpoint is
  94. * <cid, port> (see struct sockaddr_vm). Note: cid is not really used here:
  95. * when we write apps to connect to the host, we can only use VMADDR_CID_ANY
  96. * or VMADDR_CID_HOST (both are equivalent) as the remote cid, and when we
  97. * write apps to bind() & listen() in the VM, we can only use VMADDR_CID_ANY
  98. * as the local cid.
  99. *
  100. * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
  101. * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
  102. * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
  103. * the below sockaddr:
  104. *
  105. * struct SOCKADDR_HV
  106. * {
  107. * ADDRESS_FAMILY Family;
  108. * USHORT Reserved;
  109. * GUID VmId;
  110. * GUID ServiceId;
  111. * };
  112. * Note: VmID is not used by Linux VM and actually it isn't transmitted via
  113. * VMBus, because here it's obvious the host and the VM can easily identify
  114. * each other. Though the VmID is useful on the host, especially in the case
  115. * of Windows container, Linux VM doesn't need it at all.
  116. *
  117. * To make use of the AF_VSOCK infrastructure in Linux VM, we have to limit
  118. * the available GUID space of SOCKADDR_HV so that we can create a mapping
  119. * between AF_VSOCK port and SOCKADDR_HV Service GUID. The rule of writing
  120. * Hyper-V Sockets apps on the host and in Linux VM is:
  121. *
  122. ****************************************************************************
  123. * The only valid Service GUIDs, from the perspectives of both the host and *
  124. * Linux VM, that can be connected by the other end, must conform to this *
  125. * format: <port>-facb-11e6-bd58-64006a7986d3, and the "port" must be in *
  126. * this range [0, 0x7FFFFFFF]. *
  127. ****************************************************************************
  128. *
  129. * When we write apps on the host to connect(), the GUID ServiceID is used.
  130. * When we write apps in Linux VM to connect(), we only need to specify the
  131. * port and the driver will form the GUID and use that to request the host.
  132. *
  133. * From the perspective of Linux VM:
  134. * 1. the local ephemeral port (i.e. the local auto-bound port when we call
  135. * connect() without explicit bind()) is generated by __vsock_bind_stream(),
  136. * and the range is [1024, 0xFFFFFFFF).
  137. * 2. the remote ephemeral port (i.e. the auto-generated remote port for
  138. * a connect request initiated by the host's connect()) is generated by
  139. * hvs_remote_addr_init() and the range is [0x80000000, 0xFFFFFFFF).
  140. */
  141. #define MAX_LISTEN_PORT ((u32)0x7FFFFFFF)
  142. #define MAX_VM_LISTEN_PORT MAX_LISTEN_PORT
  143. #define MAX_HOST_LISTEN_PORT MAX_LISTEN_PORT
  144. #define MIN_HOST_EPHEMERAL_PORT (MAX_HOST_LISTEN_PORT + 1)
  145. /* 00000000-facb-11e6-bd58-64006a7986d3 */
  146. static const uuid_le srv_id_template =
  147. UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
  148. 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
  149. static bool is_valid_srv_id(const uuid_le *id)
  150. {
  151. return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(uuid_le) - 4);
  152. }
  153. static unsigned int get_port_by_srv_id(const uuid_le *svr_id)
  154. {
  155. return *((unsigned int *)svr_id);
  156. }
  157. static void hvs_addr_init(struct sockaddr_vm *addr, const uuid_le *svr_id)
  158. {
  159. unsigned int port = get_port_by_srv_id(svr_id);
  160. vsock_addr_init(addr, VMADDR_CID_ANY, port);
  161. }
  162. static void hvs_remote_addr_init(struct sockaddr_vm *remote,
  163. struct sockaddr_vm *local)
  164. {
  165. static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
  166. struct sock *sk;
  167. vsock_addr_init(remote, VMADDR_CID_ANY, VMADDR_PORT_ANY);
  168. while (1) {
  169. /* Wrap around ? */
  170. if (host_ephemeral_port < MIN_HOST_EPHEMERAL_PORT ||
  171. host_ephemeral_port == VMADDR_PORT_ANY)
  172. host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
  173. remote->svm_port = host_ephemeral_port++;
  174. sk = vsock_find_connected_socket(remote, local);
  175. if (!sk) {
  176. /* Found an available ephemeral port */
  177. return;
  178. }
  179. /* Release refcnt got in vsock_find_connected_socket */
  180. sock_put(sk);
  181. }
  182. }
  183. static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan)
  184. {
  185. set_channel_pending_send_size(chan,
  186. HVS_PKT_LEN(HVS_SEND_BUF_SIZE));
  187. /* See hvs_stream_has_space(): we must make sure the host has seen
  188. * the new pending send size, before we can re-check the writable
  189. * bytes.
  190. */
  191. virt_mb();
  192. }
  193. static void hvs_clear_channel_pending_send_size(struct vmbus_channel *chan)
  194. {
  195. set_channel_pending_send_size(chan, 0);
  196. /* Ditto */
  197. virt_mb();
  198. }
  199. static bool hvs_channel_readable(struct vmbus_channel *chan)
  200. {
  201. u32 readable = hv_get_bytes_to_read(&chan->inbound);
  202. /* 0-size payload means FIN */
  203. return readable >= HVS_PKT_LEN(0);
  204. }
  205. static int hvs_channel_readable_payload(struct vmbus_channel *chan)
  206. {
  207. u32 readable = hv_get_bytes_to_read(&chan->inbound);
  208. if (readable > HVS_PKT_LEN(0)) {
  209. /* At least we have 1 byte to read. We don't need to return
  210. * the exact readable bytes: see vsock_stream_recvmsg() ->
  211. * vsock_stream_has_data().
  212. */
  213. return 1;
  214. }
  215. if (readable == HVS_PKT_LEN(0)) {
  216. /* 0-size payload means FIN */
  217. return 0;
  218. }
  219. /* No payload or FIN */
  220. return -1;
  221. }
  222. static size_t hvs_channel_writable_bytes(struct vmbus_channel *chan)
  223. {
  224. u32 writeable = hv_get_bytes_to_write(&chan->outbound);
  225. size_t ret;
  226. /* The ringbuffer mustn't be 100% full, and we should reserve a
  227. * zero-length-payload packet for the FIN: see hv_ringbuffer_write()
  228. * and hvs_shutdown().
  229. */
  230. if (writeable <= HVS_PKT_LEN(1) + HVS_PKT_LEN(0))
  231. return 0;
  232. ret = writeable - HVS_PKT_LEN(1) - HVS_PKT_LEN(0);
  233. return round_down(ret, 8);
  234. }
  235. static int hvs_send_data(struct vmbus_channel *chan,
  236. struct hvs_send_buf *send_buf, size_t to_write)
  237. {
  238. send_buf->hdr.pkt_type = 1;
  239. send_buf->hdr.data_size = to_write;
  240. return vmbus_sendpacket(chan, &send_buf->hdr,
  241. sizeof(send_buf->hdr) + to_write,
  242. 0, VM_PKT_DATA_INBAND, 0);
  243. }
  244. static void hvs_channel_cb(void *ctx)
  245. {
  246. struct sock *sk = (struct sock *)ctx;
  247. struct vsock_sock *vsk = vsock_sk(sk);
  248. struct hvsock *hvs = vsk->trans;
  249. struct vmbus_channel *chan = hvs->chan;
  250. if (hvs_channel_readable(chan))
  251. sk->sk_data_ready(sk);
  252. /* See hvs_stream_has_space(): when we reach here, the writable bytes
  253. * may be already less than HVS_PKT_LEN(HVS_SEND_BUF_SIZE).
  254. */
  255. if (hv_get_bytes_to_write(&chan->outbound) > 0)
  256. sk->sk_write_space(sk);
  257. }
  258. static void hvs_close_connection(struct vmbus_channel *chan)
  259. {
  260. struct sock *sk = get_per_channel_state(chan);
  261. struct vsock_sock *vsk = vsock_sk(sk);
  262. lock_sock(sk);
  263. sk->sk_state = TCP_CLOSE;
  264. sock_set_flag(sk, SOCK_DONE);
  265. vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
  266. sk->sk_state_change(sk);
  267. release_sock(sk);
  268. }
  269. static void hvs_open_connection(struct vmbus_channel *chan)
  270. {
  271. uuid_le *if_instance, *if_type;
  272. unsigned char conn_from_host;
  273. struct sockaddr_vm addr;
  274. struct sock *sk, *new = NULL;
  275. struct vsock_sock *vnew;
  276. struct hvsock *hvs, *hvs_new;
  277. int ret;
  278. if_type = &chan->offermsg.offer.if_type;
  279. if_instance = &chan->offermsg.offer.if_instance;
  280. conn_from_host = chan->offermsg.offer.u.pipe.user_def[0];
  281. /* The host or the VM should only listen on a port in
  282. * [0, MAX_LISTEN_PORT]
  283. */
  284. if (!is_valid_srv_id(if_type) ||
  285. get_port_by_srv_id(if_type) > MAX_LISTEN_PORT)
  286. return;
  287. hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
  288. sk = vsock_find_bound_socket(&addr);
  289. if (!sk)
  290. return;
  291. lock_sock(sk);
  292. if ((conn_from_host && sk->sk_state != TCP_LISTEN) ||
  293. (!conn_from_host && sk->sk_state != TCP_SYN_SENT))
  294. goto out;
  295. if (conn_from_host) {
  296. if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog)
  297. goto out;
  298. new = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
  299. sk->sk_type, 0);
  300. if (!new)
  301. goto out;
  302. new->sk_state = TCP_SYN_SENT;
  303. vnew = vsock_sk(new);
  304. hvs_new = vnew->trans;
  305. hvs_new->chan = chan;
  306. } else {
  307. hvs = vsock_sk(sk)->trans;
  308. hvs->chan = chan;
  309. }
  310. set_channel_read_mode(chan, HV_CALL_DIRECT);
  311. ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE,
  312. RINGBUFFER_HVS_RCV_SIZE, NULL, 0,
  313. hvs_channel_cb, conn_from_host ? new : sk);
  314. if (ret != 0) {
  315. if (conn_from_host) {
  316. hvs_new->chan = NULL;
  317. sock_put(new);
  318. } else {
  319. hvs->chan = NULL;
  320. }
  321. goto out;
  322. }
  323. set_per_channel_state(chan, conn_from_host ? new : sk);
  324. vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
  325. if (conn_from_host) {
  326. new->sk_state = TCP_ESTABLISHED;
  327. sk->sk_ack_backlog++;
  328. hvs_addr_init(&vnew->local_addr, if_type);
  329. hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
  330. hvs_new->vm_srv_id = *if_type;
  331. hvs_new->host_srv_id = *if_instance;
  332. vsock_insert_connected(vnew);
  333. vsock_enqueue_accept(sk, new);
  334. } else {
  335. sk->sk_state = TCP_ESTABLISHED;
  336. sk->sk_socket->state = SS_CONNECTED;
  337. vsock_insert_connected(vsock_sk(sk));
  338. }
  339. sk->sk_state_change(sk);
  340. out:
  341. /* Release refcnt obtained when we called vsock_find_bound_socket() */
  342. sock_put(sk);
  343. release_sock(sk);
  344. }
  345. static u32 hvs_get_local_cid(void)
  346. {
  347. return VMADDR_CID_ANY;
  348. }
  349. static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
  350. {
  351. struct hvsock *hvs;
  352. hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
  353. if (!hvs)
  354. return -ENOMEM;
  355. vsk->trans = hvs;
  356. hvs->vsk = vsk;
  357. return 0;
  358. }
  359. static int hvs_connect(struct vsock_sock *vsk)
  360. {
  361. union hvs_service_id vm, host;
  362. struct hvsock *h = vsk->trans;
  363. vm.srv_id = srv_id_template;
  364. vm.svm_port = vsk->local_addr.svm_port;
  365. h->vm_srv_id = vm.srv_id;
  366. host.srv_id = srv_id_template;
  367. host.svm_port = vsk->remote_addr.svm_port;
  368. h->host_srv_id = host.srv_id;
  369. return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
  370. }
  371. static int hvs_shutdown(struct vsock_sock *vsk, int mode)
  372. {
  373. struct sock *sk = sk_vsock(vsk);
  374. struct vmpipe_proto_header hdr;
  375. struct hvs_send_buf *send_buf;
  376. struct hvsock *hvs;
  377. if (!(mode & SEND_SHUTDOWN))
  378. return 0;
  379. lock_sock(sk);
  380. hvs = vsk->trans;
  381. if (hvs->fin_sent)
  382. goto out;
  383. send_buf = (struct hvs_send_buf *)&hdr;
  384. /* It can't fail: see hvs_channel_writable_bytes(). */
  385. (void)hvs_send_data(hvs->chan, send_buf, 0);
  386. hvs->fin_sent = true;
  387. out:
  388. release_sock(sk);
  389. return 0;
  390. }
  391. static void hvs_release(struct vsock_sock *vsk)
  392. {
  393. struct sock *sk = sk_vsock(vsk);
  394. struct hvsock *hvs = vsk->trans;
  395. struct vmbus_channel *chan;
  396. lock_sock(sk);
  397. sk->sk_state = TCP_CLOSING;
  398. vsock_remove_sock(vsk);
  399. release_sock(sk);
  400. chan = hvs->chan;
  401. if (chan)
  402. hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN);
  403. }
  404. static void hvs_destruct(struct vsock_sock *vsk)
  405. {
  406. struct hvsock *hvs = vsk->trans;
  407. struct vmbus_channel *chan = hvs->chan;
  408. if (chan)
  409. vmbus_hvsock_device_unregister(chan);
  410. kfree(hvs);
  411. }
  412. static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr)
  413. {
  414. return -EOPNOTSUPP;
  415. }
  416. static int hvs_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
  417. size_t len, int flags)
  418. {
  419. return -EOPNOTSUPP;
  420. }
  421. static int hvs_dgram_enqueue(struct vsock_sock *vsk,
  422. struct sockaddr_vm *remote, struct msghdr *msg,
  423. size_t dgram_len)
  424. {
  425. return -EOPNOTSUPP;
  426. }
  427. static bool hvs_dgram_allow(u32 cid, u32 port)
  428. {
  429. return false;
  430. }
  431. static int hvs_update_recv_data(struct hvsock *hvs)
  432. {
  433. struct hvs_recv_buf *recv_buf;
  434. u32 payload_len;
  435. recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
  436. payload_len = recv_buf->hdr.data_size;
  437. if (payload_len > HVS_MTU_SIZE)
  438. return -EIO;
  439. if (payload_len == 0)
  440. hvs->vsk->peer_shutdown |= SEND_SHUTDOWN;
  441. hvs->recv_data_len = payload_len;
  442. hvs->recv_data_off = 0;
  443. return 0;
  444. }
  445. static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
  446. size_t len, int flags)
  447. {
  448. struct hvsock *hvs = vsk->trans;
  449. bool need_refill = !hvs->recv_desc;
  450. struct hvs_recv_buf *recv_buf;
  451. u32 to_read;
  452. int ret;
  453. if (flags & MSG_PEEK)
  454. return -EOPNOTSUPP;
  455. if (need_refill) {
  456. hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
  457. ret = hvs_update_recv_data(hvs);
  458. if (ret)
  459. return ret;
  460. }
  461. recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
  462. to_read = min_t(u32, len, hvs->recv_data_len);
  463. ret = memcpy_to_msg(msg, recv_buf->data + hvs->recv_data_off, to_read);
  464. if (ret != 0)
  465. return ret;
  466. hvs->recv_data_len -= to_read;
  467. if (hvs->recv_data_len == 0) {
  468. hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
  469. if (hvs->recv_desc) {
  470. ret = hvs_update_recv_data(hvs);
  471. if (ret)
  472. return ret;
  473. }
  474. } else {
  475. hvs->recv_data_off += to_read;
  476. }
  477. return to_read;
  478. }
  479. static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
  480. size_t len)
  481. {
  482. struct hvsock *hvs = vsk->trans;
  483. struct vmbus_channel *chan = hvs->chan;
  484. struct hvs_send_buf *send_buf;
  485. ssize_t to_write, max_writable, ret;
  486. BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
  487. send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL);
  488. if (!send_buf)
  489. return -ENOMEM;
  490. max_writable = hvs_channel_writable_bytes(chan);
  491. to_write = min_t(ssize_t, len, max_writable);
  492. to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
  493. ret = memcpy_from_msg(send_buf->data, msg, to_write);
  494. if (ret < 0)
  495. goto out;
  496. ret = hvs_send_data(hvs->chan, send_buf, to_write);
  497. if (ret < 0)
  498. goto out;
  499. ret = to_write;
  500. out:
  501. kfree(send_buf);
  502. return ret;
  503. }
  504. static s64 hvs_stream_has_data(struct vsock_sock *vsk)
  505. {
  506. struct hvsock *hvs = vsk->trans;
  507. s64 ret;
  508. if (hvs->recv_data_len > 0)
  509. return 1;
  510. switch (hvs_channel_readable_payload(hvs->chan)) {
  511. case 1:
  512. ret = 1;
  513. break;
  514. case 0:
  515. vsk->peer_shutdown |= SEND_SHUTDOWN;
  516. ret = 0;
  517. break;
  518. default: /* -1 */
  519. ret = 0;
  520. break;
  521. }
  522. return ret;
  523. }
  524. static s64 hvs_stream_has_space(struct vsock_sock *vsk)
  525. {
  526. struct hvsock *hvs = vsk->trans;
  527. struct vmbus_channel *chan = hvs->chan;
  528. s64 ret;
  529. ret = hvs_channel_writable_bytes(chan);
  530. if (ret > 0) {
  531. hvs_clear_channel_pending_send_size(chan);
  532. } else {
  533. /* See hvs_channel_cb() */
  534. hvs_set_channel_pending_send_size(chan);
  535. /* Re-check the writable bytes to avoid race */
  536. ret = hvs_channel_writable_bytes(chan);
  537. if (ret > 0)
  538. hvs_clear_channel_pending_send_size(chan);
  539. }
  540. return ret;
  541. }
  542. static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk)
  543. {
  544. return HVS_MTU_SIZE + 1;
  545. }
  546. static bool hvs_stream_is_active(struct vsock_sock *vsk)
  547. {
  548. struct hvsock *hvs = vsk->trans;
  549. return hvs->chan != NULL;
  550. }
  551. static bool hvs_stream_allow(u32 cid, u32 port)
  552. {
  553. /* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF) is
  554. * reserved as ephemeral ports, which are used as the host's ports
  555. * when the host initiates connections.
  556. *
  557. * Perform this check in the guest so an immediate error is produced
  558. * instead of a timeout.
  559. */
  560. if (port > MAX_HOST_LISTEN_PORT)
  561. return false;
  562. if (cid == VMADDR_CID_HOST)
  563. return true;
  564. return false;
  565. }
  566. static
  567. int hvs_notify_poll_in(struct vsock_sock *vsk, size_t target, bool *readable)
  568. {
  569. struct hvsock *hvs = vsk->trans;
  570. *readable = hvs_channel_readable(hvs->chan);
  571. return 0;
  572. }
  573. static
  574. int hvs_notify_poll_out(struct vsock_sock *vsk, size_t target, bool *writable)
  575. {
  576. *writable = hvs_stream_has_space(vsk) > 0;
  577. return 0;
  578. }
  579. static
  580. int hvs_notify_recv_init(struct vsock_sock *vsk, size_t target,
  581. struct vsock_transport_recv_notify_data *d)
  582. {
  583. return 0;
  584. }
  585. static
  586. int hvs_notify_recv_pre_block(struct vsock_sock *vsk, size_t target,
  587. struct vsock_transport_recv_notify_data *d)
  588. {
  589. return 0;
  590. }
  591. static
  592. int hvs_notify_recv_pre_dequeue(struct vsock_sock *vsk, size_t target,
  593. struct vsock_transport_recv_notify_data *d)
  594. {
  595. return 0;
  596. }
  597. static
  598. int hvs_notify_recv_post_dequeue(struct vsock_sock *vsk, size_t target,
  599. ssize_t copied, bool data_read,
  600. struct vsock_transport_recv_notify_data *d)
  601. {
  602. return 0;
  603. }
  604. static
  605. int hvs_notify_send_init(struct vsock_sock *vsk,
  606. struct vsock_transport_send_notify_data *d)
  607. {
  608. return 0;
  609. }
  610. static
  611. int hvs_notify_send_pre_block(struct vsock_sock *vsk,
  612. struct vsock_transport_send_notify_data *d)
  613. {
  614. return 0;
  615. }
  616. static
  617. int hvs_notify_send_pre_enqueue(struct vsock_sock *vsk,
  618. struct vsock_transport_send_notify_data *d)
  619. {
  620. return 0;
  621. }
  622. static
  623. int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
  624. struct vsock_transport_send_notify_data *d)
  625. {
  626. return 0;
  627. }
  628. static void hvs_set_buffer_size(struct vsock_sock *vsk, u64 val)
  629. {
  630. /* Ignored. */
  631. }
  632. static void hvs_set_min_buffer_size(struct vsock_sock *vsk, u64 val)
  633. {
  634. /* Ignored. */
  635. }
  636. static void hvs_set_max_buffer_size(struct vsock_sock *vsk, u64 val)
  637. {
  638. /* Ignored. */
  639. }
  640. static u64 hvs_get_buffer_size(struct vsock_sock *vsk)
  641. {
  642. return -ENOPROTOOPT;
  643. }
  644. static u64 hvs_get_min_buffer_size(struct vsock_sock *vsk)
  645. {
  646. return -ENOPROTOOPT;
  647. }
  648. static u64 hvs_get_max_buffer_size(struct vsock_sock *vsk)
  649. {
  650. return -ENOPROTOOPT;
  651. }
  652. static struct vsock_transport hvs_transport = {
  653. .get_local_cid = hvs_get_local_cid,
  654. .init = hvs_sock_init,
  655. .destruct = hvs_destruct,
  656. .release = hvs_release,
  657. .connect = hvs_connect,
  658. .shutdown = hvs_shutdown,
  659. .dgram_bind = hvs_dgram_bind,
  660. .dgram_dequeue = hvs_dgram_dequeue,
  661. .dgram_enqueue = hvs_dgram_enqueue,
  662. .dgram_allow = hvs_dgram_allow,
  663. .stream_dequeue = hvs_stream_dequeue,
  664. .stream_enqueue = hvs_stream_enqueue,
  665. .stream_has_data = hvs_stream_has_data,
  666. .stream_has_space = hvs_stream_has_space,
  667. .stream_rcvhiwat = hvs_stream_rcvhiwat,
  668. .stream_is_active = hvs_stream_is_active,
  669. .stream_allow = hvs_stream_allow,
  670. .notify_poll_in = hvs_notify_poll_in,
  671. .notify_poll_out = hvs_notify_poll_out,
  672. .notify_recv_init = hvs_notify_recv_init,
  673. .notify_recv_pre_block = hvs_notify_recv_pre_block,
  674. .notify_recv_pre_dequeue = hvs_notify_recv_pre_dequeue,
  675. .notify_recv_post_dequeue = hvs_notify_recv_post_dequeue,
  676. .notify_send_init = hvs_notify_send_init,
  677. .notify_send_pre_block = hvs_notify_send_pre_block,
  678. .notify_send_pre_enqueue = hvs_notify_send_pre_enqueue,
  679. .notify_send_post_enqueue = hvs_notify_send_post_enqueue,
  680. .set_buffer_size = hvs_set_buffer_size,
  681. .set_min_buffer_size = hvs_set_min_buffer_size,
  682. .set_max_buffer_size = hvs_set_max_buffer_size,
  683. .get_buffer_size = hvs_get_buffer_size,
  684. .get_min_buffer_size = hvs_get_min_buffer_size,
  685. .get_max_buffer_size = hvs_get_max_buffer_size,
  686. };
  687. static int hvs_probe(struct hv_device *hdev,
  688. const struct hv_vmbus_device_id *dev_id)
  689. {
  690. struct vmbus_channel *chan = hdev->channel;
  691. hvs_open_connection(chan);
  692. /* Always return success to suppress the unnecessary error message
  693. * in vmbus_probe(): on error the host will rescind the device in
  694. * 30 seconds and we can do cleanup at that time in
  695. * vmbus_onoffer_rescind().
  696. */
  697. return 0;
  698. }
  699. static int hvs_remove(struct hv_device *hdev)
  700. {
  701. struct vmbus_channel *chan = hdev->channel;
  702. vmbus_close(chan);
  703. return 0;
  704. }
  705. /* This isn't really used. See vmbus_match() and vmbus_probe() */
  706. static const struct hv_vmbus_device_id id_table[] = {
  707. {},
  708. };
  709. static struct hv_driver hvs_drv = {
  710. .name = "hv_sock",
  711. .hvsock = true,
  712. .id_table = id_table,
  713. .probe = hvs_probe,
  714. .remove = hvs_remove,
  715. };
  716. static int __init hvs_init(void)
  717. {
  718. int ret;
  719. if (vmbus_proto_version < VERSION_WIN10)
  720. return -ENODEV;
  721. ret = vmbus_driver_register(&hvs_drv);
  722. if (ret != 0)
  723. return ret;
  724. ret = vsock_core_init(&hvs_transport);
  725. if (ret) {
  726. vmbus_driver_unregister(&hvs_drv);
  727. return ret;
  728. }
  729. return 0;
  730. }
  731. static void __exit hvs_exit(void)
  732. {
  733. vsock_core_exit();
  734. vmbus_driver_unregister(&hvs_drv);
  735. }
  736. module_init(hvs_init);
  737. module_exit(hvs_exit);
  738. MODULE_DESCRIPTION("Hyper-V Sockets");
  739. MODULE_VERSION("1.0.0");
  740. MODULE_LICENSE("GPL");
  741. MODULE_ALIAS_NETPROTO(PF_VSOCK);