mpssd.c 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728
  1. /*
  2. * Intel MIC Platform Software Stack (MPSS)
  3. *
  4. * Copyright(c) 2013 Intel Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License, version 2, as
  8. * published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * The full GNU General Public License is included in this distribution in
  16. * the file called "COPYING".
  17. *
  18. * Intel MIC User Space Tools.
  19. */
  20. #define _GNU_SOURCE
  21. #include <stdlib.h>
  22. #include <fcntl.h>
  23. #include <getopt.h>
  24. #include <assert.h>
  25. #include <unistd.h>
  26. #include <stdbool.h>
  27. #include <signal.h>
  28. #include <poll.h>
  29. #include <features.h>
  30. #include <sys/types.h>
  31. #include <sys/stat.h>
  32. #include <sys/mman.h>
  33. #include <sys/socket.h>
  34. #include <linux/virtio_ring.h>
  35. #include <linux/virtio_net.h>
  36. #include <linux/virtio_console.h>
  37. #include <linux/virtio_blk.h>
  38. #include <linux/version.h>
  39. #include "mpssd.h"
  40. #include <linux/mic_ioctl.h>
  41. #include <linux/mic_common.h>
  42. #include <tools/endian.h>
  43. static void init_mic(struct mic_info *mic);
  44. static FILE *logfp;
  45. static struct mic_info mic_list;
  46. #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
  47. #define min_t(type, x, y) ({ \
  48. type __min1 = (x); \
  49. type __min2 = (y); \
  50. __min1 < __min2 ? __min1 : __min2; })
  51. /* align addr on a size boundary - adjust address up/down if needed */
  52. #define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
  53. #define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size)
  54. /* align addr on a size boundary - adjust address up if needed */
  55. #define _ALIGN(addr, size) _ALIGN_UP(addr, size)
  56. /* to align the pointer to the (next) page boundary */
  57. #define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE)
  58. #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
  59. #define GSO_ENABLED 1
  60. #define MAX_GSO_SIZE (64 * 1024)
  61. #define ETH_H_LEN 14
  62. #define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
  63. #define MIC_DEVICE_PAGE_END 0x1000
  64. #ifndef VIRTIO_NET_HDR_F_DATA_VALID
  65. #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
  66. #endif
  67. static struct {
  68. struct mic_device_desc dd;
  69. struct mic_vqconfig vqconfig[2];
  70. __u32 host_features, guest_acknowledgements;
  71. struct virtio_console_config cons_config;
  72. } virtcons_dev_page = {
  73. .dd = {
  74. .type = VIRTIO_ID_CONSOLE,
  75. .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
  76. .feature_len = sizeof(virtcons_dev_page.host_features),
  77. .config_len = sizeof(virtcons_dev_page.cons_config),
  78. },
  79. .vqconfig[0] = {
  80. .num = htole16(MIC_VRING_ENTRIES),
  81. },
  82. .vqconfig[1] = {
  83. .num = htole16(MIC_VRING_ENTRIES),
  84. },
  85. };
  86. static struct {
  87. struct mic_device_desc dd;
  88. struct mic_vqconfig vqconfig[2];
  89. __u32 host_features, guest_acknowledgements;
  90. struct virtio_net_config net_config;
  91. } virtnet_dev_page = {
  92. .dd = {
  93. .type = VIRTIO_ID_NET,
  94. .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
  95. .feature_len = sizeof(virtnet_dev_page.host_features),
  96. .config_len = sizeof(virtnet_dev_page.net_config),
  97. },
  98. .vqconfig[0] = {
  99. .num = htole16(MIC_VRING_ENTRIES),
  100. },
  101. .vqconfig[1] = {
  102. .num = htole16(MIC_VRING_ENTRIES),
  103. },
  104. #if GSO_ENABLED
  105. .host_features = htole32(
  106. 1 << VIRTIO_NET_F_CSUM |
  107. 1 << VIRTIO_NET_F_GSO |
  108. 1 << VIRTIO_NET_F_GUEST_TSO4 |
  109. 1 << VIRTIO_NET_F_GUEST_TSO6 |
  110. 1 << VIRTIO_NET_F_GUEST_ECN |
  111. 1 << VIRTIO_NET_F_GUEST_UFO),
  112. #else
  113. .host_features = 0,
  114. #endif
  115. };
  116. static const char *mic_config_dir = "/etc/sysconfig/mic";
  117. static const char *virtblk_backend = "VIRTBLK_BACKEND";
  118. static struct {
  119. struct mic_device_desc dd;
  120. struct mic_vqconfig vqconfig[1];
  121. __u32 host_features, guest_acknowledgements;
  122. struct virtio_blk_config blk_config;
  123. } virtblk_dev_page = {
  124. .dd = {
  125. .type = VIRTIO_ID_BLOCK,
  126. .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
  127. .feature_len = sizeof(virtblk_dev_page.host_features),
  128. .config_len = sizeof(virtblk_dev_page.blk_config),
  129. },
  130. .vqconfig[0] = {
  131. .num = htole16(MIC_VRING_ENTRIES),
  132. },
  133. .host_features =
  134. htole32(1<<VIRTIO_BLK_F_SEG_MAX),
  135. .blk_config = {
  136. .seg_max = htole32(MIC_VRING_ENTRIES - 2),
  137. .capacity = htole64(0),
  138. }
  139. };
  140. static char *myname;
  141. static int
  142. tap_configure(struct mic_info *mic, char *dev)
  143. {
  144. pid_t pid;
  145. char *ifargv[7];
  146. char ipaddr[IFNAMSIZ];
  147. int ret = 0;
  148. pid = fork();
  149. if (pid == 0) {
  150. ifargv[0] = "ip";
  151. ifargv[1] = "link";
  152. ifargv[2] = "set";
  153. ifargv[3] = dev;
  154. ifargv[4] = "up";
  155. ifargv[5] = NULL;
  156. mpsslog("Configuring %s\n", dev);
  157. ret = execvp("ip", ifargv);
  158. if (ret < 0) {
  159. mpsslog("%s execvp failed errno %s\n",
  160. mic->name, strerror(errno));
  161. return ret;
  162. }
  163. }
  164. if (pid < 0) {
  165. mpsslog("%s fork failed errno %s\n",
  166. mic->name, strerror(errno));
  167. return ret;
  168. }
  169. ret = waitpid(pid, NULL, 0);
  170. if (ret < 0) {
  171. mpsslog("%s waitpid failed errno %s\n",
  172. mic->name, strerror(errno));
  173. return ret;
  174. }
  175. snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
  176. pid = fork();
  177. if (pid == 0) {
  178. ifargv[0] = "ip";
  179. ifargv[1] = "addr";
  180. ifargv[2] = "add";
  181. ifargv[3] = ipaddr;
  182. ifargv[4] = "dev";
  183. ifargv[5] = dev;
  184. ifargv[6] = NULL;
  185. mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
  186. ret = execvp("ip", ifargv);
  187. if (ret < 0) {
  188. mpsslog("%s execvp failed errno %s\n",
  189. mic->name, strerror(errno));
  190. return ret;
  191. }
  192. }
  193. if (pid < 0) {
  194. mpsslog("%s fork failed errno %s\n",
  195. mic->name, strerror(errno));
  196. return ret;
  197. }
  198. ret = waitpid(pid, NULL, 0);
  199. if (ret < 0) {
  200. mpsslog("%s waitpid failed errno %s\n",
  201. mic->name, strerror(errno));
  202. return ret;
  203. }
  204. mpsslog("MIC name %s %s %d DONE!\n",
  205. mic->name, __func__, __LINE__);
  206. return 0;
  207. }
  208. static int tun_alloc(struct mic_info *mic, char *dev)
  209. {
  210. struct ifreq ifr;
  211. int fd, err;
  212. #if GSO_ENABLED
  213. unsigned offload;
  214. #endif
  215. fd = open("/dev/net/tun", O_RDWR);
  216. if (fd < 0) {
  217. mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
  218. goto done;
  219. }
  220. memset(&ifr, 0, sizeof(ifr));
  221. ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
  222. if (*dev)
  223. strncpy(ifr.ifr_name, dev, IFNAMSIZ);
  224. err = ioctl(fd, TUNSETIFF, (void *)&ifr);
  225. if (err < 0) {
  226. mpsslog("%s %s %d TUNSETIFF failed %s\n",
  227. mic->name, __func__, __LINE__, strerror(errno));
  228. close(fd);
  229. return err;
  230. }
  231. #if GSO_ENABLED
  232. offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
  233. TUN_F_TSO_ECN | TUN_F_UFO;
  234. err = ioctl(fd, TUNSETOFFLOAD, offload);
  235. if (err < 0) {
  236. mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
  237. mic->name, __func__, __LINE__, strerror(errno));
  238. close(fd);
  239. return err;
  240. }
  241. #endif
  242. strcpy(dev, ifr.ifr_name);
  243. mpsslog("Created TAP %s\n", dev);
  244. done:
  245. return fd;
  246. }
  247. #define NET_FD_VIRTIO_NET 0
  248. #define NET_FD_TUN 1
  249. #define MAX_NET_FD 2
  250. static void set_dp(struct mic_info *mic, int type, void *dp)
  251. {
  252. switch (type) {
  253. case VIRTIO_ID_CONSOLE:
  254. mic->mic_console.console_dp = dp;
  255. return;
  256. case VIRTIO_ID_NET:
  257. mic->mic_net.net_dp = dp;
  258. return;
  259. case VIRTIO_ID_BLOCK:
  260. mic->mic_virtblk.block_dp = dp;
  261. return;
  262. }
  263. mpsslog("%s %s %d not found\n", mic->name, __func__, type);
  264. assert(0);
  265. }
  266. static void *get_dp(struct mic_info *mic, int type)
  267. {
  268. switch (type) {
  269. case VIRTIO_ID_CONSOLE:
  270. return mic->mic_console.console_dp;
  271. case VIRTIO_ID_NET:
  272. return mic->mic_net.net_dp;
  273. case VIRTIO_ID_BLOCK:
  274. return mic->mic_virtblk.block_dp;
  275. }
  276. mpsslog("%s %s %d not found\n", mic->name, __func__, type);
  277. assert(0);
  278. return NULL;
  279. }
  280. static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
  281. {
  282. struct mic_device_desc *d;
  283. int i;
  284. void *dp = get_dp(mic, type);
  285. for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
  286. i += mic_total_desc_size(d)) {
  287. d = dp + i;
  288. /* End of list */
  289. if (d->type == 0)
  290. break;
  291. if (d->type == -1)
  292. continue;
  293. mpsslog("%s %s d-> type %d d %p\n",
  294. mic->name, __func__, d->type, d);
  295. if (d->type == (__u8)type)
  296. return d;
  297. }
  298. mpsslog("%s %s %d not found\n", mic->name, __func__, type);
  299. assert(0);
  300. return NULL;
  301. }
  302. /* See comments in vhost.c for explanation of next_desc() */
  303. static unsigned next_desc(struct vring_desc *desc)
  304. {
  305. unsigned int next;
  306. if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
  307. return -1U;
  308. next = le16toh(desc->next);
  309. return next;
  310. }
  311. /* Sum up all the IOVEC length */
  312. static ssize_t
  313. sum_iovec_len(struct mic_copy_desc *copy)
  314. {
  315. ssize_t sum = 0;
  316. int i;
  317. for (i = 0; i < copy->iovcnt; i++)
  318. sum += copy->iov[i].iov_len;
  319. return sum;
  320. }
  321. static inline void verify_out_len(struct mic_info *mic,
  322. struct mic_copy_desc *copy)
  323. {
  324. if (copy->out_len != sum_iovec_len(copy)) {
  325. mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
  326. mic->name, __func__, __LINE__,
  327. copy->out_len, sum_iovec_len(copy));
  328. assert(copy->out_len == sum_iovec_len(copy));
  329. }
  330. }
  331. /* Display an iovec */
  332. static void
  333. disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
  334. const char *s, int line)
  335. {
  336. int i;
  337. for (i = 0; i < copy->iovcnt; i++)
  338. mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
  339. mic->name, s, line, i,
  340. copy->iov[i].iov_base, copy->iov[i].iov_len);
  341. }
  342. static inline __u16 read_avail_idx(struct mic_vring *vr)
  343. {
  344. return ACCESS_ONCE(vr->info->avail_idx);
  345. }
  346. static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
  347. struct mic_copy_desc *copy, ssize_t len)
  348. {
  349. copy->vr_idx = tx ? 0 : 1;
  350. copy->update_used = true;
  351. if (type == VIRTIO_ID_NET)
  352. copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
  353. else
  354. copy->iov[0].iov_len = len;
  355. }
  356. /* Central API which triggers the copies */
  357. static int
  358. mic_virtio_copy(struct mic_info *mic, int fd,
  359. struct mic_vring *vr, struct mic_copy_desc *copy)
  360. {
  361. int ret;
  362. ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
  363. if (ret) {
  364. mpsslog("%s %s %d errno %s ret %d\n",
  365. mic->name, __func__, __LINE__,
  366. strerror(errno), ret);
  367. }
  368. return ret;
  369. }
  370. /*
  371. * This initialization routine requires at least one
  372. * vring i.e. vr0. vr1 is optional.
  373. */
  374. static void *
  375. init_vr(struct mic_info *mic, int fd, int type,
  376. struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
  377. {
  378. int vr_size;
  379. char *va;
  380. vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
  381. MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
  382. va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
  383. PROT_READ, MAP_SHARED, fd, 0);
  384. if (MAP_FAILED == va) {
  385. mpsslog("%s %s %d mmap failed errno %s\n",
  386. mic->name, __func__, __LINE__,
  387. strerror(errno));
  388. goto done;
  389. }
  390. set_dp(mic, type, va);
  391. vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
  392. vr0->info = vr0->va +
  393. vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
  394. vring_init(&vr0->vr,
  395. MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
  396. mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
  397. __func__, mic->name, vr0->va, vr0->info, vr_size,
  398. vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
  399. mpsslog("magic 0x%x expected 0x%x\n",
  400. le32toh(vr0->info->magic), MIC_MAGIC + type);
  401. assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
  402. if (vr1) {
  403. vr1->va = (struct mic_vring *)
  404. &va[MIC_DEVICE_PAGE_END + vr_size];
  405. vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
  406. MIC_VIRTIO_RING_ALIGN);
  407. vring_init(&vr1->vr,
  408. MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
  409. mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
  410. __func__, mic->name, vr1->va, vr1->info, vr_size,
  411. vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
  412. mpsslog("magic 0x%x expected 0x%x\n",
  413. le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
  414. assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
  415. }
  416. done:
  417. return va;
  418. }
  419. static void
  420. wait_for_card_driver(struct mic_info *mic, int fd, int type)
  421. {
  422. struct pollfd pollfd;
  423. int err;
  424. struct mic_device_desc *desc = get_device_desc(mic, type);
  425. pollfd.fd = fd;
  426. mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
  427. mic->name, __func__, type, desc->status);
  428. while (1) {
  429. pollfd.events = POLLIN;
  430. pollfd.revents = 0;
  431. err = poll(&pollfd, 1, -1);
  432. if (err < 0) {
  433. mpsslog("%s %s poll failed %s\n",
  434. mic->name, __func__, strerror(errno));
  435. continue;
  436. }
  437. if (pollfd.revents) {
  438. mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
  439. mic->name, __func__, type, desc->status);
  440. if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
  441. mpsslog("%s %s poll.revents %d\n",
  442. mic->name, __func__, pollfd.revents);
  443. mpsslog("%s %s desc-> type %d status 0x%x\n",
  444. mic->name, __func__, type,
  445. desc->status);
  446. break;
  447. }
  448. }
  449. }
  450. }
  451. /* Spin till we have some descriptors */
  452. static void
  453. spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
  454. {
  455. __u16 avail_idx = read_avail_idx(vr);
  456. while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
  457. #ifdef DEBUG
  458. mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
  459. mic->name, __func__,
  460. le16toh(vr->vr.avail->idx), vr->info->avail_idx);
  461. #endif
  462. sched_yield();
  463. }
  464. }
  465. static void *
  466. virtio_net(void *arg)
  467. {
  468. static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
  469. static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
  470. struct iovec vnet_iov[2][2] = {
  471. { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
  472. { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
  473. { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
  474. { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
  475. };
  476. struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
  477. struct mic_info *mic = (struct mic_info *)arg;
  478. char if_name[IFNAMSIZ];
  479. struct pollfd net_poll[MAX_NET_FD];
  480. struct mic_vring tx_vr, rx_vr;
  481. struct mic_copy_desc copy;
  482. struct mic_device_desc *desc;
  483. int err;
  484. snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
  485. mic->mic_net.tap_fd = tun_alloc(mic, if_name);
  486. if (mic->mic_net.tap_fd < 0)
  487. goto done;
  488. if (tap_configure(mic, if_name))
  489. goto done;
  490. mpsslog("MIC name %s id %d\n", mic->name, mic->id);
  491. net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
  492. net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
  493. net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
  494. net_poll[NET_FD_TUN].events = POLLIN;
  495. if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
  496. VIRTIO_ID_NET, &tx_vr, &rx_vr,
  497. virtnet_dev_page.dd.num_vq)) {
  498. mpsslog("%s init_vr failed %s\n",
  499. mic->name, strerror(errno));
  500. goto done;
  501. }
  502. copy.iovcnt = 2;
  503. desc = get_device_desc(mic, VIRTIO_ID_NET);
  504. while (1) {
  505. ssize_t len;
  506. net_poll[NET_FD_VIRTIO_NET].revents = 0;
  507. net_poll[NET_FD_TUN].revents = 0;
  508. /* Start polling for data from tap and virtio net */
  509. err = poll(net_poll, 2, -1);
  510. if (err < 0) {
  511. mpsslog("%s poll failed %s\n",
  512. __func__, strerror(errno));
  513. continue;
  514. }
  515. if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
  516. wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
  517. VIRTIO_ID_NET);
  518. /*
  519. * Check if there is data to be read from TUN and write to
  520. * virtio net fd if there is.
  521. */
  522. if (net_poll[NET_FD_TUN].revents & POLLIN) {
  523. copy.iov = iov0;
  524. len = readv(net_poll[NET_FD_TUN].fd,
  525. copy.iov, copy.iovcnt);
  526. if (len > 0) {
  527. struct virtio_net_hdr *hdr
  528. = (struct virtio_net_hdr *)vnet_hdr[0];
  529. /* Disable checksums on the card since we are on
  530. a reliable PCIe link */
  531. hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
  532. #ifdef DEBUG
  533. mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
  534. __func__, __LINE__, hdr->flags);
  535. mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
  536. copy.out_len, hdr->gso_type);
  537. #endif
  538. #ifdef DEBUG
  539. disp_iovec(mic, copy, __func__, __LINE__);
  540. mpsslog("%s %s %d read from tap 0x%lx\n",
  541. mic->name, __func__, __LINE__,
  542. len);
  543. #endif
  544. spin_for_descriptors(mic, &tx_vr);
  545. txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
  546. len);
  547. err = mic_virtio_copy(mic,
  548. mic->mic_net.virtio_net_fd, &tx_vr,
  549. &copy);
  550. if (err < 0) {
  551. mpsslog("%s %s %d mic_virtio_copy %s\n",
  552. mic->name, __func__, __LINE__,
  553. strerror(errno));
  554. }
  555. if (!err)
  556. verify_out_len(mic, &copy);
  557. #ifdef DEBUG
  558. disp_iovec(mic, copy, __func__, __LINE__);
  559. mpsslog("%s %s %d wrote to net 0x%lx\n",
  560. mic->name, __func__, __LINE__,
  561. sum_iovec_len(&copy));
  562. #endif
  563. /* Reinitialize IOV for next run */
  564. iov0[1].iov_len = MAX_NET_PKT_SIZE;
  565. } else if (len < 0) {
  566. disp_iovec(mic, &copy, __func__, __LINE__);
  567. mpsslog("%s %s %d read failed %s ", mic->name,
  568. __func__, __LINE__, strerror(errno));
  569. mpsslog("cnt %d sum %zd\n",
  570. copy.iovcnt, sum_iovec_len(&copy));
  571. }
  572. }
  573. /*
  574. * Check if there is data to be read from virtio net and
  575. * write to TUN if there is.
  576. */
  577. if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
  578. while (rx_vr.info->avail_idx !=
  579. le16toh(rx_vr.vr.avail->idx)) {
  580. copy.iov = iov1;
  581. txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
  582. MAX_NET_PKT_SIZE
  583. + sizeof(struct virtio_net_hdr));
  584. err = mic_virtio_copy(mic,
  585. mic->mic_net.virtio_net_fd, &rx_vr,
  586. &copy);
  587. if (!err) {
  588. #ifdef DEBUG
  589. struct virtio_net_hdr *hdr
  590. = (struct virtio_net_hdr *)
  591. vnet_hdr[1];
  592. mpsslog("%s %s %d hdr->flags 0x%x, ",
  593. mic->name, __func__, __LINE__,
  594. hdr->flags);
  595. mpsslog("out_len %d gso_type 0x%x\n",
  596. copy.out_len,
  597. hdr->gso_type);
  598. #endif
  599. /* Set the correct output iov_len */
  600. iov1[1].iov_len = copy.out_len -
  601. sizeof(struct virtio_net_hdr);
  602. verify_out_len(mic, &copy);
  603. #ifdef DEBUG
  604. disp_iovec(mic, copy, __func__,
  605. __LINE__);
  606. mpsslog("%s %s %d ",
  607. mic->name, __func__, __LINE__);
  608. mpsslog("read from net 0x%lx\n",
  609. sum_iovec_len(copy));
  610. #endif
  611. len = writev(net_poll[NET_FD_TUN].fd,
  612. copy.iov, copy.iovcnt);
  613. if (len != sum_iovec_len(&copy)) {
  614. mpsslog("Tun write failed %s ",
  615. strerror(errno));
  616. mpsslog("len 0x%zx ", len);
  617. mpsslog("read_len 0x%zx\n",
  618. sum_iovec_len(&copy));
  619. } else {
  620. #ifdef DEBUG
  621. disp_iovec(mic, &copy, __func__,
  622. __LINE__);
  623. mpsslog("%s %s %d ",
  624. mic->name, __func__,
  625. __LINE__);
  626. mpsslog("wrote to tap 0x%lx\n",
  627. len);
  628. #endif
  629. }
  630. } else {
  631. mpsslog("%s %s %d mic_virtio_copy %s\n",
  632. mic->name, __func__, __LINE__,
  633. strerror(errno));
  634. break;
  635. }
  636. }
  637. }
  638. if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
  639. mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
  640. }
  641. done:
  642. pthread_exit(NULL);
  643. }
  644. /* virtio_console */
  645. #define VIRTIO_CONSOLE_FD 0
  646. #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
  647. #define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */
  648. #define MAX_BUFFER_SIZE PAGE_SIZE
  649. static void *
  650. virtio_console(void *arg)
  651. {
  652. static __u8 vcons_buf[2][PAGE_SIZE];
  653. struct iovec vcons_iov[2] = {
  654. { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
  655. { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
  656. };
  657. struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
  658. struct mic_info *mic = (struct mic_info *)arg;
  659. int err;
  660. struct pollfd console_poll[MAX_CONSOLE_FD];
  661. int pty_fd;
  662. char *pts_name;
  663. ssize_t len;
  664. struct mic_vring tx_vr, rx_vr;
  665. struct mic_copy_desc copy;
  666. struct mic_device_desc *desc;
  667. pty_fd = posix_openpt(O_RDWR);
  668. if (pty_fd < 0) {
  669. mpsslog("can't open a pseudoterminal master device: %s\n",
  670. strerror(errno));
  671. goto _return;
  672. }
  673. pts_name = ptsname(pty_fd);
  674. if (pts_name == NULL) {
  675. mpsslog("can't get pts name\n");
  676. goto _close_pty;
  677. }
  678. printf("%s console message goes to %s\n", mic->name, pts_name);
  679. mpsslog("%s console message goes to %s\n", mic->name, pts_name);
  680. err = grantpt(pty_fd);
  681. if (err < 0) {
  682. mpsslog("can't grant access: %s %s\n",
  683. pts_name, strerror(errno));
  684. goto _close_pty;
  685. }
  686. err = unlockpt(pty_fd);
  687. if (err < 0) {
  688. mpsslog("can't unlock a pseudoterminal: %s %s\n",
  689. pts_name, strerror(errno));
  690. goto _close_pty;
  691. }
  692. console_poll[MONITOR_FD].fd = pty_fd;
  693. console_poll[MONITOR_FD].events = POLLIN;
  694. console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
  695. console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
  696. if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
  697. VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
  698. virtcons_dev_page.dd.num_vq)) {
  699. mpsslog("%s init_vr failed %s\n",
  700. mic->name, strerror(errno));
  701. goto _close_pty;
  702. }
  703. copy.iovcnt = 1;
  704. desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
  705. for (;;) {
  706. console_poll[MONITOR_FD].revents = 0;
  707. console_poll[VIRTIO_CONSOLE_FD].revents = 0;
  708. err = poll(console_poll, MAX_CONSOLE_FD, -1);
  709. if (err < 0) {
  710. mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
  711. strerror(errno));
  712. continue;
  713. }
  714. if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
  715. wait_for_card_driver(mic,
  716. mic->mic_console.virtio_console_fd,
  717. VIRTIO_ID_CONSOLE);
  718. if (console_poll[MONITOR_FD].revents & POLLIN) {
  719. copy.iov = iov0;
  720. len = readv(pty_fd, copy.iov, copy.iovcnt);
  721. if (len > 0) {
  722. #ifdef DEBUG
  723. disp_iovec(mic, copy, __func__, __LINE__);
  724. mpsslog("%s %s %d read from tap 0x%lx\n",
  725. mic->name, __func__, __LINE__,
  726. len);
  727. #endif
  728. spin_for_descriptors(mic, &tx_vr);
  729. txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
  730. &copy, len);
  731. err = mic_virtio_copy(mic,
  732. mic->mic_console.virtio_console_fd,
  733. &tx_vr, &copy);
  734. if (err < 0) {
  735. mpsslog("%s %s %d mic_virtio_copy %s\n",
  736. mic->name, __func__, __LINE__,
  737. strerror(errno));
  738. }
  739. if (!err)
  740. verify_out_len(mic, &copy);
  741. #ifdef DEBUG
  742. disp_iovec(mic, copy, __func__, __LINE__);
  743. mpsslog("%s %s %d wrote to net 0x%lx\n",
  744. mic->name, __func__, __LINE__,
  745. sum_iovec_len(copy));
  746. #endif
  747. /* Reinitialize IOV for next run */
  748. iov0->iov_len = PAGE_SIZE;
  749. } else if (len < 0) {
  750. disp_iovec(mic, &copy, __func__, __LINE__);
  751. mpsslog("%s %s %d read failed %s ",
  752. mic->name, __func__, __LINE__,
  753. strerror(errno));
  754. mpsslog("cnt %d sum %zd\n",
  755. copy.iovcnt, sum_iovec_len(&copy));
  756. }
  757. }
  758. if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
  759. while (rx_vr.info->avail_idx !=
  760. le16toh(rx_vr.vr.avail->idx)) {
  761. copy.iov = iov1;
  762. txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
  763. &copy, PAGE_SIZE);
  764. err = mic_virtio_copy(mic,
  765. mic->mic_console.virtio_console_fd,
  766. &rx_vr, &copy);
  767. if (!err) {
  768. /* Set the correct output iov_len */
  769. iov1->iov_len = copy.out_len;
  770. verify_out_len(mic, &copy);
  771. #ifdef DEBUG
  772. disp_iovec(mic, copy, __func__,
  773. __LINE__);
  774. mpsslog("%s %s %d ",
  775. mic->name, __func__, __LINE__);
  776. mpsslog("read from net 0x%lx\n",
  777. sum_iovec_len(copy));
  778. #endif
  779. len = writev(pty_fd,
  780. copy.iov, copy.iovcnt);
  781. if (len != sum_iovec_len(&copy)) {
  782. mpsslog("Tun write failed %s ",
  783. strerror(errno));
  784. mpsslog("len 0x%zx ", len);
  785. mpsslog("read_len 0x%zx\n",
  786. sum_iovec_len(&copy));
  787. } else {
  788. #ifdef DEBUG
  789. disp_iovec(mic, copy, __func__,
  790. __LINE__);
  791. mpsslog("%s %s %d ",
  792. mic->name, __func__,
  793. __LINE__);
  794. mpsslog("wrote to tap 0x%lx\n",
  795. len);
  796. #endif
  797. }
  798. } else {
  799. mpsslog("%s %s %d mic_virtio_copy %s\n",
  800. mic->name, __func__, __LINE__,
  801. strerror(errno));
  802. break;
  803. }
  804. }
  805. }
  806. if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
  807. mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
  808. }
  809. _close_pty:
  810. close(pty_fd);
  811. _return:
  812. pthread_exit(NULL);
  813. }
  814. static void
  815. add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
  816. {
  817. char path[PATH_MAX];
  818. int fd, err;
  819. snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
  820. fd = open(path, O_RDWR);
  821. if (fd < 0) {
  822. mpsslog("Could not open %s %s\n", path, strerror(errno));
  823. return;
  824. }
  825. err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
  826. if (err < 0) {
  827. mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
  828. close(fd);
  829. return;
  830. }
  831. switch (dd->type) {
  832. case VIRTIO_ID_NET:
  833. mic->mic_net.virtio_net_fd = fd;
  834. mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
  835. break;
  836. case VIRTIO_ID_CONSOLE:
  837. mic->mic_console.virtio_console_fd = fd;
  838. mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
  839. break;
  840. case VIRTIO_ID_BLOCK:
  841. mic->mic_virtblk.virtio_block_fd = fd;
  842. mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
  843. break;
  844. }
  845. }
  846. static bool
  847. set_backend_file(struct mic_info *mic)
  848. {
  849. FILE *config;
  850. char buff[PATH_MAX], *line, *evv, *p;
  851. snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
  852. config = fopen(buff, "r");
  853. if (config == NULL)
  854. return false;
  855. do { /* look for "virtblk_backend=XXXX" */
  856. line = fgets(buff, PATH_MAX, config);
  857. if (line == NULL)
  858. break;
  859. if (*line == '#')
  860. continue;
  861. p = strchr(line, '\n');
  862. if (p)
  863. *p = '\0';
  864. } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
  865. fclose(config);
  866. if (line == NULL)
  867. return false;
  868. evv = strchr(line, '=');
  869. if (evv == NULL)
  870. return false;
  871. mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
  872. if (mic->mic_virtblk.backend_file == NULL) {
  873. mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
  874. return false;
  875. }
  876. strcpy(mic->mic_virtblk.backend_file, evv + 1);
  877. return true;
  878. }
  879. #define SECTOR_SIZE 512
  880. static bool
  881. set_backend_size(struct mic_info *mic)
  882. {
  883. mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
  884. SEEK_END);
  885. if (mic->mic_virtblk.backend_size < 0) {
  886. mpsslog("%s: can't seek: %s\n",
  887. mic->name, mic->mic_virtblk.backend_file);
  888. return false;
  889. }
  890. virtblk_dev_page.blk_config.capacity =
  891. mic->mic_virtblk.backend_size / SECTOR_SIZE;
  892. if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
  893. virtblk_dev_page.blk_config.capacity++;
  894. virtblk_dev_page.blk_config.capacity =
  895. htole64(virtblk_dev_page.blk_config.capacity);
  896. return true;
  897. }
  898. static bool
  899. open_backend(struct mic_info *mic)
  900. {
  901. if (!set_backend_file(mic))
  902. goto _error_exit;
  903. mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
  904. if (mic->mic_virtblk.backend < 0) {
  905. mpsslog("%s: can't open: %s\n", mic->name,
  906. mic->mic_virtblk.backend_file);
  907. goto _error_free;
  908. }
  909. if (!set_backend_size(mic))
  910. goto _error_close;
  911. mic->mic_virtblk.backend_addr = mmap(NULL,
  912. mic->mic_virtblk.backend_size,
  913. PROT_READ|PROT_WRITE, MAP_SHARED,
  914. mic->mic_virtblk.backend, 0L);
  915. if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
  916. mpsslog("%s: can't map: %s %s\n",
  917. mic->name, mic->mic_virtblk.backend_file,
  918. strerror(errno));
  919. goto _error_close;
  920. }
  921. return true;
  922. _error_close:
  923. close(mic->mic_virtblk.backend);
  924. _error_free:
  925. free(mic->mic_virtblk.backend_file);
  926. _error_exit:
  927. return false;
  928. }
  929. static void
  930. close_backend(struct mic_info *mic)
  931. {
  932. munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
  933. close(mic->mic_virtblk.backend);
  934. free(mic->mic_virtblk.backend_file);
  935. }
  936. static bool
  937. start_virtblk(struct mic_info *mic, struct mic_vring *vring)
  938. {
  939. if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
  940. mpsslog("%s: blk_config is not 8 byte aligned.\n",
  941. mic->name);
  942. return false;
  943. }
  944. add_virtio_device(mic, &virtblk_dev_page.dd);
  945. if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
  946. VIRTIO_ID_BLOCK, vring, NULL,
  947. virtblk_dev_page.dd.num_vq)) {
  948. mpsslog("%s init_vr failed %s\n",
  949. mic->name, strerror(errno));
  950. return false;
  951. }
  952. return true;
  953. }
  954. static void
  955. stop_virtblk(struct mic_info *mic)
  956. {
  957. int vr_size, ret;
  958. vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
  959. MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
  960. ret = munmap(mic->mic_virtblk.block_dp,
  961. MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
  962. if (ret < 0)
  963. mpsslog("%s munmap errno %d\n", mic->name, errno);
  964. close(mic->mic_virtblk.virtio_block_fd);
  965. }
  966. static __u8
  967. header_error_check(struct vring_desc *desc)
  968. {
  969. if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
  970. mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
  971. __func__, __LINE__);
  972. return -EIO;
  973. }
  974. if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
  975. mpsslog("%s() %d: alone\n",
  976. __func__, __LINE__);
  977. return -EIO;
  978. }
  979. if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
  980. mpsslog("%s() %d: not read\n",
  981. __func__, __LINE__);
  982. return -EIO;
  983. }
  984. return 0;
  985. }
  986. static int
  987. read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
  988. {
  989. struct iovec iovec;
  990. struct mic_copy_desc copy;
  991. iovec.iov_len = sizeof(*hdr);
  992. iovec.iov_base = hdr;
  993. copy.iov = &iovec;
  994. copy.iovcnt = 1;
  995. copy.vr_idx = 0; /* only one vring on virtio_block */
  996. copy.update_used = false; /* do not update used index */
  997. return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
  998. }
  999. static int
  1000. transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
  1001. {
  1002. struct mic_copy_desc copy;
  1003. copy.iov = iovec;
  1004. copy.iovcnt = iovcnt;
  1005. copy.vr_idx = 0; /* only one vring on virtio_block */
  1006. copy.update_used = false; /* do not update used index */
  1007. return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
  1008. }
  1009. static __u8
  1010. status_error_check(struct vring_desc *desc)
  1011. {
  1012. if (le32toh(desc->len) != sizeof(__u8)) {
  1013. mpsslog("%s() %d: length is not sizeof(status)\n",
  1014. __func__, __LINE__);
  1015. return -EIO;
  1016. }
  1017. return 0;
  1018. }
  1019. static int
  1020. write_status(int fd, __u8 *status)
  1021. {
  1022. struct iovec iovec;
  1023. struct mic_copy_desc copy;
  1024. iovec.iov_base = status;
  1025. iovec.iov_len = sizeof(*status);
  1026. copy.iov = &iovec;
  1027. copy.iovcnt = 1;
  1028. copy.vr_idx = 0; /* only one vring on virtio_block */
  1029. copy.update_used = true; /* Update used index */
  1030. return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
  1031. }
  1032. static void *
  1033. virtio_block(void *arg)
  1034. {
  1035. struct mic_info *mic = (struct mic_info *)arg;
  1036. int ret;
  1037. struct pollfd block_poll;
  1038. struct mic_vring vring;
  1039. __u16 avail_idx;
  1040. __u32 desc_idx;
  1041. struct vring_desc *desc;
  1042. struct iovec *iovec, *piov;
  1043. __u8 status;
  1044. __u32 buffer_desc_idx;
  1045. struct virtio_blk_outhdr hdr;
  1046. void *fos;
  1047. for (;;) { /* forever */
  1048. if (!open_backend(mic)) { /* No virtblk */
  1049. for (mic->mic_virtblk.signaled = 0;
  1050. !mic->mic_virtblk.signaled;)
  1051. sleep(1);
  1052. continue;
  1053. }
  1054. /* backend file is specified. */
  1055. if (!start_virtblk(mic, &vring))
  1056. goto _close_backend;
  1057. iovec = malloc(sizeof(*iovec) *
  1058. le32toh(virtblk_dev_page.blk_config.seg_max));
  1059. if (!iovec) {
  1060. mpsslog("%s: can't alloc iovec: %s\n",
  1061. mic->name, strerror(ENOMEM));
  1062. goto _stop_virtblk;
  1063. }
  1064. block_poll.fd = mic->mic_virtblk.virtio_block_fd;
  1065. block_poll.events = POLLIN;
  1066. for (mic->mic_virtblk.signaled = 0;
  1067. !mic->mic_virtblk.signaled;) {
  1068. block_poll.revents = 0;
  1069. /* timeout in 1 sec to see signaled */
  1070. ret = poll(&block_poll, 1, 1000);
  1071. if (ret < 0) {
  1072. mpsslog("%s %d: poll failed: %s\n",
  1073. __func__, __LINE__,
  1074. strerror(errno));
  1075. continue;
  1076. }
  1077. if (!(block_poll.revents & POLLIN)) {
  1078. #ifdef DEBUG
  1079. mpsslog("%s %d: block_poll.revents=0x%x\n",
  1080. __func__, __LINE__, block_poll.revents);
  1081. #endif
  1082. continue;
  1083. }
  1084. /* POLLIN */
  1085. while (vring.info->avail_idx !=
  1086. le16toh(vring.vr.avail->idx)) {
  1087. /* read header element */
  1088. avail_idx =
  1089. vring.info->avail_idx &
  1090. (vring.vr.num - 1);
  1091. desc_idx = le16toh(
  1092. vring.vr.avail->ring[avail_idx]);
  1093. desc = &vring.vr.desc[desc_idx];
  1094. #ifdef DEBUG
  1095. mpsslog("%s() %d: avail_idx=%d ",
  1096. __func__, __LINE__,
  1097. vring.info->avail_idx);
  1098. mpsslog("vring.vr.num=%d desc=%p\n",
  1099. vring.vr.num, desc);
  1100. #endif
  1101. status = header_error_check(desc);
  1102. ret = read_header(
  1103. mic->mic_virtblk.virtio_block_fd,
  1104. &hdr, desc_idx);
  1105. if (ret < 0) {
  1106. mpsslog("%s() %d %s: ret=%d %s\n",
  1107. __func__, __LINE__,
  1108. mic->name, ret,
  1109. strerror(errno));
  1110. break;
  1111. }
  1112. /* buffer element */
  1113. piov = iovec;
  1114. status = 0;
  1115. fos = mic->mic_virtblk.backend_addr +
  1116. (hdr.sector * SECTOR_SIZE);
  1117. buffer_desc_idx = next_desc(desc);
  1118. desc_idx = buffer_desc_idx;
  1119. for (desc = &vring.vr.desc[buffer_desc_idx];
  1120. desc->flags & VRING_DESC_F_NEXT;
  1121. desc_idx = next_desc(desc),
  1122. desc = &vring.vr.desc[desc_idx]) {
  1123. piov->iov_len = desc->len;
  1124. piov->iov_base = fos;
  1125. piov++;
  1126. fos += desc->len;
  1127. }
  1128. /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
  1129. if (hdr.type & ~(VIRTIO_BLK_T_OUT |
  1130. VIRTIO_BLK_T_GET_ID)) {
  1131. /*
  1132. VIRTIO_BLK_T_IN - does not do
  1133. anything. Probably for documenting.
  1134. VIRTIO_BLK_T_SCSI_CMD - for
  1135. virtio_scsi.
  1136. VIRTIO_BLK_T_FLUSH - turned off in
  1137. config space.
  1138. VIRTIO_BLK_T_BARRIER - defined but not
  1139. used in anywhere.
  1140. */
  1141. mpsslog("%s() %d: type %x ",
  1142. __func__, __LINE__,
  1143. hdr.type);
  1144. mpsslog("is not supported\n");
  1145. status = -ENOTSUP;
  1146. } else {
  1147. ret = transfer_blocks(
  1148. mic->mic_virtblk.virtio_block_fd,
  1149. iovec,
  1150. piov - iovec);
  1151. if (ret < 0 &&
  1152. status != 0)
  1153. status = ret;
  1154. }
  1155. /* write status and update used pointer */
  1156. if (status != 0)
  1157. status = status_error_check(desc);
  1158. ret = write_status(
  1159. mic->mic_virtblk.virtio_block_fd,
  1160. &status);
  1161. #ifdef DEBUG
  1162. mpsslog("%s() %d: write status=%d on desc=%p\n",
  1163. __func__, __LINE__,
  1164. status, desc);
  1165. #endif
  1166. }
  1167. }
  1168. free(iovec);
  1169. _stop_virtblk:
  1170. stop_virtblk(mic);
  1171. _close_backend:
  1172. close_backend(mic);
  1173. } /* forever */
  1174. pthread_exit(NULL);
  1175. }
  1176. static void
  1177. reset(struct mic_info *mic)
  1178. {
  1179. #define RESET_TIMEOUT 120
  1180. int i = RESET_TIMEOUT;
  1181. setsysfs(mic->name, "state", "reset");
  1182. while (i) {
  1183. char *state;
  1184. state = readsysfs(mic->name, "state");
  1185. if (!state)
  1186. goto retry;
  1187. mpsslog("%s: %s %d state %s\n",
  1188. mic->name, __func__, __LINE__, state);
  1189. /*
  1190. * If the shutdown was initiated by OSPM, the state stays
  1191. * in "suspended" which is also a valid condition for reset.
  1192. */
  1193. if ((!strcmp(state, "offline")) ||
  1194. (!strcmp(state, "suspended"))) {
  1195. free(state);
  1196. break;
  1197. }
  1198. free(state);
  1199. retry:
  1200. sleep(1);
  1201. i--;
  1202. }
  1203. }
  1204. static int
  1205. get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
  1206. {
  1207. if (!strcmp(shutdown_status, "nop"))
  1208. return MIC_NOP;
  1209. if (!strcmp(shutdown_status, "crashed"))
  1210. return MIC_CRASHED;
  1211. if (!strcmp(shutdown_status, "halted"))
  1212. return MIC_HALTED;
  1213. if (!strcmp(shutdown_status, "poweroff"))
  1214. return MIC_POWER_OFF;
  1215. if (!strcmp(shutdown_status, "restart"))
  1216. return MIC_RESTART;
  1217. mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
  1218. /* Invalid state */
  1219. assert(0);
  1220. };
  1221. static int get_mic_state(struct mic_info *mic, char *state)
  1222. {
  1223. if (!strcmp(state, "offline"))
  1224. return MIC_OFFLINE;
  1225. if (!strcmp(state, "online"))
  1226. return MIC_ONLINE;
  1227. if (!strcmp(state, "shutting_down"))
  1228. return MIC_SHUTTING_DOWN;
  1229. if (!strcmp(state, "reset_failed"))
  1230. return MIC_RESET_FAILED;
  1231. if (!strcmp(state, "suspending"))
  1232. return MIC_SUSPENDING;
  1233. if (!strcmp(state, "suspended"))
  1234. return MIC_SUSPENDED;
  1235. mpsslog("%s: BUG invalid state %s\n", mic->name, state);
  1236. /* Invalid state */
  1237. assert(0);
  1238. };
  1239. static void mic_handle_shutdown(struct mic_info *mic)
  1240. {
  1241. #define SHUTDOWN_TIMEOUT 60
  1242. int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
  1243. char *shutdown_status;
  1244. while (i) {
  1245. shutdown_status = readsysfs(mic->name, "shutdown_status");
  1246. if (!shutdown_status)
  1247. continue;
  1248. mpsslog("%s: %s %d shutdown_status %s\n",
  1249. mic->name, __func__, __LINE__, shutdown_status);
  1250. switch (get_mic_shutdown_status(mic, shutdown_status)) {
  1251. case MIC_RESTART:
  1252. mic->restart = 1;
  1253. case MIC_HALTED:
  1254. case MIC_POWER_OFF:
  1255. case MIC_CRASHED:
  1256. free(shutdown_status);
  1257. goto reset;
  1258. default:
  1259. break;
  1260. }
  1261. free(shutdown_status);
  1262. sleep(1);
  1263. i--;
  1264. }
  1265. reset:
  1266. ret = kill(mic->pid, SIGTERM);
  1267. mpsslog("%s: %s %d kill pid %d ret %d\n",
  1268. mic->name, __func__, __LINE__,
  1269. mic->pid, ret);
  1270. if (!ret) {
  1271. ret = waitpid(mic->pid, &stat,
  1272. WIFSIGNALED(stat));
  1273. mpsslog("%s: %s %d waitpid ret %d pid %d\n",
  1274. mic->name, __func__, __LINE__,
  1275. ret, mic->pid);
  1276. }
  1277. if (ret == mic->pid)
  1278. reset(mic);
  1279. }
  1280. static void *
  1281. mic_config(void *arg)
  1282. {
  1283. struct mic_info *mic = (struct mic_info *)arg;
  1284. char *state = NULL;
  1285. char pathname[PATH_MAX];
  1286. int fd, ret;
  1287. struct pollfd ufds[1];
  1288. char value[4096];
  1289. snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
  1290. MICSYSFSDIR, mic->name, "state");
  1291. fd = open(pathname, O_RDONLY);
  1292. if (fd < 0) {
  1293. mpsslog("%s: opening file %s failed %s\n",
  1294. mic->name, pathname, strerror(errno));
  1295. goto error;
  1296. }
  1297. do {
  1298. ret = lseek(fd, 0, SEEK_SET);
  1299. if (ret < 0) {
  1300. mpsslog("%s: Failed to seek to file start '%s': %s\n",
  1301. mic->name, pathname, strerror(errno));
  1302. goto close_error1;
  1303. }
  1304. ret = read(fd, value, sizeof(value));
  1305. if (ret < 0) {
  1306. mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
  1307. mic->name, pathname, strerror(errno));
  1308. goto close_error1;
  1309. }
  1310. retry:
  1311. state = readsysfs(mic->name, "state");
  1312. if (!state)
  1313. goto retry;
  1314. mpsslog("%s: %s %d state %s\n",
  1315. mic->name, __func__, __LINE__, state);
  1316. switch (get_mic_state(mic, state)) {
  1317. case MIC_SHUTTING_DOWN:
  1318. mic_handle_shutdown(mic);
  1319. goto close_error;
  1320. case MIC_SUSPENDING:
  1321. mic->boot_on_resume = 1;
  1322. setsysfs(mic->name, "state", "suspend");
  1323. mic_handle_shutdown(mic);
  1324. goto close_error;
  1325. case MIC_OFFLINE:
  1326. if (mic->boot_on_resume) {
  1327. setsysfs(mic->name, "state", "boot");
  1328. mic->boot_on_resume = 0;
  1329. }
  1330. break;
  1331. default:
  1332. break;
  1333. }
  1334. free(state);
  1335. ufds[0].fd = fd;
  1336. ufds[0].events = POLLERR | POLLPRI;
  1337. ret = poll(ufds, 1, -1);
  1338. if (ret < 0) {
  1339. mpsslog("%s: poll failed %s\n",
  1340. mic->name, strerror(errno));
  1341. goto close_error1;
  1342. }
  1343. } while (1);
  1344. close_error:
  1345. free(state);
  1346. close_error1:
  1347. close(fd);
  1348. error:
  1349. init_mic(mic);
  1350. pthread_exit(NULL);
  1351. }
  1352. static void
  1353. set_cmdline(struct mic_info *mic)
  1354. {
  1355. char buffer[PATH_MAX];
  1356. int len;
  1357. len = snprintf(buffer, PATH_MAX,
  1358. "clocksource=tsc highres=off nohz=off ");
  1359. len += snprintf(buffer + len, PATH_MAX - len,
  1360. "cpufreq_on;corec6_off;pc3_off;pc6_off ");
  1361. len += snprintf(buffer + len, PATH_MAX - len,
  1362. "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
  1363. mic->id);
  1364. setsysfs(mic->name, "cmdline", buffer);
  1365. mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
  1366. snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
  1367. mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
  1368. }
  1369. static void
  1370. set_log_buf_info(struct mic_info *mic)
  1371. {
  1372. int fd;
  1373. off_t len;
  1374. char system_map[] = "/lib/firmware/mic/System.map";
  1375. char *map, *temp, log_buf[17] = {'\0'};
  1376. fd = open(system_map, O_RDONLY);
  1377. if (fd < 0) {
  1378. mpsslog("%s: Opening System.map failed: %d\n",
  1379. mic->name, errno);
  1380. return;
  1381. }
  1382. len = lseek(fd, 0, SEEK_END);
  1383. if (len < 0) {
  1384. mpsslog("%s: Reading System.map size failed: %d\n",
  1385. mic->name, errno);
  1386. close(fd);
  1387. return;
  1388. }
  1389. map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
  1390. if (map == MAP_FAILED) {
  1391. mpsslog("%s: mmap of System.map failed: %d\n",
  1392. mic->name, errno);
  1393. close(fd);
  1394. return;
  1395. }
  1396. temp = strstr(map, "__log_buf");
  1397. if (!temp) {
  1398. mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
  1399. munmap(map, len);
  1400. close(fd);
  1401. return;
  1402. }
  1403. strncpy(log_buf, temp - 19, 16);
  1404. setsysfs(mic->name, "log_buf_addr", log_buf);
  1405. mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
  1406. temp = strstr(map, "log_buf_len");
  1407. if (!temp) {
  1408. mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
  1409. munmap(map, len);
  1410. close(fd);
  1411. return;
  1412. }
  1413. strncpy(log_buf, temp - 19, 16);
  1414. setsysfs(mic->name, "log_buf_len", log_buf);
  1415. mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
  1416. munmap(map, len);
  1417. close(fd);
  1418. }
  1419. static void init_mic(struct mic_info *mic);
  1420. static void
  1421. change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
  1422. {
  1423. struct mic_info *mic;
  1424. for (mic = mic_list.next; mic != NULL; mic = mic->next)
  1425. mic->mic_virtblk.signaled = 1/* true */;
  1426. }
  1427. static void
  1428. init_mic(struct mic_info *mic)
  1429. {
  1430. struct sigaction ignore = {
  1431. .sa_flags = 0,
  1432. .sa_handler = SIG_IGN
  1433. };
  1434. struct sigaction act = {
  1435. .sa_flags = SA_SIGINFO,
  1436. .sa_sigaction = change_virtblk_backend,
  1437. };
  1438. char buffer[PATH_MAX];
  1439. int err;
  1440. /*
  1441. * Currently, one virtio block device is supported for each MIC card
  1442. * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
  1443. * The signal informs the virtio block backend about a change in the
  1444. * configuration file which specifies the virtio backend file name on
  1445. * the host. Virtio block backend then re-reads the configuration file
  1446. * and switches to the new block device. This signalling mechanism may
  1447. * not be required once multiple virtio block devices are supported by
  1448. * the MIC daemon.
  1449. */
  1450. sigaction(SIGUSR1, &ignore, NULL);
  1451. mic->pid = fork();
  1452. switch (mic->pid) {
  1453. case 0:
  1454. set_log_buf_info(mic);
  1455. set_cmdline(mic);
  1456. add_virtio_device(mic, &virtcons_dev_page.dd);
  1457. add_virtio_device(mic, &virtnet_dev_page.dd);
  1458. err = pthread_create(&mic->mic_console.console_thread, NULL,
  1459. virtio_console, mic);
  1460. if (err)
  1461. mpsslog("%s virtcons pthread_create failed %s\n",
  1462. mic->name, strerror(err));
  1463. err = pthread_create(&mic->mic_net.net_thread, NULL,
  1464. virtio_net, mic);
  1465. if (err)
  1466. mpsslog("%s virtnet pthread_create failed %s\n",
  1467. mic->name, strerror(err));
  1468. err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
  1469. virtio_block, mic);
  1470. if (err)
  1471. mpsslog("%s virtblk pthread_create failed %s\n",
  1472. mic->name, strerror(err));
  1473. sigemptyset(&act.sa_mask);
  1474. err = sigaction(SIGUSR1, &act, NULL);
  1475. if (err)
  1476. mpsslog("%s sigaction SIGUSR1 failed %s\n",
  1477. mic->name, strerror(errno));
  1478. while (1)
  1479. sleep(60);
  1480. case -1:
  1481. mpsslog("fork failed MIC name %s id %d errno %d\n",
  1482. mic->name, mic->id, errno);
  1483. break;
  1484. default:
  1485. if (mic->restart) {
  1486. snprintf(buffer, PATH_MAX, "boot");
  1487. setsysfs(mic->name, "state", buffer);
  1488. mpsslog("%s restarting mic %d\n",
  1489. mic->name, mic->restart);
  1490. mic->restart = 0;
  1491. }
  1492. pthread_create(&mic->config_thread, NULL, mic_config, mic);
  1493. }
  1494. }
  1495. static void
  1496. start_daemon(void)
  1497. {
  1498. struct mic_info *mic;
  1499. for (mic = mic_list.next; mic != NULL; mic = mic->next)
  1500. init_mic(mic);
  1501. while (1)
  1502. sleep(60);
  1503. }
  1504. static int
  1505. init_mic_list(void)
  1506. {
  1507. struct mic_info *mic = &mic_list;
  1508. struct dirent *file;
  1509. DIR *dp;
  1510. int cnt = 0;
  1511. dp = opendir(MICSYSFSDIR);
  1512. if (!dp)
  1513. return 0;
  1514. while ((file = readdir(dp)) != NULL) {
  1515. if (!strncmp(file->d_name, "mic", 3)) {
  1516. mic->next = calloc(1, sizeof(struct mic_info));
  1517. if (mic->next) {
  1518. mic = mic->next;
  1519. mic->id = atoi(&file->d_name[3]);
  1520. mic->name = malloc(strlen(file->d_name) + 16);
  1521. if (mic->name)
  1522. strcpy(mic->name, file->d_name);
  1523. mpsslog("MIC name %s id %d\n", mic->name,
  1524. mic->id);
  1525. cnt++;
  1526. }
  1527. }
  1528. }
  1529. closedir(dp);
  1530. return cnt;
  1531. }
  1532. void
  1533. mpsslog(char *format, ...)
  1534. {
  1535. va_list args;
  1536. char buffer[4096];
  1537. char ts[52], *ts1;
  1538. time_t t;
  1539. if (logfp == NULL)
  1540. return;
  1541. va_start(args, format);
  1542. vsprintf(buffer, format, args);
  1543. va_end(args);
  1544. time(&t);
  1545. ts1 = ctime_r(&t, ts);
  1546. ts1[strlen(ts1) - 1] = '\0';
  1547. fprintf(logfp, "%s: %s", ts1, buffer);
  1548. fflush(logfp);
  1549. }
  1550. int
  1551. main(int argc, char *argv[])
  1552. {
  1553. int cnt;
  1554. pid_t pid;
  1555. myname = argv[0];
  1556. logfp = fopen(LOGFILE_NAME, "a+");
  1557. if (!logfp) {
  1558. fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
  1559. exit(1);
  1560. }
  1561. pid = fork();
  1562. switch (pid) {
  1563. case 0:
  1564. break;
  1565. case -1:
  1566. exit(2);
  1567. default:
  1568. exit(0);
  1569. }
  1570. mpsslog("MIC Daemon start\n");
  1571. cnt = init_mic_list();
  1572. if (cnt == 0) {
  1573. mpsslog("MIC module not loaded\n");
  1574. exit(3);
  1575. }
  1576. mpsslog("MIC found %d devices\n", cnt);
  1577. start_daemon();
  1578. exit(0);
  1579. }