sunvnet_common.c 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761
  1. /* sunvnet.c: Sun LDOM Virtual Network Driver.
  2. *
  3. * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
  4. * Copyright (C) 2016-2017 Oracle. All rights reserved.
  5. */
  6. #include <linux/module.h>
  7. #include <linux/kernel.h>
  8. #include <linux/types.h>
  9. #include <linux/slab.h>
  10. #include <linux/delay.h>
  11. #include <linux/init.h>
  12. #include <linux/netdevice.h>
  13. #include <linux/ethtool.h>
  14. #include <linux/etherdevice.h>
  15. #include <linux/mutex.h>
  16. #include <linux/highmem.h>
  17. #include <linux/if_vlan.h>
  18. #define CREATE_TRACE_POINTS
  19. #include <trace/events/sunvnet.h>
  20. #if IS_ENABLED(CONFIG_IPV6)
  21. #include <linux/icmpv6.h>
  22. #endif
  23. #include <net/ip.h>
  24. #include <net/icmp.h>
  25. #include <net/route.h>
  26. #include <asm/vio.h>
  27. #include <asm/ldc.h>
  28. #include "sunvnet_common.h"
  29. /* Heuristic for the number of times to exponentially backoff and
  30. * retry sending an LDC trigger when EAGAIN is encountered
  31. */
  32. #define VNET_MAX_RETRIES 10
  33. MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
  34. MODULE_DESCRIPTION("Sun LDOM virtual network support library");
  35. MODULE_LICENSE("GPL");
  36. MODULE_VERSION("1.1");
  37. static int __vnet_tx_trigger(struct vnet_port *port, u32 start);
  38. static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr)
  39. {
  40. return vio_dring_avail(dr, VNET_TX_RING_SIZE);
  41. }
  42. static int vnet_handle_unknown(struct vnet_port *port, void *arg)
  43. {
  44. struct vio_msg_tag *pkt = arg;
  45. pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n",
  46. pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
  47. pr_err("Resetting connection\n");
  48. ldc_disconnect(port->vio.lp);
  49. return -ECONNRESET;
  50. }
  51. static int vnet_port_alloc_tx_ring(struct vnet_port *port);
  52. int sunvnet_send_attr_common(struct vio_driver_state *vio)
  53. {
  54. struct vnet_port *port = to_vnet_port(vio);
  55. struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
  56. struct vio_net_attr_info pkt;
  57. int framelen = ETH_FRAME_LEN;
  58. int i, err;
  59. err = vnet_port_alloc_tx_ring(to_vnet_port(vio));
  60. if (err)
  61. return err;
  62. memset(&pkt, 0, sizeof(pkt));
  63. pkt.tag.type = VIO_TYPE_CTRL;
  64. pkt.tag.stype = VIO_SUBTYPE_INFO;
  65. pkt.tag.stype_env = VIO_ATTR_INFO;
  66. pkt.tag.sid = vio_send_sid(vio);
  67. if (vio_version_before(vio, 1, 2))
  68. pkt.xfer_mode = VIO_DRING_MODE;
  69. else
  70. pkt.xfer_mode = VIO_NEW_DRING_MODE;
  71. pkt.addr_type = VNET_ADDR_ETHERMAC;
  72. pkt.ack_freq = 0;
  73. for (i = 0; i < 6; i++)
  74. pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8);
  75. if (vio_version_after(vio, 1, 3)) {
  76. if (port->rmtu) {
  77. port->rmtu = min(VNET_MAXPACKET, port->rmtu);
  78. pkt.mtu = port->rmtu;
  79. } else {
  80. port->rmtu = VNET_MAXPACKET;
  81. pkt.mtu = port->rmtu;
  82. }
  83. if (vio_version_after_eq(vio, 1, 6))
  84. pkt.options = VIO_TX_DRING;
  85. } else if (vio_version_before(vio, 1, 3)) {
  86. pkt.mtu = framelen;
  87. } else { /* v1.3 */
  88. pkt.mtu = framelen + VLAN_HLEN;
  89. }
  90. pkt.cflags = 0;
  91. if (vio_version_after_eq(vio, 1, 7) && port->tso) {
  92. pkt.cflags |= VNET_LSO_IPV4_CAPAB;
  93. if (!port->tsolen)
  94. port->tsolen = VNET_MAXTSO;
  95. pkt.ipv4_lso_maxlen = port->tsolen;
  96. }
  97. pkt.plnk_updt = PHYSLINK_UPDATE_NONE;
  98. viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
  99. "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
  100. "cflags[0x%04x] lso_max[%u]\n",
  101. pkt.xfer_mode, pkt.addr_type,
  102. (unsigned long long)pkt.addr,
  103. pkt.ack_freq, pkt.plnk_updt, pkt.options,
  104. (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen);
  105. return vio_ldc_send(vio, &pkt, sizeof(pkt));
  106. }
  107. EXPORT_SYMBOL_GPL(sunvnet_send_attr_common);
  108. static int handle_attr_info(struct vio_driver_state *vio,
  109. struct vio_net_attr_info *pkt)
  110. {
  111. struct vnet_port *port = to_vnet_port(vio);
  112. u64 localmtu;
  113. u8 xfer_mode;
  114. viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
  115. "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
  116. " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
  117. pkt->xfer_mode, pkt->addr_type,
  118. (unsigned long long)pkt->addr,
  119. pkt->ack_freq, pkt->plnk_updt, pkt->options,
  120. (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
  121. pkt->ipv4_lso_maxlen);
  122. pkt->tag.sid = vio_send_sid(vio);
  123. xfer_mode = pkt->xfer_mode;
  124. /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */
  125. if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE)
  126. xfer_mode = VIO_NEW_DRING_MODE;
  127. /* MTU negotiation:
  128. * < v1.3 - ETH_FRAME_LEN exactly
  129. * > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change
  130. * pkt->mtu for ACK
  131. * = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly
  132. */
  133. if (vio_version_before(vio, 1, 3)) {
  134. localmtu = ETH_FRAME_LEN;
  135. } else if (vio_version_after(vio, 1, 3)) {
  136. localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET;
  137. localmtu = min(pkt->mtu, localmtu);
  138. pkt->mtu = localmtu;
  139. } else { /* v1.3 */
  140. localmtu = ETH_FRAME_LEN + VLAN_HLEN;
  141. }
  142. port->rmtu = localmtu;
  143. /* LSO negotiation */
  144. if (vio_version_after_eq(vio, 1, 7))
  145. port->tso &= !!(pkt->cflags & VNET_LSO_IPV4_CAPAB);
  146. else
  147. port->tso = false;
  148. if (port->tso) {
  149. if (!port->tsolen)
  150. port->tsolen = VNET_MAXTSO;
  151. port->tsolen = min(port->tsolen, pkt->ipv4_lso_maxlen);
  152. if (port->tsolen < VNET_MINTSO) {
  153. port->tso = false;
  154. port->tsolen = 0;
  155. pkt->cflags &= ~VNET_LSO_IPV4_CAPAB;
  156. }
  157. pkt->ipv4_lso_maxlen = port->tsolen;
  158. } else {
  159. pkt->cflags &= ~VNET_LSO_IPV4_CAPAB;
  160. pkt->ipv4_lso_maxlen = 0;
  161. port->tsolen = 0;
  162. }
  163. /* for version >= 1.6, ACK packet mode we support */
  164. if (vio_version_after_eq(vio, 1, 6)) {
  165. pkt->xfer_mode = VIO_NEW_DRING_MODE;
  166. pkt->options = VIO_TX_DRING;
  167. }
  168. if (!(xfer_mode | VIO_NEW_DRING_MODE) ||
  169. pkt->addr_type != VNET_ADDR_ETHERMAC ||
  170. pkt->mtu != localmtu) {
  171. viodbg(HS, "SEND NET ATTR NACK\n");
  172. pkt->tag.stype = VIO_SUBTYPE_NACK;
  173. (void)vio_ldc_send(vio, pkt, sizeof(*pkt));
  174. return -ECONNRESET;
  175. }
  176. viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] "
  177. "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] "
  178. "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
  179. pkt->xfer_mode, pkt->addr_type,
  180. (unsigned long long)pkt->addr,
  181. pkt->ack_freq, pkt->plnk_updt, pkt->options,
  182. (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
  183. pkt->ipv4_lso_maxlen);
  184. pkt->tag.stype = VIO_SUBTYPE_ACK;
  185. return vio_ldc_send(vio, pkt, sizeof(*pkt));
  186. }
  187. static int handle_attr_ack(struct vio_driver_state *vio,
  188. struct vio_net_attr_info *pkt)
  189. {
  190. viodbg(HS, "GOT NET ATTR ACK\n");
  191. return 0;
  192. }
  193. static int handle_attr_nack(struct vio_driver_state *vio,
  194. struct vio_net_attr_info *pkt)
  195. {
  196. viodbg(HS, "GOT NET ATTR NACK\n");
  197. return -ECONNRESET;
  198. }
  199. int sunvnet_handle_attr_common(struct vio_driver_state *vio, void *arg)
  200. {
  201. struct vio_net_attr_info *pkt = arg;
  202. switch (pkt->tag.stype) {
  203. case VIO_SUBTYPE_INFO:
  204. return handle_attr_info(vio, pkt);
  205. case VIO_SUBTYPE_ACK:
  206. return handle_attr_ack(vio, pkt);
  207. case VIO_SUBTYPE_NACK:
  208. return handle_attr_nack(vio, pkt);
  209. default:
  210. return -ECONNRESET;
  211. }
  212. }
  213. EXPORT_SYMBOL_GPL(sunvnet_handle_attr_common);
  214. void sunvnet_handshake_complete_common(struct vio_driver_state *vio)
  215. {
  216. struct vio_dring_state *dr;
  217. dr = &vio->drings[VIO_DRIVER_RX_RING];
  218. dr->rcv_nxt = 1;
  219. dr->snd_nxt = 1;
  220. dr = &vio->drings[VIO_DRIVER_TX_RING];
  221. dr->rcv_nxt = 1;
  222. dr->snd_nxt = 1;
  223. }
  224. EXPORT_SYMBOL_GPL(sunvnet_handshake_complete_common);
  225. /* The hypervisor interface that implements copying to/from imported
  226. * memory from another domain requires that copies are done to 8-byte
  227. * aligned buffers, and that the lengths of such copies are also 8-byte
  228. * multiples.
  229. *
  230. * So we align skb->data to an 8-byte multiple and pad-out the data
  231. * area so we can round the copy length up to the next multiple of
  232. * 8 for the copy.
  233. *
  234. * The transmitter puts the actual start of the packet 6 bytes into
  235. * the buffer it sends over, so that the IP headers after the ethernet
  236. * header are aligned properly. These 6 bytes are not in the descriptor
  237. * length, they are simply implied. This offset is represented using
  238. * the VNET_PACKET_SKIP macro.
  239. */
  240. static struct sk_buff *alloc_and_align_skb(struct net_device *dev,
  241. unsigned int len)
  242. {
  243. struct sk_buff *skb;
  244. unsigned long addr, off;
  245. skb = netdev_alloc_skb(dev, len + VNET_PACKET_SKIP + 8 + 8);
  246. if (unlikely(!skb))
  247. return NULL;
  248. addr = (unsigned long)skb->data;
  249. off = ((addr + 7UL) & ~7UL) - addr;
  250. if (off)
  251. skb_reserve(skb, off);
  252. return skb;
  253. }
  254. static inline void vnet_fullcsum(struct sk_buff *skb)
  255. {
  256. struct iphdr *iph = ip_hdr(skb);
  257. int offset = skb_transport_offset(skb);
  258. if (skb->protocol != htons(ETH_P_IP))
  259. return;
  260. if (iph->protocol != IPPROTO_TCP &&
  261. iph->protocol != IPPROTO_UDP)
  262. return;
  263. skb->ip_summed = CHECKSUM_NONE;
  264. skb->csum_level = 1;
  265. skb->csum = 0;
  266. if (iph->protocol == IPPROTO_TCP) {
  267. struct tcphdr *ptcp = tcp_hdr(skb);
  268. ptcp->check = 0;
  269. skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
  270. ptcp->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
  271. skb->len - offset, IPPROTO_TCP,
  272. skb->csum);
  273. } else if (iph->protocol == IPPROTO_UDP) {
  274. struct udphdr *pudp = udp_hdr(skb);
  275. pudp->check = 0;
  276. skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
  277. pudp->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
  278. skb->len - offset, IPPROTO_UDP,
  279. skb->csum);
  280. }
  281. }
  282. static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc)
  283. {
  284. struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
  285. unsigned int len = desc->size;
  286. unsigned int copy_len;
  287. struct sk_buff *skb;
  288. int maxlen;
  289. int err;
  290. err = -EMSGSIZE;
  291. if (port->tso && port->tsolen > port->rmtu)
  292. maxlen = port->tsolen;
  293. else
  294. maxlen = port->rmtu;
  295. if (unlikely(len < ETH_ZLEN || len > maxlen)) {
  296. dev->stats.rx_length_errors++;
  297. goto out_dropped;
  298. }
  299. skb = alloc_and_align_skb(dev, len);
  300. err = -ENOMEM;
  301. if (unlikely(!skb)) {
  302. dev->stats.rx_missed_errors++;
  303. goto out_dropped;
  304. }
  305. copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U;
  306. skb_put(skb, copy_len);
  307. err = ldc_copy(port->vio.lp, LDC_COPY_IN,
  308. skb->data, copy_len, 0,
  309. desc->cookies, desc->ncookies);
  310. if (unlikely(err < 0)) {
  311. dev->stats.rx_frame_errors++;
  312. goto out_free_skb;
  313. }
  314. skb_pull(skb, VNET_PACKET_SKIP);
  315. skb_trim(skb, len);
  316. skb->protocol = eth_type_trans(skb, dev);
  317. if (vio_version_after_eq(&port->vio, 1, 8)) {
  318. struct vio_net_dext *dext = vio_net_ext(desc);
  319. skb_reset_network_header(skb);
  320. if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM) {
  321. if (skb->protocol == ETH_P_IP) {
  322. struct iphdr *iph = ip_hdr(skb);
  323. iph->check = 0;
  324. ip_send_check(iph);
  325. }
  326. }
  327. if ((dext->flags & VNET_PKT_HCK_FULLCKSUM) &&
  328. skb->ip_summed == CHECKSUM_NONE) {
  329. if (skb->protocol == htons(ETH_P_IP)) {
  330. struct iphdr *iph = ip_hdr(skb);
  331. int ihl = iph->ihl * 4;
  332. skb_reset_transport_header(skb);
  333. skb_set_transport_header(skb, ihl);
  334. vnet_fullcsum(skb);
  335. }
  336. }
  337. if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) {
  338. skb->ip_summed = CHECKSUM_PARTIAL;
  339. skb->csum_level = 0;
  340. if (dext->flags & VNET_PKT_HCK_FULLCKSUM_OK)
  341. skb->csum_level = 1;
  342. }
  343. }
  344. skb->ip_summed = port->switch_port ? CHECKSUM_NONE : CHECKSUM_PARTIAL;
  345. if (unlikely(is_multicast_ether_addr(eth_hdr(skb)->h_dest)))
  346. dev->stats.multicast++;
  347. dev->stats.rx_packets++;
  348. dev->stats.rx_bytes += len;
  349. port->stats.rx_packets++;
  350. port->stats.rx_bytes += len;
  351. napi_gro_receive(&port->napi, skb);
  352. return 0;
  353. out_free_skb:
  354. kfree_skb(skb);
  355. out_dropped:
  356. dev->stats.rx_dropped++;
  357. return err;
  358. }
  359. static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
  360. u32 start, u32 end, u8 vio_dring_state)
  361. {
  362. struct vio_dring_data hdr = {
  363. .tag = {
  364. .type = VIO_TYPE_DATA,
  365. .stype = VIO_SUBTYPE_ACK,
  366. .stype_env = VIO_DRING_DATA,
  367. .sid = vio_send_sid(&port->vio),
  368. },
  369. .dring_ident = dr->ident,
  370. .start_idx = start,
  371. .end_idx = end,
  372. .state = vio_dring_state,
  373. };
  374. int err, delay;
  375. int retries = 0;
  376. hdr.seq = dr->snd_nxt;
  377. delay = 1;
  378. do {
  379. err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
  380. if (err > 0) {
  381. dr->snd_nxt++;
  382. break;
  383. }
  384. udelay(delay);
  385. if ((delay <<= 1) > 128)
  386. delay = 128;
  387. if (retries++ > VNET_MAX_RETRIES) {
  388. pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
  389. port->raddr[0], port->raddr[1],
  390. port->raddr[2], port->raddr[3],
  391. port->raddr[4], port->raddr[5]);
  392. break;
  393. }
  394. } while (err == -EAGAIN);
  395. if (err <= 0 && vio_dring_state == VIO_DRING_STOPPED) {
  396. port->stop_rx_idx = end;
  397. port->stop_rx = true;
  398. } else {
  399. port->stop_rx_idx = 0;
  400. port->stop_rx = false;
  401. }
  402. return err;
  403. }
  404. static struct vio_net_desc *get_rx_desc(struct vnet_port *port,
  405. struct vio_dring_state *dr,
  406. u32 index)
  407. {
  408. struct vio_net_desc *desc = port->vio.desc_buf;
  409. int err;
  410. err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size,
  411. (index * dr->entry_size),
  412. dr->cookies, dr->ncookies);
  413. if (err < 0)
  414. return ERR_PTR(err);
  415. return desc;
  416. }
  417. static int put_rx_desc(struct vnet_port *port,
  418. struct vio_dring_state *dr,
  419. struct vio_net_desc *desc,
  420. u32 index)
  421. {
  422. int err;
  423. err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size,
  424. (index * dr->entry_size),
  425. dr->cookies, dr->ncookies);
  426. if (err < 0)
  427. return err;
  428. return 0;
  429. }
  430. static int vnet_walk_rx_one(struct vnet_port *port,
  431. struct vio_dring_state *dr,
  432. u32 index, int *needs_ack)
  433. {
  434. struct vio_net_desc *desc = get_rx_desc(port, dr, index);
  435. struct vio_driver_state *vio = &port->vio;
  436. int err;
  437. BUG_ON(!desc);
  438. if (IS_ERR(desc))
  439. return PTR_ERR(desc);
  440. if (desc->hdr.state != VIO_DESC_READY)
  441. return 1;
  442. dma_rmb();
  443. viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n",
  444. desc->hdr.state, desc->hdr.ack,
  445. desc->size, desc->ncookies,
  446. desc->cookies[0].cookie_addr,
  447. desc->cookies[0].cookie_size);
  448. err = vnet_rx_one(port, desc);
  449. if (err == -ECONNRESET)
  450. return err;
  451. trace_vnet_rx_one(port->vio._local_sid, port->vio._peer_sid,
  452. index, desc->hdr.ack);
  453. desc->hdr.state = VIO_DESC_DONE;
  454. err = put_rx_desc(port, dr, desc, index);
  455. if (err < 0)
  456. return err;
  457. *needs_ack = desc->hdr.ack;
  458. return 0;
  459. }
  460. static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr,
  461. u32 start, u32 end, int *npkts, int budget)
  462. {
  463. struct vio_driver_state *vio = &port->vio;
  464. int ack_start = -1, ack_end = -1;
  465. bool send_ack = true;
  466. end = (end == (u32)-1) ? vio_dring_prev(dr, start)
  467. : vio_dring_next(dr, end);
  468. viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end);
  469. while (start != end) {
  470. int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack);
  471. if (err == -ECONNRESET)
  472. return err;
  473. if (err != 0)
  474. break;
  475. (*npkts)++;
  476. if (ack_start == -1)
  477. ack_start = start;
  478. ack_end = start;
  479. start = vio_dring_next(dr, start);
  480. if (ack && start != end) {
  481. err = vnet_send_ack(port, dr, ack_start, ack_end,
  482. VIO_DRING_ACTIVE);
  483. if (err == -ECONNRESET)
  484. return err;
  485. ack_start = -1;
  486. }
  487. if ((*npkts) >= budget) {
  488. send_ack = false;
  489. break;
  490. }
  491. }
  492. if (unlikely(ack_start == -1)) {
  493. ack_end = vio_dring_prev(dr, start);
  494. ack_start = ack_end;
  495. }
  496. if (send_ack) {
  497. port->napi_resume = false;
  498. trace_vnet_tx_send_stopped_ack(port->vio._local_sid,
  499. port->vio._peer_sid,
  500. ack_end, *npkts);
  501. return vnet_send_ack(port, dr, ack_start, ack_end,
  502. VIO_DRING_STOPPED);
  503. } else {
  504. trace_vnet_tx_defer_stopped_ack(port->vio._local_sid,
  505. port->vio._peer_sid,
  506. ack_end, *npkts);
  507. port->napi_resume = true;
  508. port->napi_stop_idx = ack_end;
  509. return 1;
  510. }
  511. }
  512. static int vnet_rx(struct vnet_port *port, void *msgbuf, int *npkts,
  513. int budget)
  514. {
  515. struct vio_dring_data *pkt = msgbuf;
  516. struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING];
  517. struct vio_driver_state *vio = &port->vio;
  518. viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n",
  519. pkt->tag.stype_env, pkt->seq, dr->rcv_nxt);
  520. if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
  521. return 0;
  522. if (unlikely(pkt->seq != dr->rcv_nxt)) {
  523. pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n",
  524. pkt->seq, dr->rcv_nxt);
  525. return 0;
  526. }
  527. if (!port->napi_resume)
  528. dr->rcv_nxt++;
  529. /* XXX Validate pkt->start_idx and pkt->end_idx XXX */
  530. return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx,
  531. npkts, budget);
  532. }
  533. static int idx_is_pending(struct vio_dring_state *dr, u32 end)
  534. {
  535. u32 idx = dr->cons;
  536. int found = 0;
  537. while (idx != dr->prod) {
  538. if (idx == end) {
  539. found = 1;
  540. break;
  541. }
  542. idx = vio_dring_next(dr, idx);
  543. }
  544. return found;
  545. }
  546. static int vnet_ack(struct vnet_port *port, void *msgbuf)
  547. {
  548. struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
  549. struct vio_dring_data *pkt = msgbuf;
  550. struct net_device *dev;
  551. u32 end;
  552. struct vio_net_desc *desc;
  553. struct netdev_queue *txq;
  554. if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
  555. return 0;
  556. end = pkt->end_idx;
  557. dev = VNET_PORT_TO_NET_DEVICE(port);
  558. netif_tx_lock(dev);
  559. if (unlikely(!idx_is_pending(dr, end))) {
  560. netif_tx_unlock(dev);
  561. return 0;
  562. }
  563. /* sync for race conditions with vnet_start_xmit() and tell xmit it
  564. * is time to send a trigger.
  565. */
  566. trace_vnet_rx_stopped_ack(port->vio._local_sid,
  567. port->vio._peer_sid, end);
  568. dr->cons = vio_dring_next(dr, end);
  569. desc = vio_dring_entry(dr, dr->cons);
  570. if (desc->hdr.state == VIO_DESC_READY && !port->start_cons) {
  571. /* vnet_start_xmit() just populated this dring but missed
  572. * sending the "start" LDC message to the consumer.
  573. * Send a "start" trigger on its behalf.
  574. */
  575. if (__vnet_tx_trigger(port, dr->cons) > 0)
  576. port->start_cons = false;
  577. else
  578. port->start_cons = true;
  579. } else {
  580. port->start_cons = true;
  581. }
  582. netif_tx_unlock(dev);
  583. txq = netdev_get_tx_queue(dev, port->q_index);
  584. if (unlikely(netif_tx_queue_stopped(txq) &&
  585. vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr)))
  586. return 1;
  587. return 0;
  588. }
  589. static int vnet_nack(struct vnet_port *port, void *msgbuf)
  590. {
  591. /* XXX just reset or similar XXX */
  592. return 0;
  593. }
  594. static int handle_mcast(struct vnet_port *port, void *msgbuf)
  595. {
  596. struct vio_net_mcast_info *pkt = msgbuf;
  597. struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
  598. if (pkt->tag.stype != VIO_SUBTYPE_ACK)
  599. pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n",
  600. dev->name,
  601. pkt->tag.type,
  602. pkt->tag.stype,
  603. pkt->tag.stype_env,
  604. pkt->tag.sid);
  605. return 0;
  606. }
  607. /* If the queue is stopped, wake it up so that we'll
  608. * send out another START message at the next TX.
  609. */
  610. static void maybe_tx_wakeup(struct vnet_port *port)
  611. {
  612. struct netdev_queue *txq;
  613. txq = netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port),
  614. port->q_index);
  615. __netif_tx_lock(txq, smp_processor_id());
  616. if (likely(netif_tx_queue_stopped(txq)))
  617. netif_tx_wake_queue(txq);
  618. __netif_tx_unlock(txq);
  619. }
  620. bool sunvnet_port_is_up_common(struct vnet_port *vnet)
  621. {
  622. struct vio_driver_state *vio = &vnet->vio;
  623. return !!(vio->hs_state & VIO_HS_COMPLETE);
  624. }
  625. EXPORT_SYMBOL_GPL(sunvnet_port_is_up_common);
  626. static int vnet_event_napi(struct vnet_port *port, int budget)
  627. {
  628. struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
  629. struct vio_driver_state *vio = &port->vio;
  630. int tx_wakeup, err;
  631. int npkts = 0;
  632. /* we don't expect any other bits */
  633. BUG_ON(port->rx_event & ~(LDC_EVENT_DATA_READY |
  634. LDC_EVENT_RESET |
  635. LDC_EVENT_UP));
  636. /* RESET takes precedent over any other event */
  637. if (port->rx_event & LDC_EVENT_RESET) {
  638. /* a link went down */
  639. if (port->vsw == 1) {
  640. netif_tx_stop_all_queues(dev);
  641. netif_carrier_off(dev);
  642. }
  643. vio_link_state_change(vio, LDC_EVENT_RESET);
  644. vnet_port_reset(port);
  645. vio_port_up(vio);
  646. /* If the device is running but its tx queue was
  647. * stopped (due to flow control), restart it.
  648. * This is necessary since vnet_port_reset()
  649. * clears the tx drings and thus we may never get
  650. * back a VIO_TYPE_DATA ACK packet - which is
  651. * the normal mechanism to restart the tx queue.
  652. */
  653. if (netif_running(dev))
  654. maybe_tx_wakeup(port);
  655. port->rx_event = 0;
  656. port->stats.event_reset++;
  657. return 0;
  658. }
  659. if (port->rx_event & LDC_EVENT_UP) {
  660. /* a link came up */
  661. if (port->vsw == 1) {
  662. netif_carrier_on(port->dev);
  663. netif_tx_start_all_queues(port->dev);
  664. }
  665. vio_link_state_change(vio, LDC_EVENT_UP);
  666. port->rx_event = 0;
  667. port->stats.event_up++;
  668. return 0;
  669. }
  670. err = 0;
  671. tx_wakeup = 0;
  672. while (1) {
  673. union {
  674. struct vio_msg_tag tag;
  675. u64 raw[8];
  676. } msgbuf;
  677. if (port->napi_resume) {
  678. struct vio_dring_data *pkt =
  679. (struct vio_dring_data *)&msgbuf;
  680. struct vio_dring_state *dr =
  681. &port->vio.drings[VIO_DRIVER_RX_RING];
  682. pkt->tag.type = VIO_TYPE_DATA;
  683. pkt->tag.stype = VIO_SUBTYPE_INFO;
  684. pkt->tag.stype_env = VIO_DRING_DATA;
  685. pkt->seq = dr->rcv_nxt;
  686. pkt->start_idx = vio_dring_next(dr,
  687. port->napi_stop_idx);
  688. pkt->end_idx = -1;
  689. } else {
  690. err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
  691. if (unlikely(err < 0)) {
  692. if (err == -ECONNRESET)
  693. vio_conn_reset(vio);
  694. break;
  695. }
  696. if (err == 0)
  697. break;
  698. viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
  699. msgbuf.tag.type,
  700. msgbuf.tag.stype,
  701. msgbuf.tag.stype_env,
  702. msgbuf.tag.sid);
  703. err = vio_validate_sid(vio, &msgbuf.tag);
  704. if (err < 0)
  705. break;
  706. }
  707. if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
  708. if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) {
  709. if (!sunvnet_port_is_up_common(port)) {
  710. /* failures like handshake_failure()
  711. * may have cleaned up dring, but
  712. * NAPI polling may bring us here.
  713. */
  714. err = -ECONNRESET;
  715. break;
  716. }
  717. err = vnet_rx(port, &msgbuf, &npkts, budget);
  718. if (npkts >= budget)
  719. break;
  720. if (npkts == 0)
  721. break;
  722. } else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) {
  723. err = vnet_ack(port, &msgbuf);
  724. if (err > 0)
  725. tx_wakeup |= err;
  726. } else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) {
  727. err = vnet_nack(port, &msgbuf);
  728. }
  729. } else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
  730. if (msgbuf.tag.stype_env == VNET_MCAST_INFO)
  731. err = handle_mcast(port, &msgbuf);
  732. else
  733. err = vio_control_pkt_engine(vio, &msgbuf);
  734. if (err)
  735. break;
  736. } else {
  737. err = vnet_handle_unknown(port, &msgbuf);
  738. }
  739. if (err == -ECONNRESET)
  740. break;
  741. }
  742. if (unlikely(tx_wakeup && err != -ECONNRESET))
  743. maybe_tx_wakeup(port);
  744. return npkts;
  745. }
  746. int sunvnet_poll_common(struct napi_struct *napi, int budget)
  747. {
  748. struct vnet_port *port = container_of(napi, struct vnet_port, napi);
  749. struct vio_driver_state *vio = &port->vio;
  750. int processed = vnet_event_napi(port, budget);
  751. if (processed < budget) {
  752. napi_complete_done(napi, processed);
  753. port->rx_event &= ~LDC_EVENT_DATA_READY;
  754. vio_set_intr(vio->vdev->rx_ino, HV_INTR_ENABLED);
  755. }
  756. return processed;
  757. }
  758. EXPORT_SYMBOL_GPL(sunvnet_poll_common);
  759. void sunvnet_event_common(void *arg, int event)
  760. {
  761. struct vnet_port *port = arg;
  762. struct vio_driver_state *vio = &port->vio;
  763. port->rx_event |= event;
  764. vio_set_intr(vio->vdev->rx_ino, HV_INTR_DISABLED);
  765. napi_schedule(&port->napi);
  766. }
  767. EXPORT_SYMBOL_GPL(sunvnet_event_common);
  768. static int __vnet_tx_trigger(struct vnet_port *port, u32 start)
  769. {
  770. struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
  771. struct vio_dring_data hdr = {
  772. .tag = {
  773. .type = VIO_TYPE_DATA,
  774. .stype = VIO_SUBTYPE_INFO,
  775. .stype_env = VIO_DRING_DATA,
  776. .sid = vio_send_sid(&port->vio),
  777. },
  778. .dring_ident = dr->ident,
  779. .start_idx = start,
  780. .end_idx = (u32)-1,
  781. };
  782. int err, delay;
  783. int retries = 0;
  784. if (port->stop_rx) {
  785. trace_vnet_tx_pending_stopped_ack(port->vio._local_sid,
  786. port->vio._peer_sid,
  787. port->stop_rx_idx, -1);
  788. err = vnet_send_ack(port,
  789. &port->vio.drings[VIO_DRIVER_RX_RING],
  790. port->stop_rx_idx, -1,
  791. VIO_DRING_STOPPED);
  792. if (err <= 0)
  793. return err;
  794. }
  795. hdr.seq = dr->snd_nxt;
  796. delay = 1;
  797. do {
  798. err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
  799. if (err > 0) {
  800. dr->snd_nxt++;
  801. break;
  802. }
  803. udelay(delay);
  804. if ((delay <<= 1) > 128)
  805. delay = 128;
  806. if (retries++ > VNET_MAX_RETRIES)
  807. break;
  808. } while (err == -EAGAIN);
  809. trace_vnet_tx_trigger(port->vio._local_sid,
  810. port->vio._peer_sid, start, err);
  811. return err;
  812. }
  813. static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port,
  814. unsigned *pending)
  815. {
  816. struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
  817. struct sk_buff *skb = NULL;
  818. int i, txi;
  819. *pending = 0;
  820. txi = dr->prod;
  821. for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
  822. struct vio_net_desc *d;
  823. --txi;
  824. if (txi < 0)
  825. txi = VNET_TX_RING_SIZE - 1;
  826. d = vio_dring_entry(dr, txi);
  827. if (d->hdr.state == VIO_DESC_READY) {
  828. (*pending)++;
  829. continue;
  830. }
  831. if (port->tx_bufs[txi].skb) {
  832. if (d->hdr.state != VIO_DESC_DONE)
  833. pr_notice("invalid ring buffer state %d\n",
  834. d->hdr.state);
  835. BUG_ON(port->tx_bufs[txi].skb->next);
  836. port->tx_bufs[txi].skb->next = skb;
  837. skb = port->tx_bufs[txi].skb;
  838. port->tx_bufs[txi].skb = NULL;
  839. ldc_unmap(port->vio.lp,
  840. port->tx_bufs[txi].cookies,
  841. port->tx_bufs[txi].ncookies);
  842. } else if (d->hdr.state == VIO_DESC_FREE) {
  843. break;
  844. }
  845. d->hdr.state = VIO_DESC_FREE;
  846. }
  847. return skb;
  848. }
  849. static inline void vnet_free_skbs(struct sk_buff *skb)
  850. {
  851. struct sk_buff *next;
  852. while (skb) {
  853. next = skb->next;
  854. skb->next = NULL;
  855. dev_kfree_skb(skb);
  856. skb = next;
  857. }
  858. }
  859. void sunvnet_clean_timer_expire_common(unsigned long port0)
  860. {
  861. struct vnet_port *port = (struct vnet_port *)port0;
  862. struct sk_buff *freeskbs;
  863. unsigned pending;
  864. netif_tx_lock(VNET_PORT_TO_NET_DEVICE(port));
  865. freeskbs = vnet_clean_tx_ring(port, &pending);
  866. netif_tx_unlock(VNET_PORT_TO_NET_DEVICE(port));
  867. vnet_free_skbs(freeskbs);
  868. if (pending)
  869. (void)mod_timer(&port->clean_timer,
  870. jiffies + VNET_CLEAN_TIMEOUT);
  871. else
  872. del_timer(&port->clean_timer);
  873. }
  874. EXPORT_SYMBOL_GPL(sunvnet_clean_timer_expire_common);
  875. static inline int vnet_skb_map(struct ldc_channel *lp, struct sk_buff *skb,
  876. struct ldc_trans_cookie *cookies, int ncookies,
  877. unsigned int map_perm)
  878. {
  879. int i, nc, err, blen;
  880. /* header */
  881. blen = skb_headlen(skb);
  882. if (blen < ETH_ZLEN)
  883. blen = ETH_ZLEN;
  884. blen += VNET_PACKET_SKIP;
  885. blen += 8 - (blen & 7);
  886. err = ldc_map_single(lp, skb->data - VNET_PACKET_SKIP, blen, cookies,
  887. ncookies, map_perm);
  888. if (err < 0)
  889. return err;
  890. nc = err;
  891. for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  892. skb_frag_t *f = &skb_shinfo(skb)->frags[i];
  893. u8 *vaddr;
  894. if (nc < ncookies) {
  895. vaddr = kmap_atomic(skb_frag_page(f));
  896. blen = skb_frag_size(f);
  897. blen += 8 - (blen & 7);
  898. err = ldc_map_single(lp, vaddr + f->page_offset,
  899. blen, cookies + nc, ncookies - nc,
  900. map_perm);
  901. kunmap_atomic(vaddr);
  902. } else {
  903. err = -EMSGSIZE;
  904. }
  905. if (err < 0) {
  906. ldc_unmap(lp, cookies, nc);
  907. return err;
  908. }
  909. nc += err;
  910. }
  911. return nc;
  912. }
  913. static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies)
  914. {
  915. struct sk_buff *nskb;
  916. int i, len, pad, docopy;
  917. len = skb->len;
  918. pad = 0;
  919. if (len < ETH_ZLEN) {
  920. pad += ETH_ZLEN - skb->len;
  921. len += pad;
  922. }
  923. len += VNET_PACKET_SKIP;
  924. pad += 8 - (len & 7);
  925. /* make sure we have enough cookies and alignment in every frag */
  926. docopy = skb_shinfo(skb)->nr_frags >= ncookies;
  927. for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  928. skb_frag_t *f = &skb_shinfo(skb)->frags[i];
  929. docopy |= f->page_offset & 7;
  930. }
  931. if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP ||
  932. skb_tailroom(skb) < pad ||
  933. skb_headroom(skb) < VNET_PACKET_SKIP || docopy) {
  934. int start = 0, offset;
  935. __wsum csum;
  936. len = skb->len > ETH_ZLEN ? skb->len : ETH_ZLEN;
  937. nskb = alloc_and_align_skb(skb->dev, len);
  938. if (!nskb) {
  939. dev_kfree_skb(skb);
  940. return NULL;
  941. }
  942. skb_reserve(nskb, VNET_PACKET_SKIP);
  943. nskb->protocol = skb->protocol;
  944. offset = skb_mac_header(skb) - skb->data;
  945. skb_set_mac_header(nskb, offset);
  946. offset = skb_network_header(skb) - skb->data;
  947. skb_set_network_header(nskb, offset);
  948. offset = skb_transport_header(skb) - skb->data;
  949. skb_set_transport_header(nskb, offset);
  950. offset = 0;
  951. nskb->csum_offset = skb->csum_offset;
  952. nskb->ip_summed = skb->ip_summed;
  953. if (skb->ip_summed == CHECKSUM_PARTIAL)
  954. start = skb_checksum_start_offset(skb);
  955. if (start) {
  956. struct iphdr *iph = ip_hdr(nskb);
  957. int offset = start + nskb->csum_offset;
  958. if (skb_copy_bits(skb, 0, nskb->data, start)) {
  959. dev_kfree_skb(nskb);
  960. dev_kfree_skb(skb);
  961. return NULL;
  962. }
  963. *(__sum16 *)(skb->data + offset) = 0;
  964. csum = skb_copy_and_csum_bits(skb, start,
  965. nskb->data + start,
  966. skb->len - start, 0);
  967. if (iph->protocol == IPPROTO_TCP ||
  968. iph->protocol == IPPROTO_UDP) {
  969. csum = csum_tcpudp_magic(iph->saddr, iph->daddr,
  970. skb->len - start,
  971. iph->protocol, csum);
  972. }
  973. *(__sum16 *)(nskb->data + offset) = csum;
  974. nskb->ip_summed = CHECKSUM_NONE;
  975. } else if (skb_copy_bits(skb, 0, nskb->data, skb->len)) {
  976. dev_kfree_skb(nskb);
  977. dev_kfree_skb(skb);
  978. return NULL;
  979. }
  980. (void)skb_put(nskb, skb->len);
  981. if (skb_is_gso(skb)) {
  982. skb_shinfo(nskb)->gso_size = skb_shinfo(skb)->gso_size;
  983. skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
  984. }
  985. nskb->queue_mapping = skb->queue_mapping;
  986. dev_kfree_skb(skb);
  987. skb = nskb;
  988. }
  989. return skb;
  990. }
  991. static int vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb,
  992. struct vnet_port *(*vnet_tx_port)
  993. (struct sk_buff *, struct net_device *))
  994. {
  995. struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
  996. struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
  997. struct sk_buff *segs;
  998. int maclen, datalen;
  999. int status;
  1000. int gso_size, gso_type, gso_segs;
  1001. int hlen = skb_transport_header(skb) - skb_mac_header(skb);
  1002. int proto = IPPROTO_IP;
  1003. if (skb->protocol == htons(ETH_P_IP))
  1004. proto = ip_hdr(skb)->protocol;
  1005. else if (skb->protocol == htons(ETH_P_IPV6))
  1006. proto = ipv6_hdr(skb)->nexthdr;
  1007. if (proto == IPPROTO_TCP) {
  1008. hlen += tcp_hdr(skb)->doff * 4;
  1009. } else if (proto == IPPROTO_UDP) {
  1010. hlen += sizeof(struct udphdr);
  1011. } else {
  1012. pr_err("vnet_handle_offloads GSO with unknown transport "
  1013. "protocol %d tproto %d\n", skb->protocol, proto);
  1014. hlen = 128; /* XXX */
  1015. }
  1016. datalen = port->tsolen - hlen;
  1017. gso_size = skb_shinfo(skb)->gso_size;
  1018. gso_type = skb_shinfo(skb)->gso_type;
  1019. gso_segs = skb_shinfo(skb)->gso_segs;
  1020. if (port->tso && gso_size < datalen)
  1021. gso_segs = DIV_ROUND_UP(skb->len - hlen, datalen);
  1022. if (unlikely(vnet_tx_dring_avail(dr) < gso_segs)) {
  1023. struct netdev_queue *txq;
  1024. txq = netdev_get_tx_queue(dev, port->q_index);
  1025. netif_tx_stop_queue(txq);
  1026. if (vnet_tx_dring_avail(dr) < skb_shinfo(skb)->gso_segs)
  1027. return NETDEV_TX_BUSY;
  1028. netif_tx_wake_queue(txq);
  1029. }
  1030. maclen = skb_network_header(skb) - skb_mac_header(skb);
  1031. skb_pull(skb, maclen);
  1032. if (port->tso && gso_size < datalen) {
  1033. if (skb_unclone(skb, GFP_ATOMIC))
  1034. goto out_dropped;
  1035. /* segment to TSO size */
  1036. skb_shinfo(skb)->gso_size = datalen;
  1037. skb_shinfo(skb)->gso_segs = gso_segs;
  1038. }
  1039. segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO);
  1040. if (IS_ERR(segs))
  1041. goto out_dropped;
  1042. skb_push(skb, maclen);
  1043. skb_reset_mac_header(skb);
  1044. status = 0;
  1045. while (segs) {
  1046. struct sk_buff *curr = segs;
  1047. segs = segs->next;
  1048. curr->next = NULL;
  1049. if (port->tso && curr->len > dev->mtu) {
  1050. skb_shinfo(curr)->gso_size = gso_size;
  1051. skb_shinfo(curr)->gso_type = gso_type;
  1052. skb_shinfo(curr)->gso_segs =
  1053. DIV_ROUND_UP(curr->len - hlen, gso_size);
  1054. } else {
  1055. skb_shinfo(curr)->gso_size = 0;
  1056. }
  1057. skb_push(curr, maclen);
  1058. skb_reset_mac_header(curr);
  1059. memcpy(skb_mac_header(curr), skb_mac_header(skb),
  1060. maclen);
  1061. curr->csum_start = skb_transport_header(curr) - curr->head;
  1062. if (ip_hdr(curr)->protocol == IPPROTO_TCP)
  1063. curr->csum_offset = offsetof(struct tcphdr, check);
  1064. else if (ip_hdr(curr)->protocol == IPPROTO_UDP)
  1065. curr->csum_offset = offsetof(struct udphdr, check);
  1066. if (!(status & NETDEV_TX_MASK))
  1067. status = sunvnet_start_xmit_common(curr, dev,
  1068. vnet_tx_port);
  1069. if (status & NETDEV_TX_MASK)
  1070. dev_kfree_skb_any(curr);
  1071. }
  1072. if (!(status & NETDEV_TX_MASK))
  1073. dev_kfree_skb_any(skb);
  1074. return status;
  1075. out_dropped:
  1076. dev->stats.tx_dropped++;
  1077. dev_kfree_skb_any(skb);
  1078. return NETDEV_TX_OK;
  1079. }
  1080. int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
  1081. struct vnet_port *(*vnet_tx_port)
  1082. (struct sk_buff *, struct net_device *))
  1083. {
  1084. struct vnet_port *port = NULL;
  1085. struct vio_dring_state *dr;
  1086. struct vio_net_desc *d;
  1087. unsigned int len;
  1088. struct sk_buff *freeskbs = NULL;
  1089. int i, err, txi;
  1090. unsigned pending = 0;
  1091. struct netdev_queue *txq;
  1092. rcu_read_lock();
  1093. port = vnet_tx_port(skb, dev);
  1094. if (unlikely(!port))
  1095. goto out_dropped;
  1096. if (skb_is_gso(skb) && skb->len > port->tsolen) {
  1097. err = vnet_handle_offloads(port, skb, vnet_tx_port);
  1098. rcu_read_unlock();
  1099. return err;
  1100. }
  1101. if (!skb_is_gso(skb) && skb->len > port->rmtu) {
  1102. unsigned long localmtu = port->rmtu - ETH_HLEN;
  1103. if (vio_version_after_eq(&port->vio, 1, 3))
  1104. localmtu -= VLAN_HLEN;
  1105. if (skb->protocol == htons(ETH_P_IP)) {
  1106. struct flowi4 fl4;
  1107. struct rtable *rt = NULL;
  1108. memset(&fl4, 0, sizeof(fl4));
  1109. fl4.flowi4_oif = dev->ifindex;
  1110. fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
  1111. fl4.daddr = ip_hdr(skb)->daddr;
  1112. fl4.saddr = ip_hdr(skb)->saddr;
  1113. rt = ip_route_output_key(dev_net(dev), &fl4);
  1114. if (!IS_ERR(rt)) {
  1115. skb_dst_set(skb, &rt->dst);
  1116. icmp_send(skb, ICMP_DEST_UNREACH,
  1117. ICMP_FRAG_NEEDED,
  1118. htonl(localmtu));
  1119. }
  1120. }
  1121. #if IS_ENABLED(CONFIG_IPV6)
  1122. else if (skb->protocol == htons(ETH_P_IPV6))
  1123. icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu);
  1124. #endif
  1125. goto out_dropped;
  1126. }
  1127. skb = vnet_skb_shape(skb, 2);
  1128. if (unlikely(!skb))
  1129. goto out_dropped;
  1130. if (skb->ip_summed == CHECKSUM_PARTIAL)
  1131. vnet_fullcsum(skb);
  1132. dr = &port->vio.drings[VIO_DRIVER_TX_RING];
  1133. i = skb_get_queue_mapping(skb);
  1134. txq = netdev_get_tx_queue(dev, i);
  1135. if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
  1136. if (!netif_tx_queue_stopped(txq)) {
  1137. netif_tx_stop_queue(txq);
  1138. /* This is a hard error, log it. */
  1139. netdev_err(dev, "BUG! Tx Ring full when queue awake!\n");
  1140. dev->stats.tx_errors++;
  1141. }
  1142. rcu_read_unlock();
  1143. return NETDEV_TX_BUSY;
  1144. }
  1145. d = vio_dring_cur(dr);
  1146. txi = dr->prod;
  1147. freeskbs = vnet_clean_tx_ring(port, &pending);
  1148. BUG_ON(port->tx_bufs[txi].skb);
  1149. len = skb->len;
  1150. if (len < ETH_ZLEN)
  1151. len = ETH_ZLEN;
  1152. err = vnet_skb_map(port->vio.lp, skb, port->tx_bufs[txi].cookies, 2,
  1153. (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW));
  1154. if (err < 0) {
  1155. netdev_info(dev, "tx buffer map error %d\n", err);
  1156. goto out_dropped;
  1157. }
  1158. port->tx_bufs[txi].skb = skb;
  1159. skb = NULL;
  1160. port->tx_bufs[txi].ncookies = err;
  1161. /* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
  1162. * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
  1163. * the protocol itself does not require it as long as the peer
  1164. * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
  1165. *
  1166. * An ACK for every packet in the ring is expensive as the
  1167. * sending of LDC messages is slow and affects performance.
  1168. */
  1169. d->hdr.ack = VIO_ACK_DISABLE;
  1170. d->size = len;
  1171. d->ncookies = port->tx_bufs[txi].ncookies;
  1172. for (i = 0; i < d->ncookies; i++)
  1173. d->cookies[i] = port->tx_bufs[txi].cookies[i];
  1174. if (vio_version_after_eq(&port->vio, 1, 7)) {
  1175. struct vio_net_dext *dext = vio_net_ext(d);
  1176. memset(dext, 0, sizeof(*dext));
  1177. if (skb_is_gso(port->tx_bufs[txi].skb)) {
  1178. dext->ipv4_lso_mss = skb_shinfo(port->tx_bufs[txi].skb)
  1179. ->gso_size;
  1180. dext->flags |= VNET_PKT_IPV4_LSO;
  1181. }
  1182. if (vio_version_after_eq(&port->vio, 1, 8) &&
  1183. !port->switch_port) {
  1184. dext->flags |= VNET_PKT_HCK_IPV4_HDRCKSUM_OK;
  1185. dext->flags |= VNET_PKT_HCK_FULLCKSUM_OK;
  1186. }
  1187. }
  1188. /* This has to be a non-SMP write barrier because we are writing
  1189. * to memory which is shared with the peer LDOM.
  1190. */
  1191. dma_wmb();
  1192. d->hdr.state = VIO_DESC_READY;
  1193. /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent
  1194. * to notify the consumer that some descriptors are READY.
  1195. * After that "start" trigger, no additional triggers are needed until
  1196. * a DRING_STOPPED is received from the consumer. The dr->cons field
  1197. * (set up by vnet_ack()) has the value of the next dring index
  1198. * that has not yet been ack-ed. We send a "start" trigger here
  1199. * if, and only if, start_cons is true (reset it afterward). Conversely,
  1200. * vnet_ack() should check if the dring corresponding to cons
  1201. * is marked READY, but start_cons was false.
  1202. * If so, vnet_ack() should send out the missed "start" trigger.
  1203. *
  1204. * Note that the dma_wmb() above makes sure the cookies et al. are
  1205. * not globally visible before the VIO_DESC_READY, and that the
  1206. * stores are ordered correctly by the compiler. The consumer will
  1207. * not proceed until the VIO_DESC_READY is visible assuring that
  1208. * the consumer does not observe anything related to descriptors
  1209. * out of order. The HV trap from the LDC start trigger is the
  1210. * producer to consumer announcement that work is available to the
  1211. * consumer
  1212. */
  1213. if (!port->start_cons) { /* previous trigger suffices */
  1214. trace_vnet_skip_tx_trigger(port->vio._local_sid,
  1215. port->vio._peer_sid, dr->cons);
  1216. goto ldc_start_done;
  1217. }
  1218. err = __vnet_tx_trigger(port, dr->cons);
  1219. if (unlikely(err < 0)) {
  1220. netdev_info(dev, "TX trigger error %d\n", err);
  1221. d->hdr.state = VIO_DESC_FREE;
  1222. skb = port->tx_bufs[txi].skb;
  1223. port->tx_bufs[txi].skb = NULL;
  1224. dev->stats.tx_carrier_errors++;
  1225. goto out_dropped;
  1226. }
  1227. ldc_start_done:
  1228. port->start_cons = false;
  1229. dev->stats.tx_packets++;
  1230. dev->stats.tx_bytes += port->tx_bufs[txi].skb->len;
  1231. port->stats.tx_packets++;
  1232. port->stats.tx_bytes += port->tx_bufs[txi].skb->len;
  1233. dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
  1234. if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
  1235. netif_tx_stop_queue(txq);
  1236. smp_rmb();
  1237. if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr))
  1238. netif_tx_wake_queue(txq);
  1239. }
  1240. (void)mod_timer(&port->clean_timer, jiffies + VNET_CLEAN_TIMEOUT);
  1241. rcu_read_unlock();
  1242. vnet_free_skbs(freeskbs);
  1243. return NETDEV_TX_OK;
  1244. out_dropped:
  1245. if (pending)
  1246. (void)mod_timer(&port->clean_timer,
  1247. jiffies + VNET_CLEAN_TIMEOUT);
  1248. else if (port)
  1249. del_timer(&port->clean_timer);
  1250. rcu_read_unlock();
  1251. if (skb)
  1252. dev_kfree_skb(skb);
  1253. vnet_free_skbs(freeskbs);
  1254. dev->stats.tx_dropped++;
  1255. return NETDEV_TX_OK;
  1256. }
  1257. EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common);
  1258. void sunvnet_tx_timeout_common(struct net_device *dev)
  1259. {
  1260. /* XXX Implement me XXX */
  1261. }
  1262. EXPORT_SYMBOL_GPL(sunvnet_tx_timeout_common);
  1263. int sunvnet_open_common(struct net_device *dev)
  1264. {
  1265. netif_carrier_on(dev);
  1266. netif_tx_start_all_queues(dev);
  1267. return 0;
  1268. }
  1269. EXPORT_SYMBOL_GPL(sunvnet_open_common);
  1270. int sunvnet_close_common(struct net_device *dev)
  1271. {
  1272. netif_tx_stop_all_queues(dev);
  1273. netif_carrier_off(dev);
  1274. return 0;
  1275. }
  1276. EXPORT_SYMBOL_GPL(sunvnet_close_common);
  1277. static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr)
  1278. {
  1279. struct vnet_mcast_entry *m;
  1280. for (m = vp->mcast_list; m; m = m->next) {
  1281. if (ether_addr_equal(m->addr, addr))
  1282. return m;
  1283. }
  1284. return NULL;
  1285. }
  1286. static void __update_mc_list(struct vnet *vp, struct net_device *dev)
  1287. {
  1288. struct netdev_hw_addr *ha;
  1289. netdev_for_each_mc_addr(ha, dev) {
  1290. struct vnet_mcast_entry *m;
  1291. m = __vnet_mc_find(vp, ha->addr);
  1292. if (m) {
  1293. m->hit = 1;
  1294. continue;
  1295. }
  1296. if (!m) {
  1297. m = kzalloc(sizeof(*m), GFP_ATOMIC);
  1298. if (!m)
  1299. continue;
  1300. memcpy(m->addr, ha->addr, ETH_ALEN);
  1301. m->hit = 1;
  1302. m->next = vp->mcast_list;
  1303. vp->mcast_list = m;
  1304. }
  1305. }
  1306. }
  1307. static void __send_mc_list(struct vnet *vp, struct vnet_port *port)
  1308. {
  1309. struct vio_net_mcast_info info;
  1310. struct vnet_mcast_entry *m, **pp;
  1311. int n_addrs;
  1312. memset(&info, 0, sizeof(info));
  1313. info.tag.type = VIO_TYPE_CTRL;
  1314. info.tag.stype = VIO_SUBTYPE_INFO;
  1315. info.tag.stype_env = VNET_MCAST_INFO;
  1316. info.tag.sid = vio_send_sid(&port->vio);
  1317. info.set = 1;
  1318. n_addrs = 0;
  1319. for (m = vp->mcast_list; m; m = m->next) {
  1320. if (m->sent)
  1321. continue;
  1322. m->sent = 1;
  1323. memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
  1324. m->addr, ETH_ALEN);
  1325. if (++n_addrs == VNET_NUM_MCAST) {
  1326. info.count = n_addrs;
  1327. (void)vio_ldc_send(&port->vio, &info,
  1328. sizeof(info));
  1329. n_addrs = 0;
  1330. }
  1331. }
  1332. if (n_addrs) {
  1333. info.count = n_addrs;
  1334. (void)vio_ldc_send(&port->vio, &info, sizeof(info));
  1335. }
  1336. info.set = 0;
  1337. n_addrs = 0;
  1338. pp = &vp->mcast_list;
  1339. while ((m = *pp) != NULL) {
  1340. if (m->hit) {
  1341. m->hit = 0;
  1342. pp = &m->next;
  1343. continue;
  1344. }
  1345. memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
  1346. m->addr, ETH_ALEN);
  1347. if (++n_addrs == VNET_NUM_MCAST) {
  1348. info.count = n_addrs;
  1349. (void)vio_ldc_send(&port->vio, &info,
  1350. sizeof(info));
  1351. n_addrs = 0;
  1352. }
  1353. *pp = m->next;
  1354. kfree(m);
  1355. }
  1356. if (n_addrs) {
  1357. info.count = n_addrs;
  1358. (void)vio_ldc_send(&port->vio, &info, sizeof(info));
  1359. }
  1360. }
  1361. void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp)
  1362. {
  1363. struct vnet_port *port;
  1364. rcu_read_lock();
  1365. list_for_each_entry_rcu(port, &vp->port_list, list) {
  1366. if (port->switch_port) {
  1367. __update_mc_list(vp, dev);
  1368. __send_mc_list(vp, port);
  1369. break;
  1370. }
  1371. }
  1372. rcu_read_unlock();
  1373. }
  1374. EXPORT_SYMBOL_GPL(sunvnet_set_rx_mode_common);
  1375. int sunvnet_set_mac_addr_common(struct net_device *dev, void *p)
  1376. {
  1377. return -EINVAL;
  1378. }
  1379. EXPORT_SYMBOL_GPL(sunvnet_set_mac_addr_common);
  1380. void sunvnet_port_free_tx_bufs_common(struct vnet_port *port)
  1381. {
  1382. struct vio_dring_state *dr;
  1383. int i;
  1384. dr = &port->vio.drings[VIO_DRIVER_TX_RING];
  1385. if (!dr->base)
  1386. return;
  1387. for (i = 0; i < VNET_TX_RING_SIZE; i++) {
  1388. struct vio_net_desc *d;
  1389. void *skb = port->tx_bufs[i].skb;
  1390. if (!skb)
  1391. continue;
  1392. d = vio_dring_entry(dr, i);
  1393. ldc_unmap(port->vio.lp,
  1394. port->tx_bufs[i].cookies,
  1395. port->tx_bufs[i].ncookies);
  1396. dev_kfree_skb(skb);
  1397. port->tx_bufs[i].skb = NULL;
  1398. d->hdr.state = VIO_DESC_FREE;
  1399. }
  1400. ldc_free_exp_dring(port->vio.lp, dr->base,
  1401. (dr->entry_size * dr->num_entries),
  1402. dr->cookies, dr->ncookies);
  1403. dr->base = NULL;
  1404. dr->entry_size = 0;
  1405. dr->num_entries = 0;
  1406. dr->pending = 0;
  1407. dr->ncookies = 0;
  1408. }
  1409. EXPORT_SYMBOL_GPL(sunvnet_port_free_tx_bufs_common);
  1410. void vnet_port_reset(struct vnet_port *port)
  1411. {
  1412. del_timer(&port->clean_timer);
  1413. sunvnet_port_free_tx_bufs_common(port);
  1414. port->rmtu = 0;
  1415. port->tso = (port->vsw == 0); /* no tso in vsw, misbehaves in bridge */
  1416. port->tsolen = 0;
  1417. }
  1418. EXPORT_SYMBOL_GPL(vnet_port_reset);
  1419. static int vnet_port_alloc_tx_ring(struct vnet_port *port)
  1420. {
  1421. struct vio_dring_state *dr;
  1422. unsigned long len, elen;
  1423. int i, err, ncookies;
  1424. void *dring;
  1425. dr = &port->vio.drings[VIO_DRIVER_TX_RING];
  1426. elen = sizeof(struct vio_net_desc) +
  1427. sizeof(struct ldc_trans_cookie) * 2;
  1428. if (vio_version_after_eq(&port->vio, 1, 7))
  1429. elen += sizeof(struct vio_net_dext);
  1430. len = VNET_TX_RING_SIZE * elen;
  1431. ncookies = VIO_MAX_RING_COOKIES;
  1432. dring = ldc_alloc_exp_dring(port->vio.lp, len,
  1433. dr->cookies, &ncookies,
  1434. (LDC_MAP_SHADOW |
  1435. LDC_MAP_DIRECT |
  1436. LDC_MAP_RW));
  1437. if (IS_ERR(dring)) {
  1438. err = PTR_ERR(dring);
  1439. goto err_out;
  1440. }
  1441. dr->base = dring;
  1442. dr->entry_size = elen;
  1443. dr->num_entries = VNET_TX_RING_SIZE;
  1444. dr->prod = 0;
  1445. dr->cons = 0;
  1446. port->start_cons = true; /* need an initial trigger */
  1447. dr->pending = VNET_TX_RING_SIZE;
  1448. dr->ncookies = ncookies;
  1449. for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
  1450. struct vio_net_desc *d;
  1451. d = vio_dring_entry(dr, i);
  1452. d->hdr.state = VIO_DESC_FREE;
  1453. }
  1454. return 0;
  1455. err_out:
  1456. sunvnet_port_free_tx_bufs_common(port);
  1457. return err;
  1458. }
  1459. #ifdef CONFIG_NET_POLL_CONTROLLER
  1460. void sunvnet_poll_controller_common(struct net_device *dev, struct vnet *vp)
  1461. {
  1462. struct vnet_port *port;
  1463. unsigned long flags;
  1464. spin_lock_irqsave(&vp->lock, flags);
  1465. if (!list_empty(&vp->port_list)) {
  1466. port = list_entry(vp->port_list.next, struct vnet_port, list);
  1467. napi_schedule(&port->napi);
  1468. }
  1469. spin_unlock_irqrestore(&vp->lock, flags);
  1470. }
  1471. EXPORT_SYMBOL_GPL(sunvnet_poll_controller_common);
  1472. #endif
  1473. void sunvnet_port_add_txq_common(struct vnet_port *port)
  1474. {
  1475. struct vnet *vp = port->vp;
  1476. int smallest = 0;
  1477. int i;
  1478. /* find the first least-used q
  1479. * When there are more ldoms than q's, we start to
  1480. * double up on ports per queue.
  1481. */
  1482. for (i = 0; i < VNET_MAX_TXQS; i++) {
  1483. if (vp->q_used[i] == 0) {
  1484. smallest = i;
  1485. break;
  1486. }
  1487. if (vp->q_used[i] < vp->q_used[smallest])
  1488. smallest = i;
  1489. }
  1490. vp->nports++;
  1491. vp->q_used[smallest]++;
  1492. port->q_index = smallest;
  1493. }
  1494. EXPORT_SYMBOL_GPL(sunvnet_port_add_txq_common);
  1495. void sunvnet_port_rm_txq_common(struct vnet_port *port)
  1496. {
  1497. port->vp->nports--;
  1498. port->vp->q_used[port->q_index]--;
  1499. port->q_index = 0;
  1500. }
  1501. EXPORT_SYMBOL_GPL(sunvnet_port_rm_txq_common);