netback.c 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667
  1. /*
  2. * Back-end of the driver for virtual network devices. This portion of the
  3. * driver exports a 'unified' network-device interface that can be accessed
  4. * by any operating system that implements a compatible front end. A
  5. * reference front-end implementation can be found in:
  6. * drivers/net/xen-netfront.c
  7. *
  8. * Copyright (c) 2002-2005, K A Fraser
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public License version 2
  12. * as published by the Free Software Foundation; or, when distributed
  13. * separately from the Linux kernel or incorporated into other
  14. * software packages, subject to the following license:
  15. *
  16. * Permission is hereby granted, free of charge, to any person obtaining a copy
  17. * of this source file (the "Software"), to deal in the Software without
  18. * restriction, including without limitation the rights to use, copy, modify,
  19. * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  20. * and to permit persons to whom the Software is furnished to do so, subject to
  21. * the following conditions:
  22. *
  23. * The above copyright notice and this permission notice shall be included in
  24. * all copies or substantial portions of the Software.
  25. *
  26. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  27. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  28. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  29. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  30. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  31. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  32. * IN THE SOFTWARE.
  33. */
  34. #include "common.h"
  35. #include <linux/kthread.h>
  36. #include <linux/if_vlan.h>
  37. #include <linux/udp.h>
  38. #include <linux/highmem.h>
  39. #include <net/tcp.h>
  40. #include <xen/xen.h>
  41. #include <xen/events.h>
  42. #include <xen/interface/memory.h>
  43. #include <xen/page.h>
  44. #include <asm/xen/hypercall.h>
  45. /* Provide an option to disable split event channels at load time as
  46. * event channels are limited resource. Split event channels are
  47. * enabled by default.
  48. */
  49. bool separate_tx_rx_irq = true;
  50. module_param(separate_tx_rx_irq, bool, 0644);
  51. /* The time that packets can stay on the guest Rx internal queue
  52. * before they are dropped.
  53. */
  54. unsigned int rx_drain_timeout_msecs = 10000;
  55. module_param(rx_drain_timeout_msecs, uint, 0444);
  56. /* The length of time before the frontend is considered unresponsive
  57. * because it isn't providing Rx slots.
  58. */
  59. unsigned int rx_stall_timeout_msecs = 60000;
  60. module_param(rx_stall_timeout_msecs, uint, 0444);
  61. unsigned int xenvif_max_queues;
  62. module_param_named(max_queues, xenvif_max_queues, uint, 0644);
  63. MODULE_PARM_DESC(max_queues,
  64. "Maximum number of queues per virtual interface");
  65. /*
  66. * This is the maximum slots a skb can have. If a guest sends a skb
  67. * which exceeds this limit it is considered malicious.
  68. */
  69. #define FATAL_SKB_SLOTS_DEFAULT 20
  70. static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
  71. module_param(fatal_skb_slots, uint, 0444);
  72. /* The amount to copy out of the first guest Tx slot into the skb's
  73. * linear area. If the first slot has more data, it will be mapped
  74. * and put into the first frag.
  75. *
  76. * This is sized to avoid pulling headers from the frags for most
  77. * TCP/IP packets.
  78. */
  79. #define XEN_NETBACK_TX_COPY_LEN 128
  80. /* This is the maximum number of flows in the hash cache. */
  81. #define XENVIF_HASH_CACHE_SIZE_DEFAULT 64
  82. unsigned int xenvif_hash_cache_size = XENVIF_HASH_CACHE_SIZE_DEFAULT;
  83. module_param_named(hash_cache_size, xenvif_hash_cache_size, uint, 0644);
  84. MODULE_PARM_DESC(hash_cache_size, "Number of flows in the hash cache");
  85. static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
  86. u8 status);
  87. static void make_tx_response(struct xenvif_queue *queue,
  88. struct xen_netif_tx_request *txp,
  89. unsigned int extra_count,
  90. s8 st);
  91. static void push_tx_responses(struct xenvif_queue *queue);
  92. static inline int tx_work_todo(struct xenvif_queue *queue);
  93. static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
  94. u16 idx)
  95. {
  96. return page_to_pfn(queue->mmap_pages[idx]);
  97. }
  98. static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
  99. u16 idx)
  100. {
  101. return (unsigned long)pfn_to_kaddr(idx_to_pfn(queue, idx));
  102. }
  103. #define callback_param(vif, pending_idx) \
  104. (vif->pending_tx_info[pending_idx].callback_struct)
  105. /* Find the containing VIF's structure from a pointer in pending_tx_info array
  106. */
  107. static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf)
  108. {
  109. u16 pending_idx = ubuf->desc;
  110. struct pending_tx_info *temp =
  111. container_of(ubuf, struct pending_tx_info, callback_struct);
  112. return container_of(temp - pending_idx,
  113. struct xenvif_queue,
  114. pending_tx_info[0]);
  115. }
  116. static u16 frag_get_pending_idx(skb_frag_t *frag)
  117. {
  118. return (u16)frag->page_offset;
  119. }
  120. static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
  121. {
  122. frag->page_offset = pending_idx;
  123. }
  124. static inline pending_ring_idx_t pending_index(unsigned i)
  125. {
  126. return i & (MAX_PENDING_REQS-1);
  127. }
  128. void xenvif_kick_thread(struct xenvif_queue *queue)
  129. {
  130. wake_up(&queue->wq);
  131. }
  132. void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
  133. {
  134. int more_to_do;
  135. RING_FINAL_CHECK_FOR_REQUESTS(&queue->tx, more_to_do);
  136. if (more_to_do)
  137. napi_schedule(&queue->napi);
  138. }
  139. static void tx_add_credit(struct xenvif_queue *queue)
  140. {
  141. unsigned long max_burst, max_credit;
  142. /*
  143. * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
  144. * Otherwise the interface can seize up due to insufficient credit.
  145. */
  146. max_burst = max(131072UL, queue->credit_bytes);
  147. /* Take care that adding a new chunk of credit doesn't wrap to zero. */
  148. max_credit = queue->remaining_credit + queue->credit_bytes;
  149. if (max_credit < queue->remaining_credit)
  150. max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
  151. queue->remaining_credit = min(max_credit, max_burst);
  152. }
  153. void xenvif_tx_credit_callback(unsigned long data)
  154. {
  155. struct xenvif_queue *queue = (struct xenvif_queue *)data;
  156. tx_add_credit(queue);
  157. xenvif_napi_schedule_or_enable_events(queue);
  158. }
  159. static void xenvif_tx_err(struct xenvif_queue *queue,
  160. struct xen_netif_tx_request *txp,
  161. unsigned int extra_count, RING_IDX end)
  162. {
  163. RING_IDX cons = queue->tx.req_cons;
  164. unsigned long flags;
  165. do {
  166. spin_lock_irqsave(&queue->response_lock, flags);
  167. make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR);
  168. push_tx_responses(queue);
  169. spin_unlock_irqrestore(&queue->response_lock, flags);
  170. if (cons == end)
  171. break;
  172. RING_COPY_REQUEST(&queue->tx, cons++, txp);
  173. extra_count = 0; /* only the first frag can have extras */
  174. } while (1);
  175. queue->tx.req_cons = cons;
  176. }
  177. static void xenvif_fatal_tx_err(struct xenvif *vif)
  178. {
  179. netdev_err(vif->dev, "fatal error; disabling device\n");
  180. vif->disabled = true;
  181. /* Disable the vif from queue 0's kthread */
  182. if (vif->queues)
  183. xenvif_kick_thread(&vif->queues[0]);
  184. }
  185. static int xenvif_count_requests(struct xenvif_queue *queue,
  186. struct xen_netif_tx_request *first,
  187. unsigned int extra_count,
  188. struct xen_netif_tx_request *txp,
  189. int work_to_do)
  190. {
  191. RING_IDX cons = queue->tx.req_cons;
  192. int slots = 0;
  193. int drop_err = 0;
  194. int more_data;
  195. if (!(first->flags & XEN_NETTXF_more_data))
  196. return 0;
  197. do {
  198. struct xen_netif_tx_request dropped_tx = { 0 };
  199. if (slots >= work_to_do) {
  200. netdev_err(queue->vif->dev,
  201. "Asked for %d slots but exceeds this limit\n",
  202. work_to_do);
  203. xenvif_fatal_tx_err(queue->vif);
  204. return -ENODATA;
  205. }
  206. /* This guest is really using too many slots and
  207. * considered malicious.
  208. */
  209. if (unlikely(slots >= fatal_skb_slots)) {
  210. netdev_err(queue->vif->dev,
  211. "Malicious frontend using %d slots, threshold %u\n",
  212. slots, fatal_skb_slots);
  213. xenvif_fatal_tx_err(queue->vif);
  214. return -E2BIG;
  215. }
  216. /* Xen network protocol had implicit dependency on
  217. * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
  218. * the historical MAX_SKB_FRAGS value 18 to honor the
  219. * same behavior as before. Any packet using more than
  220. * 18 slots but less than fatal_skb_slots slots is
  221. * dropped
  222. */
  223. if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) {
  224. if (net_ratelimit())
  225. netdev_dbg(queue->vif->dev,
  226. "Too many slots (%d) exceeding limit (%d), dropping packet\n",
  227. slots, XEN_NETBK_LEGACY_SLOTS_MAX);
  228. drop_err = -E2BIG;
  229. }
  230. if (drop_err)
  231. txp = &dropped_tx;
  232. RING_COPY_REQUEST(&queue->tx, cons + slots, txp);
  233. /* If the guest submitted a frame >= 64 KiB then
  234. * first->size overflowed and following slots will
  235. * appear to be larger than the frame.
  236. *
  237. * This cannot be fatal error as there are buggy
  238. * frontends that do this.
  239. *
  240. * Consume all slots and drop the packet.
  241. */
  242. if (!drop_err && txp->size > first->size) {
  243. if (net_ratelimit())
  244. netdev_dbg(queue->vif->dev,
  245. "Invalid tx request, slot size %u > remaining size %u\n",
  246. txp->size, first->size);
  247. drop_err = -EIO;
  248. }
  249. first->size -= txp->size;
  250. slots++;
  251. if (unlikely((txp->offset + txp->size) > XEN_PAGE_SIZE)) {
  252. netdev_err(queue->vif->dev, "Cross page boundary, txp->offset: %u, size: %u\n",
  253. txp->offset, txp->size);
  254. xenvif_fatal_tx_err(queue->vif);
  255. return -EINVAL;
  256. }
  257. more_data = txp->flags & XEN_NETTXF_more_data;
  258. if (!drop_err)
  259. txp++;
  260. } while (more_data);
  261. if (drop_err) {
  262. xenvif_tx_err(queue, first, extra_count, cons + slots);
  263. return drop_err;
  264. }
  265. return slots;
  266. }
  267. struct xenvif_tx_cb {
  268. u16 pending_idx;
  269. };
  270. #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
  271. static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
  272. u16 pending_idx,
  273. struct xen_netif_tx_request *txp,
  274. unsigned int extra_count,
  275. struct gnttab_map_grant_ref *mop)
  276. {
  277. queue->pages_to_map[mop-queue->tx_map_ops] = queue->mmap_pages[pending_idx];
  278. gnttab_set_map_op(mop, idx_to_kaddr(queue, pending_idx),
  279. GNTMAP_host_map | GNTMAP_readonly,
  280. txp->gref, queue->vif->domid);
  281. memcpy(&queue->pending_tx_info[pending_idx].req, txp,
  282. sizeof(*txp));
  283. queue->pending_tx_info[pending_idx].extra_count = extra_count;
  284. }
  285. static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
  286. {
  287. struct sk_buff *skb =
  288. alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN,
  289. GFP_ATOMIC | __GFP_NOWARN);
  290. if (unlikely(skb == NULL))
  291. return NULL;
  292. /* Packets passed to netif_rx() must have some headroom. */
  293. skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
  294. /* Initialize it here to avoid later surprises */
  295. skb_shinfo(skb)->destructor_arg = NULL;
  296. return skb;
  297. }
  298. static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue,
  299. struct sk_buff *skb,
  300. struct xen_netif_tx_request *txp,
  301. struct gnttab_map_grant_ref *gop,
  302. unsigned int frag_overflow,
  303. struct sk_buff *nskb)
  304. {
  305. struct skb_shared_info *shinfo = skb_shinfo(skb);
  306. skb_frag_t *frags = shinfo->frags;
  307. u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
  308. int start;
  309. pending_ring_idx_t index;
  310. unsigned int nr_slots;
  311. nr_slots = shinfo->nr_frags;
  312. /* Skip first skb fragment if it is on same page as header fragment. */
  313. start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
  314. for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
  315. shinfo->nr_frags++, txp++, gop++) {
  316. index = pending_index(queue->pending_cons++);
  317. pending_idx = queue->pending_ring[index];
  318. xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop);
  319. frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
  320. }
  321. if (frag_overflow) {
  322. shinfo = skb_shinfo(nskb);
  323. frags = shinfo->frags;
  324. for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
  325. shinfo->nr_frags++, txp++, gop++) {
  326. index = pending_index(queue->pending_cons++);
  327. pending_idx = queue->pending_ring[index];
  328. xenvif_tx_create_map_op(queue, pending_idx, txp, 0,
  329. gop);
  330. frag_set_pending_idx(&frags[shinfo->nr_frags],
  331. pending_idx);
  332. }
  333. skb_shinfo(skb)->frag_list = nskb;
  334. }
  335. return gop;
  336. }
  337. static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
  338. u16 pending_idx,
  339. grant_handle_t handle)
  340. {
  341. if (unlikely(queue->grant_tx_handle[pending_idx] !=
  342. NETBACK_INVALID_HANDLE)) {
  343. netdev_err(queue->vif->dev,
  344. "Trying to overwrite active handle! pending_idx: 0x%x\n",
  345. pending_idx);
  346. BUG();
  347. }
  348. queue->grant_tx_handle[pending_idx] = handle;
  349. }
  350. static inline void xenvif_grant_handle_reset(struct xenvif_queue *queue,
  351. u16 pending_idx)
  352. {
  353. if (unlikely(queue->grant_tx_handle[pending_idx] ==
  354. NETBACK_INVALID_HANDLE)) {
  355. netdev_err(queue->vif->dev,
  356. "Trying to unmap invalid handle! pending_idx: 0x%x\n",
  357. pending_idx);
  358. BUG();
  359. }
  360. queue->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
  361. }
  362. static int xenvif_tx_check_gop(struct xenvif_queue *queue,
  363. struct sk_buff *skb,
  364. struct gnttab_map_grant_ref **gopp_map,
  365. struct gnttab_copy **gopp_copy)
  366. {
  367. struct gnttab_map_grant_ref *gop_map = *gopp_map;
  368. u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
  369. /* This always points to the shinfo of the skb being checked, which
  370. * could be either the first or the one on the frag_list
  371. */
  372. struct skb_shared_info *shinfo = skb_shinfo(skb);
  373. /* If this is non-NULL, we are currently checking the frag_list skb, and
  374. * this points to the shinfo of the first one
  375. */
  376. struct skb_shared_info *first_shinfo = NULL;
  377. int nr_frags = shinfo->nr_frags;
  378. const bool sharedslot = nr_frags &&
  379. frag_get_pending_idx(&shinfo->frags[0]) == pending_idx;
  380. int i, err;
  381. /* Check status of header. */
  382. err = (*gopp_copy)->status;
  383. if (unlikely(err)) {
  384. if (net_ratelimit())
  385. netdev_dbg(queue->vif->dev,
  386. "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
  387. (*gopp_copy)->status,
  388. pending_idx,
  389. (*gopp_copy)->source.u.ref);
  390. /* The first frag might still have this slot mapped */
  391. if (!sharedslot)
  392. xenvif_idx_release(queue, pending_idx,
  393. XEN_NETIF_RSP_ERROR);
  394. }
  395. (*gopp_copy)++;
  396. check_frags:
  397. for (i = 0; i < nr_frags; i++, gop_map++) {
  398. int j, newerr;
  399. pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
  400. /* Check error status: if okay then remember grant handle. */
  401. newerr = gop_map->status;
  402. if (likely(!newerr)) {
  403. xenvif_grant_handle_set(queue,
  404. pending_idx,
  405. gop_map->handle);
  406. /* Had a previous error? Invalidate this fragment. */
  407. if (unlikely(err)) {
  408. xenvif_idx_unmap(queue, pending_idx);
  409. /* If the mapping of the first frag was OK, but
  410. * the header's copy failed, and they are
  411. * sharing a slot, send an error
  412. */
  413. if (i == 0 && sharedslot)
  414. xenvif_idx_release(queue, pending_idx,
  415. XEN_NETIF_RSP_ERROR);
  416. else
  417. xenvif_idx_release(queue, pending_idx,
  418. XEN_NETIF_RSP_OKAY);
  419. }
  420. continue;
  421. }
  422. /* Error on this fragment: respond to client with an error. */
  423. if (net_ratelimit())
  424. netdev_dbg(queue->vif->dev,
  425. "Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
  426. i,
  427. gop_map->status,
  428. pending_idx,
  429. gop_map->ref);
  430. xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
  431. /* Not the first error? Preceding frags already invalidated. */
  432. if (err)
  433. continue;
  434. /* First error: if the header haven't shared a slot with the
  435. * first frag, release it as well.
  436. */
  437. if (!sharedslot)
  438. xenvif_idx_release(queue,
  439. XENVIF_TX_CB(skb)->pending_idx,
  440. XEN_NETIF_RSP_OKAY);
  441. /* Invalidate preceding fragments of this skb. */
  442. for (j = 0; j < i; j++) {
  443. pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
  444. xenvif_idx_unmap(queue, pending_idx);
  445. xenvif_idx_release(queue, pending_idx,
  446. XEN_NETIF_RSP_OKAY);
  447. }
  448. /* And if we found the error while checking the frag_list, unmap
  449. * the first skb's frags
  450. */
  451. if (first_shinfo) {
  452. for (j = 0; j < first_shinfo->nr_frags; j++) {
  453. pending_idx = frag_get_pending_idx(&first_shinfo->frags[j]);
  454. xenvif_idx_unmap(queue, pending_idx);
  455. xenvif_idx_release(queue, pending_idx,
  456. XEN_NETIF_RSP_OKAY);
  457. }
  458. }
  459. /* Remember the error: invalidate all subsequent fragments. */
  460. err = newerr;
  461. }
  462. if (skb_has_frag_list(skb) && !first_shinfo) {
  463. first_shinfo = skb_shinfo(skb);
  464. shinfo = skb_shinfo(skb_shinfo(skb)->frag_list);
  465. nr_frags = shinfo->nr_frags;
  466. goto check_frags;
  467. }
  468. *gopp_map = gop_map;
  469. return err;
  470. }
  471. static void xenvif_fill_frags(struct xenvif_queue *queue, struct sk_buff *skb)
  472. {
  473. struct skb_shared_info *shinfo = skb_shinfo(skb);
  474. int nr_frags = shinfo->nr_frags;
  475. int i;
  476. u16 prev_pending_idx = INVALID_PENDING_IDX;
  477. for (i = 0; i < nr_frags; i++) {
  478. skb_frag_t *frag = shinfo->frags + i;
  479. struct xen_netif_tx_request *txp;
  480. struct page *page;
  481. u16 pending_idx;
  482. pending_idx = frag_get_pending_idx(frag);
  483. /* If this is not the first frag, chain it to the previous*/
  484. if (prev_pending_idx == INVALID_PENDING_IDX)
  485. skb_shinfo(skb)->destructor_arg =
  486. &callback_param(queue, pending_idx);
  487. else
  488. callback_param(queue, prev_pending_idx).ctx =
  489. &callback_param(queue, pending_idx);
  490. callback_param(queue, pending_idx).ctx = NULL;
  491. prev_pending_idx = pending_idx;
  492. txp = &queue->pending_tx_info[pending_idx].req;
  493. page = virt_to_page(idx_to_kaddr(queue, pending_idx));
  494. __skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
  495. skb->len += txp->size;
  496. skb->data_len += txp->size;
  497. skb->truesize += txp->size;
  498. /* Take an extra reference to offset network stack's put_page */
  499. get_page(queue->mmap_pages[pending_idx]);
  500. }
  501. }
  502. static int xenvif_get_extras(struct xenvif_queue *queue,
  503. struct xen_netif_extra_info *extras,
  504. unsigned int *extra_count,
  505. int work_to_do)
  506. {
  507. struct xen_netif_extra_info extra;
  508. RING_IDX cons = queue->tx.req_cons;
  509. do {
  510. if (unlikely(work_to_do-- <= 0)) {
  511. netdev_err(queue->vif->dev, "Missing extra info\n");
  512. xenvif_fatal_tx_err(queue->vif);
  513. return -EBADR;
  514. }
  515. RING_COPY_REQUEST(&queue->tx, cons, &extra);
  516. queue->tx.req_cons = ++cons;
  517. (*extra_count)++;
  518. if (unlikely(!extra.type ||
  519. extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
  520. netdev_err(queue->vif->dev,
  521. "Invalid extra type: %d\n", extra.type);
  522. xenvif_fatal_tx_err(queue->vif);
  523. return -EINVAL;
  524. }
  525. memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
  526. } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
  527. return work_to_do;
  528. }
  529. static int xenvif_set_skb_gso(struct xenvif *vif,
  530. struct sk_buff *skb,
  531. struct xen_netif_extra_info *gso)
  532. {
  533. if (!gso->u.gso.size) {
  534. netdev_err(vif->dev, "GSO size must not be zero.\n");
  535. xenvif_fatal_tx_err(vif);
  536. return -EINVAL;
  537. }
  538. switch (gso->u.gso.type) {
  539. case XEN_NETIF_GSO_TYPE_TCPV4:
  540. skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
  541. break;
  542. case XEN_NETIF_GSO_TYPE_TCPV6:
  543. skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
  544. break;
  545. default:
  546. netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
  547. xenvif_fatal_tx_err(vif);
  548. return -EINVAL;
  549. }
  550. skb_shinfo(skb)->gso_size = gso->u.gso.size;
  551. /* gso_segs will be calculated later */
  552. return 0;
  553. }
  554. static int checksum_setup(struct xenvif_queue *queue, struct sk_buff *skb)
  555. {
  556. bool recalculate_partial_csum = false;
  557. /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
  558. * peers can fail to set NETRXF_csum_blank when sending a GSO
  559. * frame. In this case force the SKB to CHECKSUM_PARTIAL and
  560. * recalculate the partial checksum.
  561. */
  562. if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
  563. queue->stats.rx_gso_checksum_fixup++;
  564. skb->ip_summed = CHECKSUM_PARTIAL;
  565. recalculate_partial_csum = true;
  566. }
  567. /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
  568. if (skb->ip_summed != CHECKSUM_PARTIAL)
  569. return 0;
  570. return skb_checksum_setup(skb, recalculate_partial_csum);
  571. }
  572. static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
  573. {
  574. u64 now = get_jiffies_64();
  575. u64 next_credit = queue->credit_window_start +
  576. msecs_to_jiffies(queue->credit_usec / 1000);
  577. /* Timer could already be pending in rare cases. */
  578. if (timer_pending(&queue->credit_timeout))
  579. return true;
  580. /* Passed the point where we can replenish credit? */
  581. if (time_after_eq64(now, next_credit)) {
  582. queue->credit_window_start = now;
  583. tx_add_credit(queue);
  584. }
  585. /* Still too big to send right now? Set a callback. */
  586. if (size > queue->remaining_credit) {
  587. queue->credit_timeout.data =
  588. (unsigned long)queue;
  589. mod_timer(&queue->credit_timeout,
  590. next_credit);
  591. queue->credit_window_start = next_credit;
  592. return true;
  593. }
  594. return false;
  595. }
  596. /* No locking is required in xenvif_mcast_add/del() as they are
  597. * only ever invoked from NAPI poll. An RCU list is used because
  598. * xenvif_mcast_match() is called asynchronously, during start_xmit.
  599. */
  600. static int xenvif_mcast_add(struct xenvif *vif, const u8 *addr)
  601. {
  602. struct xenvif_mcast_addr *mcast;
  603. if (vif->fe_mcast_count == XEN_NETBK_MCAST_MAX) {
  604. if (net_ratelimit())
  605. netdev_err(vif->dev,
  606. "Too many multicast addresses\n");
  607. return -ENOSPC;
  608. }
  609. mcast = kzalloc(sizeof(*mcast), GFP_ATOMIC);
  610. if (!mcast)
  611. return -ENOMEM;
  612. ether_addr_copy(mcast->addr, addr);
  613. list_add_tail_rcu(&mcast->entry, &vif->fe_mcast_addr);
  614. vif->fe_mcast_count++;
  615. return 0;
  616. }
  617. static void xenvif_mcast_del(struct xenvif *vif, const u8 *addr)
  618. {
  619. struct xenvif_mcast_addr *mcast;
  620. list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) {
  621. if (ether_addr_equal(addr, mcast->addr)) {
  622. --vif->fe_mcast_count;
  623. list_del_rcu(&mcast->entry);
  624. kfree_rcu(mcast, rcu);
  625. break;
  626. }
  627. }
  628. }
  629. bool xenvif_mcast_match(struct xenvif *vif, const u8 *addr)
  630. {
  631. struct xenvif_mcast_addr *mcast;
  632. rcu_read_lock();
  633. list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) {
  634. if (ether_addr_equal(addr, mcast->addr)) {
  635. rcu_read_unlock();
  636. return true;
  637. }
  638. }
  639. rcu_read_unlock();
  640. return false;
  641. }
  642. void xenvif_mcast_addr_list_free(struct xenvif *vif)
  643. {
  644. /* No need for locking or RCU here. NAPI poll and TX queue
  645. * are stopped.
  646. */
  647. while (!list_empty(&vif->fe_mcast_addr)) {
  648. struct xenvif_mcast_addr *mcast;
  649. mcast = list_first_entry(&vif->fe_mcast_addr,
  650. struct xenvif_mcast_addr,
  651. entry);
  652. --vif->fe_mcast_count;
  653. list_del(&mcast->entry);
  654. kfree(mcast);
  655. }
  656. }
  657. static void xenvif_tx_build_gops(struct xenvif_queue *queue,
  658. int budget,
  659. unsigned *copy_ops,
  660. unsigned *map_ops)
  661. {
  662. struct gnttab_map_grant_ref *gop = queue->tx_map_ops;
  663. struct sk_buff *skb, *nskb;
  664. int ret;
  665. unsigned int frag_overflow;
  666. while (skb_queue_len(&queue->tx_queue) < budget) {
  667. struct xen_netif_tx_request txreq;
  668. struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
  669. struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
  670. unsigned int extra_count;
  671. u16 pending_idx;
  672. RING_IDX idx;
  673. int work_to_do;
  674. unsigned int data_len;
  675. pending_ring_idx_t index;
  676. if (queue->tx.sring->req_prod - queue->tx.req_cons >
  677. XEN_NETIF_TX_RING_SIZE) {
  678. netdev_err(queue->vif->dev,
  679. "Impossible number of requests. "
  680. "req_prod %d, req_cons %d, size %ld\n",
  681. queue->tx.sring->req_prod, queue->tx.req_cons,
  682. XEN_NETIF_TX_RING_SIZE);
  683. xenvif_fatal_tx_err(queue->vif);
  684. break;
  685. }
  686. work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx);
  687. if (!work_to_do)
  688. break;
  689. idx = queue->tx.req_cons;
  690. rmb(); /* Ensure that we see the request before we copy it. */
  691. RING_COPY_REQUEST(&queue->tx, idx, &txreq);
  692. /* Credit-based scheduling. */
  693. if (txreq.size > queue->remaining_credit &&
  694. tx_credit_exceeded(queue, txreq.size))
  695. break;
  696. queue->remaining_credit -= txreq.size;
  697. work_to_do--;
  698. queue->tx.req_cons = ++idx;
  699. memset(extras, 0, sizeof(extras));
  700. extra_count = 0;
  701. if (txreq.flags & XEN_NETTXF_extra_info) {
  702. work_to_do = xenvif_get_extras(queue, extras,
  703. &extra_count,
  704. work_to_do);
  705. idx = queue->tx.req_cons;
  706. if (unlikely(work_to_do < 0))
  707. break;
  708. }
  709. if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1].type) {
  710. struct xen_netif_extra_info *extra;
  711. extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - 1];
  712. ret = xenvif_mcast_add(queue->vif, extra->u.mcast.addr);
  713. make_tx_response(queue, &txreq, extra_count,
  714. (ret == 0) ?
  715. XEN_NETIF_RSP_OKAY :
  716. XEN_NETIF_RSP_ERROR);
  717. push_tx_responses(queue);
  718. continue;
  719. }
  720. if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1].type) {
  721. struct xen_netif_extra_info *extra;
  722. extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - 1];
  723. xenvif_mcast_del(queue->vif, extra->u.mcast.addr);
  724. make_tx_response(queue, &txreq, extra_count,
  725. XEN_NETIF_RSP_OKAY);
  726. push_tx_responses(queue);
  727. continue;
  728. }
  729. ret = xenvif_count_requests(queue, &txreq, extra_count,
  730. txfrags, work_to_do);
  731. if (unlikely(ret < 0))
  732. break;
  733. idx += ret;
  734. if (unlikely(txreq.size < ETH_HLEN)) {
  735. netdev_dbg(queue->vif->dev,
  736. "Bad packet size: %d\n", txreq.size);
  737. xenvif_tx_err(queue, &txreq, extra_count, idx);
  738. break;
  739. }
  740. /* No crossing a page as the payload mustn't fragment. */
  741. if (unlikely((txreq.offset + txreq.size) > XEN_PAGE_SIZE)) {
  742. netdev_err(queue->vif->dev,
  743. "txreq.offset: %u, size: %u, end: %lu\n",
  744. txreq.offset, txreq.size,
  745. (unsigned long)(txreq.offset&~XEN_PAGE_MASK) + txreq.size);
  746. xenvif_fatal_tx_err(queue->vif);
  747. break;
  748. }
  749. index = pending_index(queue->pending_cons);
  750. pending_idx = queue->pending_ring[index];
  751. data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN &&
  752. ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
  753. XEN_NETBACK_TX_COPY_LEN : txreq.size;
  754. skb = xenvif_alloc_skb(data_len);
  755. if (unlikely(skb == NULL)) {
  756. netdev_dbg(queue->vif->dev,
  757. "Can't allocate a skb in start_xmit.\n");
  758. xenvif_tx_err(queue, &txreq, extra_count, idx);
  759. break;
  760. }
  761. skb_shinfo(skb)->nr_frags = ret;
  762. if (data_len < txreq.size)
  763. skb_shinfo(skb)->nr_frags++;
  764. /* At this point shinfo->nr_frags is in fact the number of
  765. * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
  766. */
  767. frag_overflow = 0;
  768. nskb = NULL;
  769. if (skb_shinfo(skb)->nr_frags > MAX_SKB_FRAGS) {
  770. frag_overflow = skb_shinfo(skb)->nr_frags - MAX_SKB_FRAGS;
  771. BUG_ON(frag_overflow > MAX_SKB_FRAGS);
  772. skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS;
  773. nskb = xenvif_alloc_skb(0);
  774. if (unlikely(nskb == NULL)) {
  775. kfree_skb(skb);
  776. xenvif_tx_err(queue, &txreq, extra_count, idx);
  777. if (net_ratelimit())
  778. netdev_err(queue->vif->dev,
  779. "Can't allocate the frag_list skb.\n");
  780. break;
  781. }
  782. }
  783. if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
  784. struct xen_netif_extra_info *gso;
  785. gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
  786. if (xenvif_set_skb_gso(queue->vif, skb, gso)) {
  787. /* Failure in xenvif_set_skb_gso is fatal. */
  788. kfree_skb(skb);
  789. kfree_skb(nskb);
  790. break;
  791. }
  792. }
  793. if (extras[XEN_NETIF_EXTRA_TYPE_HASH - 1].type) {
  794. struct xen_netif_extra_info *extra;
  795. enum pkt_hash_types type = PKT_HASH_TYPE_NONE;
  796. extra = &extras[XEN_NETIF_EXTRA_TYPE_HASH - 1];
  797. switch (extra->u.hash.type) {
  798. case _XEN_NETIF_CTRL_HASH_TYPE_IPV4:
  799. case _XEN_NETIF_CTRL_HASH_TYPE_IPV6:
  800. type = PKT_HASH_TYPE_L3;
  801. break;
  802. case _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP:
  803. case _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP:
  804. type = PKT_HASH_TYPE_L4;
  805. break;
  806. default:
  807. break;
  808. }
  809. if (type != PKT_HASH_TYPE_NONE)
  810. skb_set_hash(skb,
  811. *(u32 *)extra->u.hash.value,
  812. type);
  813. }
  814. XENVIF_TX_CB(skb)->pending_idx = pending_idx;
  815. __skb_put(skb, data_len);
  816. queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
  817. queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid;
  818. queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
  819. queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
  820. virt_to_gfn(skb->data);
  821. queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
  822. queue->tx_copy_ops[*copy_ops].dest.offset =
  823. offset_in_page(skb->data) & ~XEN_PAGE_MASK;
  824. queue->tx_copy_ops[*copy_ops].len = data_len;
  825. queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
  826. (*copy_ops)++;
  827. if (data_len < txreq.size) {
  828. frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
  829. pending_idx);
  830. xenvif_tx_create_map_op(queue, pending_idx, &txreq,
  831. extra_count, gop);
  832. gop++;
  833. } else {
  834. frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
  835. INVALID_PENDING_IDX);
  836. memcpy(&queue->pending_tx_info[pending_idx].req,
  837. &txreq, sizeof(txreq));
  838. queue->pending_tx_info[pending_idx].extra_count =
  839. extra_count;
  840. }
  841. queue->pending_cons++;
  842. gop = xenvif_get_requests(queue, skb, txfrags, gop,
  843. frag_overflow, nskb);
  844. __skb_queue_tail(&queue->tx_queue, skb);
  845. queue->tx.req_cons = idx;
  846. if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) ||
  847. (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops)))
  848. break;
  849. }
  850. (*map_ops) = gop - queue->tx_map_ops;
  851. return;
  852. }
  853. /* Consolidate skb with a frag_list into a brand new one with local pages on
  854. * frags. Returns 0 or -ENOMEM if can't allocate new pages.
  855. */
  856. static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *skb)
  857. {
  858. unsigned int offset = skb_headlen(skb);
  859. skb_frag_t frags[MAX_SKB_FRAGS];
  860. int i, f;
  861. struct ubuf_info *uarg;
  862. struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
  863. queue->stats.tx_zerocopy_sent += 2;
  864. queue->stats.tx_frag_overflow++;
  865. xenvif_fill_frags(queue, nskb);
  866. /* Subtract frags size, we will correct it later */
  867. skb->truesize -= skb->data_len;
  868. skb->len += nskb->len;
  869. skb->data_len += nskb->len;
  870. /* create a brand new frags array and coalesce there */
  871. for (i = 0; offset < skb->len; i++) {
  872. struct page *page;
  873. unsigned int len;
  874. BUG_ON(i >= MAX_SKB_FRAGS);
  875. page = alloc_page(GFP_ATOMIC);
  876. if (!page) {
  877. int j;
  878. skb->truesize += skb->data_len;
  879. for (j = 0; j < i; j++)
  880. put_page(frags[j].page.p);
  881. return -ENOMEM;
  882. }
  883. if (offset + PAGE_SIZE < skb->len)
  884. len = PAGE_SIZE;
  885. else
  886. len = skb->len - offset;
  887. if (skb_copy_bits(skb, offset, page_address(page), len))
  888. BUG();
  889. offset += len;
  890. frags[i].page.p = page;
  891. frags[i].page_offset = 0;
  892. skb_frag_size_set(&frags[i], len);
  893. }
  894. /* Copied all the bits from the frag list -- free it. */
  895. skb_frag_list_init(skb);
  896. xenvif_skb_zerocopy_prepare(queue, nskb);
  897. kfree_skb(nskb);
  898. /* Release all the original (foreign) frags. */
  899. for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
  900. skb_frag_unref(skb, f);
  901. uarg = skb_shinfo(skb)->destructor_arg;
  902. /* increase inflight counter to offset decrement in callback */
  903. atomic_inc(&queue->inflight_packets);
  904. uarg->callback(uarg, true);
  905. skb_shinfo(skb)->destructor_arg = NULL;
  906. /* Fill the skb with the new (local) frags. */
  907. memcpy(skb_shinfo(skb)->frags, frags, i * sizeof(skb_frag_t));
  908. skb_shinfo(skb)->nr_frags = i;
  909. skb->truesize += i * PAGE_SIZE;
  910. return 0;
  911. }
  912. static int xenvif_tx_submit(struct xenvif_queue *queue)
  913. {
  914. struct gnttab_map_grant_ref *gop_map = queue->tx_map_ops;
  915. struct gnttab_copy *gop_copy = queue->tx_copy_ops;
  916. struct sk_buff *skb;
  917. int work_done = 0;
  918. while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) {
  919. struct xen_netif_tx_request *txp;
  920. u16 pending_idx;
  921. unsigned data_len;
  922. pending_idx = XENVIF_TX_CB(skb)->pending_idx;
  923. txp = &queue->pending_tx_info[pending_idx].req;
  924. /* Check the remap error code. */
  925. if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) {
  926. /* If there was an error, xenvif_tx_check_gop is
  927. * expected to release all the frags which were mapped,
  928. * so kfree_skb shouldn't do it again
  929. */
  930. skb_shinfo(skb)->nr_frags = 0;
  931. if (skb_has_frag_list(skb)) {
  932. struct sk_buff *nskb =
  933. skb_shinfo(skb)->frag_list;
  934. skb_shinfo(nskb)->nr_frags = 0;
  935. }
  936. kfree_skb(skb);
  937. continue;
  938. }
  939. data_len = skb->len;
  940. callback_param(queue, pending_idx).ctx = NULL;
  941. if (data_len < txp->size) {
  942. /* Append the packet payload as a fragment. */
  943. txp->offset += data_len;
  944. txp->size -= data_len;
  945. } else {
  946. /* Schedule a response immediately. */
  947. xenvif_idx_release(queue, pending_idx,
  948. XEN_NETIF_RSP_OKAY);
  949. }
  950. if (txp->flags & XEN_NETTXF_csum_blank)
  951. skb->ip_summed = CHECKSUM_PARTIAL;
  952. else if (txp->flags & XEN_NETTXF_data_validated)
  953. skb->ip_summed = CHECKSUM_UNNECESSARY;
  954. xenvif_fill_frags(queue, skb);
  955. if (unlikely(skb_has_frag_list(skb))) {
  956. if (xenvif_handle_frag_list(queue, skb)) {
  957. if (net_ratelimit())
  958. netdev_err(queue->vif->dev,
  959. "Not enough memory to consolidate frag_list!\n");
  960. xenvif_skb_zerocopy_prepare(queue, skb);
  961. kfree_skb(skb);
  962. continue;
  963. }
  964. }
  965. skb->dev = queue->vif->dev;
  966. skb->protocol = eth_type_trans(skb, skb->dev);
  967. skb_reset_network_header(skb);
  968. if (checksum_setup(queue, skb)) {
  969. netdev_dbg(queue->vif->dev,
  970. "Can't setup checksum in net_tx_action\n");
  971. /* We have to set this flag to trigger the callback */
  972. if (skb_shinfo(skb)->destructor_arg)
  973. xenvif_skb_zerocopy_prepare(queue, skb);
  974. kfree_skb(skb);
  975. continue;
  976. }
  977. skb_probe_transport_header(skb, 0);
  978. /* If the packet is GSO then we will have just set up the
  979. * transport header offset in checksum_setup so it's now
  980. * straightforward to calculate gso_segs.
  981. */
  982. if (skb_is_gso(skb)) {
  983. int mss = skb_shinfo(skb)->gso_size;
  984. int hdrlen = skb_transport_header(skb) -
  985. skb_mac_header(skb) +
  986. tcp_hdrlen(skb);
  987. skb_shinfo(skb)->gso_segs =
  988. DIV_ROUND_UP(skb->len - hdrlen, mss);
  989. }
  990. queue->stats.rx_bytes += skb->len;
  991. queue->stats.rx_packets++;
  992. work_done++;
  993. /* Set this flag right before netif_receive_skb, otherwise
  994. * someone might think this packet already left netback, and
  995. * do a skb_copy_ubufs while we are still in control of the
  996. * skb. E.g. the __pskb_pull_tail earlier can do such thing.
  997. */
  998. if (skb_shinfo(skb)->destructor_arg) {
  999. xenvif_skb_zerocopy_prepare(queue, skb);
  1000. queue->stats.tx_zerocopy_sent++;
  1001. }
  1002. netif_receive_skb(skb);
  1003. }
  1004. return work_done;
  1005. }
  1006. void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
  1007. {
  1008. unsigned long flags;
  1009. pending_ring_idx_t index;
  1010. struct xenvif_queue *queue = ubuf_to_queue(ubuf);
  1011. /* This is the only place where we grab this lock, to protect callbacks
  1012. * from each other.
  1013. */
  1014. spin_lock_irqsave(&queue->callback_lock, flags);
  1015. do {
  1016. u16 pending_idx = ubuf->desc;
  1017. ubuf = (struct ubuf_info *) ubuf->ctx;
  1018. BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
  1019. MAX_PENDING_REQS);
  1020. index = pending_index(queue->dealloc_prod);
  1021. queue->dealloc_ring[index] = pending_idx;
  1022. /* Sync with xenvif_tx_dealloc_action:
  1023. * insert idx then incr producer.
  1024. */
  1025. smp_wmb();
  1026. queue->dealloc_prod++;
  1027. } while (ubuf);
  1028. spin_unlock_irqrestore(&queue->callback_lock, flags);
  1029. if (likely(zerocopy_success))
  1030. queue->stats.tx_zerocopy_success++;
  1031. else
  1032. queue->stats.tx_zerocopy_fail++;
  1033. xenvif_skb_zerocopy_complete(queue);
  1034. }
  1035. static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
  1036. {
  1037. struct gnttab_unmap_grant_ref *gop;
  1038. pending_ring_idx_t dc, dp;
  1039. u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
  1040. unsigned int i = 0;
  1041. dc = queue->dealloc_cons;
  1042. gop = queue->tx_unmap_ops;
  1043. /* Free up any grants we have finished using */
  1044. do {
  1045. dp = queue->dealloc_prod;
  1046. /* Ensure we see all indices enqueued by all
  1047. * xenvif_zerocopy_callback().
  1048. */
  1049. smp_rmb();
  1050. while (dc != dp) {
  1051. BUG_ON(gop - queue->tx_unmap_ops >= MAX_PENDING_REQS);
  1052. pending_idx =
  1053. queue->dealloc_ring[pending_index(dc++)];
  1054. pending_idx_release[gop - queue->tx_unmap_ops] =
  1055. pending_idx;
  1056. queue->pages_to_unmap[gop - queue->tx_unmap_ops] =
  1057. queue->mmap_pages[pending_idx];
  1058. gnttab_set_unmap_op(gop,
  1059. idx_to_kaddr(queue, pending_idx),
  1060. GNTMAP_host_map,
  1061. queue->grant_tx_handle[pending_idx]);
  1062. xenvif_grant_handle_reset(queue, pending_idx);
  1063. ++gop;
  1064. }
  1065. } while (dp != queue->dealloc_prod);
  1066. queue->dealloc_cons = dc;
  1067. if (gop - queue->tx_unmap_ops > 0) {
  1068. int ret;
  1069. ret = gnttab_unmap_refs(queue->tx_unmap_ops,
  1070. NULL,
  1071. queue->pages_to_unmap,
  1072. gop - queue->tx_unmap_ops);
  1073. if (ret) {
  1074. netdev_err(queue->vif->dev, "Unmap fail: nr_ops %tu ret %d\n",
  1075. gop - queue->tx_unmap_ops, ret);
  1076. for (i = 0; i < gop - queue->tx_unmap_ops; ++i) {
  1077. if (gop[i].status != GNTST_okay)
  1078. netdev_err(queue->vif->dev,
  1079. " host_addr: 0x%llx handle: 0x%x status: %d\n",
  1080. gop[i].host_addr,
  1081. gop[i].handle,
  1082. gop[i].status);
  1083. }
  1084. BUG();
  1085. }
  1086. }
  1087. for (i = 0; i < gop - queue->tx_unmap_ops; ++i)
  1088. xenvif_idx_release(queue, pending_idx_release[i],
  1089. XEN_NETIF_RSP_OKAY);
  1090. }
  1091. /* Called after netfront has transmitted */
  1092. int xenvif_tx_action(struct xenvif_queue *queue, int budget)
  1093. {
  1094. unsigned nr_mops, nr_cops = 0;
  1095. int work_done, ret;
  1096. if (unlikely(!tx_work_todo(queue)))
  1097. return 0;
  1098. xenvif_tx_build_gops(queue, budget, &nr_cops, &nr_mops);
  1099. if (nr_cops == 0)
  1100. return 0;
  1101. gnttab_batch_copy(queue->tx_copy_ops, nr_cops);
  1102. if (nr_mops != 0) {
  1103. ret = gnttab_map_refs(queue->tx_map_ops,
  1104. NULL,
  1105. queue->pages_to_map,
  1106. nr_mops);
  1107. BUG_ON(ret);
  1108. }
  1109. work_done = xenvif_tx_submit(queue);
  1110. return work_done;
  1111. }
  1112. static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
  1113. u8 status)
  1114. {
  1115. struct pending_tx_info *pending_tx_info;
  1116. pending_ring_idx_t index;
  1117. unsigned long flags;
  1118. pending_tx_info = &queue->pending_tx_info[pending_idx];
  1119. spin_lock_irqsave(&queue->response_lock, flags);
  1120. make_tx_response(queue, &pending_tx_info->req,
  1121. pending_tx_info->extra_count, status);
  1122. /* Release the pending index before pusing the Tx response so
  1123. * its available before a new Tx request is pushed by the
  1124. * frontend.
  1125. */
  1126. index = pending_index(queue->pending_prod++);
  1127. queue->pending_ring[index] = pending_idx;
  1128. push_tx_responses(queue);
  1129. spin_unlock_irqrestore(&queue->response_lock, flags);
  1130. }
  1131. static void make_tx_response(struct xenvif_queue *queue,
  1132. struct xen_netif_tx_request *txp,
  1133. unsigned int extra_count,
  1134. s8 st)
  1135. {
  1136. RING_IDX i = queue->tx.rsp_prod_pvt;
  1137. struct xen_netif_tx_response *resp;
  1138. resp = RING_GET_RESPONSE(&queue->tx, i);
  1139. resp->id = txp->id;
  1140. resp->status = st;
  1141. while (extra_count-- != 0)
  1142. RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
  1143. queue->tx.rsp_prod_pvt = ++i;
  1144. }
  1145. static void push_tx_responses(struct xenvif_queue *queue)
  1146. {
  1147. int notify;
  1148. RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
  1149. if (notify)
  1150. notify_remote_via_irq(queue->tx_irq);
  1151. }
  1152. void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
  1153. {
  1154. int ret;
  1155. struct gnttab_unmap_grant_ref tx_unmap_op;
  1156. gnttab_set_unmap_op(&tx_unmap_op,
  1157. idx_to_kaddr(queue, pending_idx),
  1158. GNTMAP_host_map,
  1159. queue->grant_tx_handle[pending_idx]);
  1160. xenvif_grant_handle_reset(queue, pending_idx);
  1161. ret = gnttab_unmap_refs(&tx_unmap_op, NULL,
  1162. &queue->mmap_pages[pending_idx], 1);
  1163. if (ret) {
  1164. netdev_err(queue->vif->dev,
  1165. "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: 0x%x status: %d\n",
  1166. ret,
  1167. pending_idx,
  1168. tx_unmap_op.host_addr,
  1169. tx_unmap_op.handle,
  1170. tx_unmap_op.status);
  1171. BUG();
  1172. }
  1173. }
  1174. static inline int tx_work_todo(struct xenvif_queue *queue)
  1175. {
  1176. if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)))
  1177. return 1;
  1178. return 0;
  1179. }
  1180. static inline bool tx_dealloc_work_todo(struct xenvif_queue *queue)
  1181. {
  1182. return queue->dealloc_cons != queue->dealloc_prod;
  1183. }
  1184. void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue)
  1185. {
  1186. if (queue->tx.sring)
  1187. xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
  1188. queue->tx.sring);
  1189. if (queue->rx.sring)
  1190. xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
  1191. queue->rx.sring);
  1192. }
  1193. int xenvif_map_frontend_data_rings(struct xenvif_queue *queue,
  1194. grant_ref_t tx_ring_ref,
  1195. grant_ref_t rx_ring_ref)
  1196. {
  1197. void *addr;
  1198. struct xen_netif_tx_sring *txs;
  1199. struct xen_netif_rx_sring *rxs;
  1200. int err = -ENOMEM;
  1201. err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
  1202. &tx_ring_ref, 1, &addr);
  1203. if (err)
  1204. goto err;
  1205. txs = (struct xen_netif_tx_sring *)addr;
  1206. BACK_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
  1207. err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
  1208. &rx_ring_ref, 1, &addr);
  1209. if (err)
  1210. goto err;
  1211. rxs = (struct xen_netif_rx_sring *)addr;
  1212. BACK_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
  1213. return 0;
  1214. err:
  1215. xenvif_unmap_frontend_data_rings(queue);
  1216. return err;
  1217. }
  1218. static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
  1219. {
  1220. /* Dealloc thread must remain running until all inflight
  1221. * packets complete.
  1222. */
  1223. return kthread_should_stop() &&
  1224. !atomic_read(&queue->inflight_packets);
  1225. }
  1226. int xenvif_dealloc_kthread(void *data)
  1227. {
  1228. struct xenvif_queue *queue = data;
  1229. for (;;) {
  1230. wait_event_interruptible(queue->dealloc_wq,
  1231. tx_dealloc_work_todo(queue) ||
  1232. xenvif_dealloc_kthread_should_stop(queue));
  1233. if (xenvif_dealloc_kthread_should_stop(queue))
  1234. break;
  1235. xenvif_tx_dealloc_action(queue);
  1236. cond_resched();
  1237. }
  1238. /* Unmap anything remaining*/
  1239. if (tx_dealloc_work_todo(queue))
  1240. xenvif_tx_dealloc_action(queue);
  1241. return 0;
  1242. }
  1243. static void make_ctrl_response(struct xenvif *vif,
  1244. const struct xen_netif_ctrl_request *req,
  1245. u32 status, u32 data)
  1246. {
  1247. RING_IDX idx = vif->ctrl.rsp_prod_pvt;
  1248. struct xen_netif_ctrl_response rsp = {
  1249. .id = req->id,
  1250. .type = req->type,
  1251. .status = status,
  1252. .data = data,
  1253. };
  1254. *RING_GET_RESPONSE(&vif->ctrl, idx) = rsp;
  1255. vif->ctrl.rsp_prod_pvt = ++idx;
  1256. }
  1257. static void push_ctrl_response(struct xenvif *vif)
  1258. {
  1259. int notify;
  1260. RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->ctrl, notify);
  1261. if (notify)
  1262. notify_remote_via_irq(vif->ctrl_irq);
  1263. }
  1264. static void process_ctrl_request(struct xenvif *vif,
  1265. const struct xen_netif_ctrl_request *req)
  1266. {
  1267. u32 status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED;
  1268. u32 data = 0;
  1269. switch (req->type) {
  1270. case XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM:
  1271. status = xenvif_set_hash_alg(vif, req->data[0]);
  1272. break;
  1273. case XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS:
  1274. status = xenvif_get_hash_flags(vif, &data);
  1275. break;
  1276. case XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS:
  1277. status = xenvif_set_hash_flags(vif, req->data[0]);
  1278. break;
  1279. case XEN_NETIF_CTRL_TYPE_SET_HASH_KEY:
  1280. status = xenvif_set_hash_key(vif, req->data[0],
  1281. req->data[1]);
  1282. break;
  1283. case XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE:
  1284. status = XEN_NETIF_CTRL_STATUS_SUCCESS;
  1285. data = XEN_NETBK_MAX_HASH_MAPPING_SIZE;
  1286. break;
  1287. case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE:
  1288. status = xenvif_set_hash_mapping_size(vif,
  1289. req->data[0]);
  1290. break;
  1291. case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING:
  1292. status = xenvif_set_hash_mapping(vif, req->data[0],
  1293. req->data[1],
  1294. req->data[2]);
  1295. break;
  1296. default:
  1297. break;
  1298. }
  1299. make_ctrl_response(vif, req, status, data);
  1300. push_ctrl_response(vif);
  1301. }
  1302. static void xenvif_ctrl_action(struct xenvif *vif)
  1303. {
  1304. for (;;) {
  1305. RING_IDX req_prod, req_cons;
  1306. req_prod = vif->ctrl.sring->req_prod;
  1307. req_cons = vif->ctrl.req_cons;
  1308. /* Make sure we can see requests before we process them. */
  1309. rmb();
  1310. if (req_cons == req_prod)
  1311. break;
  1312. while (req_cons != req_prod) {
  1313. struct xen_netif_ctrl_request req;
  1314. RING_COPY_REQUEST(&vif->ctrl, req_cons, &req);
  1315. req_cons++;
  1316. process_ctrl_request(vif, &req);
  1317. }
  1318. vif->ctrl.req_cons = req_cons;
  1319. vif->ctrl.sring->req_event = req_cons + 1;
  1320. }
  1321. }
  1322. static bool xenvif_ctrl_work_todo(struct xenvif *vif)
  1323. {
  1324. if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->ctrl)))
  1325. return 1;
  1326. return 0;
  1327. }
  1328. irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data)
  1329. {
  1330. struct xenvif *vif = data;
  1331. while (xenvif_ctrl_work_todo(vif))
  1332. xenvif_ctrl_action(vif);
  1333. return IRQ_HANDLED;
  1334. }
  1335. static int __init netback_init(void)
  1336. {
  1337. int rc = 0;
  1338. if (!xen_domain())
  1339. return -ENODEV;
  1340. /* Allow as many queues as there are CPUs if user has not
  1341. * specified a value.
  1342. */
  1343. if (xenvif_max_queues == 0)
  1344. xenvif_max_queues = num_online_cpus();
  1345. if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
  1346. pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
  1347. fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
  1348. fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
  1349. }
  1350. rc = xenvif_xenbus_init();
  1351. if (rc)
  1352. goto failed_init;
  1353. #ifdef CONFIG_DEBUG_FS
  1354. xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
  1355. if (IS_ERR_OR_NULL(xen_netback_dbg_root))
  1356. pr_warn("Init of debugfs returned %ld!\n",
  1357. PTR_ERR(xen_netback_dbg_root));
  1358. #endif /* CONFIG_DEBUG_FS */
  1359. return 0;
  1360. failed_init:
  1361. return rc;
  1362. }
  1363. module_init(netback_init);
  1364. static void __exit netback_fini(void)
  1365. {
  1366. #ifdef CONFIG_DEBUG_FS
  1367. if (!IS_ERR_OR_NULL(xen_netback_dbg_root))
  1368. debugfs_remove_recursive(xen_netback_dbg_root);
  1369. #endif /* CONFIG_DEBUG_FS */
  1370. xenvif_xenbus_fini();
  1371. }
  1372. module_exit(netback_fini);
  1373. MODULE_LICENSE("Dual BSD/GPL");
  1374. MODULE_ALIAS("xen-backend:vif");