intel_irq_remapping.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110
  1. #include <linux/interrupt.h>
  2. #include <linux/dmar.h>
  3. #include <linux/spinlock.h>
  4. #include <linux/slab.h>
  5. #include <linux/jiffies.h>
  6. #include <linux/hpet.h>
  7. #include <linux/pci.h>
  8. #include <linux/irq.h>
  9. #include <linux/intel-iommu.h>
  10. #include <linux/acpi.h>
  11. #include <asm/io_apic.h>
  12. #include <asm/smp.h>
  13. #include <asm/cpu.h>
  14. #include <asm/irq_remapping.h>
  15. #include <asm/pci-direct.h>
  16. #include <asm/msidef.h>
  17. #include "irq_remapping.h"
  18. struct ioapic_scope {
  19. struct intel_iommu *iommu;
  20. unsigned int id;
  21. unsigned int bus; /* PCI bus number */
  22. unsigned int devfn; /* PCI devfn number */
  23. };
  24. struct hpet_scope {
  25. struct intel_iommu *iommu;
  26. u8 id;
  27. unsigned int bus;
  28. unsigned int devfn;
  29. };
  30. #define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
  31. #define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
  32. static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
  33. static struct hpet_scope ir_hpet[MAX_HPET_TBS];
  34. static int ir_ioapic_num, ir_hpet_num;
  35. static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
  36. static int __init parse_ioapics_under_ir(void);
  37. static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
  38. {
  39. struct irq_cfg *cfg = irq_get_chip_data(irq);
  40. return cfg ? &cfg->irq_2_iommu : NULL;
  41. }
  42. static int get_irte(int irq, struct irte *entry)
  43. {
  44. struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
  45. unsigned long flags;
  46. int index;
  47. if (!entry || !irq_iommu)
  48. return -1;
  49. raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
  50. index = irq_iommu->irte_index + irq_iommu->sub_handle;
  51. *entry = *(irq_iommu->iommu->ir_table->base + index);
  52. raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
  53. return 0;
  54. }
  55. static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
  56. {
  57. struct ir_table *table = iommu->ir_table;
  58. struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
  59. struct irq_cfg *cfg = irq_get_chip_data(irq);
  60. unsigned int mask = 0;
  61. unsigned long flags;
  62. int index;
  63. if (!count || !irq_iommu)
  64. return -1;
  65. if (count > 1) {
  66. count = __roundup_pow_of_two(count);
  67. mask = ilog2(count);
  68. }
  69. if (mask > ecap_max_handle_mask(iommu->ecap)) {
  70. printk(KERN_ERR
  71. "Requested mask %x exceeds the max invalidation handle"
  72. " mask value %Lx\n", mask,
  73. ecap_max_handle_mask(iommu->ecap));
  74. return -1;
  75. }
  76. raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
  77. index = bitmap_find_free_region(table->bitmap,
  78. INTR_REMAP_TABLE_ENTRIES, mask);
  79. if (index < 0) {
  80. pr_warn("IR%d: can't allocate an IRTE\n", iommu->seq_id);
  81. } else {
  82. cfg->remapped = 1;
  83. irq_iommu->iommu = iommu;
  84. irq_iommu->irte_index = index;
  85. irq_iommu->sub_handle = 0;
  86. irq_iommu->irte_mask = mask;
  87. }
  88. raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
  89. return index;
  90. }
  91. static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
  92. {
  93. struct qi_desc desc;
  94. desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
  95. | QI_IEC_SELECTIVE;
  96. desc.high = 0;
  97. return qi_submit_sync(&desc, iommu);
  98. }
  99. static int map_irq_to_irte_handle(int irq, u16 *sub_handle)
  100. {
  101. struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
  102. unsigned long flags;
  103. int index;
  104. if (!irq_iommu)
  105. return -1;
  106. raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
  107. *sub_handle = irq_iommu->sub_handle;
  108. index = irq_iommu->irte_index;
  109. raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
  110. return index;
  111. }
  112. static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
  113. {
  114. struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
  115. struct irq_cfg *cfg = irq_get_chip_data(irq);
  116. unsigned long flags;
  117. if (!irq_iommu)
  118. return -1;
  119. raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
  120. cfg->remapped = 1;
  121. irq_iommu->iommu = iommu;
  122. irq_iommu->irte_index = index;
  123. irq_iommu->sub_handle = subhandle;
  124. irq_iommu->irte_mask = 0;
  125. raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
  126. return 0;
  127. }
  128. static int modify_irte(int irq, struct irte *irte_modified)
  129. {
  130. struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
  131. struct intel_iommu *iommu;
  132. unsigned long flags;
  133. struct irte *irte;
  134. int rc, index;
  135. if (!irq_iommu)
  136. return -1;
  137. raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
  138. iommu = irq_iommu->iommu;
  139. index = irq_iommu->irte_index + irq_iommu->sub_handle;
  140. irte = &iommu->ir_table->base[index];
  141. set_64bit(&irte->low, irte_modified->low);
  142. set_64bit(&irte->high, irte_modified->high);
  143. __iommu_flush_cache(iommu, irte, sizeof(*irte));
  144. rc = qi_flush_iec(iommu, index, 0);
  145. raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
  146. return rc;
  147. }
  148. static struct intel_iommu *map_hpet_to_ir(u8 hpet_id)
  149. {
  150. int i;
  151. for (i = 0; i < MAX_HPET_TBS; i++)
  152. if (ir_hpet[i].id == hpet_id)
  153. return ir_hpet[i].iommu;
  154. return NULL;
  155. }
  156. static struct intel_iommu *map_ioapic_to_ir(int apic)
  157. {
  158. int i;
  159. for (i = 0; i < MAX_IO_APICS; i++)
  160. if (ir_ioapic[i].id == apic)
  161. return ir_ioapic[i].iommu;
  162. return NULL;
  163. }
  164. static struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
  165. {
  166. struct dmar_drhd_unit *drhd;
  167. drhd = dmar_find_matched_drhd_unit(dev);
  168. if (!drhd)
  169. return NULL;
  170. return drhd->iommu;
  171. }
  172. static int clear_entries(struct irq_2_iommu *irq_iommu)
  173. {
  174. struct irte *start, *entry, *end;
  175. struct intel_iommu *iommu;
  176. int index;
  177. if (irq_iommu->sub_handle)
  178. return 0;
  179. iommu = irq_iommu->iommu;
  180. index = irq_iommu->irte_index + irq_iommu->sub_handle;
  181. start = iommu->ir_table->base + index;
  182. end = start + (1 << irq_iommu->irte_mask);
  183. for (entry = start; entry < end; entry++) {
  184. set_64bit(&entry->low, 0);
  185. set_64bit(&entry->high, 0);
  186. }
  187. bitmap_release_region(iommu->ir_table->bitmap, index,
  188. irq_iommu->irte_mask);
  189. return qi_flush_iec(iommu, index, irq_iommu->irte_mask);
  190. }
  191. static int free_irte(int irq)
  192. {
  193. struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
  194. unsigned long flags;
  195. int rc;
  196. if (!irq_iommu)
  197. return -1;
  198. raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
  199. rc = clear_entries(irq_iommu);
  200. irq_iommu->iommu = NULL;
  201. irq_iommu->irte_index = 0;
  202. irq_iommu->sub_handle = 0;
  203. irq_iommu->irte_mask = 0;
  204. raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
  205. return rc;
  206. }
  207. /*
  208. * source validation type
  209. */
  210. #define SVT_NO_VERIFY 0x0 /* no verification is required */
  211. #define SVT_VERIFY_SID_SQ 0x1 /* verify using SID and SQ fields */
  212. #define SVT_VERIFY_BUS 0x2 /* verify bus of request-id */
  213. /*
  214. * source-id qualifier
  215. */
  216. #define SQ_ALL_16 0x0 /* verify all 16 bits of request-id */
  217. #define SQ_13_IGNORE_1 0x1 /* verify most significant 13 bits, ignore
  218. * the third least significant bit
  219. */
  220. #define SQ_13_IGNORE_2 0x2 /* verify most significant 13 bits, ignore
  221. * the second and third least significant bits
  222. */
  223. #define SQ_13_IGNORE_3 0x3 /* verify most significant 13 bits, ignore
  224. * the least three significant bits
  225. */
  226. /*
  227. * set SVT, SQ and SID fields of irte to verify
  228. * source ids of interrupt requests
  229. */
  230. static void set_irte_sid(struct irte *irte, unsigned int svt,
  231. unsigned int sq, unsigned int sid)
  232. {
  233. if (disable_sourceid_checking)
  234. svt = SVT_NO_VERIFY;
  235. irte->svt = svt;
  236. irte->sq = sq;
  237. irte->sid = sid;
  238. }
  239. static int set_ioapic_sid(struct irte *irte, int apic)
  240. {
  241. int i;
  242. u16 sid = 0;
  243. if (!irte)
  244. return -1;
  245. for (i = 0; i < MAX_IO_APICS; i++) {
  246. if (ir_ioapic[i].id == apic) {
  247. sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
  248. break;
  249. }
  250. }
  251. if (sid == 0) {
  252. pr_warning("Failed to set source-id of IOAPIC (%d)\n", apic);
  253. return -1;
  254. }
  255. set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, sid);
  256. return 0;
  257. }
  258. static int set_hpet_sid(struct irte *irte, u8 id)
  259. {
  260. int i;
  261. u16 sid = 0;
  262. if (!irte)
  263. return -1;
  264. for (i = 0; i < MAX_HPET_TBS; i++) {
  265. if (ir_hpet[i].id == id) {
  266. sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;
  267. break;
  268. }
  269. }
  270. if (sid == 0) {
  271. pr_warning("Failed to set source-id of HPET block (%d)\n", id);
  272. return -1;
  273. }
  274. /*
  275. * Should really use SQ_ALL_16. Some platforms are broken.
  276. * While we figure out the right quirks for these broken platforms, use
  277. * SQ_13_IGNORE_3 for now.
  278. */
  279. set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, sid);
  280. return 0;
  281. }
  282. static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
  283. {
  284. struct pci_dev *bridge;
  285. if (!irte || !dev)
  286. return -1;
  287. /* PCIe device or Root Complex integrated PCI device */
  288. if (pci_is_pcie(dev) || !dev->bus->parent) {
  289. set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
  290. (dev->bus->number << 8) | dev->devfn);
  291. return 0;
  292. }
  293. bridge = pci_find_upstream_pcie_bridge(dev);
  294. if (bridge) {
  295. if (pci_is_pcie(bridge))/* this is a PCIe-to-PCI/PCIX bridge */
  296. set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
  297. (bridge->bus->number << 8) | dev->bus->number);
  298. else /* this is a legacy PCI bridge */
  299. set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
  300. (bridge->bus->number << 8) | bridge->devfn);
  301. }
  302. return 0;
  303. }
  304. static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode)
  305. {
  306. u64 addr;
  307. u32 sts;
  308. unsigned long flags;
  309. addr = virt_to_phys((void *)iommu->ir_table->base);
  310. raw_spin_lock_irqsave(&iommu->register_lock, flags);
  311. dmar_writeq(iommu->reg + DMAR_IRTA_REG,
  312. (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
  313. /* Set interrupt-remapping table pointer */
  314. iommu->gcmd |= DMA_GCMD_SIRTP;
  315. writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
  316. IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
  317. readl, (sts & DMA_GSTS_IRTPS), sts);
  318. raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
  319. /*
  320. * global invalidation of interrupt entry cache before enabling
  321. * interrupt-remapping.
  322. */
  323. qi_global_iec(iommu);
  324. raw_spin_lock_irqsave(&iommu->register_lock, flags);
  325. /* Enable interrupt-remapping */
  326. iommu->gcmd |= DMA_GCMD_IRE;
  327. iommu->gcmd &= ~DMA_GCMD_CFI; /* Block compatibility-format MSIs */
  328. writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
  329. IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
  330. readl, (sts & DMA_GSTS_IRES), sts);
  331. /*
  332. * With CFI clear in the Global Command register, we should be
  333. * protected from dangerous (i.e. compatibility) interrupts
  334. * regardless of x2apic status. Check just to be sure.
  335. */
  336. if (sts & DMA_GSTS_CFIS)
  337. WARN(1, KERN_WARNING
  338. "Compatibility-format IRQs enabled despite intr remapping;\n"
  339. "you are vulnerable to IRQ injection.\n");
  340. raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
  341. }
  342. static int intel_setup_irq_remapping(struct intel_iommu *iommu, int mode)
  343. {
  344. struct ir_table *ir_table;
  345. struct page *pages;
  346. unsigned long *bitmap;
  347. ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
  348. GFP_ATOMIC);
  349. if (!iommu->ir_table)
  350. return -ENOMEM;
  351. pages = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
  352. INTR_REMAP_PAGE_ORDER);
  353. if (!pages) {
  354. pr_err("IR%d: failed to allocate pages of order %d\n",
  355. iommu->seq_id, INTR_REMAP_PAGE_ORDER);
  356. kfree(iommu->ir_table);
  357. return -ENOMEM;
  358. }
  359. bitmap = kcalloc(BITS_TO_LONGS(INTR_REMAP_TABLE_ENTRIES),
  360. sizeof(long), GFP_ATOMIC);
  361. if (bitmap == NULL) {
  362. pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id);
  363. __free_pages(pages, INTR_REMAP_PAGE_ORDER);
  364. kfree(ir_table);
  365. return -ENOMEM;
  366. }
  367. ir_table->base = page_address(pages);
  368. ir_table->bitmap = bitmap;
  369. iommu_set_irq_remapping(iommu, mode);
  370. return 0;
  371. }
  372. /*
  373. * Disable Interrupt Remapping.
  374. */
  375. static void iommu_disable_irq_remapping(struct intel_iommu *iommu)
  376. {
  377. unsigned long flags;
  378. u32 sts;
  379. if (!ecap_ir_support(iommu->ecap))
  380. return;
  381. /*
  382. * global invalidation of interrupt entry cache before disabling
  383. * interrupt-remapping.
  384. */
  385. qi_global_iec(iommu);
  386. raw_spin_lock_irqsave(&iommu->register_lock, flags);
  387. sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
  388. if (!(sts & DMA_GSTS_IRES))
  389. goto end;
  390. iommu->gcmd &= ~DMA_GCMD_IRE;
  391. writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
  392. IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
  393. readl, !(sts & DMA_GSTS_IRES), sts);
  394. end:
  395. raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
  396. }
  397. static int __init dmar_x2apic_optout(void)
  398. {
  399. struct acpi_table_dmar *dmar;
  400. dmar = (struct acpi_table_dmar *)dmar_tbl;
  401. if (!dmar || no_x2apic_optout)
  402. return 0;
  403. return dmar->flags & DMAR_X2APIC_OPT_OUT;
  404. }
  405. static int __init intel_irq_remapping_supported(void)
  406. {
  407. struct dmar_drhd_unit *drhd;
  408. struct intel_iommu *iommu;
  409. if (disable_irq_remap)
  410. return 0;
  411. if (irq_remap_broken) {
  412. printk(KERN_WARNING
  413. "This system BIOS has enabled interrupt remapping\n"
  414. "on a chipset that contains an erratum making that\n"
  415. "feature unstable. To maintain system stability\n"
  416. "interrupt remapping is being disabled. Please\n"
  417. "contact your BIOS vendor for an update\n");
  418. add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
  419. disable_irq_remap = 1;
  420. return 0;
  421. }
  422. if (!dmar_ir_support())
  423. return 0;
  424. for_each_iommu(iommu, drhd)
  425. if (!ecap_ir_support(iommu->ecap))
  426. return 0;
  427. return 1;
  428. }
  429. static int __init intel_enable_irq_remapping(void)
  430. {
  431. struct dmar_drhd_unit *drhd;
  432. struct intel_iommu *iommu;
  433. bool x2apic_present;
  434. int setup = 0;
  435. int eim = 0;
  436. x2apic_present = x2apic_supported();
  437. if (parse_ioapics_under_ir() != 1) {
  438. printk(KERN_INFO "Not enable interrupt remapping\n");
  439. goto error;
  440. }
  441. if (x2apic_present) {
  442. pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
  443. eim = !dmar_x2apic_optout();
  444. if (!eim)
  445. printk(KERN_WARNING
  446. "Your BIOS is broken and requested that x2apic be disabled.\n"
  447. "This will slightly decrease performance.\n"
  448. "Use 'intremap=no_x2apic_optout' to override BIOS request.\n");
  449. }
  450. for_each_iommu(iommu, drhd) {
  451. /*
  452. * If the queued invalidation is already initialized,
  453. * shouldn't disable it.
  454. */
  455. if (iommu->qi)
  456. continue;
  457. /*
  458. * Clear previous faults.
  459. */
  460. dmar_fault(-1, iommu);
  461. /*
  462. * Disable intr remapping and queued invalidation, if already
  463. * enabled prior to OS handover.
  464. */
  465. iommu_disable_irq_remapping(iommu);
  466. dmar_disable_qi(iommu);
  467. }
  468. /*
  469. * check for the Interrupt-remapping support
  470. */
  471. for_each_iommu(iommu, drhd) {
  472. if (!ecap_ir_support(iommu->ecap))
  473. continue;
  474. if (eim && !ecap_eim_support(iommu->ecap)) {
  475. printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
  476. " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
  477. goto error;
  478. }
  479. }
  480. /*
  481. * Enable queued invalidation for all the DRHD's.
  482. */
  483. for_each_iommu(iommu, drhd) {
  484. int ret = dmar_enable_qi(iommu);
  485. if (ret) {
  486. printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
  487. " invalidation, ecap %Lx, ret %d\n",
  488. drhd->reg_base_addr, iommu->ecap, ret);
  489. goto error;
  490. }
  491. }
  492. /*
  493. * Setup Interrupt-remapping for all the DRHD's now.
  494. */
  495. for_each_iommu(iommu, drhd) {
  496. if (!ecap_ir_support(iommu->ecap))
  497. continue;
  498. if (intel_setup_irq_remapping(iommu, eim))
  499. goto error;
  500. setup = 1;
  501. }
  502. if (!setup)
  503. goto error;
  504. irq_remapping_enabled = 1;
  505. /*
  506. * VT-d has a different layout for IO-APIC entries when
  507. * interrupt remapping is enabled. So it needs a special routine
  508. * to print IO-APIC entries for debugging purposes too.
  509. */
  510. x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries;
  511. pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic");
  512. return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
  513. error:
  514. /*
  515. * handle error condition gracefully here!
  516. */
  517. if (x2apic_present)
  518. pr_warn("Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n");
  519. return -1;
  520. }
  521. static void ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope,
  522. struct intel_iommu *iommu)
  523. {
  524. struct acpi_dmar_pci_path *path;
  525. u8 bus;
  526. int count;
  527. bus = scope->bus;
  528. path = (struct acpi_dmar_pci_path *)(scope + 1);
  529. count = (scope->length - sizeof(struct acpi_dmar_device_scope))
  530. / sizeof(struct acpi_dmar_pci_path);
  531. while (--count > 0) {
  532. /*
  533. * Access PCI directly due to the PCI
  534. * subsystem isn't initialized yet.
  535. */
  536. bus = read_pci_config_byte(bus, path->device, path->function,
  537. PCI_SECONDARY_BUS);
  538. path++;
  539. }
  540. ir_hpet[ir_hpet_num].bus = bus;
  541. ir_hpet[ir_hpet_num].devfn = PCI_DEVFN(path->device, path->function);
  542. ir_hpet[ir_hpet_num].iommu = iommu;
  543. ir_hpet[ir_hpet_num].id = scope->enumeration_id;
  544. ir_hpet_num++;
  545. }
  546. static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
  547. struct intel_iommu *iommu)
  548. {
  549. struct acpi_dmar_pci_path *path;
  550. u8 bus;
  551. int count;
  552. bus = scope->bus;
  553. path = (struct acpi_dmar_pci_path *)(scope + 1);
  554. count = (scope->length - sizeof(struct acpi_dmar_device_scope))
  555. / sizeof(struct acpi_dmar_pci_path);
  556. while (--count > 0) {
  557. /*
  558. * Access PCI directly due to the PCI
  559. * subsystem isn't initialized yet.
  560. */
  561. bus = read_pci_config_byte(bus, path->device, path->function,
  562. PCI_SECONDARY_BUS);
  563. path++;
  564. }
  565. ir_ioapic[ir_ioapic_num].bus = bus;
  566. ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->device, path->function);
  567. ir_ioapic[ir_ioapic_num].iommu = iommu;
  568. ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
  569. ir_ioapic_num++;
  570. }
  571. static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header,
  572. struct intel_iommu *iommu)
  573. {
  574. struct acpi_dmar_hardware_unit *drhd;
  575. struct acpi_dmar_device_scope *scope;
  576. void *start, *end;
  577. drhd = (struct acpi_dmar_hardware_unit *)header;
  578. start = (void *)(drhd + 1);
  579. end = ((void *)drhd) + header->length;
  580. while (start < end) {
  581. scope = start;
  582. if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
  583. if (ir_ioapic_num == MAX_IO_APICS) {
  584. printk(KERN_WARNING "Exceeded Max IO APICS\n");
  585. return -1;
  586. }
  587. printk(KERN_INFO "IOAPIC id %d under DRHD base "
  588. " 0x%Lx IOMMU %d\n", scope->enumeration_id,
  589. drhd->address, iommu->seq_id);
  590. ir_parse_one_ioapic_scope(scope, iommu);
  591. } else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET) {
  592. if (ir_hpet_num == MAX_HPET_TBS) {
  593. printk(KERN_WARNING "Exceeded Max HPET blocks\n");
  594. return -1;
  595. }
  596. printk(KERN_INFO "HPET id %d under DRHD base"
  597. " 0x%Lx\n", scope->enumeration_id,
  598. drhd->address);
  599. ir_parse_one_hpet_scope(scope, iommu);
  600. }
  601. start += scope->length;
  602. }
  603. return 0;
  604. }
  605. /*
  606. * Finds the assocaition between IOAPIC's and its Interrupt-remapping
  607. * hardware unit.
  608. */
  609. static int __init parse_ioapics_under_ir(void)
  610. {
  611. struct dmar_drhd_unit *drhd;
  612. struct intel_iommu *iommu;
  613. int ir_supported = 0;
  614. int ioapic_idx;
  615. for_each_iommu(iommu, drhd)
  616. if (ecap_ir_support(iommu->ecap)) {
  617. if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu))
  618. return -1;
  619. ir_supported = 1;
  620. }
  621. if (!ir_supported)
  622. return 0;
  623. for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
  624. int ioapic_id = mpc_ioapic_id(ioapic_idx);
  625. if (!map_ioapic_to_ir(ioapic_id)) {
  626. pr_err(FW_BUG "ioapic %d has no mapping iommu, "
  627. "interrupt remapping will be disabled\n",
  628. ioapic_id);
  629. return -1;
  630. }
  631. }
  632. return 1;
  633. }
  634. static int __init ir_dev_scope_init(void)
  635. {
  636. if (!irq_remapping_enabled)
  637. return 0;
  638. return dmar_dev_scope_init();
  639. }
  640. rootfs_initcall(ir_dev_scope_init);
  641. static void disable_irq_remapping(void)
  642. {
  643. struct dmar_drhd_unit *drhd;
  644. struct intel_iommu *iommu = NULL;
  645. /*
  646. * Disable Interrupt-remapping for all the DRHD's now.
  647. */
  648. for_each_iommu(iommu, drhd) {
  649. if (!ecap_ir_support(iommu->ecap))
  650. continue;
  651. iommu_disable_irq_remapping(iommu);
  652. }
  653. }
  654. static int reenable_irq_remapping(int eim)
  655. {
  656. struct dmar_drhd_unit *drhd;
  657. int setup = 0;
  658. struct intel_iommu *iommu = NULL;
  659. for_each_iommu(iommu, drhd)
  660. if (iommu->qi)
  661. dmar_reenable_qi(iommu);
  662. /*
  663. * Setup Interrupt-remapping for all the DRHD's now.
  664. */
  665. for_each_iommu(iommu, drhd) {
  666. if (!ecap_ir_support(iommu->ecap))
  667. continue;
  668. /* Set up interrupt remapping for iommu.*/
  669. iommu_set_irq_remapping(iommu, eim);
  670. setup = 1;
  671. }
  672. if (!setup)
  673. goto error;
  674. return 0;
  675. error:
  676. /*
  677. * handle error condition gracefully here!
  678. */
  679. return -1;
  680. }
  681. static void prepare_irte(struct irte *irte, int vector,
  682. unsigned int dest)
  683. {
  684. memset(irte, 0, sizeof(*irte));
  685. irte->present = 1;
  686. irte->dst_mode = apic->irq_dest_mode;
  687. /*
  688. * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
  689. * actual level or edge trigger will be setup in the IO-APIC
  690. * RTE. This will help simplify level triggered irq migration.
  691. * For more details, see the comments (in io_apic.c) explainig IO-APIC
  692. * irq migration in the presence of interrupt-remapping.
  693. */
  694. irte->trigger_mode = 0;
  695. irte->dlvry_mode = apic->irq_delivery_mode;
  696. irte->vector = vector;
  697. irte->dest_id = IRTE_DEST(dest);
  698. irte->redir_hint = 1;
  699. }
  700. static int intel_setup_ioapic_entry(int irq,
  701. struct IO_APIC_route_entry *route_entry,
  702. unsigned int destination, int vector,
  703. struct io_apic_irq_attr *attr)
  704. {
  705. int ioapic_id = mpc_ioapic_id(attr->ioapic);
  706. struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id);
  707. struct IR_IO_APIC_route_entry *entry;
  708. struct irte irte;
  709. int index;
  710. if (!iommu) {
  711. pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
  712. return -ENODEV;
  713. }
  714. entry = (struct IR_IO_APIC_route_entry *)route_entry;
  715. index = alloc_irte(iommu, irq, 1);
  716. if (index < 0) {
  717. pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id);
  718. return -ENOMEM;
  719. }
  720. prepare_irte(&irte, vector, destination);
  721. /* Set source-id of interrupt request */
  722. set_ioapic_sid(&irte, ioapic_id);
  723. modify_irte(irq, &irte);
  724. apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
  725. "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
  726. "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
  727. "Avail:%X Vector:%02X Dest:%08X "
  728. "SID:%04X SQ:%X SVT:%X)\n",
  729. attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
  730. irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
  731. irte.avail, irte.vector, irte.dest_id,
  732. irte.sid, irte.sq, irte.svt);
  733. memset(entry, 0, sizeof(*entry));
  734. entry->index2 = (index >> 15) & 0x1;
  735. entry->zero = 0;
  736. entry->format = 1;
  737. entry->index = (index & 0x7fff);
  738. /*
  739. * IO-APIC RTE will be configured with virtual vector.
  740. * irq handler will do the explicit EOI to the io-apic.
  741. */
  742. entry->vector = attr->ioapic_pin;
  743. entry->mask = 0; /* enable IRQ */
  744. entry->trigger = attr->trigger;
  745. entry->polarity = attr->polarity;
  746. /* Mask level triggered irqs.
  747. * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
  748. */
  749. if (attr->trigger)
  750. entry->mask = 1;
  751. return 0;
  752. }
  753. /*
  754. * Migrate the IO-APIC irq in the presence of intr-remapping.
  755. *
  756. * For both level and edge triggered, irq migration is a simple atomic
  757. * update(of vector and cpu destination) of IRTE and flush the hardware cache.
  758. *
  759. * For level triggered, we eliminate the io-apic RTE modification (with the
  760. * updated vector information), by using a virtual vector (io-apic pin number).
  761. * Real vector that is used for interrupting cpu will be coming from
  762. * the interrupt-remapping table entry.
  763. *
  764. * As the migration is a simple atomic update of IRTE, the same mechanism
  765. * is used to migrate MSI irq's in the presence of interrupt-remapping.
  766. */
  767. static int
  768. intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
  769. bool force)
  770. {
  771. struct irq_cfg *cfg = data->chip_data;
  772. unsigned int dest, irq = data->irq;
  773. struct irte irte;
  774. int err;
  775. if (!config_enabled(CONFIG_SMP))
  776. return -EINVAL;
  777. if (!cpumask_intersects(mask, cpu_online_mask))
  778. return -EINVAL;
  779. if (get_irte(irq, &irte))
  780. return -EBUSY;
  781. err = assign_irq_vector(irq, cfg, mask);
  782. if (err)
  783. return err;
  784. err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest);
  785. if (err) {
  786. if (assign_irq_vector(irq, cfg, data->affinity))
  787. pr_err("Failed to recover vector for irq %d\n", irq);
  788. return err;
  789. }
  790. irte.vector = cfg->vector;
  791. irte.dest_id = IRTE_DEST(dest);
  792. /*
  793. * Atomically updates the IRTE with the new destination, vector
  794. * and flushes the interrupt entry cache.
  795. */
  796. modify_irte(irq, &irte);
  797. /*
  798. * After this point, all the interrupts will start arriving
  799. * at the new destination. So, time to cleanup the previous
  800. * vector allocation.
  801. */
  802. if (cfg->move_in_progress)
  803. send_cleanup_vector(cfg);
  804. cpumask_copy(data->affinity, mask);
  805. return 0;
  806. }
  807. static void intel_compose_msi_msg(struct pci_dev *pdev,
  808. unsigned int irq, unsigned int dest,
  809. struct msi_msg *msg, u8 hpet_id)
  810. {
  811. struct irq_cfg *cfg;
  812. struct irte irte;
  813. u16 sub_handle = 0;
  814. int ir_index;
  815. cfg = irq_get_chip_data(irq);
  816. ir_index = map_irq_to_irte_handle(irq, &sub_handle);
  817. BUG_ON(ir_index == -1);
  818. prepare_irte(&irte, cfg->vector, dest);
  819. /* Set source-id of interrupt request */
  820. if (pdev)
  821. set_msi_sid(&irte, pdev);
  822. else
  823. set_hpet_sid(&irte, hpet_id);
  824. modify_irte(irq, &irte);
  825. msg->address_hi = MSI_ADDR_BASE_HI;
  826. msg->data = sub_handle;
  827. msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
  828. MSI_ADDR_IR_SHV |
  829. MSI_ADDR_IR_INDEX1(ir_index) |
  830. MSI_ADDR_IR_INDEX2(ir_index);
  831. }
  832. /*
  833. * Map the PCI dev to the corresponding remapping hardware unit
  834. * and allocate 'nvec' consecutive interrupt-remapping table entries
  835. * in it.
  836. */
  837. static int intel_msi_alloc_irq(struct pci_dev *dev, int irq, int nvec)
  838. {
  839. struct intel_iommu *iommu;
  840. int index;
  841. iommu = map_dev_to_ir(dev);
  842. if (!iommu) {
  843. printk(KERN_ERR
  844. "Unable to map PCI %s to iommu\n", pci_name(dev));
  845. return -ENOENT;
  846. }
  847. index = alloc_irte(iommu, irq, nvec);
  848. if (index < 0) {
  849. printk(KERN_ERR
  850. "Unable to allocate %d IRTE for PCI %s\n", nvec,
  851. pci_name(dev));
  852. return -ENOSPC;
  853. }
  854. return index;
  855. }
  856. static int intel_msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
  857. int index, int sub_handle)
  858. {
  859. struct intel_iommu *iommu;
  860. iommu = map_dev_to_ir(pdev);
  861. if (!iommu)
  862. return -ENOENT;
  863. /*
  864. * setup the mapping between the irq and the IRTE
  865. * base index, the sub_handle pointing to the
  866. * appropriate interrupt remap table entry.
  867. */
  868. set_irte_irq(irq, iommu, index, sub_handle);
  869. return 0;
  870. }
  871. static int intel_setup_hpet_msi(unsigned int irq, unsigned int id)
  872. {
  873. struct intel_iommu *iommu = map_hpet_to_ir(id);
  874. int index;
  875. if (!iommu)
  876. return -1;
  877. index = alloc_irte(iommu, irq, 1);
  878. if (index < 0)
  879. return -1;
  880. return 0;
  881. }
  882. struct irq_remap_ops intel_irq_remap_ops = {
  883. .supported = intel_irq_remapping_supported,
  884. .prepare = dmar_table_init,
  885. .enable = intel_enable_irq_remapping,
  886. .disable = disable_irq_remapping,
  887. .reenable = reenable_irq_remapping,
  888. .enable_faulting = enable_drhd_fault_handling,
  889. .setup_ioapic_entry = intel_setup_ioapic_entry,
  890. .set_affinity = intel_ioapic_set_affinity,
  891. .free_irq = free_irte,
  892. .compose_msi_msg = intel_compose_msi_msg,
  893. .msi_alloc_irq = intel_msi_alloc_irq,
  894. .msi_setup_irq = intel_msi_setup_irq,
  895. .setup_hpet_msi = intel_setup_hpet_msi,
  896. };