pci-ioda.c 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425
  1. /*
  2. * Support PCI/PCIe on PowerNV platforms
  3. *
  4. * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #undef DEBUG
  12. #include <linux/kernel.h>
  13. #include <linux/pci.h>
  14. #include <linux/debugfs.h>
  15. #include <linux/delay.h>
  16. #include <linux/string.h>
  17. #include <linux/init.h>
  18. #include <linux/bootmem.h>
  19. #include <linux/irq.h>
  20. #include <linux/io.h>
  21. #include <linux/msi.h>
  22. #include <linux/memblock.h>
  23. #include <asm/sections.h>
  24. #include <asm/io.h>
  25. #include <asm/prom.h>
  26. #include <asm/pci-bridge.h>
  27. #include <asm/machdep.h>
  28. #include <asm/msi_bitmap.h>
  29. #include <asm/ppc-pci.h>
  30. #include <asm/opal.h>
  31. #include <asm/iommu.h>
  32. #include <asm/tce.h>
  33. #include <asm/xics.h>
  34. #include <asm/debug.h>
  35. #include "powernv.h"
  36. #include "pci.h"
  37. #define define_pe_printk_level(func, kern_level) \
  38. static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \
  39. { \
  40. struct va_format vaf; \
  41. va_list args; \
  42. char pfix[32]; \
  43. int r; \
  44. \
  45. va_start(args, fmt); \
  46. \
  47. vaf.fmt = fmt; \
  48. vaf.va = &args; \
  49. \
  50. if (pe->pdev) \
  51. strlcpy(pfix, dev_name(&pe->pdev->dev), \
  52. sizeof(pfix)); \
  53. else \
  54. sprintf(pfix, "%04x:%02x ", \
  55. pci_domain_nr(pe->pbus), \
  56. pe->pbus->number); \
  57. r = printk(kern_level "pci %s: [PE# %.3d] %pV", \
  58. pfix, pe->pe_number, &vaf); \
  59. \
  60. va_end(args); \
  61. \
  62. return r; \
  63. } \
  64. define_pe_printk_level(pe_err, KERN_ERR);
  65. define_pe_printk_level(pe_warn, KERN_WARNING);
  66. define_pe_printk_level(pe_info, KERN_INFO);
  67. /*
  68. * stdcix is only supposed to be used in hypervisor real mode as per
  69. * the architecture spec
  70. */
  71. static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
  72. {
  73. __asm__ __volatile__("stdcix %0,0,%1"
  74. : : "r" (val), "r" (paddr) : "memory");
  75. }
  76. static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
  77. {
  78. unsigned long pe;
  79. do {
  80. pe = find_next_zero_bit(phb->ioda.pe_alloc,
  81. phb->ioda.total_pe, 0);
  82. if (pe >= phb->ioda.total_pe)
  83. return IODA_INVALID_PE;
  84. } while(test_and_set_bit(pe, phb->ioda.pe_alloc));
  85. phb->ioda.pe_array[pe].phb = phb;
  86. phb->ioda.pe_array[pe].pe_number = pe;
  87. return pe;
  88. }
  89. static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
  90. {
  91. WARN_ON(phb->ioda.pe_array[pe].pdev);
  92. memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
  93. clear_bit(pe, phb->ioda.pe_alloc);
  94. }
  95. /* Currently those 2 are only used when MSIs are enabled, this will change
  96. * but in the meantime, we need to protect them to avoid warnings
  97. */
  98. #ifdef CONFIG_PCI_MSI
  99. static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
  100. {
  101. struct pci_controller *hose = pci_bus_to_host(dev->bus);
  102. struct pnv_phb *phb = hose->private_data;
  103. struct pci_dn *pdn = pci_get_pdn(dev);
  104. if (!pdn)
  105. return NULL;
  106. if (pdn->pe_number == IODA_INVALID_PE)
  107. return NULL;
  108. return &phb->ioda.pe_array[pdn->pe_number];
  109. }
  110. #endif /* CONFIG_PCI_MSI */
  111. static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
  112. {
  113. struct pci_dev *parent;
  114. uint8_t bcomp, dcomp, fcomp;
  115. long rc, rid_end, rid;
  116. /* Bus validation ? */
  117. if (pe->pbus) {
  118. int count;
  119. dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
  120. fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
  121. parent = pe->pbus->self;
  122. if (pe->flags & PNV_IODA_PE_BUS_ALL)
  123. count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
  124. else
  125. count = 1;
  126. switch(count) {
  127. case 1: bcomp = OpalPciBusAll; break;
  128. case 2: bcomp = OpalPciBus7Bits; break;
  129. case 4: bcomp = OpalPciBus6Bits; break;
  130. case 8: bcomp = OpalPciBus5Bits; break;
  131. case 16: bcomp = OpalPciBus4Bits; break;
  132. case 32: bcomp = OpalPciBus3Bits; break;
  133. default:
  134. pr_err("%s: Number of subordinate busses %d"
  135. " unsupported\n",
  136. pci_name(pe->pbus->self), count);
  137. /* Do an exact match only */
  138. bcomp = OpalPciBusAll;
  139. }
  140. rid_end = pe->rid + (count << 8);
  141. } else {
  142. parent = pe->pdev->bus->self;
  143. bcomp = OpalPciBusAll;
  144. dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
  145. fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
  146. rid_end = pe->rid + 1;
  147. }
  148. /*
  149. * Associate PE in PELT. We need add the PE into the
  150. * corresponding PELT-V as well. Otherwise, the error
  151. * originated from the PE might contribute to other
  152. * PEs.
  153. */
  154. rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
  155. bcomp, dcomp, fcomp, OPAL_MAP_PE);
  156. if (rc) {
  157. pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
  158. return -ENXIO;
  159. }
  160. rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
  161. pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
  162. if (rc)
  163. pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc);
  164. opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
  165. OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
  166. /* Add to all parents PELT-V */
  167. while (parent) {
  168. struct pci_dn *pdn = pci_get_pdn(parent);
  169. if (pdn && pdn->pe_number != IODA_INVALID_PE) {
  170. rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
  171. pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
  172. /* XXX What to do in case of error ? */
  173. }
  174. parent = parent->bus->self;
  175. }
  176. /* Setup reverse map */
  177. for (rid = pe->rid; rid < rid_end; rid++)
  178. phb->ioda.pe_rmap[rid] = pe->pe_number;
  179. /* Setup one MVTs on IODA1 */
  180. if (phb->type == PNV_PHB_IODA1) {
  181. pe->mve_number = pe->pe_number;
  182. rc = opal_pci_set_mve(phb->opal_id, pe->mve_number,
  183. pe->pe_number);
  184. if (rc) {
  185. pe_err(pe, "OPAL error %ld setting up MVE %d\n",
  186. rc, pe->mve_number);
  187. pe->mve_number = -1;
  188. } else {
  189. rc = opal_pci_set_mve_enable(phb->opal_id,
  190. pe->mve_number, OPAL_ENABLE_MVE);
  191. if (rc) {
  192. pe_err(pe, "OPAL error %ld enabling MVE %d\n",
  193. rc, pe->mve_number);
  194. pe->mve_number = -1;
  195. }
  196. }
  197. } else if (phb->type == PNV_PHB_IODA2)
  198. pe->mve_number = 0;
  199. return 0;
  200. }
  201. static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
  202. struct pnv_ioda_pe *pe)
  203. {
  204. struct pnv_ioda_pe *lpe;
  205. list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
  206. if (lpe->dma_weight < pe->dma_weight) {
  207. list_add_tail(&pe->dma_link, &lpe->dma_link);
  208. return;
  209. }
  210. }
  211. list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
  212. }
  213. static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
  214. {
  215. /* This is quite simplistic. The "base" weight of a device
  216. * is 10. 0 means no DMA is to be accounted for it.
  217. */
  218. /* If it's a bridge, no DMA */
  219. if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
  220. return 0;
  221. /* Reduce the weight of slow USB controllers */
  222. if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
  223. dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
  224. dev->class == PCI_CLASS_SERIAL_USB_EHCI)
  225. return 3;
  226. /* Increase the weight of RAID (includes Obsidian) */
  227. if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
  228. return 15;
  229. /* Default */
  230. return 10;
  231. }
  232. #if 0
  233. static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
  234. {
  235. struct pci_controller *hose = pci_bus_to_host(dev->bus);
  236. struct pnv_phb *phb = hose->private_data;
  237. struct pci_dn *pdn = pci_get_pdn(dev);
  238. struct pnv_ioda_pe *pe;
  239. int pe_num;
  240. if (!pdn) {
  241. pr_err("%s: Device tree node not associated properly\n",
  242. pci_name(dev));
  243. return NULL;
  244. }
  245. if (pdn->pe_number != IODA_INVALID_PE)
  246. return NULL;
  247. /* PE#0 has been pre-set */
  248. if (dev->bus->number == 0)
  249. pe_num = 0;
  250. else
  251. pe_num = pnv_ioda_alloc_pe(phb);
  252. if (pe_num == IODA_INVALID_PE) {
  253. pr_warning("%s: Not enough PE# available, disabling device\n",
  254. pci_name(dev));
  255. return NULL;
  256. }
  257. /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
  258. * pointer in the PE data structure, both should be destroyed at the
  259. * same time. However, this needs to be looked at more closely again
  260. * once we actually start removing things (Hotplug, SR-IOV, ...)
  261. *
  262. * At some point we want to remove the PDN completely anyways
  263. */
  264. pe = &phb->ioda.pe_array[pe_num];
  265. pci_dev_get(dev);
  266. pdn->pcidev = dev;
  267. pdn->pe_number = pe_num;
  268. pe->pdev = dev;
  269. pe->pbus = NULL;
  270. pe->tce32_seg = -1;
  271. pe->mve_number = -1;
  272. pe->rid = dev->bus->number << 8 | pdn->devfn;
  273. pe_info(pe, "Associated device to PE\n");
  274. if (pnv_ioda_configure_pe(phb, pe)) {
  275. /* XXX What do we do here ? */
  276. if (pe_num)
  277. pnv_ioda_free_pe(phb, pe_num);
  278. pdn->pe_number = IODA_INVALID_PE;
  279. pe->pdev = NULL;
  280. pci_dev_put(dev);
  281. return NULL;
  282. }
  283. /* Assign a DMA weight to the device */
  284. pe->dma_weight = pnv_ioda_dma_weight(dev);
  285. if (pe->dma_weight != 0) {
  286. phb->ioda.dma_weight += pe->dma_weight;
  287. phb->ioda.dma_pe_count++;
  288. }
  289. /* Link the PE */
  290. pnv_ioda_link_pe_by_weight(phb, pe);
  291. return pe;
  292. }
  293. #endif /* Useful for SRIOV case */
  294. static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
  295. {
  296. struct pci_dev *dev;
  297. list_for_each_entry(dev, &bus->devices, bus_list) {
  298. struct pci_dn *pdn = pci_get_pdn(dev);
  299. if (pdn == NULL) {
  300. pr_warn("%s: No device node associated with device !\n",
  301. pci_name(dev));
  302. continue;
  303. }
  304. pci_dev_get(dev);
  305. pdn->pcidev = dev;
  306. pdn->pe_number = pe->pe_number;
  307. pe->dma_weight += pnv_ioda_dma_weight(dev);
  308. if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
  309. pnv_ioda_setup_same_PE(dev->subordinate, pe);
  310. }
  311. }
  312. /*
  313. * There're 2 types of PCI bus sensitive PEs: One that is compromised of
  314. * single PCI bus. Another one that contains the primary PCI bus and its
  315. * subordinate PCI devices and buses. The second type of PE is normally
  316. * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
  317. */
  318. static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
  319. {
  320. struct pci_controller *hose = pci_bus_to_host(bus);
  321. struct pnv_phb *phb = hose->private_data;
  322. struct pnv_ioda_pe *pe;
  323. int pe_num;
  324. pe_num = pnv_ioda_alloc_pe(phb);
  325. if (pe_num == IODA_INVALID_PE) {
  326. pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
  327. __func__, pci_domain_nr(bus), bus->number);
  328. return;
  329. }
  330. pe = &phb->ioda.pe_array[pe_num];
  331. pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
  332. pe->pbus = bus;
  333. pe->pdev = NULL;
  334. pe->tce32_seg = -1;
  335. pe->mve_number = -1;
  336. pe->rid = bus->busn_res.start << 8;
  337. pe->dma_weight = 0;
  338. if (all)
  339. pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
  340. bus->busn_res.start, bus->busn_res.end, pe_num);
  341. else
  342. pe_info(pe, "Secondary bus %d associated with PE#%d\n",
  343. bus->busn_res.start, pe_num);
  344. if (pnv_ioda_configure_pe(phb, pe)) {
  345. /* XXX What do we do here ? */
  346. if (pe_num)
  347. pnv_ioda_free_pe(phb, pe_num);
  348. pe->pbus = NULL;
  349. return;
  350. }
  351. /* Associate it with all child devices */
  352. pnv_ioda_setup_same_PE(bus, pe);
  353. /* Put PE to the list */
  354. list_add_tail(&pe->list, &phb->ioda.pe_list);
  355. /* Account for one DMA PE if at least one DMA capable device exist
  356. * below the bridge
  357. */
  358. if (pe->dma_weight != 0) {
  359. phb->ioda.dma_weight += pe->dma_weight;
  360. phb->ioda.dma_pe_count++;
  361. }
  362. /* Link the PE */
  363. pnv_ioda_link_pe_by_weight(phb, pe);
  364. }
  365. static void pnv_ioda_setup_PEs(struct pci_bus *bus)
  366. {
  367. struct pci_dev *dev;
  368. pnv_ioda_setup_bus_PE(bus, 0);
  369. list_for_each_entry(dev, &bus->devices, bus_list) {
  370. if (dev->subordinate) {
  371. if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE)
  372. pnv_ioda_setup_bus_PE(dev->subordinate, 1);
  373. else
  374. pnv_ioda_setup_PEs(dev->subordinate);
  375. }
  376. }
  377. }
  378. /*
  379. * Configure PEs so that the downstream PCI buses and devices
  380. * could have their associated PE#. Unfortunately, we didn't
  381. * figure out the way to identify the PLX bridge yet. So we
  382. * simply put the PCI bus and the subordinate behind the root
  383. * port to PE# here. The game rule here is expected to be changed
  384. * as soon as we can detected PLX bridge correctly.
  385. */
  386. static void pnv_pci_ioda_setup_PEs(void)
  387. {
  388. struct pci_controller *hose, *tmp;
  389. list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
  390. pnv_ioda_setup_PEs(hose->bus);
  391. }
  392. }
  393. static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
  394. {
  395. struct pci_dn *pdn = pci_get_pdn(pdev);
  396. struct pnv_ioda_pe *pe;
  397. /*
  398. * The function can be called while the PE#
  399. * hasn't been assigned. Do nothing for the
  400. * case.
  401. */
  402. if (!pdn || pdn->pe_number == IODA_INVALID_PE)
  403. return;
  404. pe = &phb->ioda.pe_array[pdn->pe_number];
  405. WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
  406. set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
  407. }
  408. static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
  409. struct pci_dev *pdev, u64 dma_mask)
  410. {
  411. struct pci_dn *pdn = pci_get_pdn(pdev);
  412. struct pnv_ioda_pe *pe;
  413. uint64_t top;
  414. bool bypass = false;
  415. if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
  416. return -ENODEV;;
  417. pe = &phb->ioda.pe_array[pdn->pe_number];
  418. if (pe->tce_bypass_enabled) {
  419. top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
  420. bypass = (dma_mask >= top);
  421. }
  422. if (bypass) {
  423. dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
  424. set_dma_ops(&pdev->dev, &dma_direct_ops);
  425. set_dma_offset(&pdev->dev, pe->tce_bypass_base);
  426. } else {
  427. dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
  428. set_dma_ops(&pdev->dev, &dma_iommu_ops);
  429. set_iommu_table_base(&pdev->dev, &pe->tce32_table);
  430. }
  431. return 0;
  432. }
  433. static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
  434. {
  435. struct pci_dev *dev;
  436. list_for_each_entry(dev, &bus->devices, bus_list) {
  437. set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table);
  438. if (dev->subordinate)
  439. pnv_ioda_setup_bus_dma(pe, dev->subordinate);
  440. }
  441. }
  442. static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
  443. struct iommu_table *tbl,
  444. __be64 *startp, __be64 *endp, bool rm)
  445. {
  446. __be64 __iomem *invalidate = rm ?
  447. (__be64 __iomem *)pe->tce_inval_reg_phys :
  448. (__be64 __iomem *)tbl->it_index;
  449. unsigned long start, end, inc;
  450. start = __pa(startp);
  451. end = __pa(endp);
  452. /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
  453. if (tbl->it_busno) {
  454. start <<= 12;
  455. end <<= 12;
  456. inc = 128 << 12;
  457. start |= tbl->it_busno;
  458. end |= tbl->it_busno;
  459. } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) {
  460. /* p7ioc-style invalidation, 2 TCEs per write */
  461. start |= (1ull << 63);
  462. end |= (1ull << 63);
  463. inc = 16;
  464. } else {
  465. /* Default (older HW) */
  466. inc = 128;
  467. }
  468. end |= inc - 1; /* round up end to be different than start */
  469. mb(); /* Ensure above stores are visible */
  470. while (start <= end) {
  471. if (rm)
  472. __raw_rm_writeq(cpu_to_be64(start), invalidate);
  473. else
  474. __raw_writeq(cpu_to_be64(start), invalidate);
  475. start += inc;
  476. }
  477. /*
  478. * The iommu layer will do another mb() for us on build()
  479. * and we don't care on free()
  480. */
  481. }
  482. static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
  483. struct iommu_table *tbl,
  484. __be64 *startp, __be64 *endp, bool rm)
  485. {
  486. unsigned long start, end, inc;
  487. __be64 __iomem *invalidate = rm ?
  488. (__be64 __iomem *)pe->tce_inval_reg_phys :
  489. (__be64 __iomem *)tbl->it_index;
  490. /* We'll invalidate DMA address in PE scope */
  491. start = 0x2ul << 60;
  492. start |= (pe->pe_number & 0xFF);
  493. end = start;
  494. /* Figure out the start, end and step */
  495. inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
  496. start |= (inc << 12);
  497. inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
  498. end |= (inc << 12);
  499. inc = (0x1ul << 12);
  500. mb();
  501. while (start <= end) {
  502. if (rm)
  503. __raw_rm_writeq(cpu_to_be64(start), invalidate);
  504. else
  505. __raw_writeq(cpu_to_be64(start), invalidate);
  506. start += inc;
  507. }
  508. }
  509. void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
  510. __be64 *startp, __be64 *endp, bool rm)
  511. {
  512. struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
  513. tce32_table);
  514. struct pnv_phb *phb = pe->phb;
  515. if (phb->type == PNV_PHB_IODA1)
  516. pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
  517. else
  518. pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
  519. }
  520. static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
  521. struct pnv_ioda_pe *pe, unsigned int base,
  522. unsigned int segs)
  523. {
  524. struct page *tce_mem = NULL;
  525. const __be64 *swinvp;
  526. struct iommu_table *tbl;
  527. unsigned int i;
  528. int64_t rc;
  529. void *addr;
  530. /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
  531. #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
  532. /* XXX FIXME: Handle 64-bit only DMA devices */
  533. /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
  534. /* XXX FIXME: Allocate multi-level tables on PHB3 */
  535. /* We shouldn't already have a 32-bit DMA associated */
  536. if (WARN_ON(pe->tce32_seg >= 0))
  537. return;
  538. /* Grab a 32-bit TCE table */
  539. pe->tce32_seg = base;
  540. pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
  541. (base << 28), ((base + segs) << 28) - 1);
  542. /* XXX Currently, we allocate one big contiguous table for the
  543. * TCEs. We only really need one chunk per 256M of TCE space
  544. * (ie per segment) but that's an optimization for later, it
  545. * requires some added smarts with our get/put_tce implementation
  546. */
  547. tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
  548. get_order(TCE32_TABLE_SIZE * segs));
  549. if (!tce_mem) {
  550. pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
  551. goto fail;
  552. }
  553. addr = page_address(tce_mem);
  554. memset(addr, 0, TCE32_TABLE_SIZE * segs);
  555. /* Configure HW */
  556. for (i = 0; i < segs; i++) {
  557. rc = opal_pci_map_pe_dma_window(phb->opal_id,
  558. pe->pe_number,
  559. base + i, 1,
  560. __pa(addr) + TCE32_TABLE_SIZE * i,
  561. TCE32_TABLE_SIZE, 0x1000);
  562. if (rc) {
  563. pe_err(pe, " Failed to configure 32-bit TCE table,"
  564. " err %ld\n", rc);
  565. goto fail;
  566. }
  567. }
  568. /* Setup linux iommu table */
  569. tbl = &pe->tce32_table;
  570. pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
  571. base << 28);
  572. /* OPAL variant of P7IOC SW invalidated TCEs */
  573. swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
  574. if (swinvp) {
  575. /* We need a couple more fields -- an address and a data
  576. * to or. Since the bus is only printed out on table free
  577. * errors, and on the first pass the data will be a relative
  578. * bus number, print that out instead.
  579. */
  580. tbl->it_busno = 0;
  581. pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
  582. tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
  583. 8);
  584. tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE |
  585. TCE_PCI_SWINV_PAIR;
  586. }
  587. iommu_init_table(tbl, phb->hose->node);
  588. iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
  589. if (pe->pdev)
  590. set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
  591. else
  592. pnv_ioda_setup_bus_dma(pe, pe->pbus);
  593. return;
  594. fail:
  595. /* XXX Failure: Try to fallback to 64-bit only ? */
  596. if (pe->tce32_seg >= 0)
  597. pe->tce32_seg = -1;
  598. if (tce_mem)
  599. __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
  600. }
  601. static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
  602. {
  603. struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
  604. tce32_table);
  605. uint16_t window_id = (pe->pe_number << 1 ) + 1;
  606. int64_t rc;
  607. pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis");
  608. if (enable) {
  609. phys_addr_t top = memblock_end_of_DRAM();
  610. top = roundup_pow_of_two(top);
  611. rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
  612. pe->pe_number,
  613. window_id,
  614. pe->tce_bypass_base,
  615. top);
  616. } else {
  617. rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
  618. pe->pe_number,
  619. window_id,
  620. pe->tce_bypass_base,
  621. 0);
  622. /*
  623. * We might want to reset the DMA ops of all devices on
  624. * this PE. However in theory, that shouldn't be necessary
  625. * as this is used for VFIO/KVM pass-through and the device
  626. * hasn't yet been returned to its kernel driver
  627. */
  628. }
  629. if (rc)
  630. pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
  631. else
  632. pe->tce_bypass_enabled = enable;
  633. }
  634. static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
  635. struct pnv_ioda_pe *pe)
  636. {
  637. /* TVE #1 is selected by PCI address bit 59 */
  638. pe->tce_bypass_base = 1ull << 59;
  639. /* Install set_bypass callback for VFIO */
  640. pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass;
  641. /* Enable bypass by default */
  642. pnv_pci_ioda2_set_bypass(&pe->tce32_table, true);
  643. }
  644. static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
  645. struct pnv_ioda_pe *pe)
  646. {
  647. struct page *tce_mem = NULL;
  648. void *addr;
  649. const __be64 *swinvp;
  650. struct iommu_table *tbl;
  651. unsigned int tce_table_size, end;
  652. int64_t rc;
  653. /* We shouldn't already have a 32-bit DMA associated */
  654. if (WARN_ON(pe->tce32_seg >= 0))
  655. return;
  656. /* The PE will reserve all possible 32-bits space */
  657. pe->tce32_seg = 0;
  658. end = (1 << ilog2(phb->ioda.m32_pci_base));
  659. tce_table_size = (end / 0x1000) * 8;
  660. pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
  661. end);
  662. /* Allocate TCE table */
  663. tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
  664. get_order(tce_table_size));
  665. if (!tce_mem) {
  666. pe_err(pe, "Failed to allocate a 32-bit TCE memory\n");
  667. goto fail;
  668. }
  669. addr = page_address(tce_mem);
  670. memset(addr, 0, tce_table_size);
  671. /*
  672. * Map TCE table through TVT. The TVE index is the PE number
  673. * shifted by 1 bit for 32-bits DMA space.
  674. */
  675. rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
  676. pe->pe_number << 1, 1, __pa(addr),
  677. tce_table_size, 0x1000);
  678. if (rc) {
  679. pe_err(pe, "Failed to configure 32-bit TCE table,"
  680. " err %ld\n", rc);
  681. goto fail;
  682. }
  683. /* Setup linux iommu table */
  684. tbl = &pe->tce32_table;
  685. pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0);
  686. /* OPAL variant of PHB3 invalidated TCEs */
  687. swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
  688. if (swinvp) {
  689. /* We need a couple more fields -- an address and a data
  690. * to or. Since the bus is only printed out on table free
  691. * errors, and on the first pass the data will be a relative
  692. * bus number, print that out instead.
  693. */
  694. tbl->it_busno = 0;
  695. pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
  696. tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
  697. 8);
  698. tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
  699. }
  700. iommu_init_table(tbl, phb->hose->node);
  701. iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
  702. if (pe->pdev)
  703. set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
  704. else
  705. pnv_ioda_setup_bus_dma(pe, pe->pbus);
  706. /* Also create a bypass window */
  707. pnv_pci_ioda2_setup_bypass_pe(phb, pe);
  708. return;
  709. fail:
  710. if (pe->tce32_seg >= 0)
  711. pe->tce32_seg = -1;
  712. if (tce_mem)
  713. __free_pages(tce_mem, get_order(tce_table_size));
  714. }
  715. static void pnv_ioda_setup_dma(struct pnv_phb *phb)
  716. {
  717. struct pci_controller *hose = phb->hose;
  718. unsigned int residual, remaining, segs, tw, base;
  719. struct pnv_ioda_pe *pe;
  720. /* If we have more PE# than segments available, hand out one
  721. * per PE until we run out and let the rest fail. If not,
  722. * then we assign at least one segment per PE, plus more based
  723. * on the amount of devices under that PE
  724. */
  725. if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
  726. residual = 0;
  727. else
  728. residual = phb->ioda.tce32_count -
  729. phb->ioda.dma_pe_count;
  730. pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
  731. hose->global_number, phb->ioda.tce32_count);
  732. pr_info("PCI: %d PE# for a total weight of %d\n",
  733. phb->ioda.dma_pe_count, phb->ioda.dma_weight);
  734. /* Walk our PE list and configure their DMA segments, hand them
  735. * out one base segment plus any residual segments based on
  736. * weight
  737. */
  738. remaining = phb->ioda.tce32_count;
  739. tw = phb->ioda.dma_weight;
  740. base = 0;
  741. list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
  742. if (!pe->dma_weight)
  743. continue;
  744. if (!remaining) {
  745. pe_warn(pe, "No DMA32 resources available\n");
  746. continue;
  747. }
  748. segs = 1;
  749. if (residual) {
  750. segs += ((pe->dma_weight * residual) + (tw / 2)) / tw;
  751. if (segs > remaining)
  752. segs = remaining;
  753. }
  754. /*
  755. * For IODA2 compliant PHB3, we needn't care about the weight.
  756. * The all available 32-bits DMA space will be assigned to
  757. * the specific PE.
  758. */
  759. if (phb->type == PNV_PHB_IODA1) {
  760. pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
  761. pe->dma_weight, segs);
  762. pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
  763. } else {
  764. pe_info(pe, "Assign DMA32 space\n");
  765. segs = 0;
  766. pnv_pci_ioda2_setup_dma_pe(phb, pe);
  767. }
  768. remaining -= segs;
  769. base += segs;
  770. }
  771. }
  772. #ifdef CONFIG_PCI_MSI
  773. static void pnv_ioda2_msi_eoi(struct irq_data *d)
  774. {
  775. unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
  776. struct irq_chip *chip = irq_data_get_irq_chip(d);
  777. struct pnv_phb *phb = container_of(chip, struct pnv_phb,
  778. ioda.irq_chip);
  779. int64_t rc;
  780. rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
  781. WARN_ON_ONCE(rc);
  782. icp_native_eoi(d);
  783. }
  784. static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
  785. unsigned int hwirq, unsigned int virq,
  786. unsigned int is_64, struct msi_msg *msg)
  787. {
  788. struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
  789. struct pci_dn *pdn = pci_get_pdn(dev);
  790. struct irq_data *idata;
  791. struct irq_chip *ichip;
  792. unsigned int xive_num = hwirq - phb->msi_base;
  793. __be32 data;
  794. int rc;
  795. /* No PE assigned ? bail out ... no MSI for you ! */
  796. if (pe == NULL)
  797. return -ENXIO;
  798. /* Check if we have an MVE */
  799. if (pe->mve_number < 0)
  800. return -ENXIO;
  801. /* Force 32-bit MSI on some broken devices */
  802. if (pdn && pdn->force_32bit_msi)
  803. is_64 = 0;
  804. /* Assign XIVE to PE */
  805. rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
  806. if (rc) {
  807. pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
  808. pci_name(dev), rc, xive_num);
  809. return -EIO;
  810. }
  811. if (is_64) {
  812. __be64 addr64;
  813. rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
  814. &addr64, &data);
  815. if (rc) {
  816. pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
  817. pci_name(dev), rc);
  818. return -EIO;
  819. }
  820. msg->address_hi = be64_to_cpu(addr64) >> 32;
  821. msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful;
  822. } else {
  823. __be32 addr32;
  824. rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
  825. &addr32, &data);
  826. if (rc) {
  827. pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
  828. pci_name(dev), rc);
  829. return -EIO;
  830. }
  831. msg->address_hi = 0;
  832. msg->address_lo = be32_to_cpu(addr32);
  833. }
  834. msg->data = be32_to_cpu(data);
  835. /*
  836. * Change the IRQ chip for the MSI interrupts on PHB3.
  837. * The corresponding IRQ chip should be populated for
  838. * the first time.
  839. */
  840. if (phb->type == PNV_PHB_IODA2) {
  841. if (!phb->ioda.irq_chip_init) {
  842. idata = irq_get_irq_data(virq);
  843. ichip = irq_data_get_irq_chip(idata);
  844. phb->ioda.irq_chip_init = 1;
  845. phb->ioda.irq_chip = *ichip;
  846. phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
  847. }
  848. irq_set_chip(virq, &phb->ioda.irq_chip);
  849. }
  850. pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
  851. " address=%x_%08x data=%x PE# %d\n",
  852. pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
  853. msg->address_hi, msg->address_lo, data, pe->pe_number);
  854. return 0;
  855. }
  856. static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
  857. {
  858. unsigned int count;
  859. const __be32 *prop = of_get_property(phb->hose->dn,
  860. "ibm,opal-msi-ranges", NULL);
  861. if (!prop) {
  862. /* BML Fallback */
  863. prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
  864. }
  865. if (!prop)
  866. return;
  867. phb->msi_base = be32_to_cpup(prop);
  868. count = be32_to_cpup(prop + 1);
  869. if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
  870. pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
  871. phb->hose->global_number);
  872. return;
  873. }
  874. phb->msi_setup = pnv_pci_ioda_msi_setup;
  875. phb->msi32_support = 1;
  876. pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
  877. count, phb->msi_base);
  878. }
  879. #else
  880. static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
  881. #endif /* CONFIG_PCI_MSI */
  882. /*
  883. * This function is supposed to be called on basis of PE from top
  884. * to bottom style. So the the I/O or MMIO segment assigned to
  885. * parent PE could be overrided by its child PEs if necessary.
  886. */
  887. static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
  888. struct pnv_ioda_pe *pe)
  889. {
  890. struct pnv_phb *phb = hose->private_data;
  891. struct pci_bus_region region;
  892. struct resource *res;
  893. int i, index;
  894. int rc;
  895. /*
  896. * NOTE: We only care PCI bus based PE for now. For PCI
  897. * device based PE, for example SRIOV sensitive VF should
  898. * be figured out later.
  899. */
  900. BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
  901. pci_bus_for_each_resource(pe->pbus, res, i) {
  902. if (!res || !res->flags ||
  903. res->start > res->end)
  904. continue;
  905. if (res->flags & IORESOURCE_IO) {
  906. region.start = res->start - phb->ioda.io_pci_base;
  907. region.end = res->end - phb->ioda.io_pci_base;
  908. index = region.start / phb->ioda.io_segsize;
  909. while (index < phb->ioda.total_pe &&
  910. region.start <= region.end) {
  911. phb->ioda.io_segmap[index] = pe->pe_number;
  912. rc = opal_pci_map_pe_mmio_window(phb->opal_id,
  913. pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
  914. if (rc != OPAL_SUCCESS) {
  915. pr_err("%s: OPAL error %d when mapping IO "
  916. "segment #%d to PE#%d\n",
  917. __func__, rc, index, pe->pe_number);
  918. break;
  919. }
  920. region.start += phb->ioda.io_segsize;
  921. index++;
  922. }
  923. } else if (res->flags & IORESOURCE_MEM) {
  924. /* WARNING: Assumes M32 is mem region 0 in PHB. We need to
  925. * harden that algorithm when we start supporting M64
  926. */
  927. region.start = res->start -
  928. hose->mem_offset[0] -
  929. phb->ioda.m32_pci_base;
  930. region.end = res->end -
  931. hose->mem_offset[0] -
  932. phb->ioda.m32_pci_base;
  933. index = region.start / phb->ioda.m32_segsize;
  934. while (index < phb->ioda.total_pe &&
  935. region.start <= region.end) {
  936. phb->ioda.m32_segmap[index] = pe->pe_number;
  937. rc = opal_pci_map_pe_mmio_window(phb->opal_id,
  938. pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
  939. if (rc != OPAL_SUCCESS) {
  940. pr_err("%s: OPAL error %d when mapping M32 "
  941. "segment#%d to PE#%d",
  942. __func__, rc, index, pe->pe_number);
  943. break;
  944. }
  945. region.start += phb->ioda.m32_segsize;
  946. index++;
  947. }
  948. }
  949. }
  950. }
  951. static void pnv_pci_ioda_setup_seg(void)
  952. {
  953. struct pci_controller *tmp, *hose;
  954. struct pnv_phb *phb;
  955. struct pnv_ioda_pe *pe;
  956. list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
  957. phb = hose->private_data;
  958. list_for_each_entry(pe, &phb->ioda.pe_list, list) {
  959. pnv_ioda_setup_pe_seg(hose, pe);
  960. }
  961. }
  962. }
  963. static void pnv_pci_ioda_setup_DMA(void)
  964. {
  965. struct pci_controller *hose, *tmp;
  966. struct pnv_phb *phb;
  967. list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
  968. pnv_ioda_setup_dma(hose->private_data);
  969. /* Mark the PHB initialization done */
  970. phb = hose->private_data;
  971. phb->initialized = 1;
  972. }
  973. }
  974. static void pnv_pci_ioda_create_dbgfs(void)
  975. {
  976. #ifdef CONFIG_DEBUG_FS
  977. struct pci_controller *hose, *tmp;
  978. struct pnv_phb *phb;
  979. char name[16];
  980. list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
  981. phb = hose->private_data;
  982. sprintf(name, "PCI%04x", hose->global_number);
  983. phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
  984. if (!phb->dbgfs)
  985. pr_warning("%s: Error on creating debugfs on PHB#%x\n",
  986. __func__, hose->global_number);
  987. }
  988. #endif /* CONFIG_DEBUG_FS */
  989. }
  990. static void pnv_pci_ioda_fixup(void)
  991. {
  992. pnv_pci_ioda_setup_PEs();
  993. pnv_pci_ioda_setup_seg();
  994. pnv_pci_ioda_setup_DMA();
  995. pnv_pci_ioda_create_dbgfs();
  996. #ifdef CONFIG_EEH
  997. eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
  998. eeh_addr_cache_build();
  999. eeh_init();
  1000. #endif
  1001. }
  1002. /*
  1003. * Returns the alignment for I/O or memory windows for P2P
  1004. * bridges. That actually depends on how PEs are segmented.
  1005. * For now, we return I/O or M32 segment size for PE sensitive
  1006. * P2P bridges. Otherwise, the default values (4KiB for I/O,
  1007. * 1MiB for memory) will be returned.
  1008. *
  1009. * The current PCI bus might be put into one PE, which was
  1010. * create against the parent PCI bridge. For that case, we
  1011. * needn't enlarge the alignment so that we can save some
  1012. * resources.
  1013. */
  1014. static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
  1015. unsigned long type)
  1016. {
  1017. struct pci_dev *bridge;
  1018. struct pci_controller *hose = pci_bus_to_host(bus);
  1019. struct pnv_phb *phb = hose->private_data;
  1020. int num_pci_bridges = 0;
  1021. bridge = bus->self;
  1022. while (bridge) {
  1023. if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
  1024. num_pci_bridges++;
  1025. if (num_pci_bridges >= 2)
  1026. return 1;
  1027. }
  1028. bridge = bridge->bus->self;
  1029. }
  1030. /* We need support prefetchable memory window later */
  1031. if (type & IORESOURCE_MEM)
  1032. return phb->ioda.m32_segsize;
  1033. return phb->ioda.io_segsize;
  1034. }
  1035. /* Prevent enabling devices for which we couldn't properly
  1036. * assign a PE
  1037. */
  1038. static int pnv_pci_enable_device_hook(struct pci_dev *dev)
  1039. {
  1040. struct pci_controller *hose = pci_bus_to_host(dev->bus);
  1041. struct pnv_phb *phb = hose->private_data;
  1042. struct pci_dn *pdn;
  1043. /* The function is probably called while the PEs have
  1044. * not be created yet. For example, resource reassignment
  1045. * during PCI probe period. We just skip the check if
  1046. * PEs isn't ready.
  1047. */
  1048. if (!phb->initialized)
  1049. return 0;
  1050. pdn = pci_get_pdn(dev);
  1051. if (!pdn || pdn->pe_number == IODA_INVALID_PE)
  1052. return -EINVAL;
  1053. return 0;
  1054. }
  1055. static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
  1056. u32 devfn)
  1057. {
  1058. return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
  1059. }
  1060. static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
  1061. {
  1062. opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET,
  1063. OPAL_ASSERT_RESET);
  1064. }
  1065. void __init pnv_pci_init_ioda_phb(struct device_node *np,
  1066. u64 hub_id, int ioda_type)
  1067. {
  1068. struct pci_controller *hose;
  1069. struct pnv_phb *phb;
  1070. unsigned long size, m32map_off, pemap_off, iomap_off = 0;
  1071. const __be64 *prop64;
  1072. const __be32 *prop32;
  1073. int len;
  1074. u64 phb_id;
  1075. void *aux;
  1076. long rc;
  1077. pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name);
  1078. prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
  1079. if (!prop64) {
  1080. pr_err(" Missing \"ibm,opal-phbid\" property !\n");
  1081. return;
  1082. }
  1083. phb_id = be64_to_cpup(prop64);
  1084. pr_debug(" PHB-ID : 0x%016llx\n", phb_id);
  1085. phb = alloc_bootmem(sizeof(struct pnv_phb));
  1086. if (!phb) {
  1087. pr_err(" Out of memory !\n");
  1088. return;
  1089. }
  1090. /* Allocate PCI controller */
  1091. memset(phb, 0, sizeof(struct pnv_phb));
  1092. phb->hose = hose = pcibios_alloc_controller(np);
  1093. if (!phb->hose) {
  1094. pr_err(" Can't allocate PCI controller for %s\n",
  1095. np->full_name);
  1096. free_bootmem((unsigned long)phb, sizeof(struct pnv_phb));
  1097. return;
  1098. }
  1099. spin_lock_init(&phb->lock);
  1100. prop32 = of_get_property(np, "bus-range", &len);
  1101. if (prop32 && len == 8) {
  1102. hose->first_busno = be32_to_cpu(prop32[0]);
  1103. hose->last_busno = be32_to_cpu(prop32[1]);
  1104. } else {
  1105. pr_warn(" Broken <bus-range> on %s\n", np->full_name);
  1106. hose->first_busno = 0;
  1107. hose->last_busno = 0xff;
  1108. }
  1109. hose->private_data = phb;
  1110. phb->hub_id = hub_id;
  1111. phb->opal_id = phb_id;
  1112. phb->type = ioda_type;
  1113. /* Detect specific models for error handling */
  1114. if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
  1115. phb->model = PNV_PHB_MODEL_P7IOC;
  1116. else if (of_device_is_compatible(np, "ibm,power8-pciex"))
  1117. phb->model = PNV_PHB_MODEL_PHB3;
  1118. else
  1119. phb->model = PNV_PHB_MODEL_UNKNOWN;
  1120. /* Parse 32-bit and IO ranges (if any) */
  1121. pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
  1122. /* Get registers */
  1123. phb->regs = of_iomap(np, 0);
  1124. if (phb->regs == NULL)
  1125. pr_err(" Failed to map registers !\n");
  1126. /* Initialize more IODA stuff */
  1127. phb->ioda.total_pe = 1;
  1128. prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
  1129. if (prop32)
  1130. phb->ioda.total_pe = be32_to_cpup(prop32);
  1131. prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
  1132. if (prop32)
  1133. phb->ioda.reserved_pe = be32_to_cpup(prop32);
  1134. phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
  1135. /* FW Has already off top 64k of M32 space (MSI space) */
  1136. phb->ioda.m32_size += 0x10000;
  1137. phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
  1138. phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0];
  1139. phb->ioda.io_size = hose->pci_io_size;
  1140. phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
  1141. phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
  1142. /* Allocate aux data & arrays. We don't have IO ports on PHB3 */
  1143. size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
  1144. m32map_off = size;
  1145. size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
  1146. if (phb->type == PNV_PHB_IODA1) {
  1147. iomap_off = size;
  1148. size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
  1149. }
  1150. pemap_off = size;
  1151. size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
  1152. aux = alloc_bootmem(size);
  1153. memset(aux, 0, size);
  1154. phb->ioda.pe_alloc = aux;
  1155. phb->ioda.m32_segmap = aux + m32map_off;
  1156. if (phb->type == PNV_PHB_IODA1)
  1157. phb->ioda.io_segmap = aux + iomap_off;
  1158. phb->ioda.pe_array = aux + pemap_off;
  1159. set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc);
  1160. INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
  1161. INIT_LIST_HEAD(&phb->ioda.pe_list);
  1162. /* Calculate how many 32-bit TCE segments we have */
  1163. phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
  1164. /* Clear unusable m64 */
  1165. hose->mem_resources[1].flags = 0;
  1166. hose->mem_resources[1].start = 0;
  1167. hose->mem_resources[1].end = 0;
  1168. hose->mem_resources[2].flags = 0;
  1169. hose->mem_resources[2].start = 0;
  1170. hose->mem_resources[2].end = 0;
  1171. #if 0 /* We should really do that ... */
  1172. rc = opal_pci_set_phb_mem_window(opal->phb_id,
  1173. window_type,
  1174. window_num,
  1175. starting_real_address,
  1176. starting_pci_address,
  1177. segment_size);
  1178. #endif
  1179. pr_info(" %d (%d) PE's M32: 0x%x [segment=0x%x]"
  1180. " IO: 0x%x [segment=0x%x]\n",
  1181. phb->ioda.total_pe,
  1182. phb->ioda.reserved_pe,
  1183. phb->ioda.m32_size, phb->ioda.m32_segsize,
  1184. phb->ioda.io_size, phb->ioda.io_segsize);
  1185. phb->hose->ops = &pnv_pci_ops;
  1186. #ifdef CONFIG_EEH
  1187. phb->eeh_ops = &ioda_eeh_ops;
  1188. #endif
  1189. /* Setup RID -> PE mapping function */
  1190. phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
  1191. /* Setup TCEs */
  1192. phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
  1193. phb->dma_set_mask = pnv_pci_ioda_dma_set_mask;
  1194. /* Setup shutdown function for kexec */
  1195. phb->shutdown = pnv_pci_ioda_shutdown;
  1196. /* Setup MSI support */
  1197. pnv_pci_init_ioda_msis(phb);
  1198. /*
  1199. * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
  1200. * to let the PCI core do resource assignment. It's supposed
  1201. * that the PCI core will do correct I/O and MMIO alignment
  1202. * for the P2P bridge bars so that each PCI bus (excluding
  1203. * the child P2P bridges) can form individual PE.
  1204. */
  1205. ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
  1206. ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
  1207. ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
  1208. pci_add_flags(PCI_REASSIGN_ALL_RSRC);
  1209. /* Reset IODA tables to a clean state */
  1210. rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
  1211. if (rc)
  1212. pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
  1213. }
  1214. void __init pnv_pci_init_ioda2_phb(struct device_node *np)
  1215. {
  1216. pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
  1217. }
  1218. void __init pnv_pci_init_ioda_hub(struct device_node *np)
  1219. {
  1220. struct device_node *phbn;
  1221. const __be64 *prop64;
  1222. u64 hub_id;
  1223. pr_info("Probing IODA IO-Hub %s\n", np->full_name);
  1224. prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
  1225. if (!prop64) {
  1226. pr_err(" Missing \"ibm,opal-hubid\" property !\n");
  1227. return;
  1228. }
  1229. hub_id = be64_to_cpup(prop64);
  1230. pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
  1231. /* Count child PHBs */
  1232. for_each_child_of_node(np, phbn) {
  1233. /* Look for IODA1 PHBs */
  1234. if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
  1235. pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1);
  1236. }
  1237. }