|
|
@@ -322,11 +322,11 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
|
|
|
ret = entry << tbl->it_page_shift; /* Set the return dma address */
|
|
|
|
|
|
/* Put the TCEs in the HW table */
|
|
|
- build_fail = ppc_md.tce_build(tbl, entry, npages,
|
|
|
+ build_fail = tbl->it_ops->set(tbl, entry, npages,
|
|
|
(unsigned long)page &
|
|
|
IOMMU_PAGE_MASK(tbl), direction, attrs);
|
|
|
|
|
|
- /* ppc_md.tce_build() only returns non-zero for transient errors.
|
|
|
+ /* tbl->it_ops->set() only returns non-zero for transient errors.
|
|
|
* Clean up the table bitmap in this case and return
|
|
|
* DMA_ERROR_CODE. For all other errors the functionality is
|
|
|
* not altered.
|
|
|
@@ -337,8 +337,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
|
|
|
}
|
|
|
|
|
|
/* Flush/invalidate TLB caches if necessary */
|
|
|
- if (ppc_md.tce_flush)
|
|
|
- ppc_md.tce_flush(tbl);
|
|
|
+ if (tbl->it_ops->flush)
|
|
|
+ tbl->it_ops->flush(tbl);
|
|
|
|
|
|
/* Make sure updates are seen by hardware */
|
|
|
mb();
|
|
|
@@ -408,7 +408,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
|
|
|
if (!iommu_free_check(tbl, dma_addr, npages))
|
|
|
return;
|
|
|
|
|
|
- ppc_md.tce_free(tbl, entry, npages);
|
|
|
+ tbl->it_ops->clear(tbl, entry, npages);
|
|
|
|
|
|
spin_lock_irqsave(&(pool->lock), flags);
|
|
|
bitmap_clear(tbl->it_map, free_entry, npages);
|
|
|
@@ -424,8 +424,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
|
|
|
* not do an mb() here on purpose, it is not needed on any of
|
|
|
* the current platforms.
|
|
|
*/
|
|
|
- if (ppc_md.tce_flush)
|
|
|
- ppc_md.tce_flush(tbl);
|
|
|
+ if (tbl->it_ops->flush)
|
|
|
+ tbl->it_ops->flush(tbl);
|
|
|
}
|
|
|
|
|
|
int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
|
|
|
@@ -495,7 +495,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
|
|
|
npages, entry, dma_addr);
|
|
|
|
|
|
/* Insert into HW table */
|
|
|
- build_fail = ppc_md.tce_build(tbl, entry, npages,
|
|
|
+ build_fail = tbl->it_ops->set(tbl, entry, npages,
|
|
|
vaddr & IOMMU_PAGE_MASK(tbl),
|
|
|
direction, attrs);
|
|
|
if(unlikely(build_fail))
|
|
|
@@ -534,8 +534,8 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
|
|
|
}
|
|
|
|
|
|
/* Flush/invalidate TLB caches if necessary */
|
|
|
- if (ppc_md.tce_flush)
|
|
|
- ppc_md.tce_flush(tbl);
|
|
|
+ if (tbl->it_ops->flush)
|
|
|
+ tbl->it_ops->flush(tbl);
|
|
|
|
|
|
DBG("mapped %d elements:\n", outcount);
|
|
|
|
|
|
@@ -600,8 +600,8 @@ void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
|
|
|
* do not do an mb() here, the affected platforms do not need it
|
|
|
* when freeing.
|
|
|
*/
|
|
|
- if (ppc_md.tce_flush)
|
|
|
- ppc_md.tce_flush(tbl);
|
|
|
+ if (tbl->it_ops->flush)
|
|
|
+ tbl->it_ops->flush(tbl);
|
|
|
}
|
|
|
|
|
|
static void iommu_table_clear(struct iommu_table *tbl)
|
|
|
@@ -613,17 +613,17 @@ static void iommu_table_clear(struct iommu_table *tbl)
|
|
|
*/
|
|
|
if (!is_kdump_kernel() || is_fadump_active()) {
|
|
|
/* Clear the table in case firmware left allocations in it */
|
|
|
- ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
|
|
|
+ tbl->it_ops->clear(tbl, tbl->it_offset, tbl->it_size);
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_CRASH_DUMP
|
|
|
- if (ppc_md.tce_get) {
|
|
|
+ if (tbl->it_ops->get) {
|
|
|
unsigned long index, tceval, tcecount = 0;
|
|
|
|
|
|
/* Reserve the existing mappings left by the first kernel. */
|
|
|
for (index = 0; index < tbl->it_size; index++) {
|
|
|
- tceval = ppc_md.tce_get(tbl, index + tbl->it_offset);
|
|
|
+ tceval = tbl->it_ops->get(tbl, index + tbl->it_offset);
|
|
|
/*
|
|
|
* Freed TCE entry contains 0x7fffffffffffffff on JS20
|
|
|
*/
|
|
|
@@ -657,6 +657,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
|
|
|
unsigned int i;
|
|
|
struct iommu_pool *p;
|
|
|
|
|
|
+ BUG_ON(!tbl->it_ops);
|
|
|
+
|
|
|
/* number of bytes needed for the bitmap */
|
|
|
sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
|
|
|
|
|
|
@@ -713,9 +715,11 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
|
|
|
unsigned long bitmap_sz;
|
|
|
unsigned int order;
|
|
|
|
|
|
- if (!tbl || !tbl->it_map) {
|
|
|
- printk(KERN_ERR "%s: expected TCE map for %s\n", __func__,
|
|
|
- node_name);
|
|
|
+ if (!tbl)
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (!tbl->it_map) {
|
|
|
+ kfree(tbl);
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
@@ -726,13 +730,6 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
|
|
|
if (tbl->it_offset == 0)
|
|
|
clear_bit(0, tbl->it_map);
|
|
|
|
|
|
-#ifdef CONFIG_IOMMU_API
|
|
|
- if (tbl->it_group) {
|
|
|
- iommu_group_put(tbl->it_group);
|
|
|
- BUG_ON(tbl->it_group);
|
|
|
- }
|
|
|
-#endif
|
|
|
-
|
|
|
/* verify that table contains no entries */
|
|
|
if (!bitmap_empty(tbl->it_map, tbl->it_size))
|
|
|
pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
|
|
|
@@ -871,17 +868,33 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir)
|
|
|
+{
|
|
|
+ switch (dir) {
|
|
|
+ case DMA_BIDIRECTIONAL:
|
|
|
+ return TCE_PCI_READ | TCE_PCI_WRITE;
|
|
|
+ case DMA_FROM_DEVICE:
|
|
|
+ return TCE_PCI_WRITE;
|
|
|
+ case DMA_TO_DEVICE:
|
|
|
+ return TCE_PCI_READ;
|
|
|
+ default:
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm);
|
|
|
+
|
|
|
#ifdef CONFIG_IOMMU_API
|
|
|
/*
|
|
|
* SPAPR TCE API
|
|
|
*/
|
|
|
static void group_release(void *iommu_data)
|
|
|
{
|
|
|
- struct iommu_table *tbl = iommu_data;
|
|
|
- tbl->it_group = NULL;
|
|
|
+ struct iommu_table_group *table_group = iommu_data;
|
|
|
+
|
|
|
+ table_group->group = NULL;
|
|
|
}
|
|
|
|
|
|
-void iommu_register_group(struct iommu_table *tbl,
|
|
|
+void iommu_register_group(struct iommu_table_group *table_group,
|
|
|
int pci_domain_number, unsigned long pe_num)
|
|
|
{
|
|
|
struct iommu_group *grp;
|
|
|
@@ -893,8 +906,8 @@ void iommu_register_group(struct iommu_table *tbl,
|
|
|
PTR_ERR(grp));
|
|
|
return;
|
|
|
}
|
|
|
- tbl->it_group = grp;
|
|
|
- iommu_group_set_iommudata(grp, tbl, group_release);
|
|
|
+ table_group->group = grp;
|
|
|
+ iommu_group_set_iommudata(grp, table_group, group_release);
|
|
|
name = kasprintf(GFP_KERNEL, "domain%d-pe%lx",
|
|
|
pci_domain_number, pe_num);
|
|
|
if (!name)
|
|
|
@@ -919,8 +932,8 @@ EXPORT_SYMBOL_GPL(iommu_tce_direction);
|
|
|
void iommu_flush_tce(struct iommu_table *tbl)
|
|
|
{
|
|
|
/* Flush/invalidate TLB caches if necessary */
|
|
|
- if (ppc_md.tce_flush)
|
|
|
- ppc_md.tce_flush(tbl);
|
|
|
+ if (tbl->it_ops->flush)
|
|
|
+ tbl->it_ops->flush(tbl);
|
|
|
|
|
|
/* Make sure updates are seen by hardware */
|
|
|
mb();
|
|
|
@@ -931,7 +944,7 @@ int iommu_tce_clear_param_check(struct iommu_table *tbl,
|
|
|
unsigned long ioba, unsigned long tce_value,
|
|
|
unsigned long npages)
|
|
|
{
|
|
|
- /* ppc_md.tce_free() does not support any value but 0 */
|
|
|
+ /* tbl->it_ops->clear() does not support any value but 0 */
|
|
|
if (tce_value)
|
|
|
return -EINVAL;
|
|
|
|
|
|
@@ -952,10 +965,7 @@ EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
|
|
|
int iommu_tce_put_param_check(struct iommu_table *tbl,
|
|
|
unsigned long ioba, unsigned long tce)
|
|
|
{
|
|
|
- if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))
|
|
|
- return -EINVAL;
|
|
|
-
|
|
|
- if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ))
|
|
|
+ if (tce & ~IOMMU_PAGE_MASK(tbl))
|
|
|
return -EINVAL;
|
|
|
|
|
|
if (ioba & ~IOMMU_PAGE_MASK(tbl))
|
|
|
@@ -972,68 +982,16 @@ int iommu_tce_put_param_check(struct iommu_table *tbl,
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
|
|
|
|
|
|
-unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
|
|
|
-{
|
|
|
- unsigned long oldtce;
|
|
|
- struct iommu_pool *pool = get_pool(tbl, entry);
|
|
|
-
|
|
|
- spin_lock(&(pool->lock));
|
|
|
-
|
|
|
- oldtce = ppc_md.tce_get(tbl, entry);
|
|
|
- if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))
|
|
|
- ppc_md.tce_free(tbl, entry, 1);
|
|
|
- else
|
|
|
- oldtce = 0;
|
|
|
-
|
|
|
- spin_unlock(&(pool->lock));
|
|
|
-
|
|
|
- return oldtce;
|
|
|
-}
|
|
|
-EXPORT_SYMBOL_GPL(iommu_clear_tce);
|
|
|
-
|
|
|
-int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
|
|
|
- unsigned long entry, unsigned long pages)
|
|
|
-{
|
|
|
- unsigned long oldtce;
|
|
|
- struct page *page;
|
|
|
-
|
|
|
- for ( ; pages; --pages, ++entry) {
|
|
|
- oldtce = iommu_clear_tce(tbl, entry);
|
|
|
- if (!oldtce)
|
|
|
- continue;
|
|
|
-
|
|
|
- page = pfn_to_page(oldtce >> PAGE_SHIFT);
|
|
|
- WARN_ON(!page);
|
|
|
- if (page) {
|
|
|
- if (oldtce & TCE_PCI_WRITE)
|
|
|
- SetPageDirty(page);
|
|
|
- put_page(page);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- return 0;
|
|
|
-}
|
|
|
-EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages);
|
|
|
-
|
|
|
-/*
|
|
|
- * hwaddr is a kernel virtual address here (0xc... bazillion),
|
|
|
- * tce_build converts it to a physical address.
|
|
|
- */
|
|
|
-int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
|
|
|
- unsigned long hwaddr, enum dma_data_direction direction)
|
|
|
+long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
|
|
|
+ unsigned long *hpa, enum dma_data_direction *direction)
|
|
|
{
|
|
|
- int ret = -EBUSY;
|
|
|
- unsigned long oldtce;
|
|
|
- struct iommu_pool *pool = get_pool(tbl, entry);
|
|
|
-
|
|
|
- spin_lock(&(pool->lock));
|
|
|
+ long ret;
|
|
|
|
|
|
- oldtce = ppc_md.tce_get(tbl, entry);
|
|
|
- /* Add new entry if it is not busy */
|
|
|
- if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)))
|
|
|
- ret = ppc_md.tce_build(tbl, entry, 1, hwaddr, direction, NULL);
|
|
|
+ ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
|
|
|
|
|
|
- spin_unlock(&(pool->lock));
|
|
|
+ if (!ret && ((*direction == DMA_FROM_DEVICE) ||
|
|
|
+ (*direction == DMA_BIDIRECTIONAL)))
|
|
|
+ SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
|
|
|
|
|
|
/* if (unlikely(ret))
|
|
|
pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
|
|
|
@@ -1042,84 +1000,72 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
|
|
|
|
|
|
return ret;
|
|
|
}
|
|
|
-EXPORT_SYMBOL_GPL(iommu_tce_build);
|
|
|
+EXPORT_SYMBOL_GPL(iommu_tce_xchg);
|
|
|
|
|
|
-int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
|
|
|
- unsigned long tce)
|
|
|
+int iommu_take_ownership(struct iommu_table *tbl)
|
|
|
{
|
|
|
- int ret;
|
|
|
- struct page *page = NULL;
|
|
|
- unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
|
|
|
- enum dma_data_direction direction = iommu_tce_direction(tce);
|
|
|
-
|
|
|
- ret = get_user_pages_fast(tce & PAGE_MASK, 1,
|
|
|
- direction != DMA_TO_DEVICE, &page);
|
|
|
- if (unlikely(ret != 1)) {
|
|
|
- /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n",
|
|
|
- tce, entry << tbl->it_page_shift, ret); */
|
|
|
- return -EFAULT;
|
|
|
- }
|
|
|
- hwaddr = (unsigned long) page_address(page) + offset;
|
|
|
-
|
|
|
- ret = iommu_tce_build(tbl, entry, hwaddr, direction);
|
|
|
- if (ret)
|
|
|
- put_page(page);
|
|
|
-
|
|
|
- if (ret < 0)
|
|
|
- pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n",
|
|
|
- __func__, entry << tbl->it_page_shift, tce, ret);
|
|
|
+ unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
|
|
|
+ int ret = 0;
|
|
|
|
|
|
- return ret;
|
|
|
-}
|
|
|
-EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode);
|
|
|
+ /*
|
|
|
+ * VFIO does not control TCE entries allocation and the guest
|
|
|
+ * can write new TCEs on top of existing ones so iommu_tce_build()
|
|
|
+ * must be able to release old pages. This functionality
|
|
|
+ * requires exchange() callback defined so if it is not
|
|
|
+ * implemented, we disallow taking ownership over the table.
|
|
|
+ */
|
|
|
+ if (!tbl->it_ops->exchange)
|
|
|
+ return -EINVAL;
|
|
|
|
|
|
-int iommu_take_ownership(struct iommu_table *tbl)
|
|
|
-{
|
|
|
- unsigned long sz = (tbl->it_size + 7) >> 3;
|
|
|
+ spin_lock_irqsave(&tbl->large_pool.lock, flags);
|
|
|
+ for (i = 0; i < tbl->nr_pools; i++)
|
|
|
+ spin_lock(&tbl->pools[i].lock);
|
|
|
|
|
|
if (tbl->it_offset == 0)
|
|
|
clear_bit(0, tbl->it_map);
|
|
|
|
|
|
if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
|
|
|
pr_err("iommu_tce: it_map is not empty");
|
|
|
- return -EBUSY;
|
|
|
+ ret = -EBUSY;
|
|
|
+ /* Restore bit#0 set by iommu_init_table() */
|
|
|
+ if (tbl->it_offset == 0)
|
|
|
+ set_bit(0, tbl->it_map);
|
|
|
+ } else {
|
|
|
+ memset(tbl->it_map, 0xff, sz);
|
|
|
}
|
|
|
|
|
|
- memset(tbl->it_map, 0xff, sz);
|
|
|
- iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
|
|
|
+ for (i = 0; i < tbl->nr_pools; i++)
|
|
|
+ spin_unlock(&tbl->pools[i].lock);
|
|
|
+ spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
|
|
|
|
|
|
- /*
|
|
|
- * Disable iommu bypass, otherwise the user can DMA to all of
|
|
|
- * our physical memory via the bypass window instead of just
|
|
|
- * the pages that has been explicitly mapped into the iommu
|
|
|
- */
|
|
|
- if (tbl->set_bypass)
|
|
|
- tbl->set_bypass(tbl, false);
|
|
|
-
|
|
|
- return 0;
|
|
|
+ return ret;
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(iommu_take_ownership);
|
|
|
|
|
|
void iommu_release_ownership(struct iommu_table *tbl)
|
|
|
{
|
|
|
- unsigned long sz = (tbl->it_size + 7) >> 3;
|
|
|
+ unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&tbl->large_pool.lock, flags);
|
|
|
+ for (i = 0; i < tbl->nr_pools; i++)
|
|
|
+ spin_lock(&tbl->pools[i].lock);
|
|
|
|
|
|
- iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
|
|
|
memset(tbl->it_map, 0, sz);
|
|
|
|
|
|
/* Restore bit#0 set by iommu_init_table() */
|
|
|
if (tbl->it_offset == 0)
|
|
|
set_bit(0, tbl->it_map);
|
|
|
|
|
|
- /* The kernel owns the device now, we can restore the iommu bypass */
|
|
|
- if (tbl->set_bypass)
|
|
|
- tbl->set_bypass(tbl, true);
|
|
|
+ for (i = 0; i < tbl->nr_pools; i++)
|
|
|
+ spin_unlock(&tbl->pools[i].lock);
|
|
|
+ spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(iommu_release_ownership);
|
|
|
|
|
|
int iommu_add_device(struct device *dev)
|
|
|
{
|
|
|
struct iommu_table *tbl;
|
|
|
+ struct iommu_table_group_link *tgl;
|
|
|
|
|
|
/*
|
|
|
* The sysfs entries should be populated before
|
|
|
@@ -1137,15 +1083,22 @@ int iommu_add_device(struct device *dev)
|
|
|
}
|
|
|
|
|
|
tbl = get_iommu_table_base(dev);
|
|
|
- if (!tbl || !tbl->it_group) {
|
|
|
+ if (!tbl) {
|
|
|
pr_debug("%s: Skipping device %s with no tbl\n",
|
|
|
__func__, dev_name(dev));
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+ tgl = list_first_entry_or_null(&tbl->it_group_list,
|
|
|
+ struct iommu_table_group_link, next);
|
|
|
+ if (!tgl) {
|
|
|
+ pr_debug("%s: Skipping device %s with no group\n",
|
|
|
+ __func__, dev_name(dev));
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
pr_debug("%s: Adding %s to iommu group %d\n",
|
|
|
__func__, dev_name(dev),
|
|
|
- iommu_group_id(tbl->it_group));
|
|
|
+ iommu_group_id(tgl->table_group->group));
|
|
|
|
|
|
if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
|
|
|
pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n",
|
|
|
@@ -1154,7 +1107,7 @@ int iommu_add_device(struct device *dev)
|
|
|
return -EINVAL;
|
|
|
}
|
|
|
|
|
|
- return iommu_group_add_device(tbl->it_group, dev);
|
|
|
+ return iommu_group_add_device(tgl->table_group->group, dev);
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(iommu_add_device);
|
|
|
|