|
@@ -39,6 +39,7 @@
|
|
|
#include <asm/page.h>
|
|
|
#include <linux/cache.h>
|
|
|
|
|
|
+#include "t4_values.h"
|
|
|
#include "csio_hw.h"
|
|
|
#include "csio_wr.h"
|
|
|
#include "csio_mb.h"
|
|
@@ -1309,8 +1310,11 @@ csio_wr_fixup_host_params(struct csio_hw *hw)
|
|
|
struct csio_sge *sge = &wrm->sge;
|
|
|
uint32_t clsz = L1_CACHE_BYTES;
|
|
|
uint32_t s_hps = PAGE_SHIFT - 10;
|
|
|
- uint32_t ingpad = 0;
|
|
|
uint32_t stat_len = clsz > 64 ? 128 : 64;
|
|
|
+ u32 fl_align = clsz < 32 ? 32 : clsz;
|
|
|
+ u32 pack_align;
|
|
|
+ u32 ingpad, ingpack;
|
|
|
+ int pcie_cap;
|
|
|
|
|
|
csio_wr_reg32(hw, HOSTPAGESIZEPF0_V(s_hps) | HOSTPAGESIZEPF1_V(s_hps) |
|
|
|
HOSTPAGESIZEPF2_V(s_hps) | HOSTPAGESIZEPF3_V(s_hps) |
|
|
@@ -1318,14 +1322,82 @@ csio_wr_fixup_host_params(struct csio_hw *hw)
|
|
|
HOSTPAGESIZEPF6_V(s_hps) | HOSTPAGESIZEPF7_V(s_hps),
|
|
|
SGE_HOST_PAGE_SIZE_A);
|
|
|
|
|
|
- sge->csio_fl_align = clsz < 32 ? 32 : clsz;
|
|
|
- ingpad = ilog2(sge->csio_fl_align) - 5;
|
|
|
+ /* T5 introduced the separation of the Free List Padding and
|
|
|
+ * Packing Boundaries. Thus, we can select a smaller Padding
|
|
|
+ * Boundary to avoid uselessly chewing up PCIe Link and Memory
|
|
|
+ * Bandwidth, and use a Packing Boundary which is large enough
|
|
|
+ * to avoid false sharing between CPUs, etc.
|
|
|
+ *
|
|
|
+ * For the PCI Link, the smaller the Padding Boundary the
|
|
|
+ * better. For the Memory Controller, a smaller Padding
|
|
|
+ * Boundary is better until we cross under the Memory Line
|
|
|
+ * Size (the minimum unit of transfer to/from Memory). If we
|
|
|
+ * have a Padding Boundary which is smaller than the Memory
|
|
|
+ * Line Size, that'll involve a Read-Modify-Write cycle on the
|
|
|
+ * Memory Controller which is never good.
|
|
|
+ */
|
|
|
+
|
|
|
+ /* We want the Packing Boundary to be based on the Cache Line
|
|
|
+ * Size in order to help avoid False Sharing performance
|
|
|
+ * issues between CPUs, etc. We also want the Packing
|
|
|
+ * Boundary to incorporate the PCI-E Maximum Payload Size. We
|
|
|
+ * get best performance when the Packing Boundary is a
|
|
|
+ * multiple of the Maximum Payload Size.
|
|
|
+ */
|
|
|
+ pack_align = fl_align;
|
|
|
+ pcie_cap = pci_find_capability(hw->pdev, PCI_CAP_ID_EXP);
|
|
|
+ if (pcie_cap) {
|
|
|
+ u32 mps, mps_log;
|
|
|
+ u16 devctl;
|
|
|
+
|
|
|
+ /* The PCIe Device Control Maximum Payload Size field
|
|
|
+ * [bits 7:5] encodes sizes as powers of 2 starting at
|
|
|
+ * 128 bytes.
|
|
|
+ */
|
|
|
+ pci_read_config_word(hw->pdev,
|
|
|
+ pcie_cap + PCI_EXP_DEVCTL,
|
|
|
+ &devctl);
|
|
|
+ mps_log = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5) + 7;
|
|
|
+ mps = 1 << mps_log;
|
|
|
+ if (mps > pack_align)
|
|
|
+ pack_align = mps;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* T5/T6 have a special interpretation of the "0"
|
|
|
+ * value for the Packing Boundary. This corresponds to 16
|
|
|
+ * bytes instead of the expected 32 bytes.
|
|
|
+ */
|
|
|
+ if (pack_align <= 16) {
|
|
|
+ ingpack = INGPACKBOUNDARY_16B_X;
|
|
|
+ fl_align = 16;
|
|
|
+ } else if (pack_align == 32) {
|
|
|
+ ingpack = INGPACKBOUNDARY_64B_X;
|
|
|
+ fl_align = 64;
|
|
|
+ } else {
|
|
|
+ u32 pack_align_log = fls(pack_align) - 1;
|
|
|
+
|
|
|
+ ingpack = pack_align_log - INGPACKBOUNDARY_SHIFT_X;
|
|
|
+ fl_align = pack_align;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Use the smallest Ingress Padding which isn't smaller than
|
|
|
+ * the Memory Controller Read/Write Size. We'll take that as
|
|
|
+ * being 8 bytes since we don't know of any system with a
|
|
|
+ * wider Memory Controller Bus Width.
|
|
|
+ */
|
|
|
+ if (csio_is_t5(hw->pdev->device & CSIO_HW_CHIP_MASK))
|
|
|
+ ingpad = INGPADBOUNDARY_32B_X;
|
|
|
+ else
|
|
|
+ ingpad = T6_INGPADBOUNDARY_8B_X;
|
|
|
|
|
|
csio_set_reg_field(hw, SGE_CONTROL_A,
|
|
|
INGPADBOUNDARY_V(INGPADBOUNDARY_M) |
|
|
|
EGRSTATUSPAGESIZE_F,
|
|
|
INGPADBOUNDARY_V(ingpad) |
|
|
|
EGRSTATUSPAGESIZE_V(stat_len != 64));
|
|
|
+ csio_set_reg_field(hw, SGE_CONTROL2_A,
|
|
|
+ INGPACKBOUNDARY_V(INGPACKBOUNDARY_M),
|
|
|
+ INGPACKBOUNDARY_V(ingpack));
|
|
|
|
|
|
/* FL BUFFER SIZE#0 is Page size i,e already aligned to cache line */
|
|
|
csio_wr_reg32(hw, PAGE_SIZE, SGE_FL_BUFFER_SIZE0_A);
|
|
@@ -1337,14 +1409,16 @@ csio_wr_fixup_host_params(struct csio_hw *hw)
|
|
|
if (hw->flags & CSIO_HWF_USING_SOFT_PARAMS) {
|
|
|
csio_wr_reg32(hw,
|
|
|
(csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE2_A) +
|
|
|
- sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1),
|
|
|
+ fl_align - 1) & ~(fl_align - 1),
|
|
|
SGE_FL_BUFFER_SIZE2_A);
|
|
|
csio_wr_reg32(hw,
|
|
|
(csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE3_A) +
|
|
|
- sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1),
|
|
|
+ fl_align - 1) & ~(fl_align - 1),
|
|
|
SGE_FL_BUFFER_SIZE3_A);
|
|
|
}
|
|
|
|
|
|
+ sge->csio_fl_align = fl_align;
|
|
|
+
|
|
|
csio_wr_reg32(hw, HPZ0_V(PAGE_SHIFT - 12), ULP_RX_TDDP_PSZ_A);
|
|
|
|
|
|
/* default value of rx_dma_offset of the NIC driver */
|