فهرست منبع

Merge tag 'iwlwifi-next-for-kalle-2016-09-15-2' of git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-next

* work for new hardware support continues
* dynamic queue allocation stabilization
* improvements in the MSIx code
* multiqueue support work continues
* new firmware version support
* general cleanups and improvements
Kalle Valo 9 سال پیش
والد
کامیت
6833d0700d
26فایلهای تغییر یافته به همراه897 افزوده شده و 459 حذف شده
  1. 1 10
      drivers/net/wireless/intel/iwlwifi/dvm/ucode.c
  2. 2 2
      drivers/net/wireless/intel/iwlwifi/iwl-7000.c
  3. 13 2
      drivers/net/wireless/intel/iwlwifi/iwl-8000.c
  4. 12 1
      drivers/net/wireless/intel/iwlwifi/iwl-9000.c
  5. 1 1
      drivers/net/wireless/intel/iwlwifi/iwl-a000.c
  6. 2 0
      drivers/net/wireless/intel/iwlwifi/iwl-config.h
  7. 2 0
      drivers/net/wireless/intel/iwlwifi/iwl-csr.h
  8. 45 12
      drivers/net/wireless/intel/iwlwifi/iwl-fh.h
  9. 9 0
      drivers/net/wireless/intel/iwlwifi/iwl-fw.h
  10. 1 1
      drivers/net/wireless/intel/iwlwifi/iwl-io.c
  11. 1 0
      drivers/net/wireless/intel/iwlwifi/iwl-trans.c
  12. 11 4
      drivers/net/wireless/intel/iwlwifi/iwl-trans.h
  13. 79 0
      drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
  14. 19 3
      drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h
  15. 1 3
      drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
  16. 58 51
      drivers/net/wireless/intel/iwlwifi/mvm/fw.c
  17. 1 8
      drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
  18. 1 1
      drivers/net/wireless/intel/iwlwifi/mvm/ops.c
  19. 4 4
      drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
  20. 27 9
      drivers/net/wireless/intel/iwlwifi/mvm/sta.c
  21. 117 58
      drivers/net/wireless/intel/iwlwifi/mvm/tx.c
  22. 5 0
      drivers/net/wireless/intel/iwlwifi/pcie/drv.c
  23. 76 51
      drivers/net/wireless/intel/iwlwifi/pcie/internal.h
  24. 15 1
      drivers/net/wireless/intel/iwlwifi/pcie/rx.c
  25. 167 80
      drivers/net/wireless/intel/iwlwifi/pcie/trans.c
  26. 227 157
      drivers/net/wireless/intel/iwlwifi/pcie/tx.c

+ 1 - 10
drivers/net/wireless/intel/iwlwifi/dvm/ucode.c

@@ -46,15 +46,6 @@
  *
  ******************************************************************************/
 
-static inline const struct fw_img *
-iwl_get_ucode_image(struct iwl_priv *priv, enum iwl_ucode_type ucode_type)
-{
-	if (ucode_type >= IWL_UCODE_TYPE_MAX)
-		return NULL;
-
-	return &priv->fw->img[ucode_type];
-}
-
 /*
  *  Calibration
  */
@@ -330,7 +321,7 @@ int iwl_load_ucode_wait_alive(struct iwl_priv *priv,
 	enum iwl_ucode_type old_type;
 	static const u16 alive_cmd[] = { REPLY_ALIVE };
 
-	fw = iwl_get_ucode_image(priv, ucode_type);
+	fw = iwl_get_ucode_image(priv->fw, ucode_type);
 	if (WARN_ON(!fw))
 		return -EINVAL;
 

+ 2 - 2
drivers/net/wireless/intel/iwlwifi/iwl-7000.c

@@ -73,8 +73,8 @@
 /* Highest firmware API version supported */
 #define IWL7260_UCODE_API_MAX	17
 #define IWL7265_UCODE_API_MAX	17
-#define IWL7265D_UCODE_API_MAX	24
-#define IWL3168_UCODE_API_MAX	24
+#define IWL7265D_UCODE_API_MAX	26
+#define IWL3168_UCODE_API_MAX	26
 
 /* Lowest firmware API version supported */
 #define IWL7260_UCODE_API_MIN	16

+ 13 - 2
drivers/net/wireless/intel/iwlwifi/iwl-8000.c

@@ -70,8 +70,8 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL8000_UCODE_API_MAX	24
-#define IWL8265_UCODE_API_MAX	24
+#define IWL8000_UCODE_API_MAX	26
+#define IWL8265_UCODE_API_MAX	26
 
 /* Lowest firmware API version supported */
 #define IWL8000_UCODE_API_MIN	16
@@ -212,6 +212,17 @@ const struct iwl_cfg iwl8265_2ac_cfg = {
 	.vht_mu_mimo_supported = true,
 };
 
+const struct iwl_cfg iwl8275_2ac_cfg = {
+	.name = "Intel(R) Dual Band Wireless AC 8275",
+	.fw_name_pre = IWL8265_FW_PRE,
+	IWL_DEVICE_8265,
+	.ht_params = &iwl8000_ht_params,
+	.nvm_ver = IWL8000_NVM_VERSION,
+	.nvm_calib_ver = IWL8000_TX_POWER_VERSION,
+	.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+	.vht_mu_mimo_supported = true,
+};
+
 const struct iwl_cfg iwl4165_2ac_cfg = {
 	.name = "Intel(R) Dual Band Wireless AC 4165",
 	.fw_name_pre = IWL8000_FW_PRE,

+ 12 - 1
drivers/net/wireless/intel/iwlwifi/iwl-9000.c

@@ -55,7 +55,7 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL9000_UCODE_API_MAX	24
+#define IWL9000_UCODE_API_MAX	26
 
 /* Lowest firmware API version supported */
 #define IWL9000_UCODE_API_MIN	16
@@ -187,6 +187,17 @@ const struct iwl_cfg iwl9460_2ac_cfg = {
 	.integrated = true,
 };
 
+const struct iwl_cfg iwl9560_2ac_cfg = {
+	.name = "Intel(R) Dual Band Wireless AC 9560",
+	.fw_name_pre = IWL9000_FW_PRE,
+	IWL_DEVICE_9000,
+	.ht_params = &iwl9000_ht_params,
+	.nvm_ver = IWL9000_NVM_VERSION,
+	.nvm_calib_ver = IWL9000_TX_POWER_VERSION,
+	.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+	.integrated = true,
+};
+
 /*
  * TODO the struct below is for internal testing only this should be
  * removed by EO 2016~

+ 1 - 1
drivers/net/wireless/intel/iwlwifi/iwl-a000.c

@@ -55,7 +55,7 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL_A000_UCODE_API_MAX	24
+#define IWL_A000_UCODE_API_MAX	26
 
 /* Lowest firmware API version supported */
 #define IWL_A000_UCODE_API_MIN	24

+ 2 - 0
drivers/net/wireless/intel/iwlwifi/iwl-config.h

@@ -445,6 +445,7 @@ extern const struct iwl_cfg iwl7265d_n_cfg;
 extern const struct iwl_cfg iwl8260_2n_cfg;
 extern const struct iwl_cfg iwl8260_2ac_cfg;
 extern const struct iwl_cfg iwl8265_2ac_cfg;
+extern const struct iwl_cfg iwl8275_2ac_cfg;
 extern const struct iwl_cfg iwl4165_2ac_cfg;
 extern const struct iwl_cfg iwl8260_2ac_sdio_cfg;
 extern const struct iwl_cfg iwl8265_2ac_sdio_cfg;
@@ -454,6 +455,7 @@ extern const struct iwl_cfg iwl9160_2ac_cfg;
 extern const struct iwl_cfg iwl9260_2ac_cfg;
 extern const struct iwl_cfg iwl9270_2ac_cfg;
 extern const struct iwl_cfg iwl9460_2ac_cfg;
+extern const struct iwl_cfg iwl9560_2ac_cfg;
 extern const struct iwl_cfg iwla000_2ac_cfg;
 #endif /* CONFIG_IWLMVM */
 

+ 2 - 0
drivers/net/wireless/intel/iwlwifi/iwl-csr.h

@@ -589,6 +589,8 @@ enum dtd_diode_reg {
  * Causes for the FH register interrupts
  */
 enum msix_fh_int_causes {
+	MSIX_FH_INT_CAUSES_Q0			= BIT(0),
+	MSIX_FH_INT_CAUSES_Q1			= BIT(1),
 	MSIX_FH_INT_CAUSES_D2S_CH0_NUM		= BIT(16),
 	MSIX_FH_INT_CAUSES_D2S_CH1_NUM		= BIT(17),
 	MSIX_FH_INT_CAUSES_S2D			= BIT(19),

+ 45 - 12
drivers/net/wireless/intel/iwlwifi/iwl-fh.h

@@ -643,6 +643,7 @@ struct iwl_rb_status {
 #define TFD_QUEUE_BC_SIZE	(TFD_QUEUE_SIZE_MAX + TFD_QUEUE_SIZE_BC_DUP)
 #define IWL_TX_DMA_MASK        DMA_BIT_MASK(36)
 #define IWL_NUM_OF_TBS		20
+#define IWL_TFH_NUM_TBS		25
 
 static inline u8 iwl_get_dma_hi_addr(dma_addr_t addr)
 {
@@ -664,25 +665,29 @@ struct iwl_tfd_tb {
 } __packed;
 
 /**
- * struct iwl_tfd
+ * struct iwl_tfh_tb transmit buffer descriptor within transmit frame descriptor
  *
- * Transmit Frame Descriptor (TFD)
- *
- * @ __reserved1[3] reserved
- * @ num_tbs 0-4 number of active tbs
- *	     5   reserved
- * 	     6-7 padding (not used)
- * @ tbs[20]	transmit frame buffer descriptors
- * @ __pad 	padding
+ * This structure contains dma address and length of transmission address
  *
+ * @tb_len length of the tx buffer
+ * @addr 64 bits dma address
+ */
+struct iwl_tfh_tb {
+	__le16 tb_len;
+	__le64 addr;
+} __packed;
+
+/**
  * Each Tx queue uses a circular buffer of 256 TFDs stored in host DRAM.
  * Both driver and device share these circular buffers, each of which must be
- * contiguous 256 TFDs x 128 bytes-per-TFD = 32 KBytes
+ * contiguous 256 TFDs.
+ * For pre a000 HW it is 256 x 128 bytes-per-TFD = 32 KBytes
+ * For a000 HW and on it is 256 x 256 bytes-per-TFD = 65 KBytes
  *
  * Driver must indicate the physical address of the base of each
  * circular buffer via the FH_MEM_CBBC_QUEUE registers.
  *
- * Each TFD contains pointer/size information for up to 20 data buffers
+ * Each TFD contains pointer/size information for up to 20 / 25 data buffers
  * in host DRAM.  These buffers collectively contain the (one) frame described
  * by the TFD.  Each buffer must be a single contiguous block of memory within
  * itself, but buffers may be scattered in host DRAM.  Each buffer has max size
@@ -691,6 +696,16 @@ struct iwl_tfd_tb {
  *
  * A maximum of 255 (not 256!) TFDs may be on a queue waiting for Tx.
  */
+
+/**
+ * struct iwl_tfd - Transmit Frame Descriptor (TFD)
+ * @ __reserved1[3] reserved
+ * @ num_tbs 0-4 number of active tbs
+ *	     5   reserved
+ *	     6-7 padding (not used)
+ * @ tbs[20]	transmit frame buffer descriptors
+ * @ __pad	padding
+ */
 struct iwl_tfd {
 	u8 __reserved1[3];
 	u8 num_tbs;
@@ -698,6 +713,19 @@ struct iwl_tfd {
 	__le32 __pad;
 } __packed;
 
+/**
+ * struct iwl_tfh_tfd - Transmit Frame Descriptor (TFD)
+ * @ num_tbs 0-4 number of active tbs
+ *	     5 -15   reserved
+ * @ tbs[25]	transmit frame buffer descriptors
+ * @ __pad	padding
+ */
+struct iwl_tfh_tfd {
+	__le16 num_tbs;
+	struct iwl_tfh_tb tbs[IWL_TFH_NUM_TBS];
+	__le32 __pad;
+} __packed;
+
 /* Keep Warm Size */
 #define IWL_KW_SIZE 0x1000	/* 4k */
 
@@ -706,8 +734,13 @@ struct iwl_tfd {
 /**
  * struct iwlagn_schedq_bc_tbl scheduler byte count table
  *	base physical address provided by SCD_DRAM_BASE_ADDR
+ * For devices up to a000:
+ * @tfd_offset  0-12 - tx command byte count
+ *		12-16 - station index
+ * For a000 and on:
  * @tfd_offset  0-12 - tx command byte count
- *	       12-16 - station index
+ *		12-13 - number of 64 byte chunks
+ *		14-16 - reserved
  */
 struct iwlagn_scd_bc_tbl {
 	__le16 tfd_offset[TFD_QUEUE_BC_SIZE];

+ 9 - 0
drivers/net/wireless/intel/iwlwifi/iwl-fw.h

@@ -329,4 +329,13 @@ iwl_fw_dbg_conf_usniffer(const struct iwl_fw *fw, u8 id)
 	return conf_tlv->usniffer;
 }
 
+static inline const struct fw_img *
+iwl_get_ucode_image(const struct iwl_fw *fw, enum iwl_ucode_type ucode_type)
+{
+	if (ucode_type >= IWL_UCODE_TYPE_MAX)
+		return NULL;
+
+	return &fw->img[ucode_type];
+}
+
 #endif  /* __iwl_fw_h__ */

+ 1 - 1
drivers/net/wireless/intel/iwlwifi/iwl-io.c

@@ -267,7 +267,7 @@ static const char *get_rfh_string(int cmd)
 		IWL_CMD_MQ(cmd, RFH_Q_FRBDCB_WIDX, i);
 		IWL_CMD_MQ(cmd, RFH_Q_FRBDCB_RIDX, i);
 		IWL_CMD_MQ(cmd, RFH_Q_URBD_STTS_WPTR_LSB, i);
-	};
+	}
 
 	switch (cmd) {
 	IWL_CMD(RFH_RXF_DMA_CFG);

+ 1 - 0
drivers/net/wireless/intel/iwlwifi/iwl-trans.c

@@ -65,6 +65,7 @@
 
 #include "iwl-trans.h"
 #include "iwl-drv.h"
+#include "iwl-fh.h"
 
 struct iwl_trans *iwl_trans_alloc(unsigned int priv_size,
 				  struct device *dev,

+ 11 - 4
drivers/net/wireless/intel/iwlwifi/iwl-trans.h

@@ -262,8 +262,6 @@ static inline u32 iwl_rx_packet_payload_len(const struct iwl_rx_packet *pkt)
  *	(i.e. mark it as non-idle).
  * @CMD_WANT_ASYNC_CALLBACK: the op_mode's async callback function must be
  *	called after this command completes. Valid only with CMD_ASYNC.
- * @CMD_TB_BITMAP_POS: Position of the first bit for the TB bitmap. We need to
- *	check that we leave enough room for the TBs bitmap which needs 20 bits.
  */
 enum CMD_MODE {
 	CMD_ASYNC		= BIT(0),
@@ -274,8 +272,6 @@ enum CMD_MODE {
 	CMD_MAKE_TRANS_IDLE	= BIT(5),
 	CMD_WAKE_UP_TRANS	= BIT(6),
 	CMD_WANT_ASYNC_CALLBACK	= BIT(7),
-
-	CMD_TB_BITMAP_POS	= 11,
 };
 
 #define DEF_CMD_PAYLOAD_SIZE 320
@@ -649,6 +645,8 @@ struct iwl_trans_ops {
 	void (*txq_set_shared_mode)(struct iwl_trans *trans, u32 txq_id,
 				    bool shared);
 
+	dma_addr_t (*get_txq_byte_table)(struct iwl_trans *trans, int txq_id);
+
 	int (*wait_tx_queue_empty)(struct iwl_trans *trans, u32 txq_bm);
 	void (*freeze_txq_timer)(struct iwl_trans *trans, unsigned long txqs,
 				 bool freeze);
@@ -1073,6 +1071,15 @@ static inline void iwl_trans_txq_set_shared_mode(struct iwl_trans *trans,
 		trans->ops->txq_set_shared_mode(trans, queue, shared_mode);
 }
 
+static inline dma_addr_t iwl_trans_get_txq_byte_table(struct iwl_trans *trans,
+						      int queue)
+{
+	/* we should never be called if the trans doesn't support it */
+	BUG_ON(!trans->ops->get_txq_byte_table);
+
+	return trans->ops->get_txq_byte_table(trans, queue);
+}
+
 static inline void iwl_trans_txq_enable(struct iwl_trans *trans, int queue,
 					int fifo, int sta_id, int tid,
 					int frame_limit, u16 ssn,

+ 79 - 0
drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h

@@ -577,6 +577,85 @@ struct iwl_mvm_ba_notif {
 	u8 reserved1;
 } __packed;
 
+/**
+ * struct iwl_mvm_compressed_ba_tfd - progress of a TFD queue
+ * @q_num: TFD queue number
+ * @tfd_index: Index of first un-acked frame in the  TFD queue
+ */
+struct iwl_mvm_compressed_ba_tfd {
+	u8 q_num;
+	u8 reserved;
+	__le16 tfd_index;
+} __packed; /* COMPRESSED_BA_TFD_API_S_VER_1 */
+
+/**
+ * struct iwl_mvm_compressed_ba_ratid - progress of a RA TID queue
+ * @q_num: RA TID queue number
+ * @tid: TID of the queue
+ * @ssn: BA window current SSN
+ */
+struct iwl_mvm_compressed_ba_ratid {
+	u8 q_num;
+	u8 tid;
+	__le16 ssn;
+} __packed; /* COMPRESSED_BA_RATID_API_S_VER_1 */
+
+/*
+ * enum iwl_mvm_ba_resp_flags - TX aggregation status
+ * @IWL_MVM_BA_RESP_TX_AGG: generated due to BA
+ * @IWL_MVM_BA_RESP_TX_BAR: generated due to BA after BAR
+ * @IWL_MVM_BA_RESP_TX_AGG_FAIL: aggregation didn't receive BA
+ * @IWL_MVM_BA_RESP_TX_UNDERRUN: aggregation got underrun
+ * @IWL_MVM_BA_RESP_TX_BT_KILL: aggregation got BT-kill
+ * @IWL_MVM_BA_RESP_TX_DSP_TIMEOUT: aggregation didn't finish within the
+ *	expected time
+ */
+enum iwl_mvm_ba_resp_flags {
+	IWL_MVM_BA_RESP_TX_AGG,
+	IWL_MVM_BA_RESP_TX_BAR,
+	IWL_MVM_BA_RESP_TX_AGG_FAIL,
+	IWL_MVM_BA_RESP_TX_UNDERRUN,
+	IWL_MVM_BA_RESP_TX_BT_KILL,
+	IWL_MVM_BA_RESP_TX_DSP_TIMEOUT
+};
+
+/**
+ * struct iwl_mvm_compressed_ba_notif - notifies about reception of BA
+ * ( BA_NOTIF = 0xc5 )
+ * @flags: status flag, see the &iwl_mvm_ba_resp_flags
+ * @sta_id: Index of recipient (BA-sending) station in fw's station table
+ * @reduced_txp: power reduced according to TPC. This is the actual value and
+ *	not a copy from the LQ command. Thus, if not the first rate was used
+ *	for Tx-ing then this value will be set to 0 by FW.
+ * @initial_rate: TLC rate info, initial rate index, TLC table color
+ * @retry_cnt: retry count
+ * @query_byte_cnt: SCD query byte count
+ * @query_frame_cnt: SCD query frame count
+ * @txed: number of frames sent in the aggregation (all-TIDs)
+ * @done: number of frames that were Acked by the BA (all-TIDs)
+ * @wireless_time: Wireless-media time
+ * @tx_rate: the rate the aggregation was sent at
+ * @tfd_cnt: number of TFD-Q elements
+ * @ra_tid_cnt: number of RATID-Q elements
+ */
+struct iwl_mvm_compressed_ba_notif {
+	__le32 flags;
+	u8 sta_id;
+	u8 reduced_txp;
+	u8 initial_rate;
+	u8 retry_cnt;
+	__le32 query_byte_cnt;
+	__le16 query_frame_cnt;
+	__le16 txed;
+	__le16 done;
+	__le32 wireless_time;
+	__le32 tx_rate;
+	__le16 tfd_cnt;
+	__le16 ra_tid_cnt;
+	struct iwl_mvm_compressed_ba_tfd tfd[1];
+	struct iwl_mvm_compressed_ba_ratid ra_tid[0];
+} __packed; /* COMPRESSED_BA_RES_API_S_VER_4 */
+
 /**
  * struct iwl_mac_beacon_cmd_v6 - beacon template command
  * @tx: the tx commands associated with the beacon frame

+ 19 - 3
drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h

@@ -1977,8 +1977,9 @@ struct iwl_tdls_config_res {
 	struct iwl_tdls_config_sta_info_res sta_info[IWL_MVM_TDLS_STA_COUNT];
 } __packed; /* TDLS_CONFIG_RSP_API_S_VER_1 */
 
-#define TX_FIFO_MAX_NUM		8
-#define RX_FIFO_MAX_NUM		2
+#define TX_FIFO_MAX_NUM_9000		8
+#define TX_FIFO_MAX_NUM			15
+#define RX_FIFO_MAX_NUM			2
 #define TX_FIFO_INTERNAL_MAX_NUM	6
 
 /**
@@ -2004,6 +2005,21 @@ struct iwl_tdls_config_res {
  * NOTE: on firmware that don't have IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG
  *	 set, the last 3 members don't exist.
  */
+struct iwl_shared_mem_cfg_v1 {
+	__le32 shared_mem_addr;
+	__le32 shared_mem_size;
+	__le32 sample_buff_addr;
+	__le32 sample_buff_size;
+	__le32 txfifo_addr;
+	__le32 txfifo_size[TX_FIFO_MAX_NUM_9000];
+	__le32 rxfifo_size[RX_FIFO_MAX_NUM];
+	__le32 page_buff_addr;
+	__le32 page_buff_size;
+	__le32 rxfifo_addr;
+	__le32 internal_txfifo_addr;
+	__le32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM];
+} __packed; /* SHARED_MEM_ALLOC_API_S_VER_2 */
+
 struct iwl_shared_mem_cfg {
 	__le32 shared_mem_addr;
 	__le32 shared_mem_size;
@@ -2017,7 +2033,7 @@ struct iwl_shared_mem_cfg {
 	__le32 rxfifo_addr;
 	__le32 internal_txfifo_addr;
 	__le32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM];
-} __packed; /* SHARED_MEM_ALLOC_API_S_VER_2 */
+} __packed; /* SHARED_MEM_ALLOC_API_S_VER_3 */
 
 /**
  * VHT MU-MIMO group configuration

+ 1 - 3
drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c

@@ -440,14 +440,12 @@ static const struct iwl_prph_range iwl_prph_dump_addr_comm[] = {
 	{ .start = 0x00a04560, .end = 0x00a0457c },
 	{ .start = 0x00a04590, .end = 0x00a04598 },
 	{ .start = 0x00a045c0, .end = 0x00a045f4 },
-	{ .start = 0x00a44000, .end = 0x00a7bf80 },
 };
 
 static const struct iwl_prph_range iwl_prph_dump_addr_9000[] = {
 	{ .start = 0x00a05c00, .end = 0x00a05c18 },
 	{ .start = 0x00a05400, .end = 0x00a056e8 },
 	{ .start = 0x00a08000, .end = 0x00a098bc },
-	{ .start = 0x00adfc00, .end = 0x00adfd1c },
 	{ .start = 0x00a02400, .end = 0x00a02758 },
 };
 
@@ -559,7 +557,7 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm)
 					 sizeof(struct iwl_fw_error_dump_fifo);
 		}
 
-		for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++) {
+		for (i = 0; i < mem_cfg->num_txfifo_entries; i++) {
 			if (!mem_cfg->txfifo_size[i])
 				continue;
 

+ 58 - 51
drivers/net/wireless/intel/iwlwifi/mvm/fw.c

@@ -90,15 +90,6 @@ struct iwl_mvm_alive_data {
 	u32 scd_base_addr;
 };
 
-static inline const struct fw_img *
-iwl_get_ucode_image(struct iwl_mvm *mvm, enum iwl_ucode_type ucode_type)
-{
-	if (ucode_type >= IWL_UCODE_TYPE_MAX)
-		return NULL;
-
-	return &mvm->fw->img[ucode_type];
-}
-
 static int iwl_send_tx_ant_cfg(struct iwl_mvm *mvm, u8 valid_tx_ant)
 {
 	struct iwl_tx_ant_cfg_cmd tx_ant_cmd = {
@@ -592,9 +583,9 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm,
 	    iwl_fw_dbg_conf_usniffer(mvm->fw, FW_DBG_START_FROM_ALIVE) &&
 	    !(fw_has_capa(&mvm->fw->ucode_capa,
 			  IWL_UCODE_TLV_CAPA_USNIFFER_UNIFIED)))
-		fw = iwl_get_ucode_image(mvm, IWL_UCODE_REGULAR_USNIFFER);
+		fw = iwl_get_ucode_image(mvm->fw, IWL_UCODE_REGULAR_USNIFFER);
 	else
-		fw = iwl_get_ucode_image(mvm, ucode_type);
+		fw = iwl_get_ucode_image(mvm->fw, ucode_type);
 	if (WARN_ON(!fw))
 		return -EINVAL;
 	mvm->cur_ucode = ucode_type;
@@ -838,59 +829,48 @@ out:
 	return ret;
 }
 
-static void iwl_mvm_get_shared_mem_conf(struct iwl_mvm *mvm)
+static void iwl_mvm_parse_shared_mem_a000(struct iwl_mvm *mvm,
+					  struct iwl_rx_packet *pkt)
 {
-	struct iwl_host_cmd cmd = {
-		.flags = CMD_WANT_SKB,
-		.data = { NULL, },
-		.len = { 0, },
-	};
-	struct iwl_shared_mem_cfg *mem_cfg;
-	struct iwl_rx_packet *pkt;
-	u32 i;
+	struct iwl_shared_mem_cfg *mem_cfg = (void *)pkt->data;
+	int i;
 
-	lockdep_assert_held(&mvm->mutex);
+	mvm->shared_mem_cfg.num_txfifo_entries =
+		ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size);
+	for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++)
+		mvm->shared_mem_cfg.txfifo_size[i] =
+			le32_to_cpu(mem_cfg->txfifo_size[i]);
+	for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.rxfifo_size); i++)
+		mvm->shared_mem_cfg.rxfifo_size[i] =
+			le32_to_cpu(mem_cfg->rxfifo_size[i]);
 
-	if (fw_has_capa(&mvm->fw->ucode_capa,
-			IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG))
-		cmd.id = iwl_cmd_id(SHARED_MEM_CFG_CMD, SYSTEM_GROUP, 0);
-	else
-		cmd.id = SHARED_MEM_CFG;
+	BUILD_BUG_ON(sizeof(mvm->shared_mem_cfg.internal_txfifo_size) !=
+		     sizeof(mem_cfg->internal_txfifo_size));
 
-	if (WARN_ON(iwl_mvm_send_cmd(mvm, &cmd)))
-		return;
+	for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.internal_txfifo_size);
+	     i++)
+		mvm->shared_mem_cfg.internal_txfifo_size[i] =
+			le32_to_cpu(mem_cfg->internal_txfifo_size[i]);
+}
 
-	pkt = cmd.resp_pkt;
-	mem_cfg = (void *)pkt->data;
-
-	mvm->shared_mem_cfg.shared_mem_addr =
-		le32_to_cpu(mem_cfg->shared_mem_addr);
-	mvm->shared_mem_cfg.shared_mem_size =
-		le32_to_cpu(mem_cfg->shared_mem_size);
-	mvm->shared_mem_cfg.sample_buff_addr =
-		le32_to_cpu(mem_cfg->sample_buff_addr);
-	mvm->shared_mem_cfg.sample_buff_size =
-		le32_to_cpu(mem_cfg->sample_buff_size);
-	mvm->shared_mem_cfg.txfifo_addr = le32_to_cpu(mem_cfg->txfifo_addr);
-	for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size); i++)
+static void iwl_mvm_parse_shared_mem(struct iwl_mvm *mvm,
+				     struct iwl_rx_packet *pkt)
+{
+	struct iwl_shared_mem_cfg_v1 *mem_cfg = (void *)pkt->data;
+	int i;
+
+	mvm->shared_mem_cfg.num_txfifo_entries =
+		ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size);
+	for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++)
 		mvm->shared_mem_cfg.txfifo_size[i] =
 			le32_to_cpu(mem_cfg->txfifo_size[i]);
 	for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.rxfifo_size); i++)
 		mvm->shared_mem_cfg.rxfifo_size[i] =
 			le32_to_cpu(mem_cfg->rxfifo_size[i]);
-	mvm->shared_mem_cfg.page_buff_addr =
-		le32_to_cpu(mem_cfg->page_buff_addr);
-	mvm->shared_mem_cfg.page_buff_size =
-		le32_to_cpu(mem_cfg->page_buff_size);
 
-	/* new API has more data */
+	/* new API has more data, from rxfifo_addr field and on */
 	if (fw_has_capa(&mvm->fw->ucode_capa,
 			IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG)) {
-		mvm->shared_mem_cfg.rxfifo_addr =
-			le32_to_cpu(mem_cfg->rxfifo_addr);
-		mvm->shared_mem_cfg.internal_txfifo_addr =
-			le32_to_cpu(mem_cfg->internal_txfifo_addr);
-
 		BUILD_BUG_ON(sizeof(mvm->shared_mem_cfg.internal_txfifo_size) !=
 			     sizeof(mem_cfg->internal_txfifo_size));
 
@@ -900,6 +880,33 @@ static void iwl_mvm_get_shared_mem_conf(struct iwl_mvm *mvm)
 			mvm->shared_mem_cfg.internal_txfifo_size[i] =
 				le32_to_cpu(mem_cfg->internal_txfifo_size[i]);
 	}
+}
+
+static void iwl_mvm_get_shared_mem_conf(struct iwl_mvm *mvm)
+{
+	struct iwl_host_cmd cmd = {
+		.flags = CMD_WANT_SKB,
+		.data = { NULL, },
+		.len = { 0, },
+	};
+	struct iwl_rx_packet *pkt;
+
+	lockdep_assert_held(&mvm->mutex);
+
+	if (fw_has_capa(&mvm->fw->ucode_capa,
+			IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG))
+		cmd.id = iwl_cmd_id(SHARED_MEM_CFG_CMD, SYSTEM_GROUP, 0);
+	else
+		cmd.id = SHARED_MEM_CFG;
+
+	if (WARN_ON(iwl_mvm_send_cmd(mvm, &cmd)))
+		return;
+
+	pkt = cmd.resp_pkt;
+	if (iwl_mvm_has_new_tx_api(mvm))
+		iwl_mvm_parse_shared_mem_a000(mvm, pkt);
+	else
+		iwl_mvm_parse_shared_mem(mvm, pkt);
 
 	IWL_DEBUG_INFO(mvm, "SHARED MEM CFG: got memory offsets/sizes\n");
 

+ 1 - 8
drivers/net/wireless/intel/iwlwifi/mvm/mvm.h

@@ -604,16 +604,9 @@ enum iwl_mvm_tdls_cs_state {
 };
 
 struct iwl_mvm_shared_mem_cfg {
-	u32 shared_mem_addr;
-	u32 shared_mem_size;
-	u32 sample_buff_addr;
-	u32 sample_buff_size;
-	u32 txfifo_addr;
+	int num_txfifo_entries;
 	u32 txfifo_size[TX_FIFO_MAX_NUM];
 	u32 rxfifo_size[RX_FIFO_MAX_NUM];
-	u32 page_buff_addr;
-	u32 page_buff_size;
-	u32 rxfifo_addr;
 	u32 internal_txfifo_addr;
 	u32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM];
 };

+ 1 - 1
drivers/net/wireless/intel/iwlwifi/mvm/ops.c

@@ -1672,7 +1672,7 @@ static void iwl_mvm_rx_mq_rss(struct iwl_op_mode *op_mode,
 	else if (unlikely(pkt->hdr.cmd == RX_QUEUES_NOTIFICATION &&
 			  pkt->hdr.group_id == DATA_PATH_GROUP))
 		iwl_mvm_rx_queue_notif(mvm, rxb, queue);
-	else
+	else if (likely(pkt->hdr.cmd == REPLY_RX_MPDU_CMD))
 		iwl_mvm_rx_mpdu_mq(mvm, napi, rxb, queue);
 }
 

+ 4 - 4
drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c

@@ -452,10 +452,10 @@ void iwl_mvm_reorder_timer_expired(unsigned long data)
 	u16 sn = 0, index = 0;
 	bool expired = false;
 
-	spin_lock_bh(&buf->lock);
+	spin_lock(&buf->lock);
 
 	if (!buf->num_stored || buf->removed) {
-		spin_unlock_bh(&buf->lock);
+		spin_unlock(&buf->lock);
 		return;
 	}
 
@@ -492,7 +492,7 @@ void iwl_mvm_reorder_timer_expired(unsigned long data)
 			  buf->reorder_time[index] +
 			  1 + RX_REORDER_BUF_TIMEOUT_MQ);
 	}
-	spin_unlock_bh(&buf->lock);
+	spin_unlock(&buf->lock);
 }
 
 static void iwl_mvm_del_ba(struct iwl_mvm *mvm, int queue,
@@ -503,7 +503,7 @@ static void iwl_mvm_del_ba(struct iwl_mvm *mvm, int queue,
 	struct iwl_mvm_reorder_buffer *reorder_buf;
 	u8 baid = data->baid;
 
-	if (WARN_ON_ONCE(baid >= IWL_RX_REORDER_DATA_INVALID_BAID))
+	if (WARN_ONCE(baid >= IWL_MAX_BAID, "invalid BAID: %x\n", baid))
 		return;
 
 	rcu_read_lock();

+ 27 - 9
drivers/net/wireless/intel/iwlwifi/mvm/sta.c

@@ -588,9 +588,7 @@ int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid,
 			ret);
 
 	/* Make sure the SCD wrptr is correctly set before reconfiguring */
-	iwl_trans_txq_enable(mvm->trans, queue, iwl_mvm_ac_to_tx_fifo[ac],
-			     cmd.sta_id, tid, LINK_QUAL_AGG_FRAME_LIMIT_DEF,
-			     ssn, wdg_timeout);
+	iwl_trans_txq_enable_cfg(mvm->trans, queue, ssn, NULL, wdg_timeout);
 
 	/* Update the TID "owner" of the queue */
 	spin_lock_bh(&mvm->queue_info_lock);
@@ -1498,9 +1496,31 @@ int iwl_mvm_rm_sta(struct iwl_mvm *mvm,
 		ret = iwl_mvm_drain_sta(mvm, mvm_sta, false);
 
 		/* If DQA is supported - the queues can be disabled now */
-		if (iwl_mvm_is_dqa_supported(mvm))
+		if (iwl_mvm_is_dqa_supported(mvm)) {
+			u8 reserved_txq = mvm_sta->reserved_queue;
+			enum iwl_mvm_queue_status *status;
+
 			iwl_mvm_disable_sta_queues(mvm, vif, mvm_sta);
 
+			/*
+			 * If no traffic has gone through the reserved TXQ - it
+			 * is still marked as IWL_MVM_QUEUE_RESERVED, and
+			 * should be manually marked as free again
+			 */
+			spin_lock_bh(&mvm->queue_info_lock);
+			status = &mvm->queue_info[reserved_txq].status;
+			if (WARN((*status != IWL_MVM_QUEUE_RESERVED) &&
+				 (*status != IWL_MVM_QUEUE_FREE),
+				 "sta_id %d reserved txq %d status %d",
+				 mvm_sta->sta_id, reserved_txq, *status)) {
+				spin_unlock_bh(&mvm->queue_info_lock);
+				return -EINVAL;
+			}
+
+			*status = IWL_MVM_QUEUE_FREE;
+			spin_unlock_bh(&mvm->queue_info_lock);
+		}
+
 		if (vif->type == NL80211_IFTYPE_STATION &&
 		    mvmvif->ap_sta_id == mvm_sta->sta_id) {
 			/* if associated - we can't remove the AP STA now */
@@ -2030,11 +2050,9 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 		baid_data->baid = baid;
 		baid_data->timeout = timeout;
 		baid_data->last_rx = jiffies;
-		init_timer(&baid_data->session_timer);
-		baid_data->session_timer.function =
-			iwl_mvm_rx_agg_session_expired;
-		baid_data->session_timer.data =
-			(unsigned long)&mvm->baid_map[baid];
+		setup_timer(&baid_data->session_timer,
+			    iwl_mvm_rx_agg_session_expired,
+			    (unsigned long)&mvm->baid_map[baid]);
 		baid_data->mvm = mvm;
 		baid_data->tid = tid;
 		baid_data->sta_id = mvm_sta->sta_id;

+ 117 - 58
drivers/net/wireless/intel/iwlwifi/mvm/tx.c

@@ -920,9 +920,13 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
 		tid = IWL_MAX_TID_COUNT;
 	}
 
-	if (iwl_mvm_is_dqa_supported(mvm))
+	if (iwl_mvm_is_dqa_supported(mvm)) {
 		txq_id = mvmsta->tid_data[tid].txq_id;
 
+		if (ieee80211_is_mgmt(fc))
+			tx_cmd->tid_tspec = IWL_TID_NON_QOS;
+	}
+
 	/* Copy MAC header from skb into command buffer */
 	memcpy(tx_cmd->hdr, hdr, hdrlen);
 
@@ -1100,9 +1104,13 @@ static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm,
 		IWL_DEBUG_TX_QUEUES(mvm,
 				    "Can continue DELBA flow ssn = next_recl = %d\n",
 				    tid_data->next_reclaimed);
-		iwl_mvm_disable_txq(mvm, tid_data->txq_id,
-				    vif->hw_queue[tid_to_mac80211_ac[tid]], tid,
-				    CMD_ASYNC);
+		if (!iwl_mvm_is_dqa_supported(mvm)) {
+			u8 mac80211_ac = tid_to_mac80211_ac[tid];
+
+			iwl_mvm_disable_txq(mvm, tid_data->txq_id,
+					    vif->hw_queue[mac80211_ac], tid,
+					    CMD_ASYNC);
+		}
 		tid_data->state = IWL_AGG_OFF;
 		ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
 		break;
@@ -1580,41 +1588,16 @@ void iwl_mvm_rx_tx_cmd(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
 		iwl_mvm_rx_tx_cmd_agg(mvm, pkt);
 }
 
-static void iwl_mvm_tx_info_from_ba_notif(struct ieee80211_tx_info *info,
-					  struct iwl_mvm_ba_notif *ba_notif,
-					  struct iwl_mvm_tid_data *tid_data)
-{
-	info->flags |= IEEE80211_TX_STAT_AMPDU;
-	info->status.ampdu_ack_len = ba_notif->txed_2_done;
-	info->status.ampdu_len = ba_notif->txed;
-	iwl_mvm_hwrate_to_tx_status(tid_data->rate_n_flags,
-				    info);
-	/* TODO: not accounted if the whole A-MPDU failed */
-	info->status.tx_time = tid_data->tx_time;
-	info->status.status_driver_data[0] =
-		(void *)(uintptr_t)ba_notif->reduced_txp;
-	info->status.status_driver_data[1] =
-		(void *)(uintptr_t)tid_data->rate_n_flags;
-}
-
-void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
+static void iwl_mvm_tx_reclaim(struct iwl_mvm *mvm, int sta_id, int tid,
+			       int txq, int index,
+			       struct ieee80211_tx_info *ba_info, u32 rate)
 {
-	struct iwl_rx_packet *pkt = rxb_addr(rxb);
-	struct iwl_mvm_ba_notif *ba_notif = (void *)pkt->data;
 	struct sk_buff_head reclaimed_skbs;
 	struct iwl_mvm_tid_data *tid_data;
 	struct ieee80211_sta *sta;
 	struct iwl_mvm_sta *mvmsta;
 	struct sk_buff *skb;
-	int sta_id, tid, freed;
-	/* "flow" corresponds to Tx queue */
-	u16 scd_flow = le16_to_cpu(ba_notif->scd_flow);
-	/* "ssn" is start of block-ack Tx window, corresponds to index
-	 * (in Tx queue's circular buffer) of first TFD/frame in window */
-	u16 ba_resp_scd_ssn = le16_to_cpu(ba_notif->scd_ssn);
-
-	sta_id = ba_notif->sta_id;
-	tid = ba_notif->tid;
+	int freed;
 
 	if (WARN_ONCE(sta_id >= IWL_MVM_STATION_COUNT ||
 		      tid >= IWL_MAX_TID_COUNT,
@@ -1634,10 +1617,10 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
 	mvmsta = iwl_mvm_sta_from_mac80211(sta);
 	tid_data = &mvmsta->tid_data[tid];
 
-	if (tid_data->txq_id != scd_flow) {
+	if (tid_data->txq_id != txq) {
 		IWL_ERR(mvm,
-			"invalid BA notification: Q %d, tid %d, flow %d\n",
-			tid_data->txq_id, tid, scd_flow);
+			"invalid BA notification: Q %d, tid %d\n",
+			tid_data->txq_id, tid);
 		rcu_read_unlock();
 		return;
 	}
@@ -1651,27 +1634,14 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
 	 * block-ack window (we assume that they've been successfully
 	 * transmitted ... if not, it's too late anyway).
 	 */
-	iwl_trans_reclaim(mvm->trans, scd_flow, ba_resp_scd_ssn,
-			  &reclaimed_skbs);
-
-	IWL_DEBUG_TX_REPLY(mvm,
-			   "BA_NOTIFICATION Received from %pM, sta_id = %d\n",
-			   (u8 *)&ba_notif->sta_addr_lo32,
-			   ba_notif->sta_id);
-	IWL_DEBUG_TX_REPLY(mvm,
-			   "TID = %d, SeqCtl = %d, bitmap = 0x%llx, scd_flow = %d, scd_ssn = %d sent:%d, acked:%d\n",
-			   ba_notif->tid, le16_to_cpu(ba_notif->seq_ctl),
-			   (unsigned long long)le64_to_cpu(ba_notif->bitmap),
-			   scd_flow, ba_resp_scd_ssn, ba_notif->txed,
-			   ba_notif->txed_2_done);
+	iwl_trans_reclaim(mvm->trans, txq, index, &reclaimed_skbs);
 
-	IWL_DEBUG_TX_REPLY(mvm, "reduced txp from ba notif %d\n",
-			   ba_notif->reduced_txp);
-	tid_data->next_reclaimed = ba_resp_scd_ssn;
+	tid_data->next_reclaimed = index;
 
 	iwl_mvm_check_ratid_empty(mvm, sta, tid);
 
 	freed = 0;
+	ba_info->status.status_driver_data[1] = (void *)(uintptr_t)rate;
 
 	skb_queue_walk(&reclaimed_skbs, skb) {
 		struct ieee80211_hdr *hdr = (void *)skb->data;
@@ -1693,8 +1663,12 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
 
 		/* this is the first skb we deliver in this batch */
 		/* put the rate scaling data there */
-		if (freed == 1)
-			iwl_mvm_tx_info_from_ba_notif(info, ba_notif, tid_data);
+		if (freed == 1) {
+			info->flags |= IEEE80211_TX_STAT_AMPDU;
+			memcpy(&info->status, &ba_info->status,
+			       sizeof(ba_info->status));
+			iwl_mvm_hwrate_to_tx_status(rate, info);
+		}
 	}
 
 	spin_unlock_bh(&mvmsta->lock);
@@ -1704,7 +1678,6 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
 	 * Still it's important to update RS about sent vs. acked.
 	 */
 	if (skb_queue_empty(&reclaimed_skbs)) {
-		struct ieee80211_tx_info ba_info = {};
 		struct ieee80211_chanctx_conf *chanctx_conf = NULL;
 
 		if (mvmsta->vif)
@@ -1714,11 +1687,11 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
 		if (WARN_ON_ONCE(!chanctx_conf))
 			goto out;
 
-		ba_info.band = chanctx_conf->def.chan->band;
-		iwl_mvm_tx_info_from_ba_notif(&ba_info, ba_notif, tid_data);
+		ba_info->band = chanctx_conf->def.chan->band;
+		iwl_mvm_hwrate_to_tx_status(rate, ba_info);
 
 		IWL_DEBUG_TX_REPLY(mvm, "No reclaim. Update rs directly\n");
-		iwl_mvm_rs_tx_status(mvm, sta, tid, &ba_info, false);
+		iwl_mvm_rs_tx_status(mvm, sta, tid, ba_info, false);
 	}
 
 out:
@@ -1730,6 +1703,92 @@ out:
 	}
 }
 
+void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
+{
+	struct iwl_rx_packet *pkt = rxb_addr(rxb);
+	int sta_id, tid, txq, index;
+	struct ieee80211_tx_info ba_info = {};
+	struct iwl_mvm_ba_notif *ba_notif;
+	struct iwl_mvm_tid_data *tid_data;
+	struct iwl_mvm_sta *mvmsta;
+
+	if (iwl_mvm_has_new_tx_api(mvm)) {
+		struct iwl_mvm_compressed_ba_notif *ba_res =
+			(void *)pkt->data;
+
+		sta_id = ba_res->sta_id;
+		ba_info.status.ampdu_ack_len = (u8)le16_to_cpu(ba_res->done);
+		ba_info.status.ampdu_len = (u8)le16_to_cpu(ba_res->txed);
+		ba_info.status.tx_time =
+			(u16)le32_to_cpu(ba_res->wireless_time);
+		ba_info.status.status_driver_data[0] =
+			(void *)(uintptr_t)ba_res->reduced_txp;
+
+		/*
+		 * TODO:
+		 * When supporting multi TID aggregations - we need to move
+		 * next_reclaimed to be per TXQ and not per TID or handle it
+		 * in a different way.
+		 * This will go together with SN and AddBA offload and cannot
+		 * be handled properly for now.
+		 */
+		WARN_ON(le16_to_cpu(ba_res->tfd_cnt) != 1);
+		iwl_mvm_tx_reclaim(mvm, sta_id, ba_res->ra_tid[0].tid,
+				   (int)ba_res->tfd[0].q_num,
+				   le16_to_cpu(ba_res->tfd[0].tfd_index),
+				   &ba_info, le32_to_cpu(ba_res->tx_rate));
+
+		IWL_DEBUG_TX_REPLY(mvm,
+				   "BA_NOTIFICATION Received from sta_id = %d, flags %x, sent:%d, acked:%d\n",
+				   sta_id, le32_to_cpu(ba_res->flags),
+				   le16_to_cpu(ba_res->txed),
+				   le16_to_cpu(ba_res->done));
+		return;
+	}
+
+	ba_notif = (void *)pkt->data;
+	sta_id = ba_notif->sta_id;
+	tid = ba_notif->tid;
+	/* "flow" corresponds to Tx queue */
+	txq = le16_to_cpu(ba_notif->scd_flow);
+	/* "ssn" is start of block-ack Tx window, corresponds to index
+	 * (in Tx queue's circular buffer) of first TFD/frame in window */
+	index = le16_to_cpu(ba_notif->scd_ssn);
+
+	rcu_read_lock();
+	mvmsta = iwl_mvm_sta_from_staid_rcu(mvm, sta_id);
+	if (WARN_ON_ONCE(!mvmsta)) {
+		rcu_read_unlock();
+		return;
+	}
+
+	tid_data = &mvmsta->tid_data[tid];
+
+	ba_info.status.ampdu_ack_len = ba_notif->txed_2_done;
+	ba_info.status.ampdu_len = ba_notif->txed;
+	ba_info.status.tx_time = tid_data->tx_time;
+	ba_info.status.status_driver_data[0] =
+		(void *)(uintptr_t)ba_notif->reduced_txp;
+
+	rcu_read_unlock();
+
+	iwl_mvm_tx_reclaim(mvm, sta_id, tid, txq, index, &ba_info,
+			   tid_data->rate_n_flags);
+
+	IWL_DEBUG_TX_REPLY(mvm,
+			   "BA_NOTIFICATION Received from %pM, sta_id = %d\n",
+			   (u8 *)&ba_notif->sta_addr_lo32, ba_notif->sta_id);
+
+	IWL_DEBUG_TX_REPLY(mvm,
+			   "TID = %d, SeqCtl = %d, bitmap = 0x%llx, scd_flow = %d, scd_ssn = %d sent:%d, acked:%d\n",
+			   ba_notif->tid, le16_to_cpu(ba_notif->seq_ctl),
+			   le64_to_cpu(ba_notif->bitmap), txq, index,
+			   ba_notif->txed, ba_notif->txed_2_done);
+
+	IWL_DEBUG_TX_REPLY(mvm, "reduced txp from ba notif %d\n",
+			   ba_notif->reduced_txp);
+}
+
 /*
  * Note that there are transports that buffer frames before they reach
  * the firmware. This means that after flush_tx_path is called, the

+ 5 - 0
drivers/net/wireless/intel/iwlwifi/pcie/drv.c

@@ -500,6 +500,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x24FD, 0x0930, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x0950, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x0850, iwl8265_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24FD, 0x0012, iwl8275_2ac_cfg)},
 
 /* 9000 Series */
 	{IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)},
@@ -523,6 +524,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x9DF0, 0x0060, iwl9460_2ac_cfg)},
 	{IWL_PCI_DEVICE(0xA370, 0x0060, iwl9460_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x31DC, 0x0060, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x2526, 0x0030, iwl9560_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_cfg)},
+	{IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg)},
 
 /* a000 Series */
 	{IWL_PCI_DEVICE(0x2720, 0x0A10, iwla000_2ac_cfg)},

+ 76 - 51
drivers/net/wireless/intel/iwlwifi/pcie/internal.h

@@ -37,6 +37,7 @@
 #include <linux/wait.h>
 #include <linux/pci.h>
 #include <linux/timer.h>
+#include <linux/cpu.h>
 
 #include "iwl-fh.h"
 #include "iwl-csr.h"
@@ -49,7 +50,7 @@
  * be needed for potential data in the SKB's head. The remaining ones can
  * be used for frags.
  */
-#define IWL_PCIE_MAX_FRAGS (IWL_NUM_OF_TBS - 3)
+#define IWL_PCIE_MAX_FRAGS(x) (x->max_tbs - 3)
 
 /*
  * RX related structures and functions
@@ -192,41 +193,9 @@ struct iwl_cmd_meta {
 	/* only for SYNC commands, iff the reply skb is wanted */
 	struct iwl_host_cmd *source;
 	u32 flags;
+	u32 tbs;
 };
 
-/*
- * Generic queue structure
- *
- * Contains common data for Rx and Tx queues.
- *
- * Note the difference between TFD_QUEUE_SIZE_MAX and n_window: the hardware
- * always assumes 256 descriptors, so TFD_QUEUE_SIZE_MAX is always 256 (unless
- * there might be HW changes in the future). For the normal TX
- * queues, n_window, which is the size of the software queue data
- * is also 256; however, for the command queue, n_window is only
- * 32 since we don't need so many commands pending. Since the HW
- * still uses 256 BDs for DMA though, TFD_QUEUE_SIZE_MAX stays 256. As a result,
- * the software buffers (in the variables @meta, @txb in struct
- * iwl_txq) only have 32 entries, while the HW buffers (@tfds in
- * the same struct) have 256.
- * This means that we end up with the following:
- *  HW entries: | 0 | ... | N * 32 | ... | N * 32 + 31 | ... | 255 |
- *  SW entries:           | 0      | ... | 31          |
- * where N is a number between 0 and 7. This means that the SW
- * data is a window overlayed over the HW queue.
- */
-struct iwl_queue {
-	int write_ptr;       /* 1-st empty entry (index) host_w*/
-	int read_ptr;         /* last used entry (index) host_r*/
-	/* use for monitoring and recovering the stuck queue */
-	dma_addr_t dma_addr;   /* physical addr for BD's */
-	int n_window;	       /* safe queue window */
-	u32 id;
-	int low_mark;	       /* low watermark, resume queue if free
-				* space more than this */
-	int high_mark;         /* high watermark, stop queue if free
-				* space less than this */
-};
 
 #define TFD_TX_CMD_SLOTS 256
 #define TFD_CMD_SLOTS 32
@@ -273,13 +242,32 @@ struct iwl_pcie_first_tb_buf {
  * @wd_timeout: queue watchdog timeout (jiffies) - per queue
  * @frozen: tx stuck queue timer is frozen
  * @frozen_expiry_remainder: remember how long until the timer fires
+ * @write_ptr: 1-st empty entry (index) host_w
+ * @read_ptr: last used entry (index) host_r
+ * @dma_addr:  physical addr for BD's
+ * @n_window: safe queue window
+ * @id: queue id
+ * @low_mark: low watermark, resume queue if free space more than this
+ * @high_mark: high watermark, stop queue if free space less than this
  *
  * A Tx queue consists of circular buffer of BDs (a.k.a. TFDs, transmit frame
  * descriptors) and required locking structures.
+ *
+ * Note the difference between TFD_QUEUE_SIZE_MAX and n_window: the hardware
+ * always assumes 256 descriptors, so TFD_QUEUE_SIZE_MAX is always 256 (unless
+ * there might be HW changes in the future). For the normal TX
+ * queues, n_window, which is the size of the software queue data
+ * is also 256; however, for the command queue, n_window is only
+ * 32 since we don't need so many commands pending. Since the HW
+ * still uses 256 BDs for DMA though, TFD_QUEUE_SIZE_MAX stays 256.
+ * This means that we end up with the following:
+ *  HW entries: | 0 | ... | N * 32 | ... | N * 32 + 31 | ... | 255 |
+ *  SW entries:           | 0      | ... | 31          |
+ * where N is a number between 0 and 7. This means that the SW
+ * data is a window overlayed over the HW queue.
  */
 struct iwl_txq {
-	struct iwl_queue q;
-	struct iwl_tfd *tfds;
+	void *tfds;
 	struct iwl_pcie_first_tb_buf *first_tb_bufs;
 	dma_addr_t first_tb_dma;
 	struct iwl_pcie_txq_entry *entries;
@@ -294,6 +282,14 @@ struct iwl_txq {
 	bool block;
 	unsigned long wd_timeout;
 	struct sk_buff_head overflow_q;
+
+	int write_ptr;
+	int read_ptr;
+	dma_addr_t dma_addr;
+	int n_window;
+	u32 id;
+	int low_mark;
+	int high_mark;
 };
 
 static inline dma_addr_t
@@ -308,6 +304,16 @@ struct iwl_tso_hdr_page {
 	u8 *pos;
 };
 
+/**
+ * enum iwl_shared_irq_flags - level of sharing for irq
+ * @IWL_SHARED_IRQ_NON_RX: interrupt vector serves non rx causes.
+ * @IWL_SHARED_IRQ_FIRST_RSS: interrupt vector serves first RSS queue.
+ */
+enum iwl_shared_irq_flags {
+	IWL_SHARED_IRQ_NON_RX		= BIT(0),
+	IWL_SHARED_IRQ_FIRST_RSS	= BIT(1),
+};
+
 /**
  * struct iwl_trans_pcie - PCIe transport specific data
  * @rxq: all the RX queue data
@@ -338,8 +344,10 @@ struct iwl_tso_hdr_page {
  * @fw_mon_size: size of the buffer for the firmware monitor
  * @msix_entries: array of MSI-X entries
  * @msix_enabled: true if managed to enable MSI-X
- * @allocated_vector: the number of interrupt vector allocated by the OS
- * @default_irq_num: default irq for non rx interrupt
+ * @shared_vec_mask: the type of causes the shared vector handles
+ *	(see iwl_shared_irq_flags).
+ * @alloc_vecs: the number of interrupt vectors allocated by the OS
+ * @def_irq: default irq for non rx causes
  * @fh_init_mask: initial unmasked fh causes
  * @hw_init_mask: initial unmasked hw causes
  * @fh_mask: current unmasked fh causes
@@ -391,6 +399,8 @@ struct iwl_trans_pcie {
 	unsigned int cmd_q_wdg_timeout;
 	u8 n_no_reclaim_cmds;
 	u8 no_reclaim_cmds[MAX_NO_RECLAIM_CMDS];
+	u8 max_tbs;
+	u16 tfd_size;
 
 	enum iwl_amsdu_size rx_buf_size;
 	bool bc_table_dword;
@@ -410,12 +420,14 @@ struct iwl_trans_pcie {
 
 	struct msix_entry msix_entries[IWL_MAX_RX_HW_QUEUES];
 	bool msix_enabled;
-	u32 allocated_vector;
-	u32 default_irq_num;
+	u8 shared_vec_mask;
+	u32 alloc_vecs;
+	u32 def_irq;
 	u32 fh_init_mask;
 	u32 hw_init_mask;
 	u32 fh_mask;
 	u32 hw_mask;
+	cpumask_t affinity_mask[IWL_MAX_RX_HW_QUEUES];
 };
 
 static inline struct iwl_trans_pcie *
@@ -474,6 +486,7 @@ void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int queue,
 				bool configure_scd);
 void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id,
 					bool shared_mode);
+dma_addr_t iwl_trans_pcie_get_txq_byte_table(struct iwl_trans *trans, int txq);
 void iwl_trans_pcie_log_scd_error(struct iwl_trans *trans,
 				  struct iwl_txq *txq);
 int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
@@ -486,9 +499,21 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
 			    struct sk_buff_head *skbs);
 void iwl_trans_pcie_tx_reset(struct iwl_trans *trans);
 
-static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_tfd *tfd, u8 idx)
+static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_trans *trans, void *tfd,
+					  u8 idx)
 {
-	struct iwl_tfd_tb *tb = &tfd->tbs[idx];
+	struct iwl_tfd *tfd_fh;
+	struct iwl_tfd_tb *tb;
+
+	if (trans->cfg->use_tfh) {
+		struct iwl_tfh_tfd *tfd_fh = (void *)tfd;
+		struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx];
+
+		return le16_to_cpu(tb->tb_len);
+	}
+
+	tfd_fh = (void *)tfd;
+	tb = &tfd_fh->tbs[idx];
 
 	return le16_to_cpu(tb->hi_n_len) >> 4;
 }
@@ -617,9 +642,9 @@ static inline void iwl_wake_queue(struct iwl_trans *trans,
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
-	if (test_and_clear_bit(txq->q.id, trans_pcie->queue_stopped)) {
-		IWL_DEBUG_TX_QUEUES(trans, "Wake hwq %d\n", txq->q.id);
-		iwl_op_mode_queue_not_full(trans->op_mode, txq->q.id);
+	if (test_and_clear_bit(txq->id, trans_pcie->queue_stopped)) {
+		IWL_DEBUG_TX_QUEUES(trans, "Wake hwq %d\n", txq->id);
+		iwl_op_mode_queue_not_full(trans->op_mode, txq->id);
 	}
 }
 
@@ -628,22 +653,22 @@ static inline void iwl_stop_queue(struct iwl_trans *trans,
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
-	if (!test_and_set_bit(txq->q.id, trans_pcie->queue_stopped)) {
-		iwl_op_mode_queue_full(trans->op_mode, txq->q.id);
-		IWL_DEBUG_TX_QUEUES(trans, "Stop hwq %d\n", txq->q.id);
+	if (!test_and_set_bit(txq->id, trans_pcie->queue_stopped)) {
+		iwl_op_mode_queue_full(trans->op_mode, txq->id);
+		IWL_DEBUG_TX_QUEUES(trans, "Stop hwq %d\n", txq->id);
 	} else
 		IWL_DEBUG_TX_QUEUES(trans, "hwq %d already stopped\n",
-				    txq->q.id);
+				    txq->id);
 }
 
-static inline bool iwl_queue_used(const struct iwl_queue *q, int i)
+static inline bool iwl_queue_used(const struct iwl_txq *q, int i)
 {
 	return q->write_ptr >= q->read_ptr ?
 		(i >= q->read_ptr && i < q->write_ptr) :
 		!(i < q->read_ptr && i >= q->write_ptr);
 }
 
-static inline u8 get_cmd_index(struct iwl_queue *q, u32 index)
+static inline u8 get_cmd_index(struct iwl_txq *q, u32 index)
 {
 	return index & (q->n_window - 1);
 }

+ 15 - 1
drivers/net/wireless/intel/iwlwifi/pcie/rx.c

@@ -1142,7 +1142,7 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
 
 		sequence = le16_to_cpu(pkt->hdr.sequence);
 		index = SEQ_TO_INDEX(sequence);
-		cmd_index = get_cmd_index(&txq->q, index);
+		cmd_index = get_cmd_index(txq, index);
 
 		if (rxq->id == 0)
 			iwl_op_mode_rx(trans->op_mode, &rxq->napi,
@@ -1885,6 +1885,20 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id)
 			      inta_fh,
 			      iwl_read32(trans, CSR_MSIX_FH_INT_MASK_AD));
 
+	if ((trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_NON_RX) &&
+	    inta_fh & MSIX_FH_INT_CAUSES_Q0) {
+		local_bh_disable();
+		iwl_pcie_rx_handle(trans, 0);
+		local_bh_enable();
+	}
+
+	if ((trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS) &&
+	    inta_fh & MSIX_FH_INT_CAUSES_Q1) {
+		local_bh_disable();
+		iwl_pcie_rx_handle(trans, 1);
+		local_bh_enable();
+	}
+
 	/* This "Tx" DMA channel is used only for loading uCode */
 	if (inta_fh & MSIX_FH_INT_CAUSES_D2S_CH0_NUM) {
 		IWL_DEBUG_ISR(trans, "uCode load interrupt\n");

+ 167 - 80
drivers/net/wireless/intel/iwlwifi/pcie/trans.c

@@ -1170,7 +1170,7 @@ static void iwl_pcie_synchronize_irqs(struct iwl_trans *trans)
 	if (trans_pcie->msix_enabled) {
 		int i;
 
-		for (i = 0; i < trans_pcie->allocated_vector; i++)
+		for (i = 0; i < trans_pcie->alloc_vecs; i++)
 			synchronize_irq(trans_pcie->msix_entries[i].vector);
 	} else {
 		synchronize_irq(trans_pcie->pci_dev->irq);
@@ -1429,13 +1429,58 @@ static struct iwl_causes_list causes_list[] = {
 	{MSIX_HW_INT_CAUSES_REG_HAP,		CSR_MSIX_HW_INT_MASK_AD, 0x2E},
 };
 
+static void iwl_pcie_map_non_rx_causes(struct iwl_trans *trans)
+{
+	struct iwl_trans_pcie *trans_pcie =  IWL_TRANS_GET_PCIE_TRANS(trans);
+	int val = trans_pcie->def_irq | MSIX_NON_AUTO_CLEAR_CAUSE;
+	int i;
+
+	/*
+	 * Access all non RX causes and map them to the default irq.
+	 * In case we are missing at least one interrupt vector,
+	 * the first interrupt vector will serve non-RX and FBQ causes.
+	 */
+	for (i = 0; i < ARRAY_SIZE(causes_list); i++) {
+		iwl_write8(trans, CSR_MSIX_IVAR(causes_list[i].addr), val);
+		iwl_clear_bit(trans, causes_list[i].mask_reg,
+			      causes_list[i].cause_num);
+	}
+}
+
+static void iwl_pcie_map_rx_causes(struct iwl_trans *trans)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	u32 offset =
+		trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS ? 1 : 0;
+	u32 val, idx;
+
+	/*
+	 * The first RX queue - fallback queue, which is designated for
+	 * management frame, command responses etc, is always mapped to the
+	 * first interrupt vector. The other RX queues are mapped to
+	 * the other (N - 2) interrupt vectors.
+	 */
+	val = BIT(MSIX_FH_INT_CAUSES_Q(0));
+	for (idx = 1; idx < trans->num_rx_queues; idx++) {
+		iwl_write8(trans, CSR_MSIX_RX_IVAR(idx),
+			   MSIX_FH_INT_CAUSES_Q(idx - offset));
+		val |= BIT(MSIX_FH_INT_CAUSES_Q(idx));
+	}
+	iwl_write32(trans, CSR_MSIX_FH_INT_MASK_AD, ~val);
+
+	val = MSIX_FH_INT_CAUSES_Q(0);
+	if (trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_NON_RX)
+		val |= MSIX_NON_AUTO_CLEAR_CAUSE;
+	iwl_write8(trans, CSR_MSIX_RX_IVAR(0), val);
+
+	if (trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS)
+		iwl_write8(trans, CSR_MSIX_RX_IVAR(1), val);
+}
+
 static void iwl_pcie_init_msix(struct iwl_trans_pcie *trans_pcie)
 {
-	u32 val, max_rx_vector, i;
 	struct iwl_trans *trans = trans_pcie->trans;
 
-	max_rx_vector = trans_pcie->allocated_vector - 1;
-
 	if (!trans_pcie->msix_enabled) {
 		if (trans->cfg->mq_rx_supported)
 			iwl_write_prph(trans, UREG_CHICK,
@@ -1446,25 +1491,16 @@ static void iwl_pcie_init_msix(struct iwl_trans_pcie *trans_pcie)
 	iwl_write_prph(trans, UREG_CHICK, UREG_CHICK_MSIX_ENABLE);
 
 	/*
-	 * Each cause from the list above and the RX causes is represented as
-	 * a byte in the IVAR table. We access the first (N - 1) bytes and map
-	 * them to the (N - 1) vectors so these vectors will be used as rx
-	 * vectors. Then access all non rx causes and map them to the
-	 * default queue (N'th queue).
+	 * Each cause from the causes list above and the RX causes is
+	 * represented as a byte in the IVAR table. The first nibble
+	 * represents the bound interrupt vector of the cause, the second
+	 * represents no auto clear for this cause. This will be set if its
+	 * interrupt vector is bound to serve other causes.
 	 */
-	for (i = 0; i < max_rx_vector; i++) {
-		iwl_write8(trans, CSR_MSIX_RX_IVAR(i), MSIX_FH_INT_CAUSES_Q(i));
-		iwl_clear_bit(trans, CSR_MSIX_FH_INT_MASK_AD,
-			      BIT(MSIX_FH_INT_CAUSES_Q(i)));
-	}
+	iwl_pcie_map_rx_causes(trans);
+
+	iwl_pcie_map_non_rx_causes(trans);
 
-	for (i = 0; i < ARRAY_SIZE(causes_list); i++) {
-		val = trans_pcie->default_irq_num |
-			MSIX_NON_AUTO_CLEAR_CAUSE;
-		iwl_write8(trans, CSR_MSIX_IVAR(causes_list[i].addr), val);
-		iwl_clear_bit(trans, causes_list[i].mask_reg,
-			      causes_list[i].cause_num);
-	}
 	trans_pcie->fh_init_mask =
 		~iwl_read32(trans, CSR_MSIX_FH_INT_MASK_AD);
 	trans_pcie->fh_mask = trans_pcie->fh_init_mask;
@@ -1477,40 +1513,55 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev,
 					struct iwl_trans *trans)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	int max_irqs, num_irqs, i, ret, nr_online_cpus;
 	u16 pci_cmd;
-	int max_vector;
-	int ret, i;
-
-	if (trans->cfg->mq_rx_supported) {
-		max_vector = min_t(u32, (num_possible_cpus() + 2),
-				   IWL_MAX_RX_HW_QUEUES);
-		for (i = 0; i < max_vector; i++)
-			trans_pcie->msix_entries[i].entry = i;
-
-		ret = pci_enable_msix_range(pdev, trans_pcie->msix_entries,
-					    MSIX_MIN_INTERRUPT_VECTORS,
-					    max_vector);
-		if (ret > 1) {
-			IWL_DEBUG_INFO(trans,
-				       "Enable MSI-X allocate %d interrupt vector\n",
-				       ret);
-			trans_pcie->allocated_vector = ret;
-			trans_pcie->default_irq_num =
-				trans_pcie->allocated_vector - 1;
-			trans_pcie->trans->num_rx_queues =
-				trans_pcie->allocated_vector - 1;
-			trans_pcie->msix_enabled = true;
-
-			return;
-		}
+
+	if (!trans->cfg->mq_rx_supported)
+		goto enable_msi;
+
+	nr_online_cpus = num_online_cpus();
+	max_irqs = min_t(u32, nr_online_cpus + 2, IWL_MAX_RX_HW_QUEUES);
+	for (i = 0; i < max_irqs; i++)
+		trans_pcie->msix_entries[i].entry = i;
+
+	num_irqs = pci_enable_msix_range(pdev, trans_pcie->msix_entries,
+					 MSIX_MIN_INTERRUPT_VECTORS,
+					 max_irqs);
+	if (num_irqs < 0) {
 		IWL_DEBUG_INFO(trans,
-			       "ret = %d %s move to msi mode\n", ret,
-			       (ret == 1) ?
-			       "can't allocate more than 1 interrupt vector" :
-			       "failed to enable msi-x mode");
-		pci_disable_msix(pdev);
+			       "Failed to enable msi-x mode (ret %d). Moving to msi mode.\n",
+			       num_irqs);
+		goto enable_msi;
+	}
+	trans_pcie->def_irq = (num_irqs == max_irqs) ? num_irqs - 1 : 0;
+
+	IWL_DEBUG_INFO(trans,
+		       "MSI-X enabled. %d interrupt vectors were allocated\n",
+		       num_irqs);
+
+	/*
+	 * In case the OS provides fewer interrupts than requested, different
+	 * causes will share the same interrupt vector as follows:
+	 * One interrupt less: non rx causes shared with FBQ.
+	 * Two interrupts less: non rx causes shared with FBQ and RSS.
+	 * More than two interrupts: we will use fewer RSS queues.
+	 */
+	if (num_irqs <= nr_online_cpus) {
+		trans_pcie->trans->num_rx_queues = num_irqs + 1;
+		trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX |
+			IWL_SHARED_IRQ_FIRST_RSS;
+	} else if (num_irqs == nr_online_cpus + 1) {
+		trans_pcie->trans->num_rx_queues = num_irqs;
+		trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX;
+	} else {
+		trans_pcie->trans->num_rx_queues = num_irqs - 1;
 	}
 
+	trans_pcie->alloc_vecs = num_irqs;
+	trans_pcie->msix_enabled = true;
+	return;
+
+enable_msi:
 	ret = pci_enable_msi(pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "pci_enable_msi failed - %d\n", ret);
@@ -1523,19 +1574,41 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev,
 	}
 }
 
+static void iwl_pcie_irq_set_affinity(struct iwl_trans *trans)
+{
+	int iter_rx_q, i, ret, cpu, offset;
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+	i = trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS ? 0 : 1;
+	iter_rx_q = trans_pcie->trans->num_rx_queues - 1 + i;
+	offset = 1 + i;
+	for (; i < iter_rx_q ; i++) {
+		/*
+		 * Get the cpu prior to the place to search
+		 * (i.e. return will be > i - 1).
+		 */
+		cpu = cpumask_next(i - offset, cpu_online_mask);
+		cpumask_set_cpu(cpu, &trans_pcie->affinity_mask[i]);
+		ret = irq_set_affinity_hint(trans_pcie->msix_entries[i].vector,
+					    &trans_pcie->affinity_mask[i]);
+		if (ret)
+			IWL_ERR(trans_pcie->trans,
+				"Failed to set affinity mask for IRQ %d\n",
+				i);
+	}
+}
+
 static int iwl_pcie_init_msix_handler(struct pci_dev *pdev,
 				      struct iwl_trans_pcie *trans_pcie)
 {
-	int i, last_vector;
-
-	last_vector = trans_pcie->trans->num_rx_queues;
+	int i;
 
-	for (i = 0; i < trans_pcie->allocated_vector; i++) {
+	for (i = 0; i < trans_pcie->alloc_vecs; i++) {
 		int ret;
 
 		ret = request_threaded_irq(trans_pcie->msix_entries[i].vector,
 					   iwl_pcie_msix_isr,
-					   (i == last_vector) ?
+					   (i == trans_pcie->def_irq) ?
 					   iwl_pcie_irq_msix_handler :
 					   iwl_pcie_irq_rx_msix_handler,
 					   IRQF_SHARED,
@@ -1553,6 +1626,7 @@ static int iwl_pcie_init_msix_handler(struct pci_dev *pdev,
 			return ret;
 		}
 	}
+	iwl_pcie_irq_set_affinity(trans_pcie->trans);
 
 	return 0;
 }
@@ -1712,9 +1786,14 @@ void iwl_trans_pcie_free(struct iwl_trans *trans)
 	iwl_pcie_rx_free(trans);
 
 	if (trans_pcie->msix_enabled) {
-		for (i = 0; i < trans_pcie->allocated_vector; i++)
+		for (i = 0; i < trans_pcie->alloc_vecs; i++) {
+			irq_set_affinity_hint(
+				trans_pcie->msix_entries[i].vector,
+				NULL);
+
 			free_irq(trans_pcie->msix_entries[i].vector,
 				 &trans_pcie->msix_entries[i]);
+		}
 
 		pci_disable_msix(trans_pcie->pci_dev);
 		trans_pcie->msix_enabled = false;
@@ -1899,7 +1978,7 @@ static void iwl_trans_pcie_freeze_txq_timer(struct iwl_trans *trans,
 
 		txq->frozen = freeze;
 
-		if (txq->q.read_ptr == txq->q.write_ptr)
+		if (txq->read_ptr == txq->write_ptr)
 			goto next_queue;
 
 		if (freeze) {
@@ -1947,7 +2026,7 @@ static void iwl_trans_pcie_block_txq_ptrs(struct iwl_trans *trans, bool block)
 			txq->block--;
 			if (!txq->block) {
 				iwl_write32(trans, HBUS_TARG_WRPTR,
-					    txq->q.write_ptr | (i << 8));
+					    txq->write_ptr | (i << 8));
 			}
 		} else if (block) {
 			txq->block++;
@@ -1967,14 +2046,14 @@ void iwl_trans_pcie_log_scd_error(struct iwl_trans *trans, struct iwl_txq *txq)
 	int cnt;
 
 	IWL_ERR(trans, "Current SW read_ptr %d write_ptr %d\n",
-		txq->q.read_ptr, txq->q.write_ptr);
+		txq->read_ptr, txq->write_ptr);
 
 	if (trans->cfg->use_tfh)
 		/* TODO: access new SCD registers and dump them */
 		return;
 
 	scd_sram_addr = trans_pcie->scd_base_addr +
-			SCD_TX_STTS_QUEUE_OFFSET(txq->q.id);
+			SCD_TX_STTS_QUEUE_OFFSET(txq->id);
 	iwl_trans_read_mem_bytes(trans, scd_sram_addr, buf, sizeof(buf));
 
 	iwl_print_hex_error(trans, buf, sizeof(buf));
@@ -2009,7 +2088,6 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq;
-	struct iwl_queue *q;
 	int cnt;
 	unsigned long now = jiffies;
 	int ret = 0;
@@ -2027,13 +2105,12 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm)
 
 		IWL_DEBUG_TX_QUEUES(trans, "Emptying queue %d...\n", cnt);
 		txq = &trans_pcie->txq[cnt];
-		q = &txq->q;
-		wr_ptr = ACCESS_ONCE(q->write_ptr);
+		wr_ptr = ACCESS_ONCE(txq->write_ptr);
 
-		while (q->read_ptr != ACCESS_ONCE(q->write_ptr) &&
+		while (txq->read_ptr != ACCESS_ONCE(txq->write_ptr) &&
 		       !time_after(jiffies,
 				   now + msecs_to_jiffies(IWL_FLUSH_WAIT_MS))) {
-			u8 write_ptr = ACCESS_ONCE(q->write_ptr);
+			u8 write_ptr = ACCESS_ONCE(txq->write_ptr);
 
 			if (WARN_ONCE(wr_ptr != write_ptr,
 				      "WR pointer moved while flushing %d -> %d\n",
@@ -2042,7 +2119,7 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm)
 			usleep_range(1000, 2000);
 		}
 
-		if (q->read_ptr != q->write_ptr) {
+		if (txq->read_ptr != txq->write_ptr) {
 			IWL_ERR(trans,
 				"fail to flush all tx fifo queues Q %d\n", cnt);
 			ret = -ETIMEDOUT;
@@ -2210,7 +2287,6 @@ static ssize_t iwl_dbgfs_tx_queue_read(struct file *file,
 	struct iwl_trans *trans = file->private_data;
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq;
-	struct iwl_queue *q;
 	char *buf;
 	int pos = 0;
 	int cnt;
@@ -2228,10 +2304,9 @@ static ssize_t iwl_dbgfs_tx_queue_read(struct file *file,
 
 	for (cnt = 0; cnt < trans->cfg->base_params->num_of_queues; cnt++) {
 		txq = &trans_pcie->txq[cnt];
-		q = &txq->q;
 		pos += scnprintf(buf + pos, bufsz - pos,
 				"hwq %.2d: read=%u write=%u use=%d stop=%d need_update=%d frozen=%d%s\n",
-				cnt, q->read_ptr, q->write_ptr,
+				cnt, txq->read_ptr, txq->write_ptr,
 				!!test_bit(cnt, trans_pcie->queue_used),
 				 !!test_bit(cnt, trans_pcie->queue_stopped),
 				 txq->need_update, txq->frozen,
@@ -2437,13 +2512,14 @@ err:
 }
 #endif /*CONFIG_IWLWIFI_DEBUGFS */
 
-static u32 iwl_trans_pcie_get_cmdlen(struct iwl_tfd *tfd)
+static u32 iwl_trans_pcie_get_cmdlen(struct iwl_trans *trans, void *tfd)
 {
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	u32 cmdlen = 0;
 	int i;
 
-	for (i = 0; i < IWL_NUM_OF_TBS; i++)
-		cmdlen += iwl_pcie_tfd_tb_get_len(tfd, i);
+	for (i = 0; i < trans_pcie->max_tbs; i++)
+		cmdlen += iwl_pcie_tfd_tb_get_len(trans, tfd, i);
 
 	return cmdlen;
 }
@@ -2658,7 +2734,7 @@ static struct iwl_trans_dump_data
 
 	/* host commands */
 	len += sizeof(*data) +
-		cmdq->q.n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE);
+		cmdq->n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE);
 
 	/* FW monitor */
 	if (trans_pcie->fw_mon_page) {
@@ -2726,12 +2802,13 @@ static struct iwl_trans_dump_data
 	data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_TXCMD);
 	txcmd = (void *)data->data;
 	spin_lock_bh(&cmdq->lock);
-	ptr = cmdq->q.write_ptr;
-	for (i = 0; i < cmdq->q.n_window; i++) {
-		u8 idx = get_cmd_index(&cmdq->q, ptr);
+	ptr = cmdq->write_ptr;
+	for (i = 0; i < cmdq->n_window; i++) {
+		u8 idx = get_cmd_index(cmdq, ptr);
 		u32 caplen, cmdlen;
 
-		cmdlen = iwl_trans_pcie_get_cmdlen(&cmdq->tfds[ptr]);
+		cmdlen = iwl_trans_pcie_get_cmdlen(trans, cmdq->tfds +
+						   trans_pcie->tfd_size * ptr);
 		caplen = min_t(u32, TFD_MAX_PAYLOAD_SIZE, cmdlen);
 
 		if (cmdlen) {
@@ -2801,6 +2878,8 @@ static const struct iwl_trans_ops trans_ops_pcie = {
 	.txq_disable = iwl_trans_pcie_txq_disable,
 	.txq_enable = iwl_trans_pcie_txq_enable,
 
+	.get_txq_byte_table = iwl_trans_pcie_get_txq_byte_table,
+
 	.txq_set_shared_mode = iwl_trans_pcie_txq_set_shared_mode,
 
 	.wait_tx_queue_empty = iwl_trans_pcie_wait_txq_empty,
@@ -2839,8 +2918,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
 	if (!trans)
 		return ERR_PTR(-ENOMEM);
 
-	trans->max_skb_frags = IWL_PCIE_MAX_FRAGS;
-
 	trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
 	trans_pcie->trans = trans;
@@ -2874,6 +2951,16 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
 	else
 		addr_size = 36;
 
+	if (cfg->use_tfh) {
+		trans_pcie->max_tbs = IWL_TFH_NUM_TBS;
+		trans_pcie->tfd_size = sizeof(struct iwl_tfh_tb);
+
+	} else {
+		trans_pcie->max_tbs = IWL_NUM_OF_TBS;
+		trans_pcie->tfd_size = sizeof(struct iwl_tfd);
+	}
+	trans->max_skb_frags = IWL_PCIE_MAX_FRAGS(trans_pcie);
+
 	pci_set_master(pdev);
 
 	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(addr_size));

+ 227 - 157
drivers/net/wireless/intel/iwlwifi/pcie/tx.c

@@ -71,7 +71,7 @@
  *
  ***************************************************/
 
-static int iwl_queue_space(const struct iwl_queue *q)
+static int iwl_queue_space(const struct iwl_txq *q)
 {
 	unsigned int max;
 	unsigned int used;
@@ -102,7 +102,7 @@ static int iwl_queue_space(const struct iwl_queue *q)
 /*
  * iwl_queue_init - Initialize queue's high/low-water and read/write indexes
  */
-static int iwl_queue_init(struct iwl_queue *q, int slots_num, u32 id)
+static int iwl_queue_init(struct iwl_txq *q, int slots_num, u32 id)
 {
 	q->n_window = slots_num;
 	q->id = id;
@@ -158,13 +158,13 @@ static void iwl_pcie_txq_stuck_timer(unsigned long data)
 
 	spin_lock(&txq->lock);
 	/* check if triggered erroneously */
-	if (txq->q.read_ptr == txq->q.write_ptr) {
+	if (txq->read_ptr == txq->write_ptr) {
 		spin_unlock(&txq->lock);
 		return;
 	}
 	spin_unlock(&txq->lock);
 
-	IWL_ERR(trans, "Queue %d stuck for %u ms.\n", txq->q.id,
+	IWL_ERR(trans, "Queue %d stuck for %u ms.\n", txq->id,
 		jiffies_to_msecs(txq->wd_timeout));
 
 	iwl_trans_pcie_log_scd_error(trans, txq);
@@ -176,22 +176,21 @@ static void iwl_pcie_txq_stuck_timer(unsigned long data)
  * iwl_pcie_txq_update_byte_cnt_tbl - Set up entry in Tx byte-count array
  */
 static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans,
-					     struct iwl_txq *txq, u16 byte_cnt)
+					     struct iwl_txq *txq, u16 byte_cnt,
+					     int num_tbs)
 {
 	struct iwlagn_scd_bc_tbl *scd_bc_tbl;
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	int write_ptr = txq->q.write_ptr;
-	int txq_id = txq->q.id;
+	int write_ptr = txq->write_ptr;
+	int txq_id = txq->id;
 	u8 sec_ctl = 0;
-	u8 sta_id = 0;
 	u16 len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE;
 	__le16 bc_ent;
 	struct iwl_tx_cmd *tx_cmd =
-		(void *) txq->entries[txq->q.write_ptr].cmd->payload;
+		(void *)txq->entries[txq->write_ptr].cmd->payload;
 
 	scd_bc_tbl = trans_pcie->scd_bc_tbls.addr;
 
-	sta_id = tx_cmd->sta_id;
 	sec_ctl = tx_cmd->sec_ctl;
 
 	switch (sec_ctl & TX_CMD_SEC_MSK) {
@@ -205,14 +204,32 @@ static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans,
 		len += IEEE80211_WEP_IV_LEN + IEEE80211_WEP_ICV_LEN;
 		break;
 	}
-
 	if (trans_pcie->bc_table_dword)
 		len = DIV_ROUND_UP(len, 4);
 
 	if (WARN_ON(len > 0xFFF || write_ptr >= TFD_QUEUE_SIZE_MAX))
 		return;
 
-	bc_ent = cpu_to_le16(len | (sta_id << 12));
+	if (trans->cfg->use_tfh) {
+		u8 filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) +
+				     num_tbs * sizeof(struct iwl_tfh_tb);
+		/*
+		 * filled_tfd_size contains the number of filled bytes in the
+		 * TFD.
+		 * Dividing it by 64 will give the number of chunks to fetch
+		 * to SRAM- 0 for one chunk, 1 for 2 and so on.
+		 * If, for example, TFD contains only 3 TBs then 32 bytes
+		 * of the TFD are used, and only one chunk of 64 bytes should
+		 * be fetched
+		 */
+		u8 num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1;
+
+		bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12));
+	} else {
+		u8 sta_id = tx_cmd->sta_id;
+
+		bc_ent = cpu_to_le16(len | (sta_id << 12));
+	}
 
 	scd_bc_tbl[txq_id].tfd_offset[write_ptr] = bc_ent;
 
@@ -227,12 +244,12 @@ static void iwl_pcie_txq_inval_byte_cnt_tbl(struct iwl_trans *trans,
 	struct iwl_trans_pcie *trans_pcie =
 		IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwlagn_scd_bc_tbl *scd_bc_tbl = trans_pcie->scd_bc_tbls.addr;
-	int txq_id = txq->q.id;
-	int read_ptr = txq->q.read_ptr;
+	int txq_id = txq->id;
+	int read_ptr = txq->read_ptr;
 	u8 sta_id = 0;
 	__le16 bc_ent;
 	struct iwl_tx_cmd *tx_cmd =
-		(void *)txq->entries[txq->q.read_ptr].cmd->payload;
+		(void *)txq->entries[read_ptr].cmd->payload;
 
 	WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX);
 
@@ -240,6 +257,7 @@ static void iwl_pcie_txq_inval_byte_cnt_tbl(struct iwl_trans *trans,
 		sta_id = tx_cmd->sta_id;
 
 	bc_ent = cpu_to_le16(1 | (sta_id << 12));
+
 	scd_bc_tbl[txq_id].tfd_offset[read_ptr] = bc_ent;
 
 	if (read_ptr < TFD_QUEUE_SIZE_BC_DUP)
@@ -255,7 +273,7 @@ static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans,
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	u32 reg = 0;
-	int txq_id = txq->q.id;
+	int txq_id = txq->id;
 
 	lockdep_assert_held(&txq->lock);
 
@@ -289,10 +307,10 @@ static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans,
 	 * if not in power-save mode, uCode will never sleep when we're
 	 * trying to tx (during RFKILL, we're not trying to tx).
 	 */
-	IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->q.write_ptr);
+	IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->write_ptr);
 	if (!txq->block)
 		iwl_write32(trans, HBUS_TARG_WRPTR,
-			    txq->q.write_ptr | (txq_id << 8));
+			    txq->write_ptr | (txq_id << 8));
 }
 
 void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans)
@@ -312,11 +330,30 @@ void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans)
 	}
 }
 
-static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_tfd *tfd, u8 idx)
+static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie,
+				     struct iwl_txq *txq, int idx)
+{
+	return txq->tfds + trans_pcie->tfd_size * idx;
+}
+
+static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_trans *trans,
+						  void *tfd, u8 idx)
 {
-	struct iwl_tfd_tb *tb = &tfd->tbs[idx];
+	struct iwl_tfd *tfd_fh;
+	struct iwl_tfd_tb *tb;
+	dma_addr_t addr;
+
+	if (trans->cfg->use_tfh) {
+		struct iwl_tfh_tfd *tfd_fh = (void *)tfd;
+		struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx];
+
+		return (dma_addr_t)(le64_to_cpu(tb->addr));
+	}
+
+	tfd_fh = (void *)tfd;
+	tb = &tfd_fh->tbs[idx];
+	addr = get_unaligned_le32(&tb->lo);
 
-	dma_addr_t addr = get_unaligned_le32(&tb->lo);
 	if (sizeof(dma_addr_t) > sizeof(u32))
 		addr |=
 		((dma_addr_t)(le16_to_cpu(tb->hi_n_len) & 0xF) << 16) << 16;
@@ -324,37 +361,59 @@ static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_tfd *tfd, u8 idx)
 	return addr;
 }
 
-static inline void iwl_pcie_tfd_set_tb(struct iwl_tfd *tfd, u8 idx,
-				       dma_addr_t addr, u16 len)
+static inline void iwl_pcie_tfd_set_tb(struct iwl_trans *trans, void *tfd,
+				       u8 idx, dma_addr_t addr, u16 len)
 {
-	struct iwl_tfd_tb *tb = &tfd->tbs[idx];
-	u16 hi_n_len = len << 4;
+	if (trans->cfg->use_tfh) {
+		struct iwl_tfh_tfd *tfd_fh = (void *)tfd;
+		struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx];
 
-	put_unaligned_le32(addr, &tb->lo);
-	if (sizeof(dma_addr_t) > sizeof(u32))
-		hi_n_len |= ((addr >> 16) >> 16) & 0xF;
+		put_unaligned_le64(addr, &tb->addr);
+		tb->tb_len = cpu_to_le16(len);
+
+		tfd_fh->num_tbs = cpu_to_le16(idx + 1);
+	} else {
+		struct iwl_tfd *tfd_fh = (void *)tfd;
+		struct iwl_tfd_tb *tb = &tfd_fh->tbs[idx];
+
+		u16 hi_n_len = len << 4;
+
+		put_unaligned_le32(addr, &tb->lo);
+		if (sizeof(dma_addr_t) > sizeof(u32))
+			hi_n_len |= ((addr >> 16) >> 16) & 0xF;
 
-	tb->hi_n_len = cpu_to_le16(hi_n_len);
+		tb->hi_n_len = cpu_to_le16(hi_n_len);
 
-	tfd->num_tbs = idx + 1;
+		tfd_fh->num_tbs = idx + 1;
+	}
 }
 
-static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_tfd *tfd)
+static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_trans *trans, void *tfd)
 {
-	return tfd->num_tbs & 0x1f;
+	struct iwl_tfd *tfd_fh;
+
+	if (trans->cfg->use_tfh) {
+		struct iwl_tfh_tfd *tfd_fh = (void *)tfd;
+
+		return le16_to_cpu(tfd_fh->num_tbs) & 0x1f;
+	}
+
+	tfd_fh = (void *)tfd;
+	return tfd_fh->num_tbs & 0x1f;
 }
 
 static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
 			       struct iwl_cmd_meta *meta,
-			       struct iwl_tfd *tfd)
+			       struct iwl_txq *txq, int index)
 {
-	int i;
-	int num_tbs;
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	int i, num_tbs;
+	void *tfd = iwl_pcie_get_tfd(trans_pcie, txq, index);
 
 	/* Sanity check on number of chunks */
-	num_tbs = iwl_pcie_tfd_get_num_tbs(tfd);
+	num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
 
-	if (num_tbs >= IWL_NUM_OF_TBS) {
+	if (num_tbs >= trans_pcie->max_tbs) {
 		IWL_ERR(trans, "Too many chunks: %i\n", num_tbs);
 		/* @todo issue fatal error, it is quite serious situation */
 		return;
@@ -363,18 +422,30 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
 	/* first TB is never freed - it's the bidirectional DMA data */
 
 	for (i = 1; i < num_tbs; i++) {
-		if (meta->flags & BIT(i + CMD_TB_BITMAP_POS))
+		if (meta->tbs & BIT(i))
 			dma_unmap_page(trans->dev,
-				       iwl_pcie_tfd_tb_get_addr(tfd, i),
-				       iwl_pcie_tfd_tb_get_len(tfd, i),
+				       iwl_pcie_tfd_tb_get_addr(trans, tfd, i),
+				       iwl_pcie_tfd_tb_get_len(trans, tfd, i),
 				       DMA_TO_DEVICE);
 		else
 			dma_unmap_single(trans->dev,
-					 iwl_pcie_tfd_tb_get_addr(tfd, i),
-					 iwl_pcie_tfd_tb_get_len(tfd, i),
+					 iwl_pcie_tfd_tb_get_addr(trans, tfd,
+								  i),
+					 iwl_pcie_tfd_tb_get_len(trans, tfd,
+								 i),
 					 DMA_TO_DEVICE);
 	}
-	tfd->num_tbs = 0;
+
+	if (trans->cfg->use_tfh) {
+		struct iwl_tfh_tfd *tfd_fh = (void *)tfd;
+
+		tfd_fh->num_tbs = 0;
+	} else {
+		struct iwl_tfd *tfd_fh = (void *)tfd;
+
+		tfd_fh->num_tbs = 0;
+	}
+
 }
 
 /*
@@ -388,20 +459,18 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
  */
 static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
 {
-	struct iwl_tfd *tfd_tmp = txq->tfds;
-
 	/* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
 	 * idx is bounded by n_window
 	 */
-	int rd_ptr = txq->q.read_ptr;
-	int idx = get_cmd_index(&txq->q, rd_ptr);
+	int rd_ptr = txq->read_ptr;
+	int idx = get_cmd_index(txq, rd_ptr);
 
 	lockdep_assert_held(&txq->lock);
 
 	/* We have only q->n_window txq->entries, but we use
 	 * TFD_QUEUE_SIZE_MAX tfds
 	 */
-	iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, &tfd_tmp[rd_ptr]);
+	iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, txq, rd_ptr);
 
 	/* free SKB */
 	if (txq->entries) {
@@ -423,23 +492,21 @@ static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
 static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
 				  dma_addr_t addr, u16 len, bool reset)
 {
-	struct iwl_queue *q;
-	struct iwl_tfd *tfd, *tfd_tmp;
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	void *tfd;
 	u32 num_tbs;
 
-	q = &txq->q;
-	tfd_tmp = txq->tfds;
-	tfd = &tfd_tmp[q->write_ptr];
+	tfd = txq->tfds + trans_pcie->tfd_size * txq->write_ptr;
 
 	if (reset)
-		memset(tfd, 0, sizeof(*tfd));
+		memset(tfd, 0, trans_pcie->tfd_size);
 
-	num_tbs = iwl_pcie_tfd_get_num_tbs(tfd);
+	num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
 
-	/* Each TFD can point to a maximum 20 Tx buffers */
-	if (num_tbs >= IWL_NUM_OF_TBS) {
+	/* Each TFD can point to a maximum max_tbs Tx buffers */
+	if (num_tbs >= trans_pcie->max_tbs) {
 		IWL_ERR(trans, "Error can not send more than %d chunks\n",
-			IWL_NUM_OF_TBS);
+			trans_pcie->max_tbs);
 		return -EINVAL;
 	}
 
@@ -447,7 +514,7 @@ static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
 		 "Unaligned address = %llx\n", (unsigned long long)addr))
 		return -EINVAL;
 
-	iwl_pcie_tfd_set_tb(tfd, num_tbs, addr, len);
+	iwl_pcie_tfd_set_tb(trans, tfd, num_tbs, addr, len);
 
 	return num_tbs;
 }
@@ -457,7 +524,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans,
 			       u32 txq_id)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	size_t tfd_sz = sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX;
+	size_t tfd_sz = trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX;
 	size_t tb0_buf_sz;
 	int i;
 
@@ -468,7 +535,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans,
 		    (unsigned long)txq);
 	txq->trans_pcie = trans_pcie;
 
-	txq->q.n_window = slots_num;
+	txq->n_window = slots_num;
 
 	txq->entries = kcalloc(slots_num,
 			       sizeof(struct iwl_pcie_txq_entry),
@@ -489,7 +556,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans,
 	/* Circular buffer of transmit frame descriptors (TFDs),
 	 * shared with device */
 	txq->tfds = dma_alloc_coherent(trans->dev, tfd_sz,
-				       &txq->q.dma_addr, GFP_KERNEL);
+				       &txq->dma_addr, GFP_KERNEL);
 	if (!txq->tfds)
 		goto error;
 
@@ -503,11 +570,11 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans,
 	if (!txq->first_tb_bufs)
 		goto err_free_tfds;
 
-	txq->q.id = txq_id;
+	txq->id = txq_id;
 
 	return 0;
 err_free_tfds:
-	dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->q.dma_addr);
+	dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->dma_addr);
 error:
 	if (txq->entries && txq_id == trans_pcie->cmd_queue)
 		for (i = 0; i < slots_num; i++)
@@ -531,7 +598,7 @@ static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq,
 	BUILD_BUG_ON(TFD_QUEUE_SIZE_MAX & (TFD_QUEUE_SIZE_MAX - 1));
 
 	/* Initialize queue's high/low-water marks, and head/tail indexes */
-	ret = iwl_queue_init(&txq->q, slots_num, txq_id);
+	ret = iwl_queue_init(txq, slots_num, txq_id);
 	if (ret)
 		return ret;
 
@@ -545,10 +612,10 @@ static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq,
 	if (trans->cfg->use_tfh)
 		iwl_write_direct64(trans,
 				   FH_MEM_CBBC_QUEUE(trans, txq_id),
-				   txq->q.dma_addr);
+				   txq->dma_addr);
 	else
 		iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(trans, txq_id),
-				   txq->q.dma_addr >> 8);
+				   txq->dma_addr >> 8);
 
 	return 0;
 }
@@ -595,15 +662,14 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq = &trans_pcie->txq[txq_id];
-	struct iwl_queue *q = &txq->q;
 
 	spin_lock_bh(&txq->lock);
-	while (q->write_ptr != q->read_ptr) {
+	while (txq->write_ptr != txq->read_ptr) {
 		IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
-				   txq_id, q->read_ptr);
+				   txq_id, txq->read_ptr);
 
 		if (txq_id != trans_pcie->cmd_queue) {
-			struct sk_buff *skb = txq->entries[q->read_ptr].skb;
+			struct sk_buff *skb = txq->entries[txq->read_ptr].skb;
 
 			if (WARN_ON_ONCE(!skb))
 				continue;
@@ -611,15 +677,15 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
 			iwl_pcie_free_tso_page(trans_pcie, skb);
 		}
 		iwl_pcie_txq_free_tfd(trans, txq);
-		q->read_ptr = iwl_queue_inc_wrap(q->read_ptr);
+		txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr);
 
-		if (q->read_ptr == q->write_ptr) {
+		if (txq->read_ptr == txq->write_ptr) {
 			unsigned long flags;
 
 			spin_lock_irqsave(&trans_pcie->reg_lock, flags);
 			if (txq_id != trans_pcie->cmd_queue) {
 				IWL_DEBUG_RPM(trans, "Q %d - last tx freed\n",
-					      q->id);
+					      txq->id);
 				iwl_trans_unref(trans);
 			} else {
 				iwl_pcie_clear_cmd_in_flight(trans);
@@ -663,7 +729,7 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
 
 	/* De-alloc array of command/tx buffers */
 	if (txq_id == trans_pcie->cmd_queue)
-		for (i = 0; i < txq->q.n_window; i++) {
+		for (i = 0; i < txq->n_window; i++) {
 			kzfree(txq->entries[i].cmd);
 			kzfree(txq->entries[i].free_buf);
 		}
@@ -671,13 +737,13 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
 	/* De-alloc circular buffer of TFDs */
 	if (txq->tfds) {
 		dma_free_coherent(dev,
-				  sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX,
-				  txq->tfds, txq->q.dma_addr);
-		txq->q.dma_addr = 0;
+				  trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX,
+				  txq->tfds, txq->dma_addr);
+		txq->dma_addr = 0;
 		txq->tfds = NULL;
 
 		dma_free_coherent(dev,
-				  sizeof(*txq->first_tb_bufs) * txq->q.n_window,
+				  sizeof(*txq->first_tb_bufs) * txq->n_window,
 				  txq->first_tb_bufs, txq->first_tb_dma);
 	}
 
@@ -761,14 +827,14 @@ void iwl_trans_pcie_tx_reset(struct iwl_trans *trans)
 		if (trans->cfg->use_tfh)
 			iwl_write_direct64(trans,
 					   FH_MEM_CBBC_QUEUE(trans, txq_id),
-					   txq->q.dma_addr);
+					   txq->dma_addr);
 		else
 			iwl_write_direct32(trans,
 					   FH_MEM_CBBC_QUEUE(trans, txq_id),
-					   txq->q.dma_addr >> 8);
+					   txq->dma_addr >> 8);
 		iwl_pcie_txq_unmap(trans, txq_id);
-		txq->q.read_ptr = 0;
-		txq->q.write_ptr = 0;
+		txq->read_ptr = 0;
+		txq->write_ptr = 0;
 	}
 
 	/* Tell NIC where to find the "keep warm" buffer */
@@ -1012,7 +1078,7 @@ static inline void iwl_pcie_txq_progress(struct iwl_txq *txq)
 	 * if empty delete timer, otherwise move timer forward
 	 * since we're making progress on this queue
 	 */
-	if (txq->q.read_ptr == txq->q.write_ptr)
+	if (txq->read_ptr == txq->write_ptr)
 		del_timer(&txq->stuck_timer);
 	else
 		mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
@@ -1025,7 +1091,6 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq = &trans_pcie->txq[txq_id];
 	int tfd_num = ssn & (TFD_QUEUE_SIZE_MAX - 1);
-	struct iwl_queue *q = &txq->q;
 	int last_to_free;
 
 	/* This function is not meant to release cmd queue*/
@@ -1040,21 +1105,21 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
 		goto out;
 	}
 
-	if (txq->q.read_ptr == tfd_num)
+	if (txq->read_ptr == tfd_num)
 		goto out;
 
 	IWL_DEBUG_TX_REPLY(trans, "[Q %d] %d -> %d (%d)\n",
-			   txq_id, txq->q.read_ptr, tfd_num, ssn);
+			   txq_id, txq->read_ptr, tfd_num, ssn);
 
 	/*Since we free until index _not_ inclusive, the one before index is
 	 * the last we will free. This one must be used */
 	last_to_free = iwl_queue_dec_wrap(tfd_num);
 
-	if (!iwl_queue_used(q, last_to_free)) {
+	if (!iwl_queue_used(txq, last_to_free)) {
 		IWL_ERR(trans,
 			"%s: Read index for DMA queue txq id (%d), last_to_free %d is out of range [0-%d] %d %d.\n",
 			__func__, txq_id, last_to_free, TFD_QUEUE_SIZE_MAX,
-			q->write_ptr, q->read_ptr);
+			txq->write_ptr, txq->read_ptr);
 		goto out;
 	}
 
@@ -1062,9 +1127,9 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
 		goto out;
 
 	for (;
-	     q->read_ptr != tfd_num;
-	     q->read_ptr = iwl_queue_inc_wrap(q->read_ptr)) {
-		struct sk_buff *skb = txq->entries[txq->q.read_ptr].skb;
+	     txq->read_ptr != tfd_num;
+	     txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr)) {
+		struct sk_buff *skb = txq->entries[txq->read_ptr].skb;
 
 		if (WARN_ON_ONCE(!skb))
 			continue;
@@ -1073,16 +1138,17 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
 
 		__skb_queue_tail(skbs, skb);
 
-		txq->entries[txq->q.read_ptr].skb = NULL;
+		txq->entries[txq->read_ptr].skb = NULL;
 
-		iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq);
+		if (!trans->cfg->use_tfh)
+			iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq);
 
 		iwl_pcie_txq_free_tfd(trans, txq);
 	}
 
 	iwl_pcie_txq_progress(txq);
 
-	if (iwl_queue_space(&txq->q) > txq->q.low_mark &&
+	if (iwl_queue_space(txq) > txq->low_mark &&
 	    test_bit(txq_id, trans_pcie->queue_stopped)) {
 		struct sk_buff_head overflow_skbs;
 
@@ -1114,12 +1180,12 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
 		}
 		spin_lock_bh(&txq->lock);
 
-		if (iwl_queue_space(&txq->q) > txq->q.low_mark)
+		if (iwl_queue_space(txq) > txq->low_mark)
 			iwl_wake_queue(trans, txq);
 	}
 
-	if (q->read_ptr == q->write_ptr) {
-		IWL_DEBUG_RPM(trans, "Q %d - last tx reclaimed\n", q->id);
+	if (txq->read_ptr == txq->write_ptr) {
+		IWL_DEBUG_RPM(trans, "Q %d - last tx reclaimed\n", txq->id);
 		iwl_trans_unref(trans);
 	}
 
@@ -1181,31 +1247,30 @@ static void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq = &trans_pcie->txq[txq_id];
-	struct iwl_queue *q = &txq->q;
 	unsigned long flags;
 	int nfreed = 0;
 
 	lockdep_assert_held(&txq->lock);
 
-	if ((idx >= TFD_QUEUE_SIZE_MAX) || (!iwl_queue_used(q, idx))) {
+	if ((idx >= TFD_QUEUE_SIZE_MAX) || (!iwl_queue_used(txq, idx))) {
 		IWL_ERR(trans,
 			"%s: Read index for DMA queue txq id (%d), index %d is out of range [0-%d] %d %d.\n",
 			__func__, txq_id, idx, TFD_QUEUE_SIZE_MAX,
-			q->write_ptr, q->read_ptr);
+			txq->write_ptr, txq->read_ptr);
 		return;
 	}
 
-	for (idx = iwl_queue_inc_wrap(idx); q->read_ptr != idx;
-	     q->read_ptr = iwl_queue_inc_wrap(q->read_ptr)) {
+	for (idx = iwl_queue_inc_wrap(idx); txq->read_ptr != idx;
+	     txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr)) {
 
 		if (nfreed++ > 0) {
 			IWL_ERR(trans, "HCMD skipped: index (%d) %d %d\n",
-				idx, q->write_ptr, q->read_ptr);
+				idx, txq->write_ptr, txq->read_ptr);
 			iwl_force_nmi(trans);
 		}
 	}
 
-	if (q->read_ptr == q->write_ptr) {
+	if (txq->read_ptr == txq->write_ptr) {
 		spin_lock_irqsave(&trans_pcie->reg_lock, flags);
 		iwl_pcie_clear_cmd_in_flight(trans);
 		spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
@@ -1291,14 +1356,14 @@ void iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn,
 			 */
 			iwl_scd_txq_disable_agg(trans, txq_id);
 
-			ssn = txq->q.read_ptr;
+			ssn = txq->read_ptr;
 		}
 	}
 
 	/* Place first TFD at index corresponding to start sequence number.
 	 * Assumes that ssn_idx is valid (!= 0xFFF) */
-	txq->q.read_ptr = (ssn & 0xff);
-	txq->q.write_ptr = (ssn & 0xff);
+	txq->read_ptr = (ssn & 0xff);
+	txq->write_ptr = (ssn & 0xff);
 	iwl_write_direct32(trans, HBUS_TARG_WRPTR,
 			   (ssn & 0xff) | (txq_id << 8));
 
@@ -1351,6 +1416,14 @@ void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id,
 	txq->ampdu = !shared_mode;
 }
 
+dma_addr_t iwl_trans_pcie_get_txq_byte_table(struct iwl_trans *trans, int txq)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+	return trans_pcie->scd_bc_tbls.dma +
+	       txq * sizeof(struct iwlagn_scd_bc_tbl);
+}
+
 void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id,
 				bool configure_scd)
 {
@@ -1406,7 +1479,6 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue];
-	struct iwl_queue *q = &txq->q;
 	struct iwl_device_cmd *out_cmd;
 	struct iwl_cmd_meta *out_meta;
 	unsigned long flags;
@@ -1505,7 +1577,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 
 	spin_lock_bh(&txq->lock);
 
-	if (iwl_queue_space(q) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) {
+	if (iwl_queue_space(txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) {
 		spin_unlock_bh(&txq->lock);
 
 		IWL_ERR(trans, "No space in command queue\n");
@@ -1514,7 +1586,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		goto free_dup_buf;
 	}
 
-	idx = get_cmd_index(q, q->write_ptr);
+	idx = get_cmd_index(txq, txq->write_ptr);
 	out_cmd = txq->entries[idx].cmd;
 	out_meta = &txq->entries[idx].meta;
 
@@ -1533,7 +1605,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		out_cmd->hdr_wide.reserved = 0;
 		out_cmd->hdr_wide.sequence =
 			cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) |
-						 INDEX_TO_SEQ(q->write_ptr));
+						 INDEX_TO_SEQ(txq->write_ptr));
 
 		cmd_pos = sizeof(struct iwl_cmd_header_wide);
 		copy_size = sizeof(struct iwl_cmd_header_wide);
@@ -1541,7 +1613,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		out_cmd->hdr.cmd = iwl_cmd_opcode(cmd->id);
 		out_cmd->hdr.sequence =
 			cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) |
-						 INDEX_TO_SEQ(q->write_ptr));
+						 INDEX_TO_SEQ(txq->write_ptr));
 		out_cmd->hdr.group_id = 0;
 
 		cmd_pos = sizeof(struct iwl_cmd_header);
@@ -1591,7 +1663,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		     iwl_get_cmd_string(trans, cmd->id),
 		     group_id, out_cmd->hdr.cmd,
 		     le16_to_cpu(out_cmd->hdr.sequence),
-		     cmd_size, q->write_ptr, idx, trans_pcie->cmd_queue);
+		     cmd_size, txq->write_ptr, idx, trans_pcie->cmd_queue);
 
 	/* start the TFD with the minimum copy bytes */
 	tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE);
@@ -1607,8 +1679,8 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 					   copy_size - tb0_size,
 					   DMA_TO_DEVICE);
 		if (dma_mapping_error(trans->dev, phys_addr)) {
-			iwl_pcie_tfd_unmap(trans, out_meta,
-					   &txq->tfds[q->write_ptr]);
+			iwl_pcie_tfd_unmap(trans, out_meta, txq,
+					   txq->write_ptr);
 			idx = -ENOMEM;
 			goto out;
 		}
@@ -1631,8 +1703,8 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		phys_addr = dma_map_single(trans->dev, (void *)data,
 					   cmdlen[i], DMA_TO_DEVICE);
 		if (dma_mapping_error(trans->dev, phys_addr)) {
-			iwl_pcie_tfd_unmap(trans, out_meta,
-					   &txq->tfds[q->write_ptr]);
+			iwl_pcie_tfd_unmap(trans, out_meta, txq,
+					   txq->write_ptr);
 			idx = -ENOMEM;
 			goto out;
 		}
@@ -1640,8 +1712,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		iwl_pcie_txq_build_tfd(trans, txq, phys_addr, cmdlen[i], false);
 	}
 
-	BUILD_BUG_ON(IWL_NUM_OF_TBS + CMD_TB_BITMAP_POS >
-		     sizeof(out_meta->flags) * BITS_PER_BYTE);
+	BUILD_BUG_ON(IWL_TFH_NUM_TBS > sizeof(out_meta->tbs) * BITS_PER_BYTE);
 	out_meta->flags = cmd->flags;
 	if (WARN_ON_ONCE(txq->entries[idx].free_buf))
 		kzfree(txq->entries[idx].free_buf);
@@ -1650,7 +1721,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 	trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr_wide);
 
 	/* start timer if queue currently empty */
-	if (q->read_ptr == q->write_ptr && txq->wd_timeout)
+	if (txq->read_ptr == txq->write_ptr && txq->wd_timeout)
 		mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
 
 	spin_lock_irqsave(&trans_pcie->reg_lock, flags);
@@ -1662,7 +1733,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 	}
 
 	/* Increment and update queue's write index */
-	q->write_ptr = iwl_queue_inc_wrap(q->write_ptr);
+	txq->write_ptr = iwl_queue_inc_wrap(txq->write_ptr);
 	iwl_pcie_txq_inc_wr_ptr(trans, txq);
 
 	spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
@@ -1700,20 +1771,20 @@ void iwl_pcie_hcmd_complete(struct iwl_trans *trans,
 	if (WARN(txq_id != trans_pcie->cmd_queue,
 		 "wrong command queue %d (should be %d), sequence 0x%X readp=%d writep=%d\n",
 		 txq_id, trans_pcie->cmd_queue, sequence,
-		 trans_pcie->txq[trans_pcie->cmd_queue].q.read_ptr,
-		 trans_pcie->txq[trans_pcie->cmd_queue].q.write_ptr)) {
+		 trans_pcie->txq[trans_pcie->cmd_queue].read_ptr,
+		 trans_pcie->txq[trans_pcie->cmd_queue].write_ptr)) {
 		iwl_print_hex_error(trans, pkt, 32);
 		return;
 	}
 
 	spin_lock_bh(&txq->lock);
 
-	cmd_index = get_cmd_index(&txq->q, index);
+	cmd_index = get_cmd_index(txq, index);
 	cmd = txq->entries[cmd_index].cmd;
 	meta = &txq->entries[cmd_index].meta;
 	cmd_id = iwl_cmd_id(cmd->hdr.cmd, group_id, 0);
 
-	iwl_pcie_tfd_unmap(trans, meta, &txq->tfds[index]);
+	iwl_pcie_tfd_unmap(trans, meta, txq, index);
 
 	/* Input error checking is done when commands are added to queue. */
 	if (meta->flags & CMD_WANT_SKB) {
@@ -1826,14 +1897,13 @@ static int iwl_pcie_send_hcmd_sync(struct iwl_trans *trans,
 				 HOST_COMPLETE_TIMEOUT);
 	if (!ret) {
 		struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue];
-		struct iwl_queue *q = &txq->q;
 
 		IWL_ERR(trans, "Error sending %s: time out after %dms.\n",
 			iwl_get_cmd_string(trans, cmd->id),
 			jiffies_to_msecs(HOST_COMPLETE_TIMEOUT));
 
 		IWL_ERR(trans, "Current CMD queue read_ptr %d write_ptr %d\n",
-			q->read_ptr, q->write_ptr);
+			txq->read_ptr, txq->write_ptr);
 
 		clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
 		IWL_DEBUG_INFO(trans, "Clearing HCMD_ACTIVE for command %s\n",
@@ -1911,7 +1981,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
 			     struct iwl_cmd_meta *out_meta,
 			     struct iwl_device_cmd *dev_cmd, u16 tb1_len)
 {
-	struct iwl_queue *q = &txq->q;
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	u16 tb2_len;
 	int i;
 
@@ -1926,8 +1996,8 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
 						     skb->data + hdr_len,
 						     tb2_len, DMA_TO_DEVICE);
 		if (unlikely(dma_mapping_error(trans->dev, tb2_phys))) {
-			iwl_pcie_tfd_unmap(trans, out_meta,
-					   &txq->tfds[q->write_ptr]);
+			iwl_pcie_tfd_unmap(trans, out_meta, txq,
+					   txq->write_ptr);
 			return -EINVAL;
 		}
 		iwl_pcie_txq_build_tfd(trans, txq, tb2_phys, tb2_len, false);
@@ -1946,19 +2016,19 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
 					   skb_frag_size(frag), DMA_TO_DEVICE);
 
 		if (unlikely(dma_mapping_error(trans->dev, tb_phys))) {
-			iwl_pcie_tfd_unmap(trans, out_meta,
-					   &txq->tfds[q->write_ptr]);
+			iwl_pcie_tfd_unmap(trans, out_meta, txq,
+					   txq->write_ptr);
 			return -EINVAL;
 		}
 		tb_idx = iwl_pcie_txq_build_tfd(trans, txq, tb_phys,
 						skb_frag_size(frag), false);
 
-		out_meta->flags |= BIT(tb_idx + CMD_TB_BITMAP_POS);
+		out_meta->tbs |= BIT(tb_idx);
 	}
 
 	trace_iwlwifi_dev_tx(trans->dev, skb,
-			     &txq->tfds[txq->q.write_ptr],
-			     sizeof(struct iwl_tfd),
+			     iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
+			     trans_pcie->tfd_size,
 			     &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
 			     skb->data + hdr_len, tb2_len);
 	trace_iwlwifi_dev_tx_data(trans->dev, skb,
@@ -2019,7 +2089,6 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
 	struct ieee80211_hdr *hdr = (void *)skb->data;
 	unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
 	unsigned int mss = skb_shinfo(skb)->gso_size;
-	struct iwl_queue *q = &txq->q;
 	u16 length, iv_len, amsdu_pad;
 	u8 *start_hdr;
 	struct iwl_tso_hdr_page *hdr_page;
@@ -2033,8 +2102,8 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
 		IEEE80211_CCMP_HDR_LEN : 0;
 
 	trace_iwlwifi_dev_tx(trans->dev, skb,
-			     &txq->tfds[txq->q.write_ptr],
-			     sizeof(struct iwl_tfd),
+			     iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
+			     trans_pcie->tfd_size,
 			     &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
 			     NULL, 0);
 
@@ -2190,7 +2259,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
 	return 0;
 
 out_unmap:
-	iwl_pcie_tfd_unmap(trans, out_meta, &txq->tfds[q->write_ptr]);
+	iwl_pcie_tfd_unmap(trans, out_meta, txq, txq->write_ptr);
 	return ret;
 }
 #else /* CONFIG_INET */
@@ -2214,9 +2283,9 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 	struct iwl_tx_cmd *tx_cmd = (struct iwl_tx_cmd *)dev_cmd->payload;
 	struct iwl_cmd_meta *out_meta;
 	struct iwl_txq *txq;
-	struct iwl_queue *q;
 	dma_addr_t tb0_phys, tb1_phys, scratch_phys;
 	void *tb1_addr;
+	void *tfd;
 	u16 len, tb1_len;
 	bool wait_write_ptr;
 	__le16 fc;
@@ -2225,7 +2294,6 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 	bool amsdu;
 
 	txq = &trans_pcie->txq[txq_id];
-	q = &txq->q;
 
 	if (WARN_ONCE(!test_bit(txq_id, trans_pcie->queue_used),
 		      "TX on unused queue %d\n", txq_id))
@@ -2247,7 +2315,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 	}
 
 	if (skb_is_nonlinear(skb) &&
-	    skb_shinfo(skb)->nr_frags > IWL_PCIE_MAX_FRAGS &&
+	    skb_shinfo(skb)->nr_frags > IWL_PCIE_MAX_FRAGS(trans_pcie) &&
 	    __skb_linearize(skb))
 		return -ENOMEM;
 
@@ -2260,11 +2328,11 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 
 	spin_lock(&txq->lock);
 
-	if (iwl_queue_space(q) < q->high_mark) {
+	if (iwl_queue_space(txq) < txq->high_mark) {
 		iwl_stop_queue(trans, txq);
 
 		/* don't put the packet on the ring, if there is no room */
-		if (unlikely(iwl_queue_space(q) < 3)) {
+		if (unlikely(iwl_queue_space(txq) < 3)) {
 			struct iwl_device_cmd **dev_cmd_ptr;
 
 			dev_cmd_ptr = (void *)((u8 *)skb->cb +
@@ -2285,19 +2353,19 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 	 */
 	wifi_seq = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
 	WARN_ONCE(txq->ampdu &&
-		  (wifi_seq & 0xff) != q->write_ptr,
+		  (wifi_seq & 0xff) != txq->write_ptr,
 		  "Q: %d WiFi Seq %d tfdNum %d",
-		  txq_id, wifi_seq, q->write_ptr);
+		  txq_id, wifi_seq, txq->write_ptr);
 
 	/* Set up driver data for this TFD */
-	txq->entries[q->write_ptr].skb = skb;
-	txq->entries[q->write_ptr].cmd = dev_cmd;
+	txq->entries[txq->write_ptr].skb = skb;
+	txq->entries[txq->write_ptr].cmd = dev_cmd;
 
 	dev_cmd->hdr.sequence =
 		cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
-			    INDEX_TO_SEQ(q->write_ptr)));
+			    INDEX_TO_SEQ(txq->write_ptr)));
 
-	tb0_phys = iwl_pcie_get_first_tb_dma(txq, q->write_ptr);
+	tb0_phys = iwl_pcie_get_first_tb_dma(txq, txq->write_ptr);
 	scratch_phys = tb0_phys + sizeof(struct iwl_cmd_header) +
 		       offsetof(struct iwl_tx_cmd, scratch);
 
@@ -2305,7 +2373,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 	tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys);
 
 	/* Set up first empty entry in queue's array of Tx/cmd buffers */
-	out_meta = &txq->entries[q->write_ptr].meta;
+	out_meta = &txq->entries[txq->write_ptr].meta;
 	out_meta->flags = 0;
 
 	/*
@@ -2330,7 +2398,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 	}
 
 	/* The first TB points to bi-directional DMA data */
-	memcpy(&txq->first_tb_bufs[q->write_ptr], &dev_cmd->hdr,
+	memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr,
 	       IWL_FIRST_TB_SIZE);
 	iwl_pcie_txq_build_tfd(trans, txq, tb0_phys,
 			       IWL_FIRST_TB_SIZE, true);
@@ -2355,13 +2423,15 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 		goto out_err;
 	}
 
+	tfd = iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
 	/* Set up entry for this TFD in Tx byte-count array */
-	iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len));
+	iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len),
+					 iwl_pcie_tfd_get_num_tbs(trans, tfd));
 
 	wait_write_ptr = ieee80211_has_morefrags(fc);
 
 	/* start timer if queue currently empty */
-	if (q->read_ptr == q->write_ptr) {
+	if (txq->read_ptr == txq->write_ptr) {
 		if (txq->wd_timeout) {
 			/*
 			 * If the TXQ is active, then set the timer, if not,
@@ -2375,12 +2445,12 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 			else
 				txq->frozen_expiry_remainder = txq->wd_timeout;
 		}
-		IWL_DEBUG_RPM(trans, "Q: %d first tx - take ref\n", q->id);
+		IWL_DEBUG_RPM(trans, "Q: %d first tx - take ref\n", txq->id);
 		iwl_trans_ref(trans);
 	}
 
 	/* Tell device the write index *just past* this latest filled TFD */
-	q->write_ptr = iwl_queue_inc_wrap(q->write_ptr);
+	txq->write_ptr = iwl_queue_inc_wrap(txq->write_ptr);
 	if (!wait_write_ptr)
 		iwl_pcie_txq_inc_wr_ptr(trans, txq);