Prechádzať zdrojové kódy

Merge tag 'drm-amdkfd-next-fixes-2015-06-10' of git://people.freedesktop.org/~gabbayo/linux into drm-next

- Make the reset wavefronts action be per process per device instead of
  per process, because one device can be stuck but the other one won't be

- Add some missing properties to the CZ device_info structure

- Rename symbols to not have CONFIG_ prefix

- Some more cleanups and debug prints

* tag 'drm-amdkfd-next-fixes-2015-06-10' of git://people.freedesktop.org/~gabbayo/linux:
  drm/amdkfd: remove not used defines from cik_regs.h
  drm/amdkfd: Add missing properties to CZ device info
  drm/amdkfd: make reset wavefronts per process per device
  drm/amdkfd: add debug print to kfd_events.c
  drm/amdkfd: avoid CONFIG_ prefix for non-Kconfig symbols
Dave Airlie 10 rokov pred
rodič
commit
933ea180a4

+ 13 - 164
drivers/gpu/drm/amd/amdkfd/cik_regs.h

@@ -23,33 +23,11 @@
 #ifndef CIK_REGS_H
 #define CIK_REGS_H
 
-#define IH_VMID_0_LUT					0x3D40u
-
-#define BIF_DOORBELL_CNTL				0x530Cu
-
-#define	SRBM_GFX_CNTL					0xE44
-#define	PIPEID(x)					((x) << 0)
-#define	MEID(x)						((x) << 2)
-#define	VMID(x)						((x) << 4)
-#define	QUEUEID(x)					((x) << 8)
-
-#define	SQ_CONFIG					0x8C00
-
-#define	SH_MEM_BASES					0x8C28
 /* if PTR32, these are the bases for scratch and lds */
 #define	PRIVATE_BASE(x)					((x) << 0) /* scratch */
 #define	SHARED_BASE(x)					((x) << 16) /* LDS */
-#define	SH_MEM_APE1_BASE				0x8C2C
-/* if PTR32, this is the base location of GPUVM */
-#define	SH_MEM_APE1_LIMIT				0x8C30
-/* if PTR32, this is the upper limit of GPUVM */
-#define	SH_MEM_CONFIG					0x8C34
 #define	PTR32						(1 << 0)
-#define PRIVATE_ATC					(1 << 1)
 #define	ALIGNMENT_MODE(x)				((x) << 2)
-#define	SH_MEM_ALIGNMENT_MODE_DWORD			0
-#define	SH_MEM_ALIGNMENT_MODE_DWORD_STRICT		1
-#define	SH_MEM_ALIGNMENT_MODE_STRICT			2
 #define	SH_MEM_ALIGNMENT_MODE_UNALIGNED			3
 #define	DEFAULT_MTYPE(x)				((x) << 4)
 #define	APE1_MTYPE(x)					((x) << 7)
@@ -58,137 +36,34 @@
 #define	MTYPE_CACHED					0
 #define	MTYPE_NONCACHED					3
 
-
-#define SH_STATIC_MEM_CONFIG				0x9604u
-
-#define	TC_CFG_L1_LOAD_POLICY0				0xAC68
-#define	TC_CFG_L1_LOAD_POLICY1				0xAC6C
-#define	TC_CFG_L1_STORE_POLICY				0xAC70
-#define	TC_CFG_L2_LOAD_POLICY0				0xAC74
-#define	TC_CFG_L2_LOAD_POLICY1				0xAC78
-#define	TC_CFG_L2_STORE_POLICY0				0xAC7C
-#define	TC_CFG_L2_STORE_POLICY1				0xAC80
-#define	TC_CFG_L2_ATOMIC_POLICY				0xAC84
-#define	TC_CFG_L1_VOLATILE				0xAC88
-#define	TC_CFG_L2_VOLATILE				0xAC8C
-
-#define CP_PQ_WPTR_POLL_CNTL				0xC20C
-#define	WPTR_POLL_EN					(1 << 31)
-
-#define CPC_INT_CNTL					0xC2D0
-#define CP_ME1_PIPE0_INT_CNTL				0xC214
-#define CP_ME1_PIPE1_INT_CNTL				0xC218
-#define CP_ME1_PIPE2_INT_CNTL				0xC21C
-#define CP_ME1_PIPE3_INT_CNTL				0xC220
-#define CP_ME2_PIPE0_INT_CNTL				0xC224
-#define CP_ME2_PIPE1_INT_CNTL				0xC228
-#define CP_ME2_PIPE2_INT_CNTL				0xC22C
-#define CP_ME2_PIPE3_INT_CNTL				0xC230
-#define DEQUEUE_REQUEST_INT_ENABLE			(1 << 13)
-#define WRM_POLL_TIMEOUT_INT_ENABLE			(1 << 17)
-#define PRIV_REG_INT_ENABLE				(1 << 23)
-#define TIME_STAMP_INT_ENABLE				(1 << 26)
-#define GENERIC2_INT_ENABLE				(1 << 29)
-#define GENERIC1_INT_ENABLE				(1 << 30)
-#define GENERIC0_INT_ENABLE				(1 << 31)
-#define CP_ME1_PIPE0_INT_STATUS				0xC214
-#define CP_ME1_PIPE1_INT_STATUS				0xC218
-#define CP_ME1_PIPE2_INT_STATUS				0xC21C
-#define CP_ME1_PIPE3_INT_STATUS				0xC220
-#define CP_ME2_PIPE0_INT_STATUS				0xC224
-#define CP_ME2_PIPE1_INT_STATUS				0xC228
-#define CP_ME2_PIPE2_INT_STATUS				0xC22C
-#define CP_ME2_PIPE3_INT_STATUS				0xC230
-#define DEQUEUE_REQUEST_INT_STATUS			(1 << 13)
-#define WRM_POLL_TIMEOUT_INT_STATUS			(1 << 17)
-#define PRIV_REG_INT_STATUS				(1 << 23)
-#define TIME_STAMP_INT_STATUS				(1 << 26)
-#define GENERIC2_INT_STATUS				(1 << 29)
-#define GENERIC1_INT_STATUS				(1 << 30)
-#define GENERIC0_INT_STATUS				(1 << 31)
-
-#define CP_HPD_EOP_BASE_ADDR				0xC904
-#define CP_HPD_EOP_BASE_ADDR_HI				0xC908
-#define CP_HPD_EOP_VMID					0xC90C
-#define CP_HPD_EOP_CONTROL				0xC910
-#define	EOP_SIZE(x)					((x) << 0)
-#define	EOP_SIZE_MASK					(0x3f << 0)
-#define CP_MQD_BASE_ADDR				0xC914
-#define CP_MQD_BASE_ADDR_HI				0xC918
-#define CP_HQD_ACTIVE					0xC91C
-#define CP_HQD_VMID					0xC920
-
-#define CP_HQD_PERSISTENT_STATE				0xC924u
 #define	DEFAULT_CP_HQD_PERSISTENT_STATE			(0x33U << 8)
 #define	PRELOAD_REQ					(1 << 0)
 
-#define CP_HQD_PIPE_PRIORITY				0xC928u
-#define CP_HQD_QUEUE_PRIORITY				0xC92Cu
-#define CP_HQD_QUANTUM					0xC930u
+#define	MQD_CONTROL_PRIV_STATE_EN			(1U << 8)
+
+#define	DEFAULT_MIN_IB_AVAIL_SIZE			(3U << 20)
+
+#define	IB_ATC_EN					(1U << 23)
+
 #define	QUANTUM_EN					1U
 #define	QUANTUM_SCALE_1MS				(1U << 4)
 #define	QUANTUM_DURATION(x)				((x) << 8)
 
-#define CP_HQD_PQ_BASE					0xC934
-#define CP_HQD_PQ_BASE_HI				0xC938
-#define CP_HQD_PQ_RPTR					0xC93C
-#define CP_HQD_PQ_RPTR_REPORT_ADDR			0xC940
-#define CP_HQD_PQ_RPTR_REPORT_ADDR_HI			0xC944
-#define CP_HQD_PQ_WPTR_POLL_ADDR			0xC948
-#define CP_HQD_PQ_WPTR_POLL_ADDR_HI			0xC94C
-#define CP_HQD_PQ_DOORBELL_CONTROL			0xC950
-#define	DOORBELL_OFFSET(x)				((x) << 2)
-#define	DOORBELL_OFFSET_MASK				(0x1fffff << 2)
-#define	DOORBELL_SOURCE					(1 << 28)
-#define	DOORBELL_SCHD_HIT				(1 << 29)
-#define	DOORBELL_EN					(1 << 30)
-#define	DOORBELL_HIT					(1 << 31)
-#define CP_HQD_PQ_WPTR					0xC954
-#define CP_HQD_PQ_CONTROL				0xC958
-#define	QUEUE_SIZE(x)					((x) << 0)
-#define	QUEUE_SIZE_MASK					(0x3f << 0)
 #define	RPTR_BLOCK_SIZE(x)				((x) << 8)
-#define	RPTR_BLOCK_SIZE_MASK				(0x3f << 8)
 #define	MIN_AVAIL_SIZE(x)				((x) << 20)
-#define	PQ_ATC_EN					(1 << 23)
-#define	PQ_VOLATILE					(1 << 26)
-#define	NO_UPDATE_RPTR					(1 << 27)
-#define	UNORD_DISPATCH					(1 << 28)
-#define	ROQ_PQ_IB_FLIP					(1 << 29)
-#define	PRIV_STATE					(1 << 30)
-#define	KMD_QUEUE					(1 << 31)
-
 #define	DEFAULT_RPTR_BLOCK_SIZE				RPTR_BLOCK_SIZE(5)
 #define	DEFAULT_MIN_AVAIL_SIZE				MIN_AVAIL_SIZE(3)
 
-#define CP_HQD_IB_BASE_ADDR				0xC95Cu
-#define CP_HQD_IB_BASE_ADDR_HI				0xC960u
-#define CP_HQD_IB_RPTR					0xC964u
-#define CP_HQD_IB_CONTROL				0xC968u
-#define	IB_ATC_EN					(1U << 23)
-#define	DEFAULT_MIN_IB_AVAIL_SIZE			(3U << 20)
-
-#define	AQL_ENABLE					1
-
-#define CP_HQD_DEQUEUE_REQUEST				0xC974
-#define	DEQUEUE_REQUEST_DRAIN				1
-#define DEQUEUE_REQUEST_RESET				2
-#define		DEQUEUE_INT					(1U << 8)
+#define	PQ_ATC_EN					(1 << 23)
+#define	NO_UPDATE_RPTR					(1 << 27)
 
-#define CP_HQD_SEMA_CMD					0xC97Cu
-#define CP_HQD_MSG_TYPE					0xC980u
-#define CP_HQD_ATOMIC0_PREOP_LO				0xC984u
-#define CP_HQD_ATOMIC0_PREOP_HI				0xC988u
-#define CP_HQD_ATOMIC1_PREOP_LO				0xC98Cu
-#define CP_HQD_ATOMIC1_PREOP_HI				0xC990u
-#define CP_HQD_HQ_SCHEDULER0				0xC994u
-#define CP_HQD_HQ_SCHEDULER1				0xC998u
+#define	DOORBELL_OFFSET(x)				((x) << 2)
+#define	DOORBELL_EN					(1 << 30)
 
+#define	PRIV_STATE					(1 << 30)
+#define	KMD_QUEUE					(1 << 31)
 
-#define CP_MQD_CONTROL					0xC99C
-#define	MQD_VMID(x)					((x) << 0)
-#define	MQD_VMID_MASK					(0xf << 0)
-#define	MQD_CONTROL_PRIV_STATE_EN			(1U << 8)
+#define	AQL_ENABLE					1
 
 #define	SDMA_RB_VMID(x)					(x << 24)
 #define	SDMA_RB_ENABLE					(1 << 0)
@@ -202,33 +77,7 @@
 #define	SDMA_VA_SHARED_BASE(x)				(x << 8)
 
 #define GRBM_GFX_INDEX					0x30800
-#define	INSTANCE_INDEX(x)				((x) << 0)
-#define	SH_INDEX(x)					((x) << 8)
-#define	SE_INDEX(x)					((x) << 16)
-#define	SH_BROADCAST_WRITES				(1 << 29)
-#define	INSTANCE_BROADCAST_WRITES			(1 << 30)
-#define	SE_BROADCAST_WRITES				(1 << 31)
-
-#define SQC_CACHES					0x30d20
-#define SQC_POLICY					0x8C38u
-#define SQC_VOLATILE					0x8C3Cu
 
-#define CP_PERFMON_CNTL					0x36020
-
-#define ATC_VMID0_PASID_MAPPING				0x339Cu
-#define	ATC_VMID_PASID_MAPPING_UPDATE_STATUS		0x3398u
 #define	ATC_VMID_PASID_MAPPING_VALID			(1U << 31)
 
-#define ATC_VM_APERTURE0_CNTL				0x3310u
-#define	ATS_ACCESS_MODE_NEVER				0
-#define	ATS_ACCESS_MODE_ALWAYS				1
-
-#define ATC_VM_APERTURE0_CNTL2				0x3318u
-#define ATC_VM_APERTURE0_HIGH_ADDR			0x3308u
-#define ATC_VM_APERTURE0_LOW_ADDR			0x3300u
-#define ATC_VM_APERTURE1_CNTL				0x3314u
-#define ATC_VM_APERTURE1_CNTL2				0x331Cu
-#define ATC_VM_APERTURE1_HIGH_ADDR			0x330Cu
-#define ATC_VM_APERTURE1_LOW_ADDR			0x3304u
-
 #endif

+ 5 - 5
drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c

@@ -445,7 +445,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 		aw_reg_add_dword /= sizeof(uint32_t);
 
 		packets_vec[0].bitfields2.reg_offset =
-					aw_reg_add_dword - CONFIG_REG_BASE;
+					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 
 		packets_vec[0].reg_data[0] = cntl.u32All;
 
@@ -458,7 +458,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 		aw_reg_add_dword /= sizeof(uint32_t);
 
 		packets_vec[1].bitfields2.reg_offset =
-					aw_reg_add_dword - CONFIG_REG_BASE;
+					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 		packets_vec[1].reg_data[0] = addrHi.u32All;
 
 		aw_reg_add_dword =
@@ -470,7 +470,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 		aw_reg_add_dword /= sizeof(uint32_t);
 
 		packets_vec[2].bitfields2.reg_offset =
-				aw_reg_add_dword - CONFIG_REG_BASE;
+				aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 		packets_vec[2].reg_data[0] = addrLo.u32All;
 
 		/* enable watch flag if address is not zero*/
@@ -488,7 +488,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 		aw_reg_add_dword /= sizeof(uint32_t);
 
 		packets_vec[3].bitfields2.reg_offset =
-					aw_reg_add_dword - CONFIG_REG_BASE;
+					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 		packets_vec[3].reg_data[0] = cntl.u32All;
 
 		status = dbgdev_diq_submit_ib(
@@ -690,7 +690,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 	packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
 	packets_vec[1].header.type = PM4_TYPE_3;
 	packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
-						CONFIG_REG_BASE;
+						AMD_CONFIG_REG_BASE;
 
 	packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
 	packets_vec[1].bitfields2.insert_vmid = 1;

+ 3 - 3
drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h

@@ -48,9 +48,9 @@ enum {
 
 /* CONFIG reg space definition */
 enum {
-	CONFIG_REG_BASE = 0x2000,	/* in dwords */
-	CONFIG_REG_END = 0x2B00,
-	CONFIG_REG_SIZE = CONFIG_REG_END - CONFIG_REG_BASE
+	AMD_CONFIG_REG_BASE = 0x2000,	/* in dwords */
+	AMD_CONFIG_REG_END = 0x2B00,
+	AMD_CONFIG_REG_SIZE = AMD_CONFIG_REG_END - AMD_CONFIG_REG_BASE
 };
 
 /* SH reg space definition */

+ 3 - 0
drivers/gpu/drm/amd/amdkfd/kfd_device.c

@@ -44,7 +44,10 @@ static const struct kfd_device_info kaveri_device_info = {
 static const struct kfd_device_info carrizo_device_info = {
 	.asic_family = CHIP_CARRIZO,
 	.max_pasid_bits = 16,
+	/* max num of queues for CZ.TODO should be a dynamic value */
+	.max_no_of_hqd	= 24,
 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
+	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
 	.mqd_size_aligned = MQD_SIZE_ALIGNED
 };

+ 4 - 3
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

@@ -946,7 +946,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm,
 {
 	int retval;
 	enum kfd_preempt_type_filter preempt_type;
-	struct kfd_process *p;
+	struct kfd_process_device *pdd;
 
 	BUG_ON(!dqm);
 
@@ -981,8 +981,9 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm,
 	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
 	if (retval != 0) {
-		p = kfd_get_process(current);
-		p->reset_wavefronts = true;
+		pdd = kfd_get_process_device_data(dqm->dev,
+				kfd_get_process(current));
+		pdd->reset_wavefronts = true;
 		goto out;
 	}
 	pm_release_ib(&dqm->packets);

+ 4 - 0
drivers/gpu/drm/amd/amdkfd/kfd_events.c

@@ -313,6 +313,10 @@ static int create_signal_event(struct file *devkfd,
 			p->signal_event_count, ev->event_id,
 			ev->user_signal_address);
 
+	pr_debug("signal event number %zu created with id %d, address %p\n",
+			p->signal_event_count, ev->event_id,
+			ev->user_signal_address);
+
 	return 0;
 }
 

+ 5 - 5
drivers/gpu/drm/amd/amdkfd/kfd_priv.h

@@ -463,6 +463,11 @@ struct kfd_process_device {
 
 	/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
 	bool bound;
+
+	/* This flag tells if we should reset all
+	 * wavefronts on process termination
+	 */
+	bool reset_wavefronts;
 };
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -519,11 +524,6 @@ struct kfd_process {
 								event_pages */
 	u32 next_nonsignal_event_id;
 	size_t signal_event_count;
-	/*
-	 * This flag tells if we should reset all wavefronts on
-	 * process termination
-	 */
-	bool reset_wavefronts;
 };
 
 /**

+ 16 - 5
drivers/gpu/drm/amd/amdkfd/kfd_process.c

@@ -173,7 +173,7 @@ static void kfd_process_wq_release(struct work_struct *work)
 		pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
 				pdd->dev->id, p->pasid);
 
-		if (p->reset_wavefronts)
+		if (pdd->reset_wavefronts)
 			dbgdev_wave_reset_wavefronts(pdd->dev, p);
 
 		amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
@@ -222,6 +222,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 					struct mm_struct *mm)
 {
 	struct kfd_process *p;
+	struct kfd_process_device *pdd = NULL;
 
 	/*
 	 * The kfd_process structure can not be free because the
@@ -240,6 +241,15 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 	/* In case our notifier is called before IOMMU notifier */
 	pqm_uninit(&p->pqm);
 
+	/* Iterate over all process device data structure and check
+	 * if we should reset all wavefronts */
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+		if (pdd->reset_wavefronts) {
+			pr_warn("amdkfd: Resetting all wave fronts\n");
+			dbgdev_wave_reset_wavefronts(pdd->dev, p);
+			pdd->reset_wavefronts = false;
+		}
+
 	mutex_unlock(&p->mutex);
 
 	/*
@@ -305,8 +315,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 	if (kfd_init_apertures(process) != 0)
 		goto err_init_apretures;
 
-	process->reset_wavefronts = false;
-
 	return process;
 
 err_init_apretures:
@@ -348,6 +356,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 		INIT_LIST_HEAD(&pdd->qpd.queues_list);
 		INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
 		pdd->qpd.dqm = dev->dqm;
+		pdd->reset_wavefronts = false;
 		list_add(&pdd->per_device_list, &p->per_device_data);
 	}
 
@@ -409,10 +418,12 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
 		kfd_dbgmgr_destroy(dev->dbgmgr);
 
 	pqm_uninit(&p->pqm);
-	if (p->reset_wavefronts)
-		dbgdev_wave_reset_wavefronts(dev, p);
 
 	pdd = kfd_get_process_device_data(dev, p);
+	if (pdd->reset_wavefronts) {
+		dbgdev_wave_reset_wavefronts(pdd->dev, p);
+		pdd->reset_wavefronts = false;
+	}
 
 	/*
 	 * Just mark pdd as unbound, because we still need it to call