|
@@ -0,0 +1,425 @@
|
|
|
+/*
|
|
|
+ * Copyright 2016 Advanced Micro Devices, Inc.
|
|
|
+ * All Rights Reserved.
|
|
|
+ *
|
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
+ * copy of this software and associated documentation files (the
|
|
|
+ * "Software"), to deal in the Software without restriction, including
|
|
|
+ * without limitation the rights to use, copy, modify, merge, publish,
|
|
|
+ * distribute, sub license, and/or sell copies of the Software, and to
|
|
|
+ * permit persons to whom the Software is furnished to do so, subject to
|
|
|
+ * the following conditions:
|
|
|
+ *
|
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
|
|
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
|
|
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
|
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
|
|
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
+ *
|
|
|
+ * The above copyright notice and this permission notice (including the
|
|
|
+ * next paragraph) shall be included in all copies or substantial portions
|
|
|
+ * of the Software.
|
|
|
+ *
|
|
|
+ */
|
|
|
+
|
|
|
+#include <linux/firmware.h>
|
|
|
+#include <linux/module.h>
|
|
|
+#include <drm/drmP.h>
|
|
|
+#include <drm/drm.h>
|
|
|
+
|
|
|
+#include "amdgpu.h"
|
|
|
+#include "amdgpu_pm.h"
|
|
|
+#include "amdgpu_vcn.h"
|
|
|
+#include "soc15d.h"
|
|
|
+#include "soc15_common.h"
|
|
|
+
|
|
|
+#include "vega10/soc15ip.h"
|
|
|
+#include "raven1/VCN/vcn_1_0_offset.h"
|
|
|
+
|
|
|
+/* 1 second timeout */
|
|
|
+#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000)
|
|
|
+
|
|
|
+/* Firmware Names */
|
|
|
+#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
|
|
|
+
|
|
|
+MODULE_FIRMWARE(FIRMWARE_RAVEN);
|
|
|
+
|
|
|
+static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
|
|
|
+
|
|
|
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
|
|
|
+{
|
|
|
+ struct amdgpu_ring *ring;
|
|
|
+ struct amd_sched_rq *rq;
|
|
|
+ unsigned long bo_size;
|
|
|
+ const char *fw_name;
|
|
|
+ const struct common_firmware_header *hdr;
|
|
|
+ unsigned version_major, version_minor, family_id;
|
|
|
+ int r;
|
|
|
+
|
|
|
+ INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
|
|
|
+
|
|
|
+ switch (adev->asic_type) {
|
|
|
+ case CHIP_RAVEN:
|
|
|
+ fw_name = FIRMWARE_RAVEN;
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ return -EINVAL;
|
|
|
+ }
|
|
|
+
|
|
|
+ r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
|
|
|
+ if (r) {
|
|
|
+ dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
|
|
|
+ fw_name);
|
|
|
+ return r;
|
|
|
+ }
|
|
|
+
|
|
|
+ r = amdgpu_ucode_validate(adev->vcn.fw);
|
|
|
+ if (r) {
|
|
|
+ dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
|
|
|
+ fw_name);
|
|
|
+ release_firmware(adev->vcn.fw);
|
|
|
+ adev->vcn.fw = NULL;
|
|
|
+ return r;
|
|
|
+ }
|
|
|
+
|
|
|
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
|
|
|
+ family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
|
|
|
+ version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
|
|
|
+ version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
|
|
|
+ DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n",
|
|
|
+ version_major, version_minor, family_id);
|
|
|
+
|
|
|
+
|
|
|
+ bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
|
|
|
+ + AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
|
|
|
+ + AMDGPU_VCN_SESSION_SIZE * 40;
|
|
|
+ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
|
|
|
+ AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,
|
|
|
+ &adev->vcn.gpu_addr, &adev->vcn.cpu_addr);
|
|
|
+ if (r) {
|
|
|
+ dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
|
|
|
+ return r;
|
|
|
+ }
|
|
|
+
|
|
|
+ ring = &adev->vcn.ring_dec;
|
|
|
+ rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
|
|
|
+ r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
|
|
|
+ rq, amdgpu_sched_jobs);
|
|
|
+ if (r != 0) {
|
|
|
+ DRM_ERROR("Failed setting up VCN dec run queue.\n");
|
|
|
+ return r;
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
|
|
|
+{
|
|
|
+ kfree(adev->vcn.saved_bo);
|
|
|
+
|
|
|
+ amd_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec);
|
|
|
+
|
|
|
+ amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
|
|
|
+ &adev->vcn.gpu_addr,
|
|
|
+ (void **)&adev->vcn.cpu_addr);
|
|
|
+
|
|
|
+ amdgpu_ring_fini(&adev->vcn.ring_dec);
|
|
|
+
|
|
|
+ release_firmware(adev->vcn.fw);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+int amdgpu_vcn_suspend(struct amdgpu_device *adev)
|
|
|
+{
|
|
|
+ unsigned size;
|
|
|
+ void *ptr;
|
|
|
+
|
|
|
+ if (adev->vcn.vcpu_bo == NULL)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
|
|
|
+
|
|
|
+ size = amdgpu_bo_size(adev->vcn.vcpu_bo);
|
|
|
+ ptr = adev->vcn.cpu_addr;
|
|
|
+
|
|
|
+ adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL);
|
|
|
+ if (!adev->vcn.saved_bo)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ memcpy_fromio(adev->vcn.saved_bo, ptr, size);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+int amdgpu_vcn_resume(struct amdgpu_device *adev)
|
|
|
+{
|
|
|
+ unsigned size;
|
|
|
+ void *ptr;
|
|
|
+
|
|
|
+ if (adev->vcn.vcpu_bo == NULL)
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ size = amdgpu_bo_size(adev->vcn.vcpu_bo);
|
|
|
+ ptr = adev->vcn.cpu_addr;
|
|
|
+
|
|
|
+ if (adev->vcn.saved_bo != NULL) {
|
|
|
+ memcpy_toio(ptr, adev->vcn.saved_bo, size);
|
|
|
+ kfree(adev->vcn.saved_bo);
|
|
|
+ adev->vcn.saved_bo = NULL;
|
|
|
+ } else {
|
|
|
+ const struct common_firmware_header *hdr;
|
|
|
+ unsigned offset;
|
|
|
+
|
|
|
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
|
|
|
+ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
|
|
|
+ memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
|
|
|
+ le32_to_cpu(hdr->ucode_size_bytes));
|
|
|
+ size -= le32_to_cpu(hdr->ucode_size_bytes);
|
|
|
+ ptr += le32_to_cpu(hdr->ucode_size_bytes);
|
|
|
+ memset_io(ptr, 0, size);
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
|
|
|
+ bool direct, struct dma_fence **fence)
|
|
|
+{
|
|
|
+ struct ttm_validate_buffer tv;
|
|
|
+ struct ww_acquire_ctx ticket;
|
|
|
+ struct list_head head;
|
|
|
+ struct amdgpu_job *job;
|
|
|
+ struct amdgpu_ib *ib;
|
|
|
+ struct dma_fence *f = NULL;
|
|
|
+ struct amdgpu_device *adev = ring->adev;
|
|
|
+ uint64_t addr;
|
|
|
+ int i, r;
|
|
|
+
|
|
|
+ memset(&tv, 0, sizeof(tv));
|
|
|
+ tv.bo = &bo->tbo;
|
|
|
+
|
|
|
+ INIT_LIST_HEAD(&head);
|
|
|
+ list_add(&tv.head, &head);
|
|
|
+
|
|
|
+ r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL);
|
|
|
+ if (r)
|
|
|
+ return r;
|
|
|
+
|
|
|
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
|
|
|
+ if (r)
|
|
|
+ goto err;
|
|
|
+
|
|
|
+ r = amdgpu_job_alloc_with_ib(adev, 64, &job);
|
|
|
+ if (r)
|
|
|
+ goto err;
|
|
|
+
|
|
|
+ ib = &job->ibs[0];
|
|
|
+ addr = amdgpu_bo_gpu_offset(bo);
|
|
|
+ ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0);
|
|
|
+ ib->ptr[1] = addr;
|
|
|
+ ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0);
|
|
|
+ ib->ptr[3] = addr >> 32;
|
|
|
+ ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0);
|
|
|
+ ib->ptr[5] = 0;
|
|
|
+ for (i = 6; i < 16; i += 2) {
|
|
|
+ ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0);
|
|
|
+ ib->ptr[i+1] = 0;
|
|
|
+ }
|
|
|
+ ib->length_dw = 16;
|
|
|
+
|
|
|
+ if (direct) {
|
|
|
+ r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
|
|
|
+ job->fence = dma_fence_get(f);
|
|
|
+ if (r)
|
|
|
+ goto err_free;
|
|
|
+
|
|
|
+ amdgpu_job_free(job);
|
|
|
+ } else {
|
|
|
+ r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec,
|
|
|
+ AMDGPU_FENCE_OWNER_UNDEFINED, &f);
|
|
|
+ if (r)
|
|
|
+ goto err_free;
|
|
|
+ }
|
|
|
+
|
|
|
+ ttm_eu_fence_buffer_objects(&ticket, &head, f);
|
|
|
+
|
|
|
+ if (fence)
|
|
|
+ *fence = dma_fence_get(f);
|
|
|
+ amdgpu_bo_unref(&bo);
|
|
|
+ dma_fence_put(f);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+
|
|
|
+err_free:
|
|
|
+ amdgpu_job_free(job);
|
|
|
+
|
|
|
+err:
|
|
|
+ ttm_eu_backoff_reservation(&ticket, &head);
|
|
|
+ return r;
|
|
|
+}
|
|
|
+
|
|
|
+static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
|
|
|
+ struct dma_fence **fence)
|
|
|
+{
|
|
|
+ struct amdgpu_device *adev = ring->adev;
|
|
|
+ struct amdgpu_bo *bo;
|
|
|
+ uint32_t *msg;
|
|
|
+ int r, i;
|
|
|
+
|
|
|
+ r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
|
|
|
+ AMDGPU_GEM_DOMAIN_VRAM,
|
|
|
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
|
|
|
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
|
|
|
+ NULL, NULL, &bo);
|
|
|
+ if (r)
|
|
|
+ return r;
|
|
|
+
|
|
|
+ r = amdgpu_bo_reserve(bo, false);
|
|
|
+ if (r) {
|
|
|
+ amdgpu_bo_unref(&bo);
|
|
|
+ return r;
|
|
|
+ }
|
|
|
+
|
|
|
+ r = amdgpu_bo_kmap(bo, (void **)&msg);
|
|
|
+ if (r) {
|
|
|
+ amdgpu_bo_unreserve(bo);
|
|
|
+ amdgpu_bo_unref(&bo);
|
|
|
+ return r;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* stitch together an vcn create msg */
|
|
|
+ msg[0] = cpu_to_le32(0x00000de4);
|
|
|
+ msg[1] = cpu_to_le32(0x00000000);
|
|
|
+ msg[2] = cpu_to_le32(handle);
|
|
|
+ msg[3] = cpu_to_le32(0x00000000);
|
|
|
+ msg[4] = cpu_to_le32(0x00000000);
|
|
|
+ msg[5] = cpu_to_le32(0x00000000);
|
|
|
+ msg[6] = cpu_to_le32(0x00000000);
|
|
|
+ msg[7] = cpu_to_le32(0x00000780);
|
|
|
+ msg[8] = cpu_to_le32(0x00000440);
|
|
|
+ msg[9] = cpu_to_le32(0x00000000);
|
|
|
+ msg[10] = cpu_to_le32(0x01b37000);
|
|
|
+ for (i = 11; i < 1024; ++i)
|
|
|
+ msg[i] = cpu_to_le32(0x0);
|
|
|
+
|
|
|
+ amdgpu_bo_kunmap(bo);
|
|
|
+ amdgpu_bo_unreserve(bo);
|
|
|
+
|
|
|
+ return amdgpu_vcn_dec_send_msg(ring, bo, true, fence);
|
|
|
+}
|
|
|
+
|
|
|
+static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
|
|
|
+ bool direct, struct dma_fence **fence)
|
|
|
+{
|
|
|
+ struct amdgpu_device *adev = ring->adev;
|
|
|
+ struct amdgpu_bo *bo;
|
|
|
+ uint32_t *msg;
|
|
|
+ int r, i;
|
|
|
+
|
|
|
+ r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
|
|
|
+ AMDGPU_GEM_DOMAIN_VRAM,
|
|
|
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
|
|
|
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
|
|
|
+ NULL, NULL, &bo);
|
|
|
+ if (r)
|
|
|
+ return r;
|
|
|
+
|
|
|
+ r = amdgpu_bo_reserve(bo, false);
|
|
|
+ if (r) {
|
|
|
+ amdgpu_bo_unref(&bo);
|
|
|
+ return r;
|
|
|
+ }
|
|
|
+
|
|
|
+ r = amdgpu_bo_kmap(bo, (void **)&msg);
|
|
|
+ if (r) {
|
|
|
+ amdgpu_bo_unreserve(bo);
|
|
|
+ amdgpu_bo_unref(&bo);
|
|
|
+ return r;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* stitch together an vcn destroy msg */
|
|
|
+ msg[0] = cpu_to_le32(0x00000de4);
|
|
|
+ msg[1] = cpu_to_le32(0x00000002);
|
|
|
+ msg[2] = cpu_to_le32(handle);
|
|
|
+ msg[3] = cpu_to_le32(0x00000000);
|
|
|
+ for (i = 4; i < 1024; ++i)
|
|
|
+ msg[i] = cpu_to_le32(0x0);
|
|
|
+
|
|
|
+ amdgpu_bo_kunmap(bo);
|
|
|
+ amdgpu_bo_unreserve(bo);
|
|
|
+
|
|
|
+ return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence);
|
|
|
+}
|
|
|
+
|
|
|
+static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
|
|
|
+{
|
|
|
+ struct amdgpu_device *adev =
|
|
|
+ container_of(work, struct amdgpu_device, vcn.idle_work.work);
|
|
|
+ unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
|
|
|
+
|
|
|
+ if (fences == 0) {
|
|
|
+ if (adev->pm.dpm_enabled) {
|
|
|
+ amdgpu_dpm_enable_uvd(adev, false);
|
|
|
+ } else {
|
|
|
+ amdgpu_asic_set_uvd_clocks(adev, 0, 0);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
|
|
|
+{
|
|
|
+ struct amdgpu_device *adev = ring->adev;
|
|
|
+ bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
|
|
|
+
|
|
|
+ if (set_clocks) {
|
|
|
+ if (adev->pm.dpm_enabled) {
|
|
|
+ amdgpu_dpm_enable_uvd(adev, true);
|
|
|
+ } else {
|
|
|
+ amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
|
|
|
+{
|
|
|
+ schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
|
|
|
+}
|
|
|
+
|
|
|
+int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
|
|
+{
|
|
|
+ struct dma_fence *fence;
|
|
|
+ long r;
|
|
|
+
|
|
|
+ r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
|
|
|
+ if (r) {
|
|
|
+ DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
|
|
|
+ goto error;
|
|
|
+ }
|
|
|
+
|
|
|
+ r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence);
|
|
|
+ if (r) {
|
|
|
+ DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
|
|
|
+ goto error;
|
|
|
+ }
|
|
|
+
|
|
|
+ r = dma_fence_wait_timeout(fence, false, timeout);
|
|
|
+ if (r == 0) {
|
|
|
+ DRM_ERROR("amdgpu: IB test timed out.\n");
|
|
|
+ r = -ETIMEDOUT;
|
|
|
+ } else if (r < 0) {
|
|
|
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
|
|
+ } else {
|
|
|
+ DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
|
|
|
+ r = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ dma_fence_put(fence);
|
|
|
+
|
|
|
+error:
|
|
|
+ return r;
|
|
|
+}
|