|
@@ -28,17 +28,19 @@
|
|
|
#include "i915_drv.h"
|
|
|
#include "intel_renderstate.h"
|
|
|
|
|
|
-struct render_state {
|
|
|
+struct intel_render_state {
|
|
|
const struct intel_renderstate_rodata *rodata;
|
|
|
struct i915_vma *vma;
|
|
|
- u32 aux_batch_size;
|
|
|
- u32 aux_batch_offset;
|
|
|
+ u32 batch_offset;
|
|
|
+ u32 batch_size;
|
|
|
+ u32 aux_offset;
|
|
|
+ u32 aux_size;
|
|
|
};
|
|
|
|
|
|
static const struct intel_renderstate_rodata *
|
|
|
-render_state_get_rodata(const struct drm_i915_gem_request *req)
|
|
|
+render_state_get_rodata(const struct intel_engine_cs *engine)
|
|
|
{
|
|
|
- switch (INTEL_GEN(req->i915)) {
|
|
|
+ switch (INTEL_GEN(engine->i915)) {
|
|
|
case 6:
|
|
|
return &gen6_null_state;
|
|
|
case 7:
|
|
@@ -63,29 +65,27 @@ render_state_get_rodata(const struct drm_i915_gem_request *req)
|
|
|
*/
|
|
|
#define OUT_BATCH(batch, i, val) \
|
|
|
do { \
|
|
|
- if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) { \
|
|
|
- ret = -ENOSPC; \
|
|
|
- goto err_out; \
|
|
|
- } \
|
|
|
+ if ((i) >= PAGE_SIZE / sizeof(u32)) \
|
|
|
+ goto err; \
|
|
|
(batch)[(i)++] = (val); \
|
|
|
} while(0)
|
|
|
|
|
|
-static int render_state_setup(struct render_state *so)
|
|
|
+static int render_state_setup(struct intel_render_state *so,
|
|
|
+ struct drm_i915_private *i915)
|
|
|
{
|
|
|
- struct drm_i915_private *dev_priv = to_i915(so->vma->vm->dev);
|
|
|
const struct intel_renderstate_rodata *rodata = so->rodata;
|
|
|
- const bool has_64bit_reloc = INTEL_GEN(dev_priv) >= 8;
|
|
|
+ const bool has_64bit_reloc = INTEL_GEN(i915) >= 8;
|
|
|
+ struct drm_i915_gem_object *obj = so->vma->obj;
|
|
|
unsigned int i = 0, reloc_index = 0;
|
|
|
- struct page *page;
|
|
|
+ unsigned int needs_clflush;
|
|
|
u32 *d;
|
|
|
int ret;
|
|
|
|
|
|
- ret = i915_gem_object_set_to_cpu_domain(so->vma->obj, true);
|
|
|
+ ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
|
|
|
- page = i915_gem_object_get_dirty_page(so->vma->obj, 0);
|
|
|
- d = kmap(page);
|
|
|
+ d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0));
|
|
|
|
|
|
while (i < rodata->batch_items) {
|
|
|
u32 s = rodata->batch[i];
|
|
@@ -95,10 +95,8 @@ static int render_state_setup(struct render_state *so)
|
|
|
s = lower_32_bits(r);
|
|
|
if (has_64bit_reloc) {
|
|
|
if (i + 1 >= rodata->batch_items ||
|
|
|
- rodata->batch[i + 1] != 0) {
|
|
|
- ret = -EINVAL;
|
|
|
- goto err_out;
|
|
|
- }
|
|
|
+ rodata->batch[i + 1] != 0)
|
|
|
+ goto err;
|
|
|
|
|
|
d[i++] = s;
|
|
|
s = upper_32_bits(r);
|
|
@@ -110,12 +108,20 @@ static int render_state_setup(struct render_state *so)
|
|
|
d[i++] = s;
|
|
|
}
|
|
|
|
|
|
+ if (rodata->reloc[reloc_index] != -1) {
|
|
|
+ DRM_ERROR("only %d relocs resolved\n", reloc_index);
|
|
|
+ goto err;
|
|
|
+ }
|
|
|
+
|
|
|
+ so->batch_offset = so->vma->node.start;
|
|
|
+ so->batch_size = rodata->batch_items * sizeof(u32);
|
|
|
+
|
|
|
while (i % CACHELINE_DWORDS)
|
|
|
OUT_BATCH(d, i, MI_NOOP);
|
|
|
|
|
|
- so->aux_batch_offset = i * sizeof(u32);
|
|
|
+ so->aux_offset = i * sizeof(u32);
|
|
|
|
|
|
- if (HAS_POOLED_EU(dev_priv)) {
|
|
|
+ if (HAS_POOLED_EU(i915)) {
|
|
|
/*
|
|
|
* We always program 3x6 pool config but depending upon which
|
|
|
* subslice is disabled HW drops down to appropriate config
|
|
@@ -143,89 +149,131 @@ static int render_state_setup(struct render_state *so)
|
|
|
}
|
|
|
|
|
|
OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
|
|
|
- so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset;
|
|
|
-
|
|
|
+ so->aux_size = i * sizeof(u32) - so->aux_offset;
|
|
|
+ so->aux_offset += so->batch_offset;
|
|
|
/*
|
|
|
* Since we are sending length, we need to strictly conform to
|
|
|
* all requirements. For Gen2 this must be a multiple of 8.
|
|
|
*/
|
|
|
- so->aux_batch_size = ALIGN(so->aux_batch_size, 8);
|
|
|
-
|
|
|
- kunmap(page);
|
|
|
-
|
|
|
- ret = i915_gem_object_set_to_gtt_domain(so->vma->obj, false);
|
|
|
- if (ret)
|
|
|
- return ret;
|
|
|
-
|
|
|
- if (rodata->reloc[reloc_index] != -1) {
|
|
|
- DRM_ERROR("only %d relocs resolved\n", reloc_index);
|
|
|
- return -EINVAL;
|
|
|
- }
|
|
|
+ so->aux_size = ALIGN(so->aux_size, 8);
|
|
|
|
|
|
- return 0;
|
|
|
+ if (needs_clflush)
|
|
|
+ drm_clflush_virt_range(d, i * sizeof(u32));
|
|
|
+ kunmap_atomic(d);
|
|
|
|
|
|
-err_out:
|
|
|
- kunmap(page);
|
|
|
+ ret = i915_gem_object_set_to_gtt_domain(obj, false);
|
|
|
+out:
|
|
|
+ i915_gem_obj_finish_shmem_access(obj);
|
|
|
return ret;
|
|
|
+
|
|
|
+err:
|
|
|
+ kunmap_atomic(d);
|
|
|
+ ret = -EINVAL;
|
|
|
+ goto out;
|
|
|
}
|
|
|
|
|
|
#undef OUT_BATCH
|
|
|
|
|
|
-int i915_gem_render_state_init(struct drm_i915_gem_request *req)
|
|
|
+int i915_gem_render_state_init(struct intel_engine_cs *engine)
|
|
|
{
|
|
|
- struct render_state so;
|
|
|
+ struct intel_render_state *so;
|
|
|
+ const struct intel_renderstate_rodata *rodata;
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
int ret;
|
|
|
|
|
|
- if (WARN_ON(req->engine->id != RCS))
|
|
|
- return -ENOENT;
|
|
|
+ if (engine->id != RCS)
|
|
|
+ return 0;
|
|
|
|
|
|
- so.rodata = render_state_get_rodata(req);
|
|
|
- if (!so.rodata)
|
|
|
+ rodata = render_state_get_rodata(engine);
|
|
|
+ if (!rodata)
|
|
|
return 0;
|
|
|
|
|
|
- if (so.rodata->batch_items * 4 > 4096)
|
|
|
+ if (rodata->batch_items * 4 > 4096)
|
|
|
return -EINVAL;
|
|
|
|
|
|
- obj = i915_gem_object_create_internal(req->i915, 4096);
|
|
|
- if (IS_ERR(obj))
|
|
|
- return PTR_ERR(obj);
|
|
|
+ so = kmalloc(sizeof(*so), GFP_KERNEL);
|
|
|
+ if (!so)
|
|
|
+ return -ENOMEM;
|
|
|
|
|
|
- so.vma = i915_vma_create(obj, &req->i915->ggtt.base, NULL);
|
|
|
- if (IS_ERR(so.vma)) {
|
|
|
- ret = PTR_ERR(so.vma);
|
|
|
- goto err_obj;
|
|
|
+ obj = i915_gem_object_create_internal(engine->i915, 4096);
|
|
|
+ if (IS_ERR(obj)) {
|
|
|
+ ret = PTR_ERR(obj);
|
|
|
+ goto err_free;
|
|
|
}
|
|
|
|
|
|
- ret = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL);
|
|
|
- if (ret)
|
|
|
+ so->vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL);
|
|
|
+ if (IS_ERR(so->vma)) {
|
|
|
+ ret = PTR_ERR(so->vma);
|
|
|
goto err_obj;
|
|
|
+ }
|
|
|
+
|
|
|
+ so->rodata = rodata;
|
|
|
+ engine->render_state = so;
|
|
|
+ return 0;
|
|
|
|
|
|
- ret = render_state_setup(&so);
|
|
|
+err_obj:
|
|
|
+ i915_gem_object_put(obj);
|
|
|
+err_free:
|
|
|
+ kfree(so);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+int i915_gem_render_state_emit(struct drm_i915_gem_request *req)
|
|
|
+{
|
|
|
+ struct intel_render_state *so;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ so = req->engine->render_state;
|
|
|
+ if (!so)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ /* Recreate the page after shrinking */
|
|
|
+ if (!so->vma->obj->pages)
|
|
|
+ so->batch_offset = -1;
|
|
|
+
|
|
|
+ ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
|
|
|
if (ret)
|
|
|
- goto err_unpin;
|
|
|
+ return ret;
|
|
|
|
|
|
- ret = req->engine->emit_bb_start(req, so.vma->node.start,
|
|
|
- so.rodata->batch_items * 4,
|
|
|
+ if (so->vma->node.start != so->batch_offset) {
|
|
|
+ ret = render_state_setup(so, req->i915);
|
|
|
+ if (ret)
|
|
|
+ goto err_unpin;
|
|
|
+ }
|
|
|
+
|
|
|
+ ret = req->engine->emit_bb_start(req,
|
|
|
+ so->batch_offset, so->batch_size,
|
|
|
I915_DISPATCH_SECURE);
|
|
|
if (ret)
|
|
|
goto err_unpin;
|
|
|
|
|
|
- if (so.aux_batch_size > 8) {
|
|
|
+ if (so->aux_size > 8) {
|
|
|
ret = req->engine->emit_bb_start(req,
|
|
|
- (so.vma->node.start +
|
|
|
- so.aux_batch_offset),
|
|
|
- so.aux_batch_size,
|
|
|
+ so->aux_offset, so->aux_size,
|
|
|
I915_DISPATCH_SECURE);
|
|
|
if (ret)
|
|
|
goto err_unpin;
|
|
|
}
|
|
|
|
|
|
- i915_vma_move_to_active(so.vma, req, 0);
|
|
|
+ i915_vma_move_to_active(so->vma, req, 0);
|
|
|
err_unpin:
|
|
|
- i915_vma_unpin(so.vma);
|
|
|
- i915_vma_close(so.vma);
|
|
|
-err_obj:
|
|
|
- __i915_gem_object_release_unless_active(obj);
|
|
|
+ i915_vma_unpin(so->vma);
|
|
|
return ret;
|
|
|
}
|
|
|
+
|
|
|
+void i915_gem_render_state_fini(struct intel_engine_cs *engine)
|
|
|
+{
|
|
|
+ struct intel_render_state *so;
|
|
|
+ struct drm_i915_gem_object *obj;
|
|
|
+
|
|
|
+ so = fetch_and_zero(&engine->render_state);
|
|
|
+ if (!so)
|
|
|
+ return;
|
|
|
+
|
|
|
+ obj = so->vma->obj;
|
|
|
+
|
|
|
+ i915_vma_close(so->vma);
|
|
|
+ __i915_gem_object_release_unless_active(obj);
|
|
|
+
|
|
|
+ kfree(so);
|
|
|
+}
|