|
@@ -1146,23 +1146,74 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
|
|
|
return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
|
|
|
}
|
|
|
|
|
|
-static int __i915_spin_request(struct drm_i915_gem_request *req)
|
|
|
+static unsigned long local_clock_us(unsigned *cpu)
|
|
|
+{
|
|
|
+ unsigned long t;
|
|
|
+
|
|
|
+ /* Cheaply and approximately convert from nanoseconds to microseconds.
|
|
|
+ * The result and subsequent calculations are also defined in the same
|
|
|
+ * approximate microseconds units. The principal source of timing
|
|
|
+ * error here is from the simple truncation.
|
|
|
+ *
|
|
|
+ * Note that local_clock() is only defined wrt to the current CPU;
|
|
|
+ * the comparisons are no longer valid if we switch CPUs. Instead of
|
|
|
+ * blocking preemption for the entire busywait, we can detect the CPU
|
|
|
+ * switch and use that as indicator of system load and a reason to
|
|
|
+ * stop busywaiting, see busywait_stop().
|
|
|
+ */
|
|
|
+ *cpu = get_cpu();
|
|
|
+ t = local_clock() >> 10;
|
|
|
+ put_cpu();
|
|
|
+
|
|
|
+ return t;
|
|
|
+}
|
|
|
+
|
|
|
+static bool busywait_stop(unsigned long timeout, unsigned cpu)
|
|
|
+{
|
|
|
+ unsigned this_cpu;
|
|
|
+
|
|
|
+ if (time_after(local_clock_us(&this_cpu), timeout))
|
|
|
+ return true;
|
|
|
+
|
|
|
+ return this_cpu != cpu;
|
|
|
+}
|
|
|
+
|
|
|
+static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
|
|
|
{
|
|
|
unsigned long timeout;
|
|
|
+ unsigned cpu;
|
|
|
+
|
|
|
+ /* When waiting for high frequency requests, e.g. during synchronous
|
|
|
+ * rendering split between the CPU and GPU, the finite amount of time
|
|
|
+ * required to set up the irq and wait upon it limits the response
|
|
|
+ * rate. By busywaiting on the request completion for a short while we
|
|
|
+ * can service the high frequency waits as quick as possible. However,
|
|
|
+ * if it is a slow request, we want to sleep as quickly as possible.
|
|
|
+ * The tradeoff between waiting and sleeping is roughly the time it
|
|
|
+ * takes to sleep on a request, on the order of a microsecond.
|
|
|
+ */
|
|
|
|
|
|
- if (i915_gem_request_get_ring(req)->irq_refcount)
|
|
|
+ if (req->ring->irq_refcount)
|
|
|
return -EBUSY;
|
|
|
|
|
|
- timeout = jiffies + 1;
|
|
|
+ /* Only spin if we know the GPU is processing this request */
|
|
|
+ if (!i915_gem_request_started(req, true))
|
|
|
+ return -EAGAIN;
|
|
|
+
|
|
|
+ timeout = local_clock_us(&cpu) + 5;
|
|
|
while (!need_resched()) {
|
|
|
if (i915_gem_request_completed(req, true))
|
|
|
return 0;
|
|
|
|
|
|
- if (time_after_eq(jiffies, timeout))
|
|
|
+ if (signal_pending_state(state, current))
|
|
|
+ break;
|
|
|
+
|
|
|
+ if (busywait_stop(timeout, cpu))
|
|
|
break;
|
|
|
|
|
|
cpu_relax_lowlatency();
|
|
|
}
|
|
|
+
|
|
|
if (i915_gem_request_completed(req, false))
|
|
|
return 0;
|
|
|
|
|
@@ -1197,6 +1248,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
|
|
|
struct drm_i915_private *dev_priv = dev->dev_private;
|
|
|
const bool irq_test_in_progress =
|
|
|
ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
|
|
|
+ int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
|
|
|
DEFINE_WAIT(wait);
|
|
|
unsigned long timeout_expire;
|
|
|
s64 before, now;
|
|
@@ -1229,7 +1281,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
|
|
|
before = ktime_get_raw_ns();
|
|
|
|
|
|
/* Optimistic spin for the next jiffie before touching IRQs */
|
|
|
- ret = __i915_spin_request(req);
|
|
|
+ ret = __i915_spin_request(req, state);
|
|
|
if (ret == 0)
|
|
|
goto out;
|
|
|
|
|
@@ -1241,8 +1293,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
|
|
|
for (;;) {
|
|
|
struct timer_list timer;
|
|
|
|
|
|
- prepare_to_wait(&ring->irq_queue, &wait,
|
|
|
- interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
|
|
|
+ prepare_to_wait(&ring->irq_queue, &wait, state);
|
|
|
|
|
|
/* We need to check whether any gpu reset happened in between
|
|
|
* the caller grabbing the seqno and now ... */
|
|
@@ -1260,7 +1311,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
- if (interruptible && signal_pending(current)) {
|
|
|
+ if (signal_pending_state(state, current)) {
|
|
|
ret = -ERESTARTSYS;
|
|
|
break;
|
|
|
}
|
|
@@ -2554,6 +2605,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
|
|
|
request->batch_obj = obj;
|
|
|
|
|
|
request->emitted_jiffies = jiffies;
|
|
|
+ request->previous_seqno = ring->last_submitted_seqno;
|
|
|
ring->last_submitted_seqno = request->seqno;
|
|
|
list_add_tail(&request->list, &ring->request_list);
|
|
|
|
|
@@ -4080,6 +4132,29 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
|
|
|
+{
|
|
|
+ struct drm_i915_gem_object *obj = vma->obj;
|
|
|
+ bool mappable, fenceable;
|
|
|
+ u32 fence_size, fence_alignment;
|
|
|
+
|
|
|
+ fence_size = i915_gem_get_gtt_size(obj->base.dev,
|
|
|
+ obj->base.size,
|
|
|
+ obj->tiling_mode);
|
|
|
+ fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
|
|
|
+ obj->base.size,
|
|
|
+ obj->tiling_mode,
|
|
|
+ true);
|
|
|
+
|
|
|
+ fenceable = (vma->node.size == fence_size &&
|
|
|
+ (vma->node.start & (fence_alignment - 1)) == 0);
|
|
|
+
|
|
|
+ mappable = (vma->node.start + fence_size <=
|
|
|
+ to_i915(obj->base.dev)->gtt.mappable_end);
|
|
|
+
|
|
|
+ obj->map_and_fenceable = mappable && fenceable;
|
|
|
+}
|
|
|
+
|
|
|
static int
|
|
|
i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
|
|
|
struct i915_address_space *vm,
|
|
@@ -4147,25 +4222,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
|
|
|
|
|
|
if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
|
|
|
(bound ^ vma->bound) & GLOBAL_BIND) {
|
|
|
- bool mappable, fenceable;
|
|
|
- u32 fence_size, fence_alignment;
|
|
|
-
|
|
|
- fence_size = i915_gem_get_gtt_size(obj->base.dev,
|
|
|
- obj->base.size,
|
|
|
- obj->tiling_mode);
|
|
|
- fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
|
|
|
- obj->base.size,
|
|
|
- obj->tiling_mode,
|
|
|
- true);
|
|
|
-
|
|
|
- fenceable = (vma->node.size == fence_size &&
|
|
|
- (vma->node.start & (fence_alignment - 1)) == 0);
|
|
|
-
|
|
|
- mappable = (vma->node.start + fence_size <=
|
|
|
- dev_priv->gtt.mappable_end);
|
|
|
-
|
|
|
- obj->map_and_fenceable = mappable && fenceable;
|
|
|
-
|
|
|
+ __i915_vma_set_map_and_fenceable(vma);
|
|
|
WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
|
|
|
}
|
|
|
|