8 жил өмнө · 4a525bad68
--- a/Documentation/gpu/index.rst
+++ b/Documentation/gpu/index.rst
@@ -13,6 +13,7 @@ Linux GPU Driver Developer's Guide
 
				    i915
			
 
				    meson
			
 
				    pl111
			
 
				+   tegra
			
 
				    tinydrm
			
 
				    vc4
			
 
				    vga-switcheroo
			
--- a/Documentation/gpu/tegra.rst
+++ b/Documentation/gpu/tegra.rst
@@ -0,0 +1,178 @@
 
				+===============================================
			
 
				+ drm/tegra NVIDIA Tegra GPU and display driver
			
 
				+===============================================
			
 
				+
			
 
				+NVIDIA Tegra SoCs support a set of display, graphics and video functions via
			
 
				+the host1x controller. host1x supplies command streams, gathered from a push
			
 
				+buffer provided directly by the CPU, to its clients via channels. Software,
			
 
				+or blocks amongst themselves, can use syncpoints for synchronization.
			
 
				+
			
 
				+Up until, but not including, Tegra124 (aka Tegra K1) the drm/tegra driver
			
 
				+supports the built-in GPU, comprised of the gr2d and gr3d engines. Starting
			
 
				+with Tegra124 the GPU is based on the NVIDIA desktop GPU architecture and
			
 
				+supported by the drm/nouveau driver.
			
 
				+
			
 
				+The drm/tegra driver supports NVIDIA Tegra SoC generations since Tegra20. It
			
 
				+has three parts:
			
 
				+
			
 
				+  - A host1x driver that provides infrastructure and access to the host1x
			
 
				+    services.
			
 
				+
			
 
				+  - A KMS driver that supports the display controllers as well as a number of
			
 
				+    outputs, such as RGB, HDMI, DSI, and DisplayPort.
			
 
				+
			
 
				+  - A set of custom userspace IOCTLs that can be used to submit jobs to the
			
 
				+    GPU and video engines via host1x.
			
 
				+
			
 
				+Driver Infrastructure
			
 
				+=====================
			
 
				+
			
 
				+The various host1x clients need to be bound together into a logical device in
			
 
				+order to expose their functionality to users. The infrastructure that supports
			
 
				+this is implemented in the host1x driver. When a driver is registered with the
			
 
				+infrastructure it provides a list of compatible strings specifying the devices
			
 
				+that it needs. The infrastructure creates a logical device and scan the device
			
 
				+tree for matching device nodes, adding the required clients to a list. Drivers
			
 
				+for individual clients register with the infrastructure as well and are added
			
 
				+to the logical host1x device.
			
 
				+
			
 
				+Once all clients are available, the infrastructure will initialize the logical
			
 
				+device using a driver-provided function which will set up the bits specific to
			
 
				+the subsystem and in turn initialize each of its clients.
			
 
				+
			
 
				+Similarly, when one of the clients is unregistered, the infrastructure will
			
 
				+destroy the logical device by calling back into the driver, which ensures that
			
 
				+the subsystem specific bits are torn down and the clients destroyed in turn.
			
 
				+
			
 
				+Host1x Infrastructure Reference
			
 
				+-------------------------------
			
 
				+
			
 
				+.. kernel-doc:: include/linux/host1x.h
			
 
				+
			
 
				+.. kernel-doc:: drivers/gpu/host1x/bus.c
			
 
				+   :export:
			
 
				+
			
 
				+Host1x Syncpoint Reference
			
 
				+--------------------------
			
 
				+
			
 
				+.. kernel-doc:: drivers/gpu/host1x/syncpt.c
			
 
				+   :export:
			
 
				+
			
 
				+KMS driver
			
 
				+==========
			
 
				+
			
 
				+The display hardware has remained mostly backwards compatible over the various
			
 
				+Tegra SoC generations, up until Tegra186 which introduces several changes that
			
 
				+make it difficult to support with a parameterized driver.
			
 
				+
			
 
				+Display Controllers
			
 
				+-------------------
			
 
				+
			
 
				+Tegra SoCs have two display controllers, each of which can be associated with
			
 
				+zero or more outputs. Outputs can also share a single display controller, but
			
 
				+only if they run with compatible display timings. Two display controllers can
			
 
				+also share a single framebuffer, allowing cloned configurations even if modes
			
 
				+on two outputs don't match. A display controller is modelled as a CRTC in KMS
			
 
				+terms.
			
 
				+
			
 
				+On Tegra186, the number of display controllers has been increased to three. A
			
 
				+display controller can no longer drive all of the outputs. While two of these
			
 
				+controllers can drive both DSI outputs and both SOR outputs, the third cannot
			
 
				+drive any DSI.
			
 
				+
			
 
				+Windows
			
 
				+~~~~~~~
			
 
				+
			
 
				+A display controller controls a set of windows that can be used to composite
			
 
				+multiple buffers onto the screen. While it is possible to assign arbitrary Z
			
 
				+ordering to individual windows (by programming the corresponding blending
			
 
				+registers), this is currently not supported by the driver. Instead, it will
			
 
				+assume a fixed Z ordering of the windows (window A is the root window, that
			
 
				+is, the lowest, while windows B and C are overlaid on top of window A). The
			
 
				+overlay windows support multiple pixel formats and can automatically convert
			
 
				+from YUV to RGB at scanout time. This makes them useful for displaying video
			
 
				+content. In KMS, each window is modelled as a plane. Each display controller
			
 
				+has a hardware cursor that is exposed as a cursor plane.
			
 
				+
			
 
				+Outputs
			
 
				+-------
			
 
				+
			
 
				+The type and number of supported outputs varies between Tegra SoC generations.
			
 
				+All generations support at least HDMI. While earlier generations supported the
			
 
				+very simple RGB interfaces (one per display controller), recent generations no
			
 
				+longer do and instead provide standard interfaces such as DSI and eDP/DP.
			
 
				+
			
 
				+Outputs are modelled as a composite encoder/connector pair.
			
 
				+
			
 
				+RGB/LVDS
			
 
				+~~~~~~~~
			
 
				+
			
 
				+This interface is no longer available since Tegra124. It has been replaced by
			
 
				+the more standard DSI and eDP interfaces.
			
 
				+
			
 
				+HDMI
			
 
				+~~~~
			
 
				+
			
 
				+HDMI is supported on all Tegra SoCs. Starting with Tegra210, HDMI is provided
			
 
				+by the versatile SOR output, which supports eDP, DP and HDMI. The SOR is able
			
 
				+to support HDMI 2.0, though support for this is currently not merged.
			
 
				+
			
 
				+DSI
			
 
				+~~~
			
 
				+
			
 
				+Although Tegra has supported DSI since Tegra30, the controller has changed in
			
 
				+several ways in Tegra114. Since none of the publicly available development
			
 
				+boards prior to Dalmore (Tegra114) have made use of DSI, only Tegra114 and
			
 
				+later are supported by the drm/tegra driver.
			
 
				+
			
 
				+eDP/DP
			
 
				+~~~~~~
			
 
				+
			
 
				+eDP was first introduced in Tegra124 where it was used to drive the display
			
 
				+panel for notebook form factors. Tegra210 added support for full DisplayPort
			
 
				+support, though this is currently not implemented in the drm/tegra driver.
			
 
				+
			
 
				+Userspace Interface
			
 
				+===================
			
 
				+
			
 
				+The userspace interface provided by drm/tegra allows applications to create
			
 
				+GEM buffers, access and control syncpoints as well as submit command streams
			
 
				+to host1x.
			
 
				+
			
 
				+GEM Buffers
			
 
				+-----------
			
 
				+
			
 
				+The ``DRM_IOCTL_TEGRA_GEM_CREATE`` IOCTL is used to create a GEM buffer object
			
 
				+with Tegra-specific flags. This is useful for buffers that should be tiled, or
			
 
				+that are to be scanned out upside down (useful for 3D content).
			
 
				+
			
 
				+After a GEM buffer object has been created, its memory can be mapped by an
			
 
				+application using the mmap offset returned by the ``DRM_IOCTL_TEGRA_GEM_MMAP``
			
 
				+IOCTL.
			
 
				+
			
 
				+Syncpoints
			
 
				+----------
			
 
				+
			
 
				+The current value of a syncpoint can be obtained by executing the
			
 
				+``DRM_IOCTL_TEGRA_SYNCPT_READ`` IOCTL. Incrementing the syncpoint is achieved
			
 
				+using the ``DRM_IOCTL_TEGRA_SYNCPT_INCR`` IOCTL.
			
 
				+
			
 
				+Userspace can also request blocking on a syncpoint. To do so, it needs to
			
 
				+execute the ``DRM_IOCTL_TEGRA_SYNCPT_WAIT`` IOCTL, specifying the value of
			
 
				+the syncpoint to wait for. The kernel will release the application when the
			
 
				+syncpoint reaches that value or after a specified timeout.
			
 
				+
			
 
				+Command Stream Submission
			
 
				+-------------------------
			
 
				+
			
 
				+Before an application can submit command streams to host1x it needs to open a
			
 
				+channel to an engine using the ``DRM_IOCTL_TEGRA_OPEN_CHANNEL`` IOCTL. Client
			
 
				+IDs are used to identify the target of the channel. When a channel is no
			
 
				+longer needed, it can be closed using the ``DRM_IOCTL_TEGRA_CLOSE_CHANNEL``
			
 
				+IOCTL. To retrieve the syncpoint associated with a channel, an application
			
 
				+can use the ``DRM_IOCTL_TEGRA_GET_SYNCPT``.
			
 
				+
			
 
				+After opening a channel, submitting command streams is easy. The application
			
 
				+writes commands into the memory backing a GEM buffer object and passes these
			
 
				+to the ``DRM_IOCTL_TEGRA_SUBMIT`` IOCTL along with various other parameters,
			
 
				+such as the syncpoints or relocations used in the job submission.
			
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -30,6 +30,7 @@ struct tegra_dc_soc_info {
 
				 	bool supports_block_linear;
			
 
				 	unsigned int pitch_align;
			
 
				 	bool has_powergate;
			
 
				+	bool broken_reset;
			
 
				 };
			
 
				 
			
 
				 struct tegra_plane {
			
@@ -485,12 +486,25 @@ static int tegra_plane_state_add(struct tegra_plane *plane,
 
				 {
			
 
				 	struct drm_crtc_state *crtc_state;
			
 
				 	struct tegra_dc_state *tegra;
			
 
				+	struct drm_rect clip;
			
 
				+	int err;
			
 
				 
			
 
				 	/* Propagate errors from allocation or locking failures. */
			
 
				 	crtc_state = drm_atomic_get_crtc_state(state->state, state->crtc);
			
 
				 	if (IS_ERR(crtc_state))
			
 
				 		return PTR_ERR(crtc_state);
			
 
				 
			
 
				+	clip.x1 = 0;
			
 
				+	clip.y1 = 0;
			
 
				+	clip.x2 = crtc_state->mode.hdisplay;
			
 
				+	clip.y2 = crtc_state->mode.vdisplay;
			
 
				+
			
 
				+	/* Check plane state for visibility and calculate clipping bounds */
			
 
				+	err = drm_plane_helper_check_state(state, &clip, 0, INT_MAX,
			
 
				+					   true, true);
			
 
				+	if (err < 0)
			
 
				+		return err;
			
 
				+
			
 
				 	tegra = to_dc_state(crtc_state);
			
 
				 
			
 
				 	tegra->planes |= WIN_A_ACT_REQ << plane->index;
			
@@ -545,6 +559,23 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void tegra_dc_disable_window(struct tegra_dc *dc, int index)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+	u32 value;
			
 
				+
			
 
				+	spin_lock_irqsave(&dc->lock, flags);
			
 
				+
			
 
				+	value = WINDOW_A_SELECT << index;
			
 
				+	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
			
 
				+
			
 
				+	value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS);
			
 
				+	value &= ~WIN_ENABLE;
			
 
				+	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
			
 
				+
			
 
				+	spin_unlock_irqrestore(&dc->lock, flags);
			
 
				+}
			
 
				+
			
 
				 static void tegra_plane_atomic_update(struct drm_plane *plane,
			
 
				 				      struct drm_plane_state *old_state)
			
 
				 {
			
@@ -559,15 +590,18 @@ static void tegra_plane_atomic_update(struct drm_plane *plane,
 
				 	if (!plane->state->crtc || !plane->state->fb)
			
 
				 		return;
			
 
				 
			
 
				+	if (!plane->state->visible)
			
 
				+		return tegra_dc_disable_window(dc, p->index);
			
 
				+
			
 
				 	memset(&window, 0, sizeof(window));
			
 
				-	window.src.x = plane->state->src_x >> 16;
			
 
				-	window.src.y = plane->state->src_y >> 16;
			
 
				-	window.src.w = plane->state->src_w >> 16;
			
 
				-	window.src.h = plane->state->src_h >> 16;
			
 
				-	window.dst.x = plane->state->crtc_x;
			
 
				-	window.dst.y = plane->state->crtc_y;
			
 
				-	window.dst.w = plane->state->crtc_w;
			
 
				-	window.dst.h = plane->state->crtc_h;
			
 
				+	window.src.x = plane->state->src.x1 >> 16;
			
 
				+	window.src.y = plane->state->src.y1 >> 16;
			
 
				+	window.src.w = drm_rect_width(&plane->state->src) >> 16;
			
 
				+	window.src.h = drm_rect_height(&plane->state->src) >> 16;
			
 
				+	window.dst.x = plane->state->dst.x1;
			
 
				+	window.dst.y = plane->state->dst.y1;
			
 
				+	window.dst.w = drm_rect_width(&plane->state->dst);
			
 
				+	window.dst.h = drm_rect_height(&plane->state->dst);
			
 
				 	window.bits_per_pixel = fb->format->cpp[0] * 8;
			
 
				 	window.bottom_up = tegra_fb_is_bottom_up(fb);
			
 
				 
			
@@ -598,8 +632,6 @@ static void tegra_plane_atomic_disable(struct drm_plane *plane,
 
				 {
			
 
				 	struct tegra_plane *p = to_tegra_plane(plane);
			
 
				 	struct tegra_dc *dc;
			
 
				-	unsigned long flags;
			
 
				-	u32 value;
			
 
				 
			
 
				 	/* rien ne va plus */
			
 
				 	if (!old_state || !old_state->crtc)
			
@@ -607,16 +639,7 @@ static void tegra_plane_atomic_disable(struct drm_plane *plane,
 
				 
			
 
				 	dc = to_tegra_dc(old_state->crtc);
			
 
				 
			
 
				-	spin_lock_irqsave(&dc->lock, flags);
			
 
				-
			
 
				-	value = WINDOW_A_SELECT << p->index;
			
 
				-	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
			
 
				-
			
 
				-	value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS);
			
 
				-	value &= ~WIN_ENABLE;
			
 
				-	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
			
 
				-
			
 
				-	spin_unlock_irqrestore(&dc->lock, flags);
			
 
				+	tegra_dc_disable_window(dc, p->index);
			
 
				 }
			
 
				 
			
 
				 static const struct drm_plane_helper_funcs tegra_primary_plane_helper_funcs = {
			
@@ -1856,6 +1879,7 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = {
 
				 	.supports_block_linear = false,
			
 
				 	.pitch_align = 8,
			
 
				 	.has_powergate = false,
			
 
				+	.broken_reset = true,
			
 
				 };
			
 
				 
			
 
				 static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
			
@@ -1865,6 +1889,7 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
 
				 	.supports_block_linear = false,
			
 
				 	.pitch_align = 8,
			
 
				 	.has_powergate = false,
			
 
				+	.broken_reset = false,
			
 
				 };
			
 
				 
			
 
				 static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
			
@@ -1874,6 +1899,7 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
 
				 	.supports_block_linear = false,
			
 
				 	.pitch_align = 64,
			
 
				 	.has_powergate = true,
			
 
				+	.broken_reset = false,
			
 
				 };
			
 
				 
			
 
				 static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
			
@@ -1883,6 +1909,7 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
 
				 	.supports_block_linear = true,
			
 
				 	.pitch_align = 64,
			
 
				 	.has_powergate = true,
			
 
				+	.broken_reset = false,
			
 
				 };
			
 
				 
			
 
				 static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
			
@@ -1892,6 +1919,7 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
 
				 	.supports_block_linear = true,
			
 
				 	.pitch_align = 64,
			
 
				 	.has_powergate = true,
			
 
				+	.broken_reset = false,
			
 
				 };
			
 
				 
			
 
				 static const struct of_device_id tegra_dc_of_match[] = {
			
@@ -1989,7 +2017,8 @@ static int tegra_dc_probe(struct platform_device *pdev)
 
				 		return PTR_ERR(dc->rst);
			
 
				 	}
			
 
				 
			
 
				-	reset_control_assert(dc->rst);
			
 
				+	if (!dc->soc->broken_reset)
			
 
				+		reset_control_assert(dc->rst);
			
 
				 
			
 
				 	if (dc->soc->has_powergate) {
			
 
				 		if (dc->pipe == 0)
			
@@ -2063,10 +2092,12 @@ static int tegra_dc_suspend(struct device *dev)
 
				 	struct tegra_dc *dc = dev_get_drvdata(dev);
			
 
				 	int err;
			
 
				 
			
 
				-	err = reset_control_assert(dc->rst);
			
 
				-	if (err < 0) {
			
 
				-		dev_err(dev, "failed to assert reset: %d\n", err);
			
 
				-		return err;
			
 
				+	if (!dc->soc->broken_reset) {
			
 
				+		err = reset_control_assert(dc->rst);
			
 
				+		if (err < 0) {
			
 
				+			dev_err(dev, "failed to assert reset: %d\n", err);
			
 
				+			return err;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	if (dc->soc->has_powergate)
			
@@ -2096,10 +2127,13 @@ static int tegra_dc_resume(struct device *dev)
 
				 			return err;
			
 
				 		}
			
 
				 
			
 
				-		err = reset_control_deassert(dc->rst);
			
 
				-		if (err < 0) {
			
 
				-			dev_err(dev, "failed to deassert reset: %d\n", err);
			
 
				-			return err;
			
 
				+		if (!dc->soc->broken_reset) {
			
 
				+			err = reset_control_deassert(dc->rst);
			
 
				+			if (err < 0) {
			
 
				+				dev_err(dev,
			
 
				+					"failed to deassert reset: %d\n", err);
			
 
				+				return err;
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -26,6 +26,7 @@
 
				 #define DRIVER_PATCHLEVEL 0
			
 
				 
			
 
				 #define CARVEOUT_SZ SZ_64M
			
 
				+#define CDMA_GATHER_FETCHES_MAX_NB 16383
			
 
				 
			
 
				 struct tegra_drm_file {
			
 
				 	struct idr contexts;
			
@@ -348,6 +349,36 @@ static int host1x_reloc_copy_from_user(struct host1x_reloc *dest,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int host1x_waitchk_copy_from_user(struct host1x_waitchk *dest,
			
 
				+					 struct drm_tegra_waitchk __user *src,
			
 
				+					 struct drm_file *file)
			
 
				+{
			
 
				+	u32 cmdbuf;
			
 
				+	int err;
			
 
				+
			
 
				+	err = get_user(cmdbuf, &src->handle);
			
 
				+	if (err < 0)
			
 
				+		return err;
			
 
				+
			
 
				+	err = get_user(dest->offset, &src->offset);
			
 
				+	if (err < 0)
			
 
				+		return err;
			
 
				+
			
 
				+	err = get_user(dest->syncpt_id, &src->syncpt);
			
 
				+	if (err < 0)
			
 
				+		return err;
			
 
				+
			
 
				+	err = get_user(dest->thresh, &src->thresh);
			
 
				+	if (err < 0)
			
 
				+		return err;
			
 
				+
			
 
				+	dest->bo = host1x_bo_lookup(file, cmdbuf);
			
 
				+	if (!dest->bo)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 int tegra_drm_submit(struct tegra_drm_context *context,
			
 
				 		     struct drm_tegra_submit *args, struct drm_device *drm,
			
 
				 		     struct drm_file *file)
			
@@ -362,6 +393,8 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 
				 	struct drm_tegra_waitchk __user *waitchks =
			
 
				 		(void __user *)(uintptr_t)args->waitchks;
			
 
				 	struct drm_tegra_syncpt syncpt;
			
 
				+	struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
			
 
				+	struct host1x_syncpt *sp;
			
 
				 	struct host1x_job *job;
			
 
				 	int err;
			
 
				 
			
@@ -369,6 +402,10 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 
				 	if (args->num_syncpts != 1)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				+	/* We don't yet support waitchks */
			
 
				+	if (args->num_waitchks != 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				 	job = host1x_job_alloc(context->channel, args->num_cmdbufs,
			
 
				 			       args->num_relocs, args->num_waitchks);
			
 
				 	if (!job)
			
@@ -383,18 +420,42 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 
				 	while (num_cmdbufs) {
			
 
				 		struct drm_tegra_cmdbuf cmdbuf;
			
 
				 		struct host1x_bo *bo;
			
 
				+		struct tegra_bo *obj;
			
 
				+		u64 offset;
			
 
				 
			
 
				 		if (copy_from_user(&cmdbuf, cmdbufs, sizeof(cmdbuf))) {
			
 
				 			err = -EFAULT;
			
 
				 			goto fail;
			
 
				 		}
			
 
				 
			
 
				+		/*
			
 
				+		 * The maximum number of CDMA gather fetches is 16383, a higher
			
 
				+		 * value means the words count is malformed.
			
 
				+		 */
			
 
				+		if (cmdbuf.words > CDMA_GATHER_FETCHES_MAX_NB) {
			
 
				+			err = -EINVAL;
			
 
				+			goto fail;
			
 
				+		}
			
 
				+
			
 
				 		bo = host1x_bo_lookup(file, cmdbuf.handle);
			
 
				 		if (!bo) {
			
 
				 			err = -ENOENT;
			
 
				 			goto fail;
			
 
				 		}
			
 
				 
			
 
				+		offset = (u64)cmdbuf.offset + (u64)cmdbuf.words * sizeof(u32);
			
 
				+		obj = host1x_to_tegra_bo(bo);
			
 
				+
			
 
				+		/*
			
 
				+		 * Gather buffer base address must be 4-bytes aligned,
			
 
				+		 * unaligned offset is malformed and cause commands stream
			
 
				+		 * corruption on the buffer address relocation.
			
 
				+		 */
			
 
				+		if (offset & 3 || offset >= obj->gem.size) {
			
 
				+			err = -EINVAL;
			
 
				+			goto fail;
			
 
				+		}
			
 
				+
			
 
				 		host1x_job_add_gather(job, bo, cmdbuf.words, cmdbuf.offset);
			
 
				 		num_cmdbufs--;
			
 
				 		cmdbufs++;
			
@@ -402,17 +463,59 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 
				 
			
 
				 	/* copy and resolve relocations from submit */
			
 
				 	while (num_relocs--) {
			
 
				+		struct host1x_reloc *reloc;
			
 
				+		struct tegra_bo *obj;
			
 
				+
			
 
				 		err = host1x_reloc_copy_from_user(&job->relocarray[num_relocs],
			
 
				 						  &relocs[num_relocs], drm,
			
 
				 						  file);
			
 
				 		if (err < 0)
			
 
				 			goto fail;
			
 
				+
			
 
				+		reloc = &job->relocarray[num_relocs];
			
 
				+		obj = host1x_to_tegra_bo(reloc->cmdbuf.bo);
			
 
				+
			
 
				+		/*
			
 
				+		 * The unaligned cmdbuf offset will cause an unaligned write
			
 
				+		 * during of the relocations patching, corrupting the commands
			
 
				+		 * stream.
			
 
				+		 */
			
 
				+		if (reloc->cmdbuf.offset & 3 ||
			
 
				+		    reloc->cmdbuf.offset >= obj->gem.size) {
			
 
				+			err = -EINVAL;
			
 
				+			goto fail;
			
 
				+		}
			
 
				+
			
 
				+		obj = host1x_to_tegra_bo(reloc->target.bo);
			
 
				+
			
 
				+		if (reloc->target.offset >= obj->gem.size) {
			
 
				+			err = -EINVAL;
			
 
				+			goto fail;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				-	if (copy_from_user(job->waitchk, waitchks,
			
 
				-			   sizeof(*waitchks) * num_waitchks)) {
			
 
				-		err = -EFAULT;
			
 
				-		goto fail;
			
 
				+	/* copy and resolve waitchks from submit */
			
 
				+	while (num_waitchks--) {
			
 
				+		struct host1x_waitchk *wait = &job->waitchk[num_waitchks];
			
 
				+		struct tegra_bo *obj;
			
 
				+
			
 
				+		err = host1x_waitchk_copy_from_user(wait,
			
 
				+						    &waitchks[num_waitchks],
			
 
				+						    file);
			
 
				+		if (err < 0)
			
 
				+			goto fail;
			
 
				+
			
 
				+		obj = host1x_to_tegra_bo(wait->bo);
			
 
				+
			
 
				+		/*
			
 
				+		 * The unaligned offset will cause an unaligned write during
			
 
				+		 * of the waitchks patching, corrupting the commands stream.
			
 
				+		 */
			
 
				+		if (wait->offset & 3 ||
			
 
				+		    wait->offset >= obj->gem.size) {
			
 
				+			err = -EINVAL;
			
 
				+			goto fail;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	if (copy_from_user(&syncpt, (void __user *)(uintptr_t)args->syncpts,
			
@@ -421,7 +524,15 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 
				 		goto fail;
			
 
				 	}
			
 
				 
			
 
				+	/* check whether syncpoint ID is valid */
			
 
				+	sp = host1x_syncpt_get(host1x, syncpt.id);
			
 
				+	if (!sp) {
			
 
				+		err = -ENOENT;
			
 
				+		goto fail;
			
 
				+	}
			
 
				+
			
 
				 	job->is_addr_reg = context->client->ops->is_addr_reg;
			
 
				+	job->is_valid_class = context->client->ops->is_valid_class;
			
 
				 	job->syncpt_incrs = syncpt.incrs;
			
 
				 	job->syncpt_id = syncpt.id;
			
 
				 	job->timeout = 10000;
			
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -83,6 +83,7 @@ struct tegra_drm_client_ops {
 
				 			    struct tegra_drm_context *context);
			
 
				 	void (*close_channel)(struct tegra_drm_context *context);
			
 
				 	int (*is_addr_reg)(struct device *dev, u32 class, u32 offset);
			
 
				+	int (*is_valid_class)(u32 class);
			
 
				 	int (*submit)(struct tegra_drm_context *context,
			
 
				 		      struct drm_tegra_submit *args, struct drm_device *drm,
			
 
				 		      struct drm_file *file);
			
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -20,11 +20,6 @@
 
				 #include "drm.h"
			
 
				 #include "gem.h"
			
 
				 
			
 
				-static inline struct tegra_bo *host1x_to_tegra_bo(struct host1x_bo *bo)
			
 
				-{
			
 
				-	return container_of(bo, struct tegra_bo, base);
			
 
				-}
			
 
				-
			
 
				 static void tegra_bo_put(struct host1x_bo *bo)
			
 
				 {
			
 
				 	struct tegra_bo *obj = host1x_to_tegra_bo(bo);
			
--- a/drivers/gpu/drm/tegra/gem.h
+++ b/drivers/gpu/drm/tegra/gem.h
@@ -52,6 +52,11 @@ static inline struct tegra_bo *to_tegra_bo(struct drm_gem_object *gem)
 
				 	return container_of(gem, struct tegra_bo, gem);
			
 
				 }
			
 
				 
			
 
				+static inline struct tegra_bo *host1x_to_tegra_bo(struct host1x_bo *bo)
			
 
				+{
			
 
				+	return container_of(bo, struct tegra_bo, base);
			
 
				+}
			
 
				+
			
 
				 struct tegra_bo *tegra_bo_create(struct drm_device *drm, size_t size,
			
 
				 				 unsigned long flags);
			
 
				 struct tegra_bo *tegra_bo_create_with_handle(struct drm_file *file,
			
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -38,7 +38,7 @@ static int gr2d_init(struct host1x_client *client)
 
				 
			
 
				 	client->syncpts[0] = host1x_syncpt_request(client->dev, flags);
			
 
				 	if (!client->syncpts[0]) {
			
 
				-		host1x_channel_free(gr2d->channel);
			
 
				+		host1x_channel_put(gr2d->channel);
			
 
				 		return -ENOMEM;
			
 
				 	}
			
 
				 
			
@@ -57,7 +57,7 @@ static int gr2d_exit(struct host1x_client *client)
 
				 		return err;
			
 
				 
			
 
				 	host1x_syncpt_free(client->syncpts[0]);
			
 
				-	host1x_channel_free(gr2d->channel);
			
 
				+	host1x_channel_put(gr2d->channel);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -109,10 +109,17 @@ static int gr2d_is_addr_reg(struct device *dev, u32 class, u32 offset)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int gr2d_is_valid_class(u32 class)
			
 
				+{
			
 
				+	return (class == HOST1X_CLASS_GR2D ||
			
 
				+		class == HOST1X_CLASS_GR2D_SB);
			
 
				+}
			
 
				+
			
 
				 static const struct tegra_drm_client_ops gr2d_ops = {
			
 
				 	.open_channel = gr2d_open_channel,
			
 
				 	.close_channel = gr2d_close_channel,
			
 
				 	.is_addr_reg = gr2d_is_addr_reg,
			
 
				+	.is_valid_class = gr2d_is_valid_class,
			
 
				 	.submit = tegra_drm_submit,
			
 
				 };
			
 
				 
			
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -48,7 +48,7 @@ static int gr3d_init(struct host1x_client *client)
 
				 
			
 
				 	client->syncpts[0] = host1x_syncpt_request(client->dev, flags);
			
 
				 	if (!client->syncpts[0]) {
			
 
				-		host1x_channel_free(gr3d->channel);
			
 
				+		host1x_channel_put(gr3d->channel);
			
 
				 		return -ENOMEM;
			
 
				 	}
			
 
				 
			
@@ -67,7 +67,7 @@ static int gr3d_exit(struct host1x_client *client)
 
				 		return err;
			
 
				 
			
 
				 	host1x_syncpt_free(client->syncpts[0]);
			
 
				-	host1x_channel_free(gr3d->channel);
			
 
				+	host1x_channel_put(gr3d->channel);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -182,7 +182,7 @@ static int vic_init(struct host1x_client *client)
 
				 free_syncpt:
			
 
				 	host1x_syncpt_free(client->syncpts[0]);
			
 
				 free_channel:
			
 
				-	host1x_channel_free(vic->channel);
			
 
				+	host1x_channel_put(vic->channel);
			
 
				 detach_device:
			
 
				 	if (tegra->domain)
			
 
				 		iommu_detach_device(tegra->domain, vic->dev);
			
@@ -203,7 +203,7 @@ static int vic_exit(struct host1x_client *client)
 
				 		return err;
			
 
				 
			
 
				 	host1x_syncpt_free(client->syncpts[0]);
			
 
				-	host1x_channel_free(vic->channel);
			
 
				+	host1x_channel_put(vic->channel);
			
 
				 
			
 
				 	if (vic->domain) {
			
 
				 		iommu_detach_device(vic->domain, vic->dev);
			
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -40,6 +40,9 @@ struct host1x_subdev {
 
				 
			
 
				 /**
			
 
				  * host1x_subdev_add() - add a new subdevice with an associated device node
			
 
				+ * @device: host1x device to add the subdevice to
			
 
				+ * @driver: host1x driver
			
 
				+ * @np: device node
			
 
				  */
			
 
				 static int host1x_subdev_add(struct host1x_device *device,
			
 
				 			     struct device_node *np)
			
@@ -62,6 +65,7 @@ static int host1x_subdev_add(struct host1x_device *device,
 
				 
			
 
				 /**
			
 
				  * host1x_subdev_del() - remove subdevice
			
 
				+ * @subdev: subdevice to remove
			
 
				  */
			
 
				 static void host1x_subdev_del(struct host1x_subdev *subdev)
			
 
				 {
			
@@ -72,6 +76,8 @@ static void host1x_subdev_del(struct host1x_subdev *subdev)
 
				 
			
 
				 /**
			
 
				  * host1x_device_parse_dt() - scan device tree and add matching subdevices
			
 
				+ * @device: host1x logical device
			
 
				+ * @driver: host1x driver
			
 
				  */
			
 
				 static int host1x_device_parse_dt(struct host1x_device *device,
			
 
				 				  struct host1x_driver *driver)
			
@@ -166,6 +172,16 @@ static void host1x_subdev_unregister(struct host1x_device *device,
 
				 	mutex_unlock(&device->subdevs_lock);
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * host1x_device_init() - initialize a host1x logical device
			
 
				+ * @device: host1x logical device
			
 
				+ *
			
 
				+ * The driver for the host1x logical device can call this during execution of
			
 
				+ * its &host1x_driver.probe implementation to initialize each of its clients.
			
 
				+ * The client drivers access the subsystem specific driver data using the
			
 
				+ * &host1x_client.parent field and driver data associated with it (usually by
			
 
				+ * calling dev_get_drvdata()).
			
 
				+ */
			
 
				 int host1x_device_init(struct host1x_device *device)
			
 
				 {
			
 
				 	struct host1x_client *client;
			
@@ -192,6 +208,15 @@ int host1x_device_init(struct host1x_device *device)
 
				 }
			
 
				 EXPORT_SYMBOL(host1x_device_init);
			
 
				 
			
 
				+/**
			
 
				+ * host1x_device_exit() - uninitialize host1x logical device
			
 
				+ * @device: host1x logical device
			
 
				+ *
			
 
				+ * When the driver for a host1x logical device is unloaded, it can call this
			
 
				+ * function to tear down each of its clients. Typically this is done after a
			
 
				+ * subsystem-specific data structure is removed and the functionality can no
			
 
				+ * longer be used.
			
 
				+ */
			
 
				 int host1x_device_exit(struct host1x_device *device)
			
 
				 {
			
 
				 	struct host1x_client *client;
			
@@ -446,6 +471,14 @@ static void host1x_detach_driver(struct host1x *host1x,
 
				 	mutex_unlock(&host1x->devices_lock);
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * host1x_register() - register a host1x controller
			
 
				+ * @host1x: host1x controller
			
 
				+ *
			
 
				+ * The host1x controller driver uses this to register a host1x controller with
			
 
				+ * the infrastructure. Note that all Tegra SoC generations have only ever come
			
 
				+ * with a single host1x instance, so this function is somewhat academic.
			
 
				+ */
			
 
				 int host1x_register(struct host1x *host1x)
			
 
				 {
			
 
				 	struct host1x_driver *driver;
			
@@ -464,6 +497,13 @@ int host1x_register(struct host1x *host1x)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * host1x_unregister() - unregister a host1x controller
			
 
				+ * @host1x: host1x controller
			
 
				+ *
			
 
				+ * The host1x controller driver uses this to remove a host1x controller from
			
 
				+ * the infrastructure.
			
 
				+ */
			
 
				 int host1x_unregister(struct host1x *host1x)
			
 
				 {
			
 
				 	struct host1x_driver *driver;
			
@@ -513,6 +553,16 @@ static void host1x_device_shutdown(struct device *dev)
 
				 		driver->shutdown(device);
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * host1x_driver_register_full() - register a host1x driver
			
 
				+ * @driver: host1x driver
			
 
				+ * @owner: owner module
			
 
				+ *
			
 
				+ * Drivers for host1x logical devices call this function to register a driver
			
 
				+ * with the infrastructure. Note that since these drive logical devices, the
			
 
				+ * registration of the driver actually triggers tho logical device creation.
			
 
				+ * A logical device will be created for each host1x instance.
			
 
				+ */
			
 
				 int host1x_driver_register_full(struct host1x_driver *driver,
			
 
				 				struct module *owner)
			
 
				 {
			
@@ -541,6 +591,13 @@ int host1x_driver_register_full(struct host1x_driver *driver,
 
				 }
			
 
				 EXPORT_SYMBOL(host1x_driver_register_full);
			
 
				 
			
 
				+/**
			
 
				+ * host1x_driver_unregister() - unregister a host1x driver
			
 
				+ * @driver: host1x driver
			
 
				+ *
			
 
				+ * Unbinds the driver from each of the host1x logical devices that it is
			
 
				+ * bound to, effectively removing the subsystem devices that they represent.
			
 
				+ */
			
 
				 void host1x_driver_unregister(struct host1x_driver *driver)
			
 
				 {
			
 
				 	driver_unregister(&driver->driver);
			
@@ -551,6 +608,17 @@ void host1x_driver_unregister(struct host1x_driver *driver)
 
				 }
			
 
				 EXPORT_SYMBOL(host1x_driver_unregister);
			
 
				 
			
 
				+/**
			
 
				+ * host1x_client_register() - register a host1x client
			
 
				+ * @client: host1x client
			
 
				+ *
			
 
				+ * Registers a host1x client with each host1x controller instance. Note that
			
 
				+ * each client will only match their parent host1x controller and will only be
			
 
				+ * associated with that instance. Once all clients have been registered with
			
 
				+ * their parent host1x controller, the infrastructure will set up the logical
			
 
				+ * device and call host1x_device_init(), which will in turn call each client's
			
 
				+ * &host1x_client_ops.init implementation.
			
 
				+ */
			
 
				 int host1x_client_register(struct host1x_client *client)
			
 
				 {
			
 
				 	struct host1x *host1x;
			
@@ -576,6 +644,13 @@ int host1x_client_register(struct host1x_client *client)
 
				 }
			
 
				 EXPORT_SYMBOL(host1x_client_register);
			
 
				 
			
 
				+/**
			
 
				+ * host1x_client_unregister() - unregister a host1x client
			
 
				+ * @client: host1x client
			
 
				+ *
			
 
				+ * Removes a host1x client from its host1x controller instance. If a logical
			
 
				+ * device has already been initialized, it will be torn down.
			
 
				+ */
			
 
				 int host1x_client_unregister(struct host1x_client *client)
			
 
				 {
			
 
				 	struct host1x_client *c;
			
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -88,7 +88,6 @@ struct host1x_cdma {
 
				 
			
 
				 int host1x_cdma_init(struct host1x_cdma *cdma);
			
 
				 int host1x_cdma_deinit(struct host1x_cdma *cdma);
			
 
				-void host1x_cdma_stop(struct host1x_cdma *cdma);
			
 
				 int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job);
			
 
				 void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2);
			
 
				 void host1x_cdma_end(struct host1x_cdma *cdma, struct host1x_job *job);
			
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -24,19 +24,33 @@
 
				 #include "job.h"
			
 
				 
			
 
				 /* Constructor for the host1x device list */
			
 
				-int host1x_channel_list_init(struct host1x *host)
			
 
				+int host1x_channel_list_init(struct host1x_channel_list *chlist,
			
 
				+			     unsigned int num_channels)
			
 
				 {
			
 
				-	INIT_LIST_HEAD(&host->chlist.list);
			
 
				-	mutex_init(&host->chlist_mutex);
			
 
				-
			
 
				-	if (host->info->nb_channels > BITS_PER_LONG) {
			
 
				-		WARN(1, "host1x hardware has more channels than supported by the driver\n");
			
 
				-		return -ENOSYS;
			
 
				+	chlist->channels = kcalloc(num_channels, sizeof(struct host1x_channel),
			
 
				+				   GFP_KERNEL);
			
 
				+	if (!chlist->channels)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	chlist->allocated_channels =
			
 
				+		kcalloc(BITS_TO_LONGS(num_channels), sizeof(unsigned long),
			
 
				+			GFP_KERNEL);
			
 
				+	if (!chlist->allocated_channels) {
			
 
				+		kfree(chlist->channels);
			
 
				+		return -ENOMEM;
			
 
				 	}
			
 
				 
			
 
				+	bitmap_zero(chlist->allocated_channels, num_channels);
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+void host1x_channel_list_free(struct host1x_channel_list *chlist)
			
 
				+{
			
 
				+	kfree(chlist->allocated_channels);
			
 
				+	kfree(chlist->channels);
			
 
				+}
			
 
				+
			
 
				 int host1x_job_submit(struct host1x_job *job)
			
 
				 {
			
 
				 	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
			
@@ -47,86 +61,107 @@ EXPORT_SYMBOL(host1x_job_submit);
 
				 
			
 
				 struct host1x_channel *host1x_channel_get(struct host1x_channel *channel)
			
 
				 {
			
 
				-	int err = 0;
			
 
				+	kref_get(&channel->refcount);
			
 
				 
			
 
				-	mutex_lock(&channel->reflock);
			
 
				+	return channel;
			
 
				+}
			
 
				+EXPORT_SYMBOL(host1x_channel_get);
			
 
				 
			
 
				-	if (channel->refcount == 0)
			
 
				-		err = host1x_cdma_init(&channel->cdma);
			
 
				+/**
			
 
				+ * host1x_channel_get_index() - Attempt to get channel reference by index
			
 
				+ * @host: Host1x device object
			
 
				+ * @index: Index of channel
			
 
				+ *
			
 
				+ * If channel number @index is currently allocated, increase its refcount
			
 
				+ * and return a pointer to it. Otherwise, return NULL.
			
 
				+ */
			
 
				+struct host1x_channel *host1x_channel_get_index(struct host1x *host,
			
 
				+						unsigned int index)
			
 
				+{
			
 
				+	struct host1x_channel *ch = &host->channel_list.channels[index];
			
 
				 
			
 
				-	if (!err)
			
 
				-		channel->refcount++;
			
 
				+	if (!kref_get_unless_zero(&ch->refcount))
			
 
				+		return NULL;
			
 
				 
			
 
				-	mutex_unlock(&channel->reflock);
			
 
				+	return ch;
			
 
				+}
			
 
				+
			
 
				+static void release_channel(struct kref *kref)
			
 
				+{
			
 
				+	struct host1x_channel *channel =
			
 
				+		container_of(kref, struct host1x_channel, refcount);
			
 
				+	struct host1x *host = dev_get_drvdata(channel->dev->parent);
			
 
				+	struct host1x_channel_list *chlist = &host->channel_list;
			
 
				+
			
 
				+	host1x_hw_cdma_stop(host, &channel->cdma);
			
 
				+	host1x_cdma_deinit(&channel->cdma);
			
 
				 
			
 
				-	return err ? NULL : channel;
			
 
				+	clear_bit(channel->id, chlist->allocated_channels);
			
 
				 }
			
 
				-EXPORT_SYMBOL(host1x_channel_get);
			
 
				 
			
 
				 void host1x_channel_put(struct host1x_channel *channel)
			
 
				 {
			
 
				-	mutex_lock(&channel->reflock);
			
 
				+	kref_put(&channel->refcount, release_channel);
			
 
				+}
			
 
				+EXPORT_SYMBOL(host1x_channel_put);
			
 
				 
			
 
				-	if (channel->refcount == 1) {
			
 
				-		struct host1x *host = dev_get_drvdata(channel->dev->parent);
			
 
				+static struct host1x_channel *acquire_unused_channel(struct host1x *host)
			
 
				+{
			
 
				+	struct host1x_channel_list *chlist = &host->channel_list;
			
 
				+	unsigned int max_channels = host->info->nb_channels;
			
 
				+	unsigned int index;
			
 
				 
			
 
				-		host1x_hw_cdma_stop(host, &channel->cdma);
			
 
				-		host1x_cdma_deinit(&channel->cdma);
			
 
				+	index = find_first_zero_bit(chlist->allocated_channels, max_channels);
			
 
				+	if (index >= max_channels) {
			
 
				+		dev_err(host->dev, "failed to find free channel\n");
			
 
				+		return NULL;
			
 
				 	}
			
 
				 
			
 
				-	channel->refcount--;
			
 
				+	chlist->channels[index].id = index;
			
 
				 
			
 
				-	mutex_unlock(&channel->reflock);
			
 
				+	set_bit(index, chlist->allocated_channels);
			
 
				+
			
 
				+	return &chlist->channels[index];
			
 
				 }
			
 
				-EXPORT_SYMBOL(host1x_channel_put);
			
 
				 
			
 
				+/**
			
 
				+ * host1x_channel_request() - Allocate a channel
			
 
				+ * @device: Host1x unit this channel will be used to send commands to
			
 
				+ *
			
 
				+ * Allocates a new host1x channel for @device. If there are no free channels,
			
 
				+ * this will sleep until one becomes available. May return NULL if CDMA
			
 
				+ * initialization fails.
			
 
				+ */
			
 
				 struct host1x_channel *host1x_channel_request(struct device *dev)
			
 
				 {
			
 
				 	struct host1x *host = dev_get_drvdata(dev->parent);
			
 
				-	unsigned int max_channels = host->info->nb_channels;
			
 
				-	struct host1x_channel *channel = NULL;
			
 
				-	unsigned long index;
			
 
				+	struct host1x_channel_list *chlist = &host->channel_list;
			
 
				+	struct host1x_channel *channel;
			
 
				 	int err;
			
 
				 
			
 
				-	mutex_lock(&host->chlist_mutex);
			
 
				+	channel = acquire_unused_channel(host);
			
 
				+	if (!channel)
			
 
				+		return NULL;
			
 
				 
			
 
				-	index = find_first_zero_bit(&host->allocated_channels, max_channels);
			
 
				-	if (index >= max_channels)
			
 
				-		goto fail;
			
 
				+	kref_init(&channel->refcount);
			
 
				+	mutex_init(&channel->submitlock);
			
 
				+	channel->dev = dev;
			
 
				 
			
 
				-	channel = kzalloc(sizeof(*channel), GFP_KERNEL);
			
 
				-	if (!channel)
			
 
				+	err = host1x_hw_channel_init(host, channel, channel->id);
			
 
				+	if (err < 0)
			
 
				 		goto fail;
			
 
				 
			
 
				-	err = host1x_hw_channel_init(host, channel, index);
			
 
				+	err = host1x_cdma_init(&channel->cdma);
			
 
				 	if (err < 0)
			
 
				 		goto fail;
			
 
				 
			
 
				-	/* Link device to host1x_channel */
			
 
				-	channel->dev = dev;
			
 
				-
			
 
				-	/* Add to channel list */
			
 
				-	list_add_tail(&channel->list, &host->chlist.list);
			
 
				-
			
 
				-	host->allocated_channels |= BIT(index);
			
 
				-
			
 
				-	mutex_unlock(&host->chlist_mutex);
			
 
				 	return channel;
			
 
				 
			
 
				 fail:
			
 
				-	dev_err(dev, "failed to init channel\n");
			
 
				-	kfree(channel);
			
 
				-	mutex_unlock(&host->chlist_mutex);
			
 
				-	return NULL;
			
 
				-}
			
 
				-EXPORT_SYMBOL(host1x_channel_request);
			
 
				+	clear_bit(channel->id, chlist->allocated_channels);
			
 
				 
			
 
				-void host1x_channel_free(struct host1x_channel *channel)
			
 
				-{
			
 
				-	struct host1x *host = dev_get_drvdata(channel->dev->parent);
			
 
				+	dev_err(dev, "failed to initialize channel\n");
			
 
				 
			
 
				-	host->allocated_channels &= ~BIT(channel->id);
			
 
				-	list_del(&channel->list);
			
 
				-	kfree(channel);
			
 
				+	return NULL;
			
 
				 }
			
 
				-EXPORT_SYMBOL(host1x_channel_free);
			
 
				+EXPORT_SYMBOL(host1x_channel_request);
			
--- a/drivers/gpu/host1x/channel.h
+++ b/drivers/gpu/host1x/channel.h
@@ -20,17 +20,21 @@
 
				 #define __HOST1X_CHANNEL_H
			
 
				 
			
 
				 #include <linux/io.h>
			
 
				+#include <linux/kref.h>
			
 
				 
			
 
				 #include "cdma.h"
			
 
				 
			
 
				 struct host1x;
			
 
				+struct host1x_channel;
			
 
				 
			
 
				-struct host1x_channel {
			
 
				-	struct list_head list;
			
 
				+struct host1x_channel_list {
			
 
				+	struct host1x_channel *channels;
			
 
				+	unsigned long *allocated_channels;
			
 
				+};
			
 
				 
			
 
				-	unsigned int refcount;
			
 
				+struct host1x_channel {
			
 
				+	struct kref refcount;
			
 
				 	unsigned int id;
			
 
				-	struct mutex reflock;
			
 
				 	struct mutex submitlock;
			
 
				 	void __iomem *regs;
			
 
				 	struct device *dev;
			
@@ -38,9 +42,10 @@ struct host1x_channel {
 
				 };
			
 
				 
			
 
				 /* channel list operations */
			
 
				-int host1x_channel_list_init(struct host1x *host);
			
 
				-
			
 
				-#define host1x_for_each_channel(host, channel)				\
			
 
				-	list_for_each_entry(channel, &host->chlist.list, list)
			
 
				+int host1x_channel_list_init(struct host1x_channel_list *chlist,
			
 
				+			     unsigned int num_channels);
			
 
				+void host1x_channel_list_free(struct host1x_channel_list *chlist);
			
 
				+struct host1x_channel *host1x_channel_get_index(struct host1x *host,
			
 
				+						unsigned int index);
			
 
				 
			
 
				 #endif
			
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -43,24 +43,19 @@ void host1x_debug_output(struct output *o, const char *fmt, ...)
 
				 	o->fn(o->ctx, o->buf, len);
			
 
				 }
			
 
				 
			
 
				-static int show_channels(struct host1x_channel *ch, void *data, bool show_fifo)
			
 
				+static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
			
 
				 {
			
 
				 	struct host1x *m = dev_get_drvdata(ch->dev->parent);
			
 
				 	struct output *o = data;
			
 
				 
			
 
				-	mutex_lock(&ch->reflock);
			
 
				+	mutex_lock(&ch->cdma.lock);
			
 
				 
			
 
				-	if (ch->refcount) {
			
 
				-		mutex_lock(&ch->cdma.lock);
			
 
				+	if (show_fifo)
			
 
				+		host1x_hw_show_channel_fifo(m, ch, o);
			
 
				 
			
 
				-		if (show_fifo)
			
 
				-			host1x_hw_show_channel_fifo(m, ch, o);
			
 
				+	host1x_hw_show_channel_cdma(m, ch, o);
			
 
				 
			
 
				-		host1x_hw_show_channel_cdma(m, ch, o);
			
 
				-		mutex_unlock(&ch->cdma.lock);
			
 
				-	}
			
 
				-
			
 
				-	mutex_unlock(&ch->reflock);
			
 
				+	mutex_unlock(&ch->cdma.lock);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -94,28 +89,22 @@ static void show_syncpts(struct host1x *m, struct output *o)
 
				 	host1x_debug_output(o, "\n");
			
 
				 }
			
 
				 
			
 
				-static void show_all(struct host1x *m, struct output *o)
			
 
				+static void show_all(struct host1x *m, struct output *o, bool show_fifo)
			
 
				 {
			
 
				-	struct host1x_channel *ch;
			
 
				+	int i;
			
 
				 
			
 
				 	host1x_hw_show_mlocks(m, o);
			
 
				 	show_syncpts(m, o);
			
 
				 	host1x_debug_output(o, "---- channels ----\n");
			
 
				 
			
 
				-	host1x_for_each_channel(m, ch)
			
 
				-		show_channels(ch, o, true);
			
 
				-}
			
 
				-
			
 
				-static void show_all_no_fifo(struct host1x *host1x, struct output *o)
			
 
				-{
			
 
				-	struct host1x_channel *ch;
			
 
				-
			
 
				-	host1x_hw_show_mlocks(host1x, o);
			
 
				-	show_syncpts(host1x, o);
			
 
				-	host1x_debug_output(o, "---- channels ----\n");
			
 
				+	for (i = 0; i < m->info->nb_channels; ++i) {
			
 
				+		struct host1x_channel *ch = host1x_channel_get_index(m, i);
			
 
				 
			
 
				-	host1x_for_each_channel(host1x, ch)
			
 
				-		show_channels(ch, o, false);
			
 
				+		if (ch) {
			
 
				+			show_channel(ch, o, show_fifo);
			
 
				+			host1x_channel_put(ch);
			
 
				+		}
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static int host1x_debug_show_all(struct seq_file *s, void *unused)
			
@@ -125,7 +114,7 @@ static int host1x_debug_show_all(struct seq_file *s, void *unused)
 
				 		.ctx = s
			
 
				 	};
			
 
				 
			
 
				-	show_all(s->private, &o);
			
 
				+	show_all(s->private, &o, true);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -137,7 +126,7 @@ static int host1x_debug_show(struct seq_file *s, void *unused)
 
				 		.ctx = s
			
 
				 	};
			
 
				 
			
 
				-	show_all_no_fifo(s->private, &o);
			
 
				+	show_all(s->private, &o, false);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -216,7 +205,7 @@ void host1x_debug_dump(struct host1x *host1x)
 
				 		.fn = write_to_printk
			
 
				 	};
			
 
				 
			
 
				-	show_all(host1x, &o);
			
 
				+	show_all(host1x, &o, true);
			
 
				 }
			
 
				 
			
 
				 void host1x_debug_dump_syncpts(struct host1x *host1x)
			
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -198,7 +198,8 @@ static int host1x_probe(struct platform_device *pdev)
 
				 		host->iova_end = geometry->aperture_end;
			
 
				 	}
			
 
				 
			
 
				-	err = host1x_channel_list_init(host);
			
 
				+	err = host1x_channel_list_init(&host->channel_list,
			
 
				+				       host->info->nb_channels);
			
 
				 	if (err) {
			
 
				 		dev_err(&pdev->dev, "failed to initialize channel list\n");
			
 
				 		goto fail_detach_device;
			
@@ -207,7 +208,7 @@ static int host1x_probe(struct platform_device *pdev)
 
				 	err = clk_prepare_enable(host->clk);
			
 
				 	if (err < 0) {
			
 
				 		dev_err(&pdev->dev, "failed to enable clock\n");
			
 
				-		goto fail_detach_device;
			
 
				+		goto fail_free_channels;
			
 
				 	}
			
 
				 
			
 
				 	err = reset_control_deassert(host->rst);
			
@@ -244,6 +245,8 @@ fail_reset_assert:
 
				 	reset_control_assert(host->rst);
			
 
				 fail_unprepare_disable:
			
 
				 	clk_disable_unprepare(host->clk);
			
 
				+fail_free_channels:
			
 
				+	host1x_channel_list_free(&host->channel_list);
			
 
				 fail_detach_device:
			
 
				 	if (host->domain) {
			
 
				 		put_iova_domain(&host->iova);
			
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -129,10 +129,8 @@ struct host1x {
 
				 	struct host1x_syncpt *nop_sp;
			
 
				 
			
 
				 	struct mutex syncpt_mutex;
			
 
				-	struct mutex chlist_mutex;
			
 
				-	struct host1x_channel chlist;
			
 
				-	unsigned long allocated_channels;
			
 
				-	unsigned int num_allocated_channels;
			
 
				+
			
 
				+	struct host1x_channel_list channel_list;
			
 
				 
			
 
				 	struct dentry *debugfs;
			
 
				 
			
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -181,10 +181,6 @@ error:
 
				 static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
			
 
				 			       unsigned int index)
			
 
				 {
			
 
				-	ch->id = index;
			
 
				-	mutex_init(&ch->reflock);
			
 
				-	mutex_init(&ch->submitlock);
			
 
				-
			
 
				 	ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE;
			
 
				 	return 0;
			
 
				 }
			
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -31,6 +31,8 @@
 
				 #include "job.h"
			
 
				 #include "syncpt.h"
			
 
				 
			
 
				+#define HOST1X_WAIT_SYNCPT_OFFSET 0x8
			
 
				+
			
 
				 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
			
 
				 				    u32 num_cmdbufs, u32 num_relocs,
			
 
				 				    u32 num_waitchks)
			
@@ -137,8 +139,9 @@ static void host1x_syncpt_patch_offset(struct host1x_syncpt *sp,
 
				  * avoid a wrap condition in the HW).
			
 
				  */
			
 
				 static int do_waitchks(struct host1x_job *job, struct host1x *host,
			
 
				-		       struct host1x_bo *patch)
			
 
				+		       struct host1x_job_gather *g)
			
 
				 {
			
 
				+	struct host1x_bo *patch = g->bo;
			
 
				 	int i;
			
 
				 
			
 
				 	/* compare syncpt vs wait threshold */
			
@@ -165,7 +168,8 @@ static int do_waitchks(struct host1x_job *job, struct host1x *host,
 
				 				wait->syncpt_id, sp->name, wait->thresh,
			
 
				 				host1x_syncpt_read_min(sp));
			
 
				 
			
 
				-			host1x_syncpt_patch_offset(sp, patch, wait->offset);
			
 
				+			host1x_syncpt_patch_offset(sp, patch,
			
 
				+						   g->offset + wait->offset);
			
 
				 		}
			
 
				 
			
 
				 		wait->bo = NULL;
			
@@ -269,11 +273,12 @@ unpin:
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
			
 
				+static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
			
 
				 {
			
 
				 	int i = 0;
			
 
				 	u32 last_page = ~0;
			
 
				 	void *cmdbuf_page_addr = NULL;
			
 
				+	struct host1x_bo *cmdbuf = g->bo;
			
 
				 
			
 
				 	/* pin & patch the relocs for one gather */
			
 
				 	for (i = 0; i < job->num_relocs; i++) {
			
@@ -286,6 +291,13 @@ static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
 
				 		if (cmdbuf != reloc->cmdbuf.bo)
			
 
				 			continue;
			
 
				 
			
 
				+		if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
			
 
				+			target = (u32 *)job->gather_copy_mapped +
			
 
				+					reloc->cmdbuf.offset / sizeof(u32) +
			
 
				+						g->offset / sizeof(u32);
			
 
				+			goto patch_reloc;
			
 
				+		}
			
 
				+
			
 
				 		if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) {
			
 
				 			if (cmdbuf_page_addr)
			
 
				 				host1x_bo_kunmap(cmdbuf, last_page,
			
@@ -302,6 +314,7 @@ static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
 
				 		}
			
 
				 
			
 
				 		target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK);
			
 
				+patch_reloc:
			
 
				 		*target = reloc_addr;
			
 
				 	}
			
 
				 
			
@@ -319,6 +332,21 @@ static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
 
				 	if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
			
 
				 		return false;
			
 
				 
			
 
				+	/* relocation shift value validation isn't implemented yet */
			
 
				+	if (reloc->shift)
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static bool check_wait(struct host1x_waitchk *wait, struct host1x_bo *cmdbuf,
			
 
				+		       unsigned int offset)
			
 
				+{
			
 
				+	offset *= sizeof(u32);
			
 
				+
			
 
				+	if (wait->bo != cmdbuf || wait->offset != offset)
			
 
				+		return false;
			
 
				+
			
 
				 	return true;
			
 
				 }
			
 
				 
			
@@ -329,6 +357,9 @@ struct host1x_firewall {
 
				 	unsigned int num_relocs;
			
 
				 	struct host1x_reloc *reloc;
			
 
				 
			
 
				+	unsigned int num_waitchks;
			
 
				+	struct host1x_waitchk *waitchk;
			
 
				+
			
 
				 	struct host1x_bo *cmdbuf;
			
 
				 	unsigned int offset;
			
 
				 
			
@@ -341,6 +372,9 @@ struct host1x_firewall {
 
				 
			
 
				 static int check_register(struct host1x_firewall *fw, unsigned long offset)
			
 
				 {
			
 
				+	if (!fw->job->is_addr_reg)
			
 
				+		return 0;
			
 
				+
			
 
				 	if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
			
 
				 		if (!fw->num_relocs)
			
 
				 			return -EINVAL;
			
@@ -352,6 +386,33 @@ static int check_register(struct host1x_firewall *fw, unsigned long offset)
 
				 		fw->reloc++;
			
 
				 	}
			
 
				 
			
 
				+	if (offset == HOST1X_WAIT_SYNCPT_OFFSET) {
			
 
				+		if (fw->class != HOST1X_CLASS_HOST1X)
			
 
				+			return -EINVAL;
			
 
				+
			
 
				+		if (!fw->num_waitchks)
			
 
				+			return -EINVAL;
			
 
				+
			
 
				+		if (!check_wait(fw->waitchk, fw->cmdbuf, fw->offset))
			
 
				+			return -EINVAL;
			
 
				+
			
 
				+		fw->num_waitchks--;
			
 
				+		fw->waitchk++;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int check_class(struct host1x_firewall *fw, u32 class)
			
 
				+{
			
 
				+	if (!fw->job->is_valid_class) {
			
 
				+		if (fw->class != class)
			
 
				+			return -EINVAL;
			
 
				+	} else {
			
 
				+		if (!fw->job->is_valid_class(fw->class))
			
 
				+			return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -428,11 +489,9 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
 
				 {
			
 
				 	u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
			
 
				 		(g->offset / sizeof(u32));
			
 
				+	u32 job_class = fw->class;
			
 
				 	int err = 0;
			
 
				 
			
 
				-	if (!fw->job->is_addr_reg)
			
 
				-		return 0;
			
 
				-
			
 
				 	fw->words = g->words;
			
 
				 	fw->cmdbuf = g->bo;
			
 
				 	fw->offset = 0;
			
@@ -452,7 +511,9 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
 
				 			fw->class = word >> 6 & 0x3ff;
			
 
				 			fw->mask = word & 0x3f;
			
 
				 			fw->reg = word >> 16 & 0xfff;
			
 
				-			err = check_mask(fw);
			
 
				+			err = check_class(fw, job_class);
			
 
				+			if (!err)
			
 
				+				err = check_mask(fw);
			
 
				 			if (err)
			
 
				 				goto out;
			
 
				 			break;
			
@@ -480,7 +541,6 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
 
				 				goto out;
			
 
				 			break;
			
 
				 		case 4:
			
 
				-		case 5:
			
 
				 		case 14:
			
 
				 			break;
			
 
				 		default:
			
@@ -504,7 +564,9 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 
				 	fw.dev = dev;
			
 
				 	fw.reloc = job->relocarray;
			
 
				 	fw.num_relocs = job->num_relocs;
			
 
				-	fw.class = 0;
			
 
				+	fw.waitchk = job->waitchk;
			
 
				+	fw.num_waitchks = job->num_waitchk;
			
 
				+	fw.class = job->class;
			
 
				 
			
 
				 	for (i = 0; i < job->num_gathers; i++) {
			
 
				 		struct host1x_job_gather *g = &job->gathers[i];
			
@@ -512,12 +574,20 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 
				 		size += g->words * sizeof(u32);
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * Try a non-blocking allocation from a higher priority pools first,
			
 
				+	 * as awaiting for the allocation here is a major performance hit.
			
 
				+	 */
			
 
				 	job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy,
			
 
				-					       GFP_KERNEL);
			
 
				-	if (!job->gather_copy_mapped) {
			
 
				-		job->gather_copy_mapped = NULL;
			
 
				+					       GFP_NOWAIT);
			
 
				+
			
 
				+	/* the higher priority allocation failed, try the generic-blocking */
			
 
				+	if (!job->gather_copy_mapped)
			
 
				+		job->gather_copy_mapped = dma_alloc_wc(dev, size,
			
 
				+						       &job->gather_copy,
			
 
				+						       GFP_KERNEL);
			
 
				+	if (!job->gather_copy_mapped)
			
 
				 		return -ENOMEM;
			
 
				-	}
			
 
				 
			
 
				 	job->gather_copy_size = size;
			
 
				 
			
@@ -542,8 +612,8 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 
				 		offset += g->words * sizeof(u32);
			
 
				 	}
			
 
				 
			
 
				-	/* No relocs should remain at this point */
			
 
				-	if (fw.num_relocs)
			
 
				+	/* No relocs and waitchks should remain at this point */
			
 
				+	if (fw.num_relocs || fw.num_waitchks)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	return 0;
			
@@ -573,6 +643,12 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
 
				 	if (err)
			
 
				 		goto out;
			
 
				 
			
 
				+	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
			
 
				+		err = copy_gathers(job, dev);
			
 
				+		if (err)
			
 
				+			goto out;
			
 
				+	}
			
 
				+
			
 
				 	/* patch gathers */
			
 
				 	for (i = 0; i < job->num_gathers; i++) {
			
 
				 		struct host1x_job_gather *g = &job->gathers[i];
			
@@ -581,7 +657,9 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
 
				 		if (g->handled)
			
 
				 			continue;
			
 
				 
			
 
				-		g->base = job->gather_addr_phys[i];
			
 
				+		/* copy_gathers() sets gathers base if firewall is enabled */
			
 
				+		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
			
 
				+			g->base = job->gather_addr_phys[i];
			
 
				 
			
 
				 		for (j = i + 1; j < job->num_gathers; j++) {
			
 
				 			if (job->gathers[j].bo == g->bo) {
			
@@ -590,24 +668,18 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		err = do_relocs(job, g->bo);
			
 
				+		err = do_relocs(job, g);
			
 
				 		if (err)
			
 
				 			break;
			
 
				 
			
 
				-		err = do_waitchks(job, host, g->bo);
			
 
				+		err = do_waitchks(job, host, g);
			
 
				 		if (err)
			
 
				 			break;
			
 
				 	}
			
 
				 
			
 
				-	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !err) {
			
 
				-		err = copy_gathers(job, dev);
			
 
				-		if (err) {
			
 
				-			host1x_job_unpin(job);
			
 
				-			return err;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				 out:
			
 
				+	if (err)
			
 
				+		host1x_job_unpin(job);
			
 
				 	wmb();
			
 
				 
			
 
				 	return err;
			
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -27,20 +27,6 @@ struct host1x_job_gather {
 
				 	bool handled;
			
 
				 };
			
 
				 
			
 
				-struct host1x_cmdbuf {
			
 
				-	u32 handle;
			
 
				-	u32 offset;
			
 
				-	u32 words;
			
 
				-	u32 pad;
			
 
				-};
			
 
				-
			
 
				-struct host1x_waitchk {
			
 
				-	struct host1x_bo *bo;
			
 
				-	u32 offset;
			
 
				-	u32 syncpt_id;
			
 
				-	u32 thresh;
			
 
				-};
			
 
				-
			
 
				 struct host1x_job_unpin_data {
			
 
				 	struct host1x_bo *bo;
			
 
				 	struct sg_table *sgt;
			
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -99,14 +99,24 @@ unlock:
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * host1x_syncpt_id() - retrieve syncpoint ID
			
 
				+ * @sp: host1x syncpoint
			
 
				+ *
			
 
				+ * Given a pointer to a struct host1x_syncpt, retrieves its ID. This ID is
			
 
				+ * often used as a value to program into registers that control how hardware
			
 
				+ * blocks interact with syncpoints.
			
 
				+ */
			
 
				 u32 host1x_syncpt_id(struct host1x_syncpt *sp)
			
 
				 {
			
 
				 	return sp->id;
			
 
				 }
			
 
				 EXPORT_SYMBOL(host1x_syncpt_id);
			
 
				 
			
 
				-/*
			
 
				- * Updates the value sent to hardware.
			
 
				+/**
			
 
				+ * host1x_syncpt_incr_max() - update the value sent to hardware
			
 
				+ * @sp: host1x syncpoint
			
 
				+ * @incrs: number of increments
			
 
				  */
			
 
				 u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs)
			
 
				 {
			
@@ -175,8 +185,9 @@ u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp)
 
				 	return sp->base_val;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Increment syncpoint value from cpu, updating cache
			
 
				+/**
			
 
				+ * host1x_syncpt_incr() - increment syncpoint value from CPU, updating cache
			
 
				+ * @sp: host1x syncpoint
			
 
				  */
			
 
				 int host1x_syncpt_incr(struct host1x_syncpt *sp)
			
 
				 {
			
@@ -195,8 +206,12 @@ static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
 
				 	return host1x_syncpt_is_expired(sp, thresh);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Main entrypoint for syncpoint value waits.
			
 
				+/**
			
 
				+ * host1x_syncpt_wait() - wait for a syncpoint to reach a given value
			
 
				+ * @sp: host1x syncpoint
			
 
				+ * @thresh: threshold
			
 
				+ * @timeout: maximum time to wait for the syncpoint to reach the given value
			
 
				+ * @value: return location for the syncpoint value
			
 
				  */
			
 
				 int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
			
 
				 		       u32 *value)
			
@@ -402,6 +417,16 @@ int host1x_syncpt_init(struct host1x *host)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * host1x_syncpt_request() - request a syncpoint
			
 
				+ * @dev: device requesting the syncpoint
			
 
				+ * @flags: flags
			
 
				+ *
			
 
				+ * host1x client drivers can use this function to allocate a syncpoint for
			
 
				+ * subsequent use. A syncpoint returned by this function will be reserved for
			
 
				+ * use by the client exclusively. When no longer using a syncpoint, a host1x
			
 
				+ * client driver needs to release it using host1x_syncpt_free().
			
 
				+ */
			
 
				 struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
			
 
				 					    unsigned long flags)
			
 
				 {
			
@@ -411,6 +436,16 @@ struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
 
				 }
			
 
				 EXPORT_SYMBOL(host1x_syncpt_request);
			
 
				 
			
 
				+/**
			
 
				+ * host1x_syncpt_free() - free a requested syncpoint
			
 
				+ * @sp: host1x syncpoint
			
 
				+ *
			
 
				+ * Release a syncpoint previously allocated using host1x_syncpt_request(). A
			
 
				+ * host1x client driver should call this when the syncpoint is no longer in
			
 
				+ * use. Note that client drivers must ensure that the syncpoint doesn't remain
			
 
				+ * under the control of hardware after calling this function, otherwise two
			
 
				+ * clients may end up trying to access the same syncpoint concurrently.
			
 
				+ */
			
 
				 void host1x_syncpt_free(struct host1x_syncpt *sp)
			
 
				 {
			
 
				 	if (!sp)
			
@@ -438,9 +473,12 @@ void host1x_syncpt_deinit(struct host1x *host)
 
				 		kfree(sp->name);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Read max. It indicates how many operations there are in queue, either in
			
 
				- * channel or in a software thread.
			
 
				+/**
			
 
				+ * host1x_syncpt_read_max() - read maximum syncpoint value
			
 
				+ * @sp: host1x syncpoint
			
 
				+ *
			
 
				+ * The maximum syncpoint value indicates how many operations there are in
			
 
				+ * queue, either in channel or in a software thread.
			
 
				  */
			
 
				 u32 host1x_syncpt_read_max(struct host1x_syncpt *sp)
			
 
				 {
			
@@ -450,8 +488,12 @@ u32 host1x_syncpt_read_max(struct host1x_syncpt *sp)
 
				 }
			
 
				 EXPORT_SYMBOL(host1x_syncpt_read_max);
			
 
				 
			
 
				-/*
			
 
				- * Read min, which is a shadow of the current sync point value in hardware.
			
 
				+/**
			
 
				+ * host1x_syncpt_read_min() - read minimum syncpoint value
			
 
				+ * @sp: host1x syncpoint
			
 
				+ *
			
 
				+ * The minimum syncpoint value is a shadow of the current sync point value in
			
 
				+ * hardware.
			
 
				  */
			
 
				 u32 host1x_syncpt_read_min(struct host1x_syncpt *sp)
			
 
				 {
			
@@ -461,6 +503,10 @@ u32 host1x_syncpt_read_min(struct host1x_syncpt *sp)
 
				 }
			
 
				 EXPORT_SYMBOL(host1x_syncpt_read_min);
			
 
				 
			
 
				+/**
			
 
				+ * host1x_syncpt_read() - read the current syncpoint value
			
 
				+ * @sp: host1x syncpoint
			
 
				+ */
			
 
				 u32 host1x_syncpt_read(struct host1x_syncpt *sp)
			
 
				 {
			
 
				 	return host1x_syncpt_load(sp);
			
@@ -482,6 +528,11 @@ unsigned int host1x_syncpt_nb_mlocks(struct host1x *host)
 
				 	return host->info->nb_mlocks;
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * host1x_syncpt_get() - obtain a syncpoint by ID
			
 
				+ * @host: host1x controller
			
 
				+ * @id: syncpoint ID
			
 
				+ */
			
 
				 struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, unsigned int id)
			
 
				 {
			
 
				 	if (id >= host->info->nb_pts)
			
@@ -491,12 +542,20 @@ struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, unsigned int id)
 
				 }
			
 
				 EXPORT_SYMBOL(host1x_syncpt_get);
			
 
				 
			
 
				+/**
			
 
				+ * host1x_syncpt_get_base() - obtain the wait base associated with a syncpoint
			
 
				+ * @sp: host1x syncpoint
			
 
				+ */
			
 
				 struct host1x_syncpt_base *host1x_syncpt_get_base(struct host1x_syncpt *sp)
			
 
				 {
			
 
				 	return sp ? sp->base : NULL;
			
 
				 }
			
 
				 EXPORT_SYMBOL(host1x_syncpt_get_base);
			
 
				 
			
 
				+/**
			
 
				+ * host1x_syncpt_base_id() - retrieve the ID of a syncpoint wait base
			
 
				+ * @base: host1x syncpoint wait base
			
 
				+ */
			
 
				 u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base)
			
 
				 {
			
 
				 	return base->id;
			
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -32,11 +32,27 @@ enum host1x_class {
 
				 
			
 
				 struct host1x_client;
			
 
				 
			
 
				+/**
			
 
				+ * struct host1x_client_ops - host1x client operations
			
 
				+ * @init: host1x client initialization code
			
 
				+ * @exit: host1x client tear down code
			
 
				+ */
			
 
				 struct host1x_client_ops {
			
 
				 	int (*init)(struct host1x_client *client);
			
 
				 	int (*exit)(struct host1x_client *client);
			
 
				 };
			
 
				 
			
 
				+/**
			
 
				+ * struct host1x_client - host1x client structure
			
 
				+ * @list: list node for the host1x client
			
 
				+ * @parent: pointer to struct device representing the host1x controller
			
 
				+ * @dev: pointer to struct device backing this host1x client
			
 
				+ * @ops: host1x client operations
			
 
				+ * @class: host1x class represented by this client
			
 
				+ * @channel: host1x channel associated with this client
			
 
				+ * @syncpts: array of syncpoints requested for this client
			
 
				+ * @num_syncpts: number of syncpoints requested for this client
			
 
				+ */
			
 
				 struct host1x_client {
			
 
				 	struct list_head list;
			
 
				 	struct device *parent;
			
@@ -156,7 +172,6 @@ struct host1x_channel;
 
				 struct host1x_job;
			
 
				 
			
 
				 struct host1x_channel *host1x_channel_request(struct device *dev);
			
 
				-void host1x_channel_free(struct host1x_channel *channel);
			
 
				 struct host1x_channel *host1x_channel_get(struct host1x_channel *channel);
			
 
				 void host1x_channel_put(struct host1x_channel *channel);
			
 
				 int host1x_job_submit(struct host1x_job *job);
			
@@ -177,6 +192,13 @@ struct host1x_reloc {
 
				 	unsigned long shift;
			
 
				 };
			
 
				 
			
 
				+struct host1x_waitchk {
			
 
				+	struct host1x_bo *bo;
			
 
				+	u32 offset;
			
 
				+	u32 syncpt_id;
			
 
				+	u32 thresh;
			
 
				+};
			
 
				+
			
 
				 struct host1x_job {
			
 
				 	/* When refcount goes to zero, job can be freed */
			
 
				 	struct kref ref;
			
@@ -226,7 +248,10 @@ struct host1x_job {
 
				 	u8 *gather_copy_mapped;
			
 
				 
			
 
				 	/* Check if register is marked as an address reg */
			
 
				-	int (*is_addr_reg)(struct device *dev, u32 reg, u32 class);
			
 
				+	int (*is_addr_reg)(struct device *dev, u32 class, u32 reg);
			
 
				+
			
 
				+	/* Check if class belongs to the unit */
			
 
				+	int (*is_valid_class)(u32 class);
			
 
				 
			
 
				 	/* Request a SETCLASS to this class */
			
 
				 	u32 class;
			
@@ -251,6 +276,15 @@ void host1x_job_unpin(struct host1x_job *job);
 
				 
			
 
				 struct host1x_device;
			
 
				 
			
 
				+/**
			
 
				+ * struct host1x_driver - host1x logical device driver
			
 
				+ * @driver: core driver
			
 
				+ * @subdevs: table of OF device IDs matching subdevices for this driver
			
 
				+ * @list: list node for the driver
			
 
				+ * @probe: called when the host1x logical device is probed
			
 
				+ * @remove: called when the host1x logical device is removed
			
 
				+ * @shutdown: called when the host1x logical device is shut down
			
 
				+ */
			
 
				 struct host1x_driver {
			
 
				 	struct device_driver driver;