Browse Source

Merge branch 'drm-radeon-kms' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6

* 'drm-radeon-kms' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6: (35 commits)
  drm/radeon: set fb aperture sizes for framebuffer handoff.
  drm/ttm: fix highuser vs dma32 confusion.
  drm/radeon: Fix size used for benchmarking BO copies.
  drm/radeon: Add radeon.test parameter for running BO GPU copy tests.
  drm/radeon/kms: allow interruptible waits for objects.
  drm/ttm: powerpc: Fix Highmem cache flushing.
  x86: Export kmap_atomic_prot() needed for TTM.
  drm/ttm: Fix ttm in-kernel copying of pages with non-standard caching attributes.
  drm/ttm: Fix an oops and sync object leak.
  drm/radeon/kms: vram sizing on certain r100 chips needs workaround.
  drm/radeon: Pay more attention to object placement requested by userspace.
  drm/radeon: Fall back to evicting BOs with memcpy if necessary.
  drm/radeon: Don't unreserve twice on failure to validate.
  drm/radeon/kms: fix bandwidth computation on avivo hardware
  drm/radeon/kms: add initial colortiling support.
  drm/radeon/kms: fix hotspot handling on pre-avivo chips
  drm/radeon/kms: enable frac fb divs on rs600/rs690/rs740
  drm/radeon/kms: add PLL flag to prefer frequencies <= the target freq
  drm/radeon/kms: block RN50 from using 3D engine.
  drm/radeon/kms: fix VRAM sizing like DDX does it.
  ...
Linus Torvalds 16 năm trước cách đây
mục cha
commit
84210aeb4a
46 tập tin đã thay đổi với 3576 bổ sung1529 xóa
  1. 1 0
      arch/x86/mm/highmem_32.c
  2. 2 1
      drivers/gpu/drm/radeon/Makefile
  3. 146 147
      drivers/gpu/drm/radeon/atombios_crtc.c
  4. 757 13
      drivers/gpu/drm/radeon/r100.c
  5. 70 8
      drivers/gpu/drm/radeon/r300.c
  6. 3 1
      drivers/gpu/drm/radeon/r300_reg.h
  7. 2 0
      drivers/gpu/drm/radeon/r500_reg.h
  8. 17 5
      drivers/gpu/drm/radeon/r520.c
  9. 3 2
      drivers/gpu/drm/radeon/r600.c
  10. 82 5
      drivers/gpu/drm/radeon/radeon.h
  11. 32 0
      drivers/gpu/drm/radeon/radeon_asic.h
  12. 6 3
      drivers/gpu/drm/radeon/radeon_atombios.c
  13. 2 2
      drivers/gpu/drm/radeon/radeon_benchmark.c
  14. 7 1
      drivers/gpu/drm/radeon/radeon_cs.c
  15. 7 2
      drivers/gpu/drm/radeon/radeon_cursor.c
  16. 37 22
      drivers/gpu/drm/radeon/radeon_device.c
  17. 47 27
      drivers/gpu/drm/radeon/radeon_display.c
  18. 4 0
      drivers/gpu/drm/radeon/radeon_drv.c
  19. 2 357
      drivers/gpu/drm/radeon/radeon_encoders.c
  20. 64 6
      drivers/gpu/drm/radeon/radeon_fb.c
  21. 1 1
      drivers/gpu/drm/radeon/radeon_fence.c
  22. 1 1
      drivers/gpu/drm/radeon/radeon_gart.c
  23. 43 2
      drivers/gpu/drm/radeon/radeon_gem.c
  24. 2 0
      drivers/gpu/drm/radeon/radeon_kms.c
  25. 197 490
      drivers/gpu/drm/radeon/radeon_legacy_crtc.c
  26. 4 184
      drivers/gpu/drm/radeon/radeon_legacy_encoders.c
  27. 27 24
      drivers/gpu/drm/radeon/radeon_mode.h
  28. 142 16
      drivers/gpu/drm/radeon/radeon_object.c
  29. 1 14
      drivers/gpu/drm/radeon/radeon_ring.c
  30. 39 0
      drivers/gpu/drm/radeon/radeon_share.h
  31. 209 0
      drivers/gpu/drm/radeon/radeon_test.c
  32. 15 9
      drivers/gpu/drm/radeon/radeon_ttm.c
  33. 15 15
      drivers/gpu/drm/radeon/rs400.c
  34. 6 1
      drivers/gpu/drm/radeon/rs600.c
  35. 476 3
      drivers/gpu/drm/radeon/rs690.c
  36. 99 0
      drivers/gpu/drm/radeon/rs690r.h
  37. 668 130
      drivers/gpu/drm/radeon/rv515.c
  38. 170 0
      drivers/gpu/drm/radeon/rv515r.h
  39. 1 1
      drivers/gpu/drm/radeon/rv770.c
  40. 49 14
      drivers/gpu/drm/ttm/ttm_bo.c
  41. 56 13
      drivers/gpu/drm/ttm/ttm_bo_util.c
  42. 3 0
      drivers/gpu/drm/ttm/ttm_bo_vm.c
  43. 19 6
      drivers/gpu/drm/ttm/ttm_tt.c
  44. 22 1
      include/drm/radeon_drm.h
  45. 19 1
      include/drm/ttm/ttm_bo_driver.h
  46. 1 1
      include/drm/ttm/ttm_module.h

+ 1 - 0
arch/x86/mm/highmem_32.c

@@ -103,6 +103,7 @@ EXPORT_SYMBOL(kmap);
 EXPORT_SYMBOL(kunmap);
 EXPORT_SYMBOL(kmap_atomic);
 EXPORT_SYMBOL(kunmap_atomic);
+EXPORT_SYMBOL(kmap_atomic_prot);
 
 void __init set_highmem_pages_init(void)
 {

+ 2 - 1
drivers/gpu/drm/radeon/Makefile

@@ -13,7 +13,8 @@ radeon-$(CONFIG_DRM_RADEON_KMS) += radeon_device.o radeon_kms.o \
 	radeon_encoders.o radeon_display.o radeon_cursor.o radeon_i2c.o \
 	radeon_clocks.o radeon_fb.o radeon_gem.o radeon_ring.o radeon_irq_kms.o \
 	radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \
-	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rs780.o rv770.o
+	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rs780.o rv770.o \
+	radeon_test.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 

+ 146 - 147
drivers/gpu/drm/radeon/atombios_crtc.c

@@ -31,6 +31,132 @@
 #include "atom.h"
 #include "atom-bits.h"
 
+static void atombios_overscan_setup(struct drm_crtc *crtc,
+				    struct drm_display_mode *mode,
+				    struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	SET_CRTC_OVERSCAN_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, SetCRTC_OverScan);
+	int a1, a2;
+
+	memset(&args, 0, sizeof(args));
+
+	args.usOverscanRight = 0;
+	args.usOverscanLeft = 0;
+	args.usOverscanBottom = 0;
+	args.usOverscanTop = 0;
+	args.ucCRTC = radeon_crtc->crtc_id;
+
+	switch (radeon_crtc->rmx_type) {
+	case RMX_CENTER:
+		args.usOverscanTop = (adjusted_mode->crtc_vdisplay - mode->crtc_vdisplay) / 2;
+		args.usOverscanBottom = (adjusted_mode->crtc_vdisplay - mode->crtc_vdisplay) / 2;
+		args.usOverscanLeft = (adjusted_mode->crtc_hdisplay - mode->crtc_hdisplay) / 2;
+		args.usOverscanRight = (adjusted_mode->crtc_hdisplay - mode->crtc_hdisplay) / 2;
+		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+		break;
+	case RMX_ASPECT:
+		a1 = mode->crtc_vdisplay * adjusted_mode->crtc_hdisplay;
+		a2 = adjusted_mode->crtc_vdisplay * mode->crtc_hdisplay;
+
+		if (a1 > a2) {
+			args.usOverscanLeft = (adjusted_mode->crtc_hdisplay - (a2 / mode->crtc_vdisplay)) / 2;
+			args.usOverscanRight = (adjusted_mode->crtc_hdisplay - (a2 / mode->crtc_vdisplay)) / 2;
+		} else if (a2 > a1) {
+			args.usOverscanLeft = (adjusted_mode->crtc_vdisplay - (a1 / mode->crtc_hdisplay)) / 2;
+			args.usOverscanRight = (adjusted_mode->crtc_vdisplay - (a1 / mode->crtc_hdisplay)) / 2;
+		}
+		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+		break;
+	case RMX_FULL:
+	default:
+		args.usOverscanRight = 0;
+		args.usOverscanLeft = 0;
+		args.usOverscanBottom = 0;
+		args.usOverscanTop = 0;
+		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+		break;
+	}
+}
+
+static void atombios_scaler_setup(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	ENABLE_SCALER_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, EnableScaler);
+	/* fixme - fill in enc_priv for atom dac */
+	enum radeon_tv_std tv_std = TV_STD_NTSC;
+
+	if (!ASIC_IS_AVIVO(rdev) && radeon_crtc->crtc_id)
+		return;
+
+	memset(&args, 0, sizeof(args));
+
+	args.ucScaler = radeon_crtc->crtc_id;
+
+	if (radeon_crtc->devices & (ATOM_DEVICE_TV_SUPPORT)) {
+		switch (tv_std) {
+		case TV_STD_NTSC:
+		default:
+			args.ucTVStandard = ATOM_TV_NTSC;
+			break;
+		case TV_STD_PAL:
+			args.ucTVStandard = ATOM_TV_PAL;
+			break;
+		case TV_STD_PAL_M:
+			args.ucTVStandard = ATOM_TV_PALM;
+			break;
+		case TV_STD_PAL_60:
+			args.ucTVStandard = ATOM_TV_PAL60;
+			break;
+		case TV_STD_NTSC_J:
+			args.ucTVStandard = ATOM_TV_NTSCJ;
+			break;
+		case TV_STD_SCART_PAL:
+			args.ucTVStandard = ATOM_TV_PAL; /* ??? */
+			break;
+		case TV_STD_SECAM:
+			args.ucTVStandard = ATOM_TV_SECAM;
+			break;
+		case TV_STD_PAL_CN:
+			args.ucTVStandard = ATOM_TV_PALCN;
+			break;
+		}
+		args.ucEnable = SCALER_ENABLE_MULTITAP_MODE;
+	} else if (radeon_crtc->devices & (ATOM_DEVICE_CV_SUPPORT)) {
+		args.ucTVStandard = ATOM_TV_CV;
+		args.ucEnable = SCALER_ENABLE_MULTITAP_MODE;
+	} else {
+		switch (radeon_crtc->rmx_type) {
+		case RMX_FULL:
+			args.ucEnable = ATOM_SCALER_EXPANSION;
+			break;
+		case RMX_CENTER:
+			args.ucEnable = ATOM_SCALER_CENTER;
+			break;
+		case RMX_ASPECT:
+			args.ucEnable = ATOM_SCALER_EXPANSION;
+			break;
+		default:
+			if (ASIC_IS_AVIVO(rdev))
+				args.ucEnable = ATOM_SCALER_DISABLE;
+			else
+				args.ucEnable = ATOM_SCALER_CENTER;
+			break;
+		}
+	}
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+	if (radeon_crtc->devices & (ATOM_DEVICE_CV_SUPPORT | ATOM_DEVICE_TV_SUPPORT)
+	    && rdev->family >= CHIP_RV515 && rdev->family <= CHIP_RV570) {
+		atom_rv515_force_tv_scaler(rdev);
+	}
+}
+
 static void atombios_lock_crtc(struct drm_crtc *crtc, int lock)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
@@ -203,6 +329,12 @@ void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
 	if (ASIC_IS_AVIVO(rdev)) {
 		uint32_t ss_cntl;
 
+		if ((rdev->family == CHIP_RS600) ||
+		    (rdev->family == CHIP_RS690) ||
+		    (rdev->family == CHIP_RS740))
+			pll_flags |= (RADEON_PLL_USE_FRAC_FB_DIV |
+				      RADEON_PLL_PREFER_CLOSEST_LOWER);
+
 		if (ASIC_IS_DCE32(rdev) && mode->clock > 200000)	/* range limits??? */
 			pll_flags |= RADEON_PLL_PREFER_HIGH_FB_DIV;
 		else
@@ -321,7 +453,7 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 	struct drm_gem_object *obj;
 	struct drm_radeon_gem_object *obj_priv;
 	uint64_t fb_location;
-	uint32_t fb_format, fb_pitch_pixels;
+	uint32_t fb_format, fb_pitch_pixels, tiling_flags;
 
 	if (!crtc->fb)
 		return -EINVAL;
@@ -358,7 +490,14 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 		return -EINVAL;
 	}
 
-	/* TODO tiling */
+	radeon_object_get_tiling_flags(obj->driver_private,
+				       &tiling_flags, NULL);
+	if (tiling_flags & RADEON_TILING_MACRO)
+		fb_format |= AVIVO_D1GRPH_MACRO_ADDRESS_MODE;
+
+	if (tiling_flags & RADEON_TILING_MICRO)
+		fb_format |= AVIVO_D1GRPH_TILED;
+
 	if (radeon_crtc->crtc_id == 0)
 		WREG32(AVIVO_D1VGA_CONTROL, 0);
 	else
@@ -509,6 +648,9 @@ int atombios_crtc_mode_set(struct drm_crtc *crtc,
 		radeon_crtc_set_base(crtc, x, y, old_fb);
 		radeon_legacy_atom_set_surface(crtc);
 	}
+	atombios_overscan_setup(crtc, mode, adjusted_mode);
+	atombios_scaler_setup(crtc);
+	radeon_bandwidth_update(rdev);
 	return 0;
 }
 
@@ -516,6 +658,8 @@ static bool atombios_crtc_mode_fixup(struct drm_crtc *crtc,
 				     struct drm_display_mode *mode,
 				     struct drm_display_mode *adjusted_mode)
 {
+	if (!radeon_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+		return false;
 	return true;
 }
 
@@ -548,148 +692,3 @@ void radeon_atombios_init_crtc(struct drm_device *dev,
 		    AVIVO_D2CRTC_H_TOTAL - AVIVO_D1CRTC_H_TOTAL;
 	drm_crtc_helper_add(&radeon_crtc->base, &atombios_helper_funcs);
 }
-
-void radeon_init_disp_bw_avivo(struct drm_device *dev,
-			       struct drm_display_mode *mode1,
-			       uint32_t pixel_bytes1,
-			       struct drm_display_mode *mode2,
-			       uint32_t pixel_bytes2)
-{
-	struct radeon_device *rdev = dev->dev_private;
-	fixed20_12 min_mem_eff;
-	fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff;
-	fixed20_12 sclk_ff, mclk_ff;
-	uint32_t dc_lb_memory_split, temp;
-
-	min_mem_eff.full = rfixed_const_8(0);
-	if (rdev->disp_priority == 2) {
-		uint32_t mc_init_misc_lat_timer = 0;
-		if (rdev->family == CHIP_RV515)
-			mc_init_misc_lat_timer =
-			    RREG32_MC(RV515_MC_INIT_MISC_LAT_TIMER);
-		else if (rdev->family == CHIP_RS690)
-			mc_init_misc_lat_timer =
-			    RREG32_MC(RS690_MC_INIT_MISC_LAT_TIMER);
-
-		mc_init_misc_lat_timer &=
-		    ~(R300_MC_DISP1R_INIT_LAT_MASK <<
-		      R300_MC_DISP1R_INIT_LAT_SHIFT);
-		mc_init_misc_lat_timer &=
-		    ~(R300_MC_DISP0R_INIT_LAT_MASK <<
-		      R300_MC_DISP0R_INIT_LAT_SHIFT);
-
-		if (mode2)
-			mc_init_misc_lat_timer |=
-			    (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
-		if (mode1)
-			mc_init_misc_lat_timer |=
-			    (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
-
-		if (rdev->family == CHIP_RV515)
-			WREG32_MC(RV515_MC_INIT_MISC_LAT_TIMER,
-				  mc_init_misc_lat_timer);
-		else if (rdev->family == CHIP_RS690)
-			WREG32_MC(RS690_MC_INIT_MISC_LAT_TIMER,
-				  mc_init_misc_lat_timer);
-	}
-
-	/*
-	 * determine is there is enough bw for current mode
-	 */
-	temp_ff.full = rfixed_const(100);
-	mclk_ff.full = rfixed_const(rdev->clock.default_mclk);
-	mclk_ff.full = rfixed_div(mclk_ff, temp_ff);
-	sclk_ff.full = rfixed_const(rdev->clock.default_sclk);
-	sclk_ff.full = rfixed_div(sclk_ff, temp_ff);
-
-	temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
-	temp_ff.full = rfixed_const(temp);
-	mem_bw.full = rfixed_mul(mclk_ff, temp_ff);
-	mem_bw.full = rfixed_mul(mem_bw, min_mem_eff);
-
-	pix_clk.full = 0;
-	pix_clk2.full = 0;
-	peak_disp_bw.full = 0;
-	if (mode1) {
-		temp_ff.full = rfixed_const(1000);
-		pix_clk.full = rfixed_const(mode1->clock);	/* convert to fixed point */
-		pix_clk.full = rfixed_div(pix_clk, temp_ff);
-		temp_ff.full = rfixed_const(pixel_bytes1);
-		peak_disp_bw.full += rfixed_mul(pix_clk, temp_ff);
-	}
-	if (mode2) {
-		temp_ff.full = rfixed_const(1000);
-		pix_clk2.full = rfixed_const(mode2->clock);	/* convert to fixed point */
-		pix_clk2.full = rfixed_div(pix_clk2, temp_ff);
-		temp_ff.full = rfixed_const(pixel_bytes2);
-		peak_disp_bw.full += rfixed_mul(pix_clk2, temp_ff);
-	}
-
-	if (peak_disp_bw.full >= mem_bw.full) {
-		DRM_ERROR
-		    ("You may not have enough display bandwidth for current mode\n"
-		     "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
-		printk("peak disp bw %d, mem_bw %d\n",
-		       rfixed_trunc(peak_disp_bw), rfixed_trunc(mem_bw));
-	}
-
-	/*
-	 * Line Buffer Setup
-	 * There is a single line buffer shared by both display controllers.
-	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between the display
-	 * controllers.  The paritioning can either be done manually or via one of four
-	 * preset allocations specified in bits 1:0:
-	 * 0 - line buffer is divided in half and shared between each display controller
-	 * 1 - D1 gets 3/4 of the line buffer, D2 gets 1/4
-	 * 2 - D1 gets the whole buffer
-	 * 3 - D1 gets 1/4 of the line buffer, D2 gets 3/4
-	 * Setting bit 2 of DC_LB_MEMORY_SPLIT controls switches to manual allocation mode.
-	 * In manual allocation mode, D1 always starts at 0, D1 end/2 is specified in bits
-	 * 14:4; D2 allocation follows D1.
-	 */
-
-	/* is auto or manual better ? */
-	dc_lb_memory_split =
-	    RREG32(AVIVO_DC_LB_MEMORY_SPLIT) & ~AVIVO_DC_LB_MEMORY_SPLIT_MASK;
-	dc_lb_memory_split &= ~AVIVO_DC_LB_MEMORY_SPLIT_SHIFT_MODE;
-#if 1
-	/* auto */
-	if (mode1 && mode2) {
-		if (mode1->hdisplay > mode2->hdisplay) {
-			if (mode1->hdisplay > 2560)
-				dc_lb_memory_split |=
-				    AVIVO_DC_LB_MEMORY_SPLIT_D1_3Q_D2_1Q;
-			else
-				dc_lb_memory_split |=
-				    AVIVO_DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
-		} else if (mode2->hdisplay > mode1->hdisplay) {
-			if (mode2->hdisplay > 2560)
-				dc_lb_memory_split |=
-				    AVIVO_DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q;
-			else
-				dc_lb_memory_split |=
-				    AVIVO_DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
-		} else
-			dc_lb_memory_split |=
-			    AVIVO_DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
-	} else if (mode1) {
-		dc_lb_memory_split |= AVIVO_DC_LB_MEMORY_SPLIT_D1_ONLY;
-	} else if (mode2) {
-		dc_lb_memory_split |= AVIVO_DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q;
-	}
-#else
-	/* manual */
-	dc_lb_memory_split |= AVIVO_DC_LB_MEMORY_SPLIT_SHIFT_MODE;
-	dc_lb_memory_split &=
-	    ~(AVIVO_DC_LB_DISP1_END_ADR_MASK <<
-	      AVIVO_DC_LB_DISP1_END_ADR_SHIFT);
-	if (mode1) {
-		dc_lb_memory_split |=
-		    ((((mode1->hdisplay / 2) + 64) & AVIVO_DC_LB_DISP1_END_ADR_MASK)
-		     << AVIVO_DC_LB_DISP1_END_ADR_SHIFT);
-	} else if (mode2) {
-		dc_lb_memory_split |= (0 << AVIVO_DC_LB_DISP1_END_ADR_SHIFT);
-	}
-#endif
-	WREG32(AVIVO_DC_LB_MEMORY_SPLIT, dc_lb_memory_split);
-}

+ 757 - 13
drivers/gpu/drm/radeon/r100.c

@@ -110,7 +110,7 @@ int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
 	if (i < 0 || i > rdev->gart.num_gpu_pages) {
 		return -EINVAL;
 	}
-	rdev->gart.table.ram.ptr[i] = cpu_to_le32((uint32_t)addr);
+	rdev->gart.table.ram.ptr[i] = cpu_to_le32(lower_32_bits(addr));
 	return 0;
 }
 
@@ -173,8 +173,12 @@ void r100_mc_setup(struct radeon_device *rdev)
 		DRM_ERROR("Failed to register debugfs file for R100 MC !\n");
 	}
 	/* Write VRAM size in case we are limiting it */
-	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
-	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
+	/* Novell bug 204882 for RN50/M6/M7 with 8/16/32MB VRAM,
+	 * if the aperture is 64MB but we have 32MB VRAM
+	 * we report only 32MB VRAM but we have to set MC_FB_LOCATION
+	 * to 64MB, otherwise the gpu accidentially dies */
+	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
 	tmp = REG_SET(RADEON_MC_FB_TOP, tmp >> 16);
 	tmp |= REG_SET(RADEON_MC_FB_START, rdev->mc.vram_location >> 16);
 	WREG32(RADEON_MC_FB_LOCATION, tmp);
@@ -215,7 +219,6 @@ int r100_mc_init(struct radeon_device *rdev)
 	r100_pci_gart_disable(rdev);
 
 	/* Setup GPU memory space */
-	rdev->mc.vram_location = 0xFFFFFFFFUL;
 	rdev->mc.gtt_location = 0xFFFFFFFFUL;
 	if (rdev->flags & RADEON_IS_AGP) {
 		r = radeon_agp_init(rdev);
@@ -752,6 +755,102 @@ int r100_cs_packet_parse(struct radeon_cs_parser *p,
 	return 0;
 }
 
+/**
+ * r100_cs_packet_next_vline() - parse userspace VLINE packet
+ * @parser:		parser structure holding parsing context.
+ *
+ * Userspace sends a special sequence for VLINE waits.
+ * PACKET0 - VLINE_START_END + value
+ * PACKET0 - WAIT_UNTIL +_value
+ * RELOC (P3) - crtc_id in reloc.
+ *
+ * This function parses this and relocates the VLINE START END
+ * and WAIT UNTIL packets to the correct crtc.
+ * It also detects a switched off crtc and nulls out the
+ * wait in that case.
+ */
+int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
+{
+	struct radeon_cs_chunk *ib_chunk;
+	struct drm_mode_object *obj;
+	struct drm_crtc *crtc;
+	struct radeon_crtc *radeon_crtc;
+	struct radeon_cs_packet p3reloc, waitreloc;
+	int crtc_id;
+	int r;
+	uint32_t header, h_idx, reg;
+
+	ib_chunk = &p->chunks[p->chunk_ib_idx];
+
+	/* parse the wait until */
+	r = r100_cs_packet_parse(p, &waitreloc, p->idx);
+	if (r)
+		return r;
+
+	/* check its a wait until and only 1 count */
+	if (waitreloc.reg != RADEON_WAIT_UNTIL ||
+	    waitreloc.count != 0) {
+		DRM_ERROR("vline wait had illegal wait until segment\n");
+		r = -EINVAL;
+		return r;
+	}
+
+	if (ib_chunk->kdata[waitreloc.idx + 1] != RADEON_WAIT_CRTC_VLINE) {
+		DRM_ERROR("vline wait had illegal wait until\n");
+		r = -EINVAL;
+		return r;
+	}
+
+	/* jump over the NOP */
+	r = r100_cs_packet_parse(p, &p3reloc, p->idx);
+	if (r)
+		return r;
+
+	h_idx = p->idx - 2;
+	p->idx += waitreloc.count;
+	p->idx += p3reloc.count;
+
+	header = ib_chunk->kdata[h_idx];
+	crtc_id = ib_chunk->kdata[h_idx + 5];
+	reg = ib_chunk->kdata[h_idx] >> 2;
+	mutex_lock(&p->rdev->ddev->mode_config.mutex);
+	obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
+	if (!obj) {
+		DRM_ERROR("cannot find crtc %d\n", crtc_id);
+		r = -EINVAL;
+		goto out;
+	}
+	crtc = obj_to_crtc(obj);
+	radeon_crtc = to_radeon_crtc(crtc);
+	crtc_id = radeon_crtc->crtc_id;
+
+	if (!crtc->enabled) {
+		/* if the CRTC isn't enabled - we need to nop out the wait until */
+		ib_chunk->kdata[h_idx + 2] = PACKET2(0);
+		ib_chunk->kdata[h_idx + 3] = PACKET2(0);
+	} else if (crtc_id == 1) {
+		switch (reg) {
+		case AVIVO_D1MODE_VLINE_START_END:
+			header &= R300_CP_PACKET0_REG_MASK;
+			header |= AVIVO_D2MODE_VLINE_START_END >> 2;
+			break;
+		case RADEON_CRTC_GUI_TRIG_VLINE:
+			header &= R300_CP_PACKET0_REG_MASK;
+			header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
+			break;
+		default:
+			DRM_ERROR("unknown crtc reloc\n");
+			r = -EINVAL;
+			goto out;
+		}
+		ib_chunk->kdata[h_idx] = header;
+		ib_chunk->kdata[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
+	}
+out:
+	mutex_unlock(&p->rdev->ddev->mode_config.mutex);
+	return r;
+}
+
 /**
  * r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3
  * @parser:		parser structure holding parsing context.
@@ -814,6 +913,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 	unsigned idx;
 	bool onereg;
 	int r;
+	u32 tile_flags = 0;
 
 	ib = p->ib->ptr;
 	ib_chunk = &p->chunks[p->chunk_ib_idx];
@@ -825,6 +925,15 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 	}
 	for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
 		switch (reg) {
+		case RADEON_CRTC_GUI_TRIG_VLINE:
+			r = r100_cs_packet_parse_vline(p);
+			if (r) {
+				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+						idx, reg);
+				r100_cs_dump_packet(p, pkt);
+				return r;
+			}
+			break;
 		/* FIXME: only allow PACKET3 blit? easier to check for out of
 		 * range access */
 		case RADEON_DST_PITCH_OFFSET:
@@ -838,7 +947,20 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 			}
 			tmp = ib_chunk->kdata[idx] & 0x003fffff;
 			tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
-			ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
+
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+				tile_flags |= RADEON_DST_TILE_MACRO;
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+				if (reg == RADEON_SRC_PITCH_OFFSET) {
+					DRM_ERROR("Cannot src blit from microtiled surface\n");
+					r100_cs_dump_packet(p, pkt);
+					return -EINVAL;
+				}
+				tile_flags |= RADEON_DST_TILE_MICRO;
+			}
+
+			tmp |= tile_flags;
+			ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
 			break;
 		case RADEON_RB3D_DEPTHOFFSET:
 		case RADEON_RB3D_COLOROFFSET:
@@ -869,6 +991,11 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 		case R300_TX_OFFSET_0+52:
 		case R300_TX_OFFSET_0+56:
 		case R300_TX_OFFSET_0+60:
+			/* rn50 has no 3D engine so fail on any 3d setup */
+			if (ASIC_IS_RN50(p->rdev)) {
+				DRM_ERROR("attempt to use RN50 3D engine failed\n");
+				return -EINVAL;
+			}
 			r = r100_cs_packet_next_reloc(p, &reloc);
 			if (r) {
 				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
@@ -878,6 +1005,25 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 			}
 			ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
 			break;
+		case R300_RB3D_COLORPITCH0:
+		case RADEON_RB3D_COLORPITCH:
+			r = r100_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+					  idx, reg);
+				r100_cs_dump_packet(p, pkt);
+				return r;
+			}
+
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+				tile_flags |= RADEON_COLOR_TILE_ENABLE;
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+				tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
+
+			tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
+			tmp |= tile_flags;
+			ib[idx] = tmp;
+			break;
 		default:
 			/* FIXME: we don't want to allow anyothers packet */
 			break;
@@ -1256,29 +1402,100 @@ static void r100_vram_get_type(struct radeon_device *rdev)
 	}
 }
 
-void r100_vram_info(struct radeon_device *rdev)
+static u32 r100_get_accessible_vram(struct radeon_device *rdev)
 {
-	r100_vram_get_type(rdev);
+	u32 aper_size;
+	u8 byte;
+
+	aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
+
+	/* Set HDP_APER_CNTL only on cards that are known not to be broken,
+	 * that is has the 2nd generation multifunction PCI interface
+	 */
+	if (rdev->family == CHIP_RV280 ||
+	    rdev->family >= CHIP_RV350) {
+		WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
+		       ~RADEON_HDP_APER_CNTL);
+		DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
+		return aper_size * 2;
+	}
+
+	/* Older cards have all sorts of funny issues to deal with. First
+	 * check if it's a multifunction card by reading the PCI config
+	 * header type... Limit those to one aperture size
+	 */
+	pci_read_config_byte(rdev->pdev, 0xe, &byte);
+	if (byte & 0x80) {
+		DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
+		DRM_INFO("Limiting VRAM to one aperture\n");
+		return aper_size;
+	}
+
+	/* Single function older card. We read HDP_APER_CNTL to see how the BIOS
+	 * have set it up. We don't write this as it's broken on some ASICs but
+	 * we expect the BIOS to have done the right thing (might be too optimistic...)
+	 */
+	if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL)
+		return aper_size * 2;
+	return aper_size;
+}
+
+void r100_vram_init_sizes(struct radeon_device *rdev)
+{
+	u64 config_aper_size;
+	u32 accessible;
+
+	config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
 
 	if (rdev->flags & RADEON_IS_IGP) {
 		uint32_t tom;
 		/* read NB_TOM to get the amount of ram stolen for the GPU */
 		tom = RREG32(RADEON_NB_TOM);
-		rdev->mc.vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
-		WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
+		rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
+		/* for IGPs we need to keep VRAM where it was put by the BIOS */
+		rdev->mc.vram_location = (tom & 0xffff) << 16;
+		WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
+		rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
 	} else {
-		rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
+		rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
 		/* Some production boards of m6 will report 0
 		 * if it's 8 MB
 		 */
-		if (rdev->mc.vram_size == 0) {
-			rdev->mc.vram_size = 8192 * 1024;
-			WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
+		if (rdev->mc.real_vram_size == 0) {
+			rdev->mc.real_vram_size = 8192 * 1024;
+			WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
 		}
+		/* let driver place VRAM */
+		rdev->mc.vram_location = 0xFFFFFFFFUL;
+		 /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - 
+		  * Novell bug 204882 + along with lots of ubuntu ones */
+		if (config_aper_size > rdev->mc.real_vram_size)
+			rdev->mc.mc_vram_size = config_aper_size;
+		else
+			rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
 	}
 
+	/* work out accessible VRAM */
+	accessible = r100_get_accessible_vram(rdev);
+
 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+
+	if (accessible > rdev->mc.aper_size)
+		accessible = rdev->mc.aper_size;
+
+	if (rdev->mc.mc_vram_size > rdev->mc.aper_size)
+		rdev->mc.mc_vram_size = rdev->mc.aper_size;
+
+	if (rdev->mc.real_vram_size > rdev->mc.aper_size)
+		rdev->mc.real_vram_size = rdev->mc.aper_size;
+}
+
+void r100_vram_info(struct radeon_device *rdev)
+{
+	r100_vram_get_type(rdev);
+
+	r100_vram_init_sizes(rdev);
 }
 
 
@@ -1533,3 +1750,530 @@ int r100_debugfs_mc_info_init(struct radeon_device *rdev)
 	return 0;
 #endif
 }
+
+int r100_set_surface_reg(struct radeon_device *rdev, int reg,
+			 uint32_t tiling_flags, uint32_t pitch,
+			 uint32_t offset, uint32_t obj_size)
+{
+	int surf_index = reg * 16;
+	int flags = 0;
+
+	/* r100/r200 divide by 16 */
+	if (rdev->family < CHIP_R300)
+		flags = pitch / 16;
+	else
+		flags = pitch / 8;
+
+	if (rdev->family <= CHIP_RS200) {
+		if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
+				 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
+			flags |= RADEON_SURF_TILE_COLOR_BOTH;
+		if (tiling_flags & RADEON_TILING_MACRO)
+			flags |= RADEON_SURF_TILE_COLOR_MACRO;
+	} else if (rdev->family <= CHIP_RV280) {
+		if (tiling_flags & (RADEON_TILING_MACRO))
+			flags |= R200_SURF_TILE_COLOR_MACRO;
+		if (tiling_flags & RADEON_TILING_MICRO)
+			flags |= R200_SURF_TILE_COLOR_MICRO;
+	} else {
+		if (tiling_flags & RADEON_TILING_MACRO)
+			flags |= R300_SURF_TILE_MACRO;
+		if (tiling_flags & RADEON_TILING_MICRO)
+			flags |= R300_SURF_TILE_MICRO;
+	}
+
+	DRM_DEBUG("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
+	WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
+	WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
+	WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
+	return 0;
+}
+
+void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
+{
+	int surf_index = reg * 16;
+	WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
+}
+
+void r100_bandwidth_update(struct radeon_device *rdev)
+{
+	fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
+	fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
+	fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff;
+	uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
+	fixed20_12 memtcas_ff[8] = {
+		fixed_init(1),
+		fixed_init(2),
+		fixed_init(3),
+		fixed_init(0),
+		fixed_init_half(1),
+		fixed_init_half(2),
+		fixed_init(0),
+	};
+	fixed20_12 memtcas_rs480_ff[8] = {
+		fixed_init(0),
+		fixed_init(1),
+		fixed_init(2),
+		fixed_init(3),
+		fixed_init(0),
+		fixed_init_half(1),
+		fixed_init_half(2),
+		fixed_init_half(3),
+	};
+	fixed20_12 memtcas2_ff[8] = {
+		fixed_init(0),
+		fixed_init(1),
+		fixed_init(2),
+		fixed_init(3),
+		fixed_init(4),
+		fixed_init(5),
+		fixed_init(6),
+		fixed_init(7),
+	};
+	fixed20_12 memtrbs[8] = {
+		fixed_init(1),
+		fixed_init_half(1),
+		fixed_init(2),
+		fixed_init_half(2),
+		fixed_init(3),
+		fixed_init_half(3),
+		fixed_init(4),
+		fixed_init_half(4)
+	};
+	fixed20_12 memtrbs_r4xx[8] = {
+		fixed_init(4),
+		fixed_init(5),
+		fixed_init(6),
+		fixed_init(7),
+		fixed_init(8),
+		fixed_init(9),
+		fixed_init(10),
+		fixed_init(11)
+	};
+	fixed20_12 min_mem_eff;
+	fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
+	fixed20_12 cur_latency_mclk, cur_latency_sclk;
+	fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate,
+		disp_drain_rate2, read_return_rate;
+	fixed20_12 time_disp1_drop_priority;
+	int c;
+	int cur_size = 16;       /* in octawords */
+	int critical_point = 0, critical_point2;
+/* 	uint32_t read_return_rate, time_disp1_drop_priority; */
+	int stop_req, max_stop_req;
+	struct drm_display_mode *mode1 = NULL;
+	struct drm_display_mode *mode2 = NULL;
+	uint32_t pixel_bytes1 = 0;
+	uint32_t pixel_bytes2 = 0;
+
+	if (rdev->mode_info.crtcs[0]->base.enabled) {
+		mode1 = &rdev->mode_info.crtcs[0]->base.mode;
+		pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8;
+	}
+	if (rdev->mode_info.crtcs[1]->base.enabled) {
+		mode2 = &rdev->mode_info.crtcs[1]->base.mode;
+		pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8;
+	}
+
+	min_mem_eff.full = rfixed_const_8(0);
+	/* get modes */
+	if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
+		uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
+		mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
+		mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
+		/* check crtc enables */
+		if (mode2)
+			mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
+		if (mode1)
+			mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
+		WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
+	}
+
+	/*
+	 * determine is there is enough bw for current mode
+	 */
+	mclk_ff.full = rfixed_const(rdev->clock.default_mclk);
+	temp_ff.full = rfixed_const(100);
+	mclk_ff.full = rfixed_div(mclk_ff, temp_ff);
+	sclk_ff.full = rfixed_const(rdev->clock.default_sclk);
+	sclk_ff.full = rfixed_div(sclk_ff, temp_ff);
+
+	temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
+	temp_ff.full = rfixed_const(temp);
+	mem_bw.full = rfixed_mul(mclk_ff, temp_ff);
+
+	pix_clk.full = 0;
+	pix_clk2.full = 0;
+	peak_disp_bw.full = 0;
+	if (mode1) {
+		temp_ff.full = rfixed_const(1000);
+		pix_clk.full = rfixed_const(mode1->clock); /* convert to fixed point */
+		pix_clk.full = rfixed_div(pix_clk, temp_ff);
+		temp_ff.full = rfixed_const(pixel_bytes1);
+		peak_disp_bw.full += rfixed_mul(pix_clk, temp_ff);
+	}
+	if (mode2) {
+		temp_ff.full = rfixed_const(1000);
+		pix_clk2.full = rfixed_const(mode2->clock); /* convert to fixed point */
+		pix_clk2.full = rfixed_div(pix_clk2, temp_ff);
+		temp_ff.full = rfixed_const(pixel_bytes2);
+		peak_disp_bw.full += rfixed_mul(pix_clk2, temp_ff);
+	}
+
+	mem_bw.full = rfixed_mul(mem_bw, min_mem_eff);
+	if (peak_disp_bw.full >= mem_bw.full) {
+		DRM_ERROR("You may not have enough display bandwidth for current mode\n"
+			  "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
+	}
+
+	/*  Get values from the EXT_MEM_CNTL register...converting its contents. */
+	temp = RREG32(RADEON_MEM_TIMING_CNTL);
+	if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
+		mem_trcd = ((temp >> 2) & 0x3) + 1;
+		mem_trp  = ((temp & 0x3)) + 1;
+		mem_tras = ((temp & 0x70) >> 4) + 1;
+	} else if (rdev->family == CHIP_R300 ||
+		   rdev->family == CHIP_R350) { /* r300, r350 */
+		mem_trcd = (temp & 0x7) + 1;
+		mem_trp = ((temp >> 8) & 0x7) + 1;
+		mem_tras = ((temp >> 11) & 0xf) + 4;
+	} else if (rdev->family == CHIP_RV350 ||
+		   rdev->family <= CHIP_RV380) {
+		/* rv3x0 */
+		mem_trcd = (temp & 0x7) + 3;
+		mem_trp = ((temp >> 8) & 0x7) + 3;
+		mem_tras = ((temp >> 11) & 0xf) + 6;
+	} else if (rdev->family == CHIP_R420 ||
+		   rdev->family == CHIP_R423 ||
+		   rdev->family == CHIP_RV410) {
+		/* r4xx */
+		mem_trcd = (temp & 0xf) + 3;
+		if (mem_trcd > 15)
+			mem_trcd = 15;
+		mem_trp = ((temp >> 8) & 0xf) + 3;
+		if (mem_trp > 15)
+			mem_trp = 15;
+		mem_tras = ((temp >> 12) & 0x1f) + 6;
+		if (mem_tras > 31)
+			mem_tras = 31;
+	} else { /* RV200, R200 */
+		mem_trcd = (temp & 0x7) + 1;
+		mem_trp = ((temp >> 8) & 0x7) + 1;
+		mem_tras = ((temp >> 12) & 0xf) + 4;
+	}
+	/* convert to FF */
+	trcd_ff.full = rfixed_const(mem_trcd);
+	trp_ff.full = rfixed_const(mem_trp);
+	tras_ff.full = rfixed_const(mem_tras);
+
+	/* Get values from the MEM_SDRAM_MODE_REG register...converting its */
+	temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
+	data = (temp & (7 << 20)) >> 20;
+	if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
+		if (rdev->family == CHIP_RS480) /* don't think rs400 */
+			tcas_ff = memtcas_rs480_ff[data];
+		else
+			tcas_ff = memtcas_ff[data];
+	} else
+		tcas_ff = memtcas2_ff[data];
+
+	if (rdev->family == CHIP_RS400 ||
+	    rdev->family == CHIP_RS480) {
+		/* extra cas latency stored in bits 23-25 0-4 clocks */
+		data = (temp >> 23) & 0x7;
+		if (data < 5)
+			tcas_ff.full += rfixed_const(data);
+	}
+
+	if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
+		/* on the R300, Tcas is included in Trbs.
+		 */
+		temp = RREG32(RADEON_MEM_CNTL);
+		data = (R300_MEM_NUM_CHANNELS_MASK & temp);
+		if (data == 1) {
+			if (R300_MEM_USE_CD_CH_ONLY & temp) {
+				temp = RREG32(R300_MC_IND_INDEX);
+				temp &= ~R300_MC_IND_ADDR_MASK;
+				temp |= R300_MC_READ_CNTL_CD_mcind;
+				WREG32(R300_MC_IND_INDEX, temp);
+				temp = RREG32(R300_MC_IND_DATA);
+				data = (R300_MEM_RBS_POSITION_C_MASK & temp);
+			} else {
+				temp = RREG32(R300_MC_READ_CNTL_AB);
+				data = (R300_MEM_RBS_POSITION_A_MASK & temp);
+			}
+		} else {
+			temp = RREG32(R300_MC_READ_CNTL_AB);
+			data = (R300_MEM_RBS_POSITION_A_MASK & temp);
+		}
+		if (rdev->family == CHIP_RV410 ||
+		    rdev->family == CHIP_R420 ||
+		    rdev->family == CHIP_R423)
+			trbs_ff = memtrbs_r4xx[data];
+		else
+			trbs_ff = memtrbs[data];
+		tcas_ff.full += trbs_ff.full;
+	}
+
+	sclk_eff_ff.full = sclk_ff.full;
+
+	if (rdev->flags & RADEON_IS_AGP) {
+		fixed20_12 agpmode_ff;
+		agpmode_ff.full = rfixed_const(radeon_agpmode);
+		temp_ff.full = rfixed_const_666(16);
+		sclk_eff_ff.full -= rfixed_mul(agpmode_ff, temp_ff);
+	}
+	/* TODO PCIE lanes may affect this - agpmode == 16?? */
+
+	if (ASIC_IS_R300(rdev)) {
+		sclk_delay_ff.full = rfixed_const(250);
+	} else {
+		if ((rdev->family == CHIP_RV100) ||
+		    rdev->flags & RADEON_IS_IGP) {
+			if (rdev->mc.vram_is_ddr)
+				sclk_delay_ff.full = rfixed_const(41);
+			else
+				sclk_delay_ff.full = rfixed_const(33);
+		} else {
+			if (rdev->mc.vram_width == 128)
+				sclk_delay_ff.full = rfixed_const(57);
+			else
+				sclk_delay_ff.full = rfixed_const(41);
+		}
+	}
+
+	mc_latency_sclk.full = rfixed_div(sclk_delay_ff, sclk_eff_ff);
+
+	if (rdev->mc.vram_is_ddr) {
+		if (rdev->mc.vram_width == 32) {
+			k1.full = rfixed_const(40);
+			c  = 3;
+		} else {
+			k1.full = rfixed_const(20);
+			c  = 1;
+		}
+	} else {
+		k1.full = rfixed_const(40);
+		c  = 3;
+	}
+
+	temp_ff.full = rfixed_const(2);
+	mc_latency_mclk.full = rfixed_mul(trcd_ff, temp_ff);
+	temp_ff.full = rfixed_const(c);
+	mc_latency_mclk.full += rfixed_mul(tcas_ff, temp_ff);
+	temp_ff.full = rfixed_const(4);
+	mc_latency_mclk.full += rfixed_mul(tras_ff, temp_ff);
+	mc_latency_mclk.full += rfixed_mul(trp_ff, temp_ff);
+	mc_latency_mclk.full += k1.full;
+
+	mc_latency_mclk.full = rfixed_div(mc_latency_mclk, mclk_ff);
+	mc_latency_mclk.full += rfixed_div(temp_ff, sclk_eff_ff);
+
+	/*
+	  HW cursor time assuming worst case of full size colour cursor.
+	*/
+	temp_ff.full = rfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
+	temp_ff.full += trcd_ff.full;
+	if (temp_ff.full < tras_ff.full)
+		temp_ff.full = tras_ff.full;
+	cur_latency_mclk.full = rfixed_div(temp_ff, mclk_ff);
+
+	temp_ff.full = rfixed_const(cur_size);
+	cur_latency_sclk.full = rfixed_div(temp_ff, sclk_eff_ff);
+	/*
+	  Find the total latency for the display data.
+	*/
+	disp_latency_overhead.full = rfixed_const(80);
+	disp_latency_overhead.full = rfixed_div(disp_latency_overhead, sclk_ff);
+	mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
+	mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
+
+	if (mc_latency_mclk.full > mc_latency_sclk.full)
+		disp_latency.full = mc_latency_mclk.full;
+	else
+		disp_latency.full = mc_latency_sclk.full;
+
+	/* setup Max GRPH_STOP_REQ default value */
+	if (ASIC_IS_RV100(rdev))
+		max_stop_req = 0x5c;
+	else
+		max_stop_req = 0x7c;
+
+	if (mode1) {
+		/*  CRTC1
+		    Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
+		    GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
+		*/
+		stop_req = mode1->hdisplay * pixel_bytes1 / 16;
+
+		if (stop_req > max_stop_req)
+			stop_req = max_stop_req;
+
+		/*
+		  Find the drain rate of the display buffer.
+		*/
+		temp_ff.full = rfixed_const((16/pixel_bytes1));
+		disp_drain_rate.full = rfixed_div(pix_clk, temp_ff);
+
+		/*
+		  Find the critical point of the display buffer.
+		*/
+		crit_point_ff.full = rfixed_mul(disp_drain_rate, disp_latency);
+		crit_point_ff.full += rfixed_const_half(0);
+
+		critical_point = rfixed_trunc(crit_point_ff);
+
+		if (rdev->disp_priority == 2) {
+			critical_point = 0;
+		}
+
+		/*
+		  The critical point should never be above max_stop_req-4.  Setting
+		  GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
+		*/
+		if (max_stop_req - critical_point < 4)
+			critical_point = 0;
+
+		if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
+			/* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
+			critical_point = 0x10;
+		}
+
+		temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
+		temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
+		temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
+		temp &= ~(RADEON_GRPH_START_REQ_MASK);
+		if ((rdev->family == CHIP_R350) &&
+		    (stop_req > 0x15)) {
+			stop_req -= 0x10;
+		}
+		temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
+		temp |= RADEON_GRPH_BUFFER_SIZE;
+		temp &= ~(RADEON_GRPH_CRITICAL_CNTL   |
+			  RADEON_GRPH_CRITICAL_AT_SOF |
+			  RADEON_GRPH_STOP_CNTL);
+		/*
+		  Write the result into the register.
+		*/
+		WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
+						       (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
+
+#if 0
+		if ((rdev->family == CHIP_RS400) ||
+		    (rdev->family == CHIP_RS480)) {
+			/* attempt to program RS400 disp regs correctly ??? */
+			temp = RREG32(RS400_DISP1_REG_CNTL);
+			temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
+				  RS400_DISP1_STOP_REQ_LEVEL_MASK);
+			WREG32(RS400_DISP1_REQ_CNTL1, (temp |
+						       (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
+						       (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
+			temp = RREG32(RS400_DMIF_MEM_CNTL1);
+			temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
+				  RS400_DISP1_CRITICAL_POINT_STOP_MASK);
+			WREG32(RS400_DMIF_MEM_CNTL1, (temp |
+						      (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
+						      (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
+		}
+#endif
+
+		DRM_DEBUG("GRPH_BUFFER_CNTL from to %x\n",
+			  /* 	  (unsigned int)info->SavedReg->grph_buffer_cntl, */
+			  (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
+	}
+
+	if (mode2) {
+		u32 grph2_cntl;
+		stop_req = mode2->hdisplay * pixel_bytes2 / 16;
+
+		if (stop_req > max_stop_req)
+			stop_req = max_stop_req;
+
+		/*
+		  Find the drain rate of the display buffer.
+		*/
+		temp_ff.full = rfixed_const((16/pixel_bytes2));
+		disp_drain_rate2.full = rfixed_div(pix_clk2, temp_ff);
+
+		grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
+		grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
+		grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
+		grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
+		if ((rdev->family == CHIP_R350) &&
+		    (stop_req > 0x15)) {
+			stop_req -= 0x10;
+		}
+		grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
+		grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
+		grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL   |
+			  RADEON_GRPH_CRITICAL_AT_SOF |
+			  RADEON_GRPH_STOP_CNTL);
+
+		if ((rdev->family == CHIP_RS100) ||
+		    (rdev->family == CHIP_RS200))
+			critical_point2 = 0;
+		else {
+			temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
+			temp_ff.full = rfixed_const(temp);
+			temp_ff.full = rfixed_mul(mclk_ff, temp_ff);
+			if (sclk_ff.full < temp_ff.full)
+				temp_ff.full = sclk_ff.full;
+
+			read_return_rate.full = temp_ff.full;
+
+			if (mode1) {
+				temp_ff.full = read_return_rate.full - disp_drain_rate.full;
+				time_disp1_drop_priority.full = rfixed_div(crit_point_ff, temp_ff);
+			} else {
+				time_disp1_drop_priority.full = 0;
+			}
+			crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
+			crit_point_ff.full = rfixed_mul(crit_point_ff, disp_drain_rate2);
+			crit_point_ff.full += rfixed_const_half(0);
+
+			critical_point2 = rfixed_trunc(crit_point_ff);
+
+			if (rdev->disp_priority == 2) {
+				critical_point2 = 0;
+			}
+
+			if (max_stop_req - critical_point2 < 4)
+				critical_point2 = 0;
+
+		}
+
+		if (critical_point2 == 0 && rdev->family == CHIP_R300) {
+			/* some R300 cards have problem with this set to 0 */
+			critical_point2 = 0x10;
+		}
+
+		WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
+						  (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
+
+		if ((rdev->family == CHIP_RS400) ||
+		    (rdev->family == CHIP_RS480)) {
+#if 0
+			/* attempt to program RS400 disp2 regs correctly ??? */
+			temp = RREG32(RS400_DISP2_REQ_CNTL1);
+			temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
+				  RS400_DISP2_STOP_REQ_LEVEL_MASK);
+			WREG32(RS400_DISP2_REQ_CNTL1, (temp |
+						       (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
+						       (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
+			temp = RREG32(RS400_DISP2_REQ_CNTL2);
+			temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
+				  RS400_DISP2_CRITICAL_POINT_STOP_MASK);
+			WREG32(RS400_DISP2_REQ_CNTL2, (temp |
+						       (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
+						       (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
+#endif
+			WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
+			WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
+			WREG32(RS400_DMIF_MEM_CNTL1,  0x29CA71DC);
+			WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
+		}
+
+		DRM_DEBUG("GRPH2_BUFFER_CNTL from to %x\n",
+			  (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
+	}
+}

+ 70 - 8
drivers/gpu/drm/radeon/r300.c

@@ -30,6 +30,8 @@
 #include "drm.h"
 #include "radeon_reg.h"
 #include "radeon.h"
+#include "radeon_drm.h"
+#include "radeon_share.h"
 
 /* r300,r350,rv350,rv370,rv380 depends on : */
 void r100_hdp_reset(struct radeon_device *rdev);
@@ -44,6 +46,7 @@ int r100_gui_wait_for_idle(struct radeon_device *rdev);
 int r100_cs_packet_parse(struct radeon_cs_parser *p,
 			 struct radeon_cs_packet *pkt,
 			 unsigned idx);
+int r100_cs_packet_parse_vline(struct radeon_cs_parser *p);
 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
 			      struct radeon_cs_reloc **cs_reloc);
 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
@@ -150,8 +153,13 @@ int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
 	if (i < 0 || i > rdev->gart.num_gpu_pages) {
 		return -EINVAL;
 	}
-	addr = (((u32)addr) >> 8) | ((upper_32_bits(addr) & 0xff) << 4) | 0xC;
-	writel(cpu_to_le32(addr), ((void __iomem *)ptr) + (i * 4));
+	addr = (lower_32_bits(addr) >> 8) |
+	       ((upper_32_bits(addr) & 0xff) << 24) |
+	       0xc;
+	/* on x86 we want this to be CPU endian, on powerpc
+	 * on powerpc without HW swappers, it'll get swapped on way
+	 * into VRAM - so no need for cpu_to_le32 on VRAM tables */
+	writel(addr, ((void __iomem *)ptr) + (i * 4));
 	return 0;
 }
 
@@ -579,10 +587,8 @@ void r300_vram_info(struct radeon_device *rdev)
 	} else {
 		rdev->mc.vram_width = 64;
 	}
-	rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
 
-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	r100_vram_init_sizes(rdev);
 }
 
 
@@ -970,7 +976,7 @@ static inline void r300_cs_track_clear(struct r300_cs_track *track)
 
 static const unsigned r300_reg_safe_bm[159] = {
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
-	0xFFFFFFBF, 0xFFFFFFFF, 0xFFFFFFBF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
@@ -1019,7 +1025,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 	struct radeon_cs_reloc *reloc;
 	struct r300_cs_track *track;
 	volatile uint32_t *ib;
-	uint32_t tmp;
+	uint32_t tmp, tile_flags = 0;
 	unsigned i;
 	int r;
 
@@ -1027,6 +1033,16 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 	ib_chunk = &p->chunks[p->chunk_ib_idx];
 	track = (struct r300_cs_track*)p->track;
 	switch(reg) {
+	case AVIVO_D1MODE_VLINE_START_END:
+	case RADEON_CRTC_GUI_TRIG_VLINE:
+		r = r100_cs_packet_parse_vline(p);
+		if (r) {
+			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+					idx, reg);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		break;
 	case RADEON_DST_PITCH_OFFSET:
 	case RADEON_SRC_PITCH_OFFSET:
 		r = r100_cs_packet_next_reloc(p, &reloc);
@@ -1038,7 +1054,19 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		}
 		tmp = ib_chunk->kdata[idx] & 0x003fffff;
 		tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
-		ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
+
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			tile_flags |= RADEON_DST_TILE_MACRO;
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+			if (reg == RADEON_SRC_PITCH_OFFSET) {
+				DRM_ERROR("Cannot src blit from microtiled surface\n");
+				r100_cs_dump_packet(p, pkt);
+				return -EINVAL;
+			}
+			tile_flags |= RADEON_DST_TILE_MICRO;
+		}
+		tmp |= tile_flags;
+		ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
 		break;
 	case R300_RB3D_COLOROFFSET0:
 	case R300_RB3D_COLOROFFSET1:
@@ -1127,6 +1155,23 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		/* RB3D_COLORPITCH1 */
 		/* RB3D_COLORPITCH2 */
 		/* RB3D_COLORPITCH3 */
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+				  idx, reg);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			tile_flags |= R300_COLOR_TILE_ENABLE;
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+			tile_flags |= R300_COLOR_MICROTILE_ENABLE;
+
+		tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
+		tmp |= tile_flags;
+		ib[idx] = tmp;
+
 		i = (reg - 0x4E38) >> 2;
 		track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
 		switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
@@ -1182,6 +1227,23 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		break;
 	case 0x4F24:
 		/* ZB_DEPTHPITCH */
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+				  idx, reg);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			tile_flags |= R300_DEPTHMACROTILE_ENABLE;
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+			tile_flags |= R300_DEPTHMICROTILE_TILED;;
+
+		tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
+		tmp |= tile_flags;
+		ib[idx] = tmp;
+
 		track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
 		break;
 	case 0x4104:

+ 3 - 1
drivers/gpu/drm/radeon/r300_reg.h

@@ -27,7 +27,9 @@
 #ifndef _R300_REG_H_
 #define _R300_REG_H_
 
-
+#define R300_SURF_TILE_MACRO (1<<16)
+#define R300_SURF_TILE_MICRO (2<<16)
+#define R300_SURF_TILE_BOTH (3<<16)
 
 
 #define R300_MC_INIT_MISC_LAT_TIMER	0x180

+ 2 - 0
drivers/gpu/drm/radeon/r500_reg.h

@@ -445,6 +445,7 @@
 #define AVIVO_D1MODE_DATA_FORMAT                0x6528
 #       define AVIVO_D1MODE_INTERLEAVE_EN       (1 << 0)
 #define AVIVO_D1MODE_DESKTOP_HEIGHT             0x652C
+#define AVIVO_D1MODE_VLINE_START_END            0x6538
 #define AVIVO_D1MODE_VIEWPORT_START             0x6580
 #define AVIVO_D1MODE_VIEWPORT_SIZE              0x6584
 #define AVIVO_D1MODE_EXT_OVERSCAN_LEFT_RIGHT    0x6588
@@ -496,6 +497,7 @@
 #define AVIVO_D2CUR_SIZE                        0x6c10
 #define AVIVO_D2CUR_POSITION                    0x6c14
 
+#define AVIVO_D2MODE_VLINE_START_END            0x6d38
 #define AVIVO_D2MODE_VIEWPORT_START             0x6d80
 #define AVIVO_D2MODE_VIEWPORT_SIZE              0x6d84
 #define AVIVO_D2MODE_EXT_OVERSCAN_LEFT_RIGHT    0x6d88

+ 17 - 5
drivers/gpu/drm/radeon/r520.c

@@ -28,6 +28,7 @@
 #include "drmP.h"
 #include "radeon_reg.h"
 #include "radeon.h"
+#include "radeon_share.h"
 
 /* r520,rv530,rv560,rv570,r580 depends on : */
 void r100_hdp_reset(struct radeon_device *rdev);
@@ -94,8 +95,8 @@ int r520_mc_init(struct radeon_device *rdev)
 		       "programming pipes. Bad things might happen.\n");
 	}
 	/* Write VRAM size in case we are limiting it */
-	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
-	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
+	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
 	tmp = REG_SET(R520_MC_FB_TOP, tmp >> 16);
 	tmp |= REG_SET(R520_MC_FB_START, rdev->mc.vram_location >> 16);
 	WREG32_MC(R520_MC_FB_LOCATION, tmp);
@@ -226,9 +227,20 @@ static void r520_vram_get_type(struct radeon_device *rdev)
 
 void r520_vram_info(struct radeon_device *rdev)
 {
+	fixed20_12 a;
+
 	r520_vram_get_type(rdev);
-	rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
 
-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	r100_vram_init_sizes(rdev);
+	/* FIXME: we should enforce default clock in case GPU is not in
+	 * default setup
+	 */
+	a.full = rfixed_const(100);
+	rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
+	rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
+}
+
+void r520_bandwidth_update(struct radeon_device *rdev)
+{
+	rv515_bandwidth_avivo_update(rdev);
 }

+ 3 - 2
drivers/gpu/drm/radeon/r600.c

@@ -67,7 +67,7 @@ int r600_mc_init(struct radeon_device *rdev)
 		       "programming pipes. Bad things might happen.\n");
 	}
 
-	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
 	tmp = REG_SET(R600_MC_FB_TOP, tmp >> 24);
 	tmp |= REG_SET(R600_MC_FB_BASE, rdev->mc.vram_location >> 24);
 	WREG32(R600_MC_VM_FB_LOCATION, tmp);
@@ -140,7 +140,8 @@ void r600_vram_get_type(struct radeon_device *rdev)
 void r600_vram_info(struct radeon_device *rdev)
 {
 	r600_vram_get_type(rdev);
-	rdev->mc.vram_size = RREG32(R600_CONFIG_MEMSIZE);
+	rdev->mc.real_vram_size = RREG32(R600_CONFIG_MEMSIZE);
+	rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
 
 	/* Could aper size report 0 ? */
 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);

+ 82 - 5
drivers/gpu/drm/radeon/radeon.h

@@ -64,6 +64,7 @@ extern int radeon_agpmode;
 extern int radeon_vram_limit;
 extern int radeon_gart_size;
 extern int radeon_benchmarking;
+extern int radeon_testing;
 extern int radeon_connector_table;
 
 /*
@@ -113,6 +114,7 @@ enum radeon_family {
 	CHIP_RV770,
 	CHIP_RV730,
 	CHIP_RV710,
+	CHIP_RS880,
 	CHIP_LAST,
 };
 
@@ -201,6 +203,14 @@ int radeon_fence_wait_last(struct radeon_device *rdev);
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
 void radeon_fence_unref(struct radeon_fence **fence);
 
+/*
+ * Tiling registers
+ */
+struct radeon_surface_reg {
+	struct radeon_object *robj;
+};
+
+#define RADEON_GEM_MAX_SURFACES 8
 
 /*
  * Radeon buffer.
@@ -213,6 +223,7 @@ struct radeon_object_list {
 	uint64_t		gpu_offset;
 	unsigned		rdomain;
 	unsigned		wdomain;
+	uint32_t                tiling_flags;
 };
 
 int radeon_object_init(struct radeon_device *rdev);
@@ -242,8 +253,15 @@ void radeon_object_list_clean(struct list_head *head);
 int radeon_object_fbdev_mmap(struct radeon_object *robj,
 			     struct vm_area_struct *vma);
 unsigned long radeon_object_size(struct radeon_object *robj);
-
-
+void radeon_object_clear_surface_reg(struct radeon_object *robj);
+int radeon_object_check_tiling(struct radeon_object *robj, bool has_moved,
+			       bool force_drop);
+void radeon_object_set_tiling_flags(struct radeon_object *robj,
+				    uint32_t tiling_flags, uint32_t pitch);
+void radeon_object_get_tiling_flags(struct radeon_object *robj, uint32_t *tiling_flags, uint32_t *pitch);
+void radeon_bo_move_notify(struct ttm_buffer_object *bo,
+			   struct ttm_mem_reg *mem);
+void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 /*
  * GEM objects.
  */
@@ -315,8 +333,11 @@ struct radeon_mc {
 	unsigned		gtt_location;
 	unsigned		gtt_size;
 	unsigned		vram_location;
-	unsigned		vram_size;
+	/* for some chips with <= 32MB we need to lie
+	 * about vram size near mc fb location */
+	unsigned		mc_vram_size;
 	unsigned		vram_width;
+	unsigned		real_vram_size;
 	int			vram_mtrr;
 	bool			vram_is_ddr;
 };
@@ -474,6 +495,39 @@ struct radeon_wb {
 	uint64_t		gpu_addr;
 };
 
+/**
+ * struct radeon_pm - power management datas
+ * @max_bandwidth:      maximum bandwidth the gpu has (MByte/s)
+ * @igp_sideport_mclk:  sideport memory clock Mhz (rs690,rs740,rs780,rs880)
+ * @igp_system_mclk:    system clock Mhz (rs690,rs740,rs780,rs880)
+ * @igp_ht_link_clk:    ht link clock Mhz (rs690,rs740,rs780,rs880)
+ * @igp_ht_link_width:  ht link width in bits (rs690,rs740,rs780,rs880)
+ * @k8_bandwidth:       k8 bandwidth the gpu has (MByte/s) (IGP)
+ * @sideport_bandwidth: sideport bandwidth the gpu has (MByte/s) (IGP)
+ * @ht_bandwidth:       ht bandwidth the gpu has (MByte/s) (IGP)
+ * @core_bandwidth:     core GPU bandwidth the gpu has (MByte/s) (IGP)
+ * @sclk:          	GPU clock Mhz (core bandwith depends of this clock)
+ * @needed_bandwidth:   current bandwidth needs
+ *
+ * It keeps track of various data needed to take powermanagement decision.
+ * Bandwith need is used to determine minimun clock of the GPU and memory.
+ * Equation between gpu/memory clock and available bandwidth is hw dependent
+ * (type of memory, bus size, efficiency, ...)
+ */
+struct radeon_pm {
+	fixed20_12		max_bandwidth;
+	fixed20_12		igp_sideport_mclk;
+	fixed20_12		igp_system_mclk;
+	fixed20_12		igp_ht_link_clk;
+	fixed20_12		igp_ht_link_width;
+	fixed20_12		k8_bandwidth;
+	fixed20_12		sideport_bandwidth;
+	fixed20_12		ht_bandwidth;
+	fixed20_12		core_bandwidth;
+	fixed20_12		sclk;
+	fixed20_12		needed_bandwidth;
+};
+
 
 /*
  * Benchmarking
@@ -481,6 +535,12 @@ struct radeon_wb {
 void radeon_benchmark(struct radeon_device *rdev);
 
 
+/*
+ * Testing
+ */
+void radeon_test_moves(struct radeon_device *rdev);
+
+
 /*
  * Debugfs
  */
@@ -535,6 +595,11 @@ struct radeon_asic {
 	void (*set_memory_clock)(struct radeon_device *rdev, uint32_t mem_clock);
 	void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes);
 	void (*set_clock_gating)(struct radeon_device *rdev, int enable);
+	int (*set_surface_reg)(struct radeon_device *rdev, int reg,
+			       uint32_t tiling_flags, uint32_t pitch,
+			       uint32_t offset, uint32_t obj_size);
+	int (*clear_surface_reg)(struct radeon_device *rdev, int reg);
+	void (*bandwidth_update)(struct radeon_device *rdev);
 };
 
 union radeon_asic_config {
@@ -566,6 +631,10 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
 int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *filp);
 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp);
+int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp);
 
 
 /*
@@ -594,8 +663,8 @@ struct radeon_device {
 	struct radeon_object		*fbdev_robj;
 	struct radeon_framebuffer	*fbdev_rfb;
 	/* Register mmio */
-	unsigned long			rmmio_base;
-	unsigned long			rmmio_size;
+	resource_size_t			rmmio_base;
+	resource_size_t			rmmio_size;
 	void				*rmmio;
 	radeon_rreg_t			mm_rreg;
 	radeon_wreg_t			mm_wreg;
@@ -619,11 +688,14 @@ struct radeon_device {
 	struct radeon_irq		irq;
 	struct radeon_asic		*asic;
 	struct radeon_gem		gem;
+	struct radeon_pm		pm;
 	struct mutex			cs_mutex;
 	struct radeon_wb		wb;
 	bool				gpu_lockup;
 	bool				shutdown;
 	bool				suspend;
+	bool				need_dma32;
+	struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
 };
 
 int radeon_device_init(struct radeon_device *rdev,
@@ -670,6 +742,8 @@ void r100_pll_errata_after_index(struct radeon_device *rdev);
 /*
  * ASICs helpers.
  */
+#define ASIC_IS_RN50(rdev) ((rdev->pdev->device == 0x515e) || \
+			    (rdev->pdev->device == 0x5969))
 #define ASIC_IS_RV100(rdev) ((rdev->family == CHIP_RV100) || \
 		(rdev->family == CHIP_RV200) || \
 		(rdev->family == CHIP_RS100) || \
@@ -796,5 +870,8 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 #define radeon_set_memory_clock(rdev, e) (rdev)->asic->set_engine_clock((rdev), (e))
 #define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->set_pcie_lanes((rdev), (l))
 #define radeon_set_clock_gating(rdev, e) (rdev)->asic->set_clock_gating((rdev), (e))
+#define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->set_surface_reg((rdev), (r), (f), (p), (o), (s)))
+#define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->clear_surface_reg((rdev), (r)))
+#define radeon_bandwidth_update(rdev) (rdev)->asic->bandwidth_update((rdev))
 
 #endif

+ 32 - 0
drivers/gpu/drm/radeon/radeon_asic.h

@@ -71,6 +71,11 @@ int r100_copy_blit(struct radeon_device *rdev,
 		   uint64_t dst_offset,
 		   unsigned num_pages,
 		   struct radeon_fence *fence);
+int r100_set_surface_reg(struct radeon_device *rdev, int reg,
+			 uint32_t tiling_flags, uint32_t pitch,
+			 uint32_t offset, uint32_t obj_size);
+int r100_clear_surface_reg(struct radeon_device *rdev, int reg);
+void r100_bandwidth_update(struct radeon_device *rdev);
 
 static struct radeon_asic r100_asic = {
 	.init = &r100_init,
@@ -100,6 +105,9 @@ static struct radeon_asic r100_asic = {
 	.set_memory_clock = NULL,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_legacy_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &r100_bandwidth_update,
 };
 
 
@@ -128,6 +136,7 @@ int r300_copy_dma(struct radeon_device *rdev,
 		  uint64_t dst_offset,
 		  unsigned num_pages,
 		  struct radeon_fence *fence);
+
 static struct radeon_asic r300_asic = {
 	.init = &r300_init,
 	.errata = &r300_errata,
@@ -156,6 +165,9 @@ static struct radeon_asic r300_asic = {
 	.set_memory_clock = NULL,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_legacy_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &r100_bandwidth_update,
 };
 
 /*
@@ -193,6 +205,9 @@ static struct radeon_asic r420_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &r100_bandwidth_update,
 };
 
 
@@ -237,6 +252,9 @@ static struct radeon_asic rs400_asic = {
 	.set_memory_clock = NULL,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_legacy_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &r100_bandwidth_update,
 };
 
 
@@ -254,6 +272,7 @@ void rs600_gart_tlb_flush(struct radeon_device *rdev);
 int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
 uint32_t rs600_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+void rs600_bandwidth_update(struct radeon_device *rdev);
 static struct radeon_asic rs600_asic = {
 	.init = &r300_init,
 	.errata = &rs600_errata,
@@ -282,6 +301,7 @@ static struct radeon_asic rs600_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.bandwidth_update = &rs600_bandwidth_update,
 };
 
 
@@ -294,6 +314,7 @@ int rs690_mc_init(struct radeon_device *rdev);
 void rs690_mc_fini(struct radeon_device *rdev);
 uint32_t rs690_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs690_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+void rs690_bandwidth_update(struct radeon_device *rdev);
 static struct radeon_asic rs690_asic = {
 	.init = &r300_init,
 	.errata = &rs690_errata,
@@ -322,6 +343,9 @@ static struct radeon_asic rs690_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &rs690_bandwidth_update,
 };
 
 
@@ -339,6 +363,7 @@ void rv515_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 void rv515_ring_start(struct radeon_device *rdev);
 uint32_t rv515_pcie_rreg(struct radeon_device *rdev, uint32_t reg);
 void rv515_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+void rv515_bandwidth_update(struct radeon_device *rdev);
 static struct radeon_asic rv515_asic = {
 	.init = &rv515_init,
 	.errata = &rv515_errata,
@@ -367,6 +392,9 @@ static struct radeon_asic rv515_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &rv515_bandwidth_update,
 };
 
 
@@ -377,6 +405,7 @@ void r520_errata(struct radeon_device *rdev);
 void r520_vram_info(struct radeon_device *rdev);
 int r520_mc_init(struct radeon_device *rdev);
 void r520_mc_fini(struct radeon_device *rdev);
+void r520_bandwidth_update(struct radeon_device *rdev);
 static struct radeon_asic r520_asic = {
 	.init = &rv515_init,
 	.errata = &r520_errata,
@@ -405,6 +434,9 @@ static struct radeon_asic r520_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &r520_bandwidth_update,
 };
 
 /*

+ 6 - 3
drivers/gpu/drm/radeon/radeon_atombios.c

@@ -103,7 +103,8 @@ static inline struct radeon_i2c_bus_rec radeon_lookup_gpio(struct drm_device
 static bool radeon_atom_apply_quirks(struct drm_device *dev,
 				     uint32_t supported_device,
 				     int *connector_type,
-				     struct radeon_i2c_bus_rec *i2c_bus)
+				     struct radeon_i2c_bus_rec *i2c_bus,
+				     uint8_t *line_mux)
 {
 
 	/* Asus M2A-VM HDMI board lists the DVI port as HDMI */
@@ -127,8 +128,10 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev,
 	if ((dev->pdev->device == 0x5653) &&
 	    (dev->pdev->subsystem_vendor == 0x1462) &&
 	    (dev->pdev->subsystem_device == 0x0291)) {
-		if (*connector_type == DRM_MODE_CONNECTOR_LVDS)
+		if (*connector_type == DRM_MODE_CONNECTOR_LVDS) {
 			i2c_bus->valid = false;
+			*line_mux = 53;
+		}
 	}
 
 	/* Funky macbooks */
@@ -526,7 +529,7 @@ bool radeon_get_atom_connector_info_from_supported_devices_table(struct
 
 		if (!radeon_atom_apply_quirks
 		    (dev, (1 << i), &bios_connectors[i].connector_type,
-		     &bios_connectors[i].ddc_bus))
+		     &bios_connectors[i].ddc_bus, &bios_connectors[i].line_mux))
 			continue;
 
 		bios_connectors[i].valid = true;

+ 2 - 2
drivers/gpu/drm/radeon/radeon_benchmark.c

@@ -63,7 +63,7 @@ void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize,
 		if (r) {
 			goto out_cleanup;
 		}
-		r = radeon_copy_dma(rdev, saddr, daddr, size >> 14, fence);
+		r = radeon_copy_dma(rdev, saddr, daddr, size / 4096, fence);
 		if (r) {
 			goto out_cleanup;
 		}
@@ -88,7 +88,7 @@ void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize,
 		if (r) {
 			goto out_cleanup;
 		}
-		r = radeon_copy_blit(rdev, saddr, daddr, size >> 14, fence);
+		r = radeon_copy_blit(rdev, saddr, daddr, size / 4096, fence);
 		if (r) {
 			goto out_cleanup;
 		}

+ 7 - 1
drivers/gpu/drm/radeon/radeon_cs.c

@@ -127,17 +127,23 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 				       sizeof(struct drm_radeon_cs_chunk))) {
 			return -EFAULT;
 		}
+		p->chunks[i].length_dw = user_chunk.length_dw;
+		p->chunks[i].kdata = NULL;
 		p->chunks[i].chunk_id = user_chunk.chunk_id;
+
 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) {
 			p->chunk_relocs_idx = i;
 		}
 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
 			p->chunk_ib_idx = i;
+			/* zero length IB isn't useful */
+			if (p->chunks[i].length_dw == 0)
+				return -EINVAL;
 		}
+
 		p->chunks[i].length_dw = user_chunk.length_dw;
 		cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data;
 
-		p->chunks[i].kdata = NULL;
 		size = p->chunks[i].length_dw * sizeof(uint32_t);
 		p->chunks[i].kdata = kzalloc(size, GFP_KERNEL);
 		if (p->chunks[i].kdata == NULL) {

+ 7 - 2
drivers/gpu/drm/radeon/radeon_cursor.c

@@ -111,9 +111,11 @@ static void radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj,
 
 	if (ASIC_IS_AVIVO(rdev))
 		WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, gpu_addr);
-	else
+	else {
+		radeon_crtc->legacy_cursor_offset = gpu_addr - radeon_crtc->legacy_display_base_addr;
 		/* offset is from DISP(2)_BASE_ADDRESS */
-		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, gpu_addr);
+		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, radeon_crtc->legacy_cursor_offset);
+	}
 }
 
 int radeon_crtc_cursor_set(struct drm_crtc *crtc,
@@ -245,6 +247,9 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
 		       (RADEON_CUR_LOCK
 			| ((xorigin ? 0 : x) << 16)
 			| (yorigin ? 0 : y)));
+		/* offset is from DISP(2)_BASE_ADDRESS */
+		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, (radeon_crtc->legacy_cursor_offset +
+								      (yorigin * 256)));
 	}
 	radeon_lock_cursor(crtc, false);
 

+ 37 - 22
drivers/gpu/drm/radeon/radeon_device.c

@@ -48,6 +48,8 @@ static void radeon_surface_init(struct radeon_device *rdev)
 			       i * (RADEON_SURFACE1_INFO - RADEON_SURFACE0_INFO),
 			       0);
 		}
+		/* enable surfaces */
+		WREG32(RADEON_SURFACE_CNTL, 0);
 	}
 }
 
@@ -119,7 +121,7 @@ int radeon_mc_setup(struct radeon_device *rdev)
 	if (rdev->mc.vram_location != 0xFFFFFFFFUL) {
 		/* vram location was already setup try to put gtt after
 		 * if it fits */
-		tmp = rdev->mc.vram_location + rdev->mc.vram_size;
+		tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size;
 		tmp = (tmp + rdev->mc.gtt_size - 1) & ~(rdev->mc.gtt_size - 1);
 		if ((0xFFFFFFFFUL - tmp) >= rdev->mc.gtt_size) {
 			rdev->mc.gtt_location = tmp;
@@ -134,13 +136,13 @@ int radeon_mc_setup(struct radeon_device *rdev)
 	} else if (rdev->mc.gtt_location != 0xFFFFFFFFUL) {
 		/* gtt location was already setup try to put vram before
 		 * if it fits */
-		if (rdev->mc.vram_size < rdev->mc.gtt_location) {
+		if (rdev->mc.mc_vram_size < rdev->mc.gtt_location) {
 			rdev->mc.vram_location = 0;
 		} else {
 			tmp = rdev->mc.gtt_location + rdev->mc.gtt_size;
-			tmp += (rdev->mc.vram_size - 1);
-			tmp &= ~(rdev->mc.vram_size - 1);
-			if ((0xFFFFFFFFUL - tmp) >= rdev->mc.vram_size) {
+			tmp += (rdev->mc.mc_vram_size - 1);
+			tmp &= ~(rdev->mc.mc_vram_size - 1);
+			if ((0xFFFFFFFFUL - tmp) >= rdev->mc.mc_vram_size) {
 				rdev->mc.vram_location = tmp;
 			} else {
 				printk(KERN_ERR "[drm] vram too big to fit "
@@ -150,12 +152,14 @@ int radeon_mc_setup(struct radeon_device *rdev)
 		}
 	} else {
 		rdev->mc.vram_location = 0;
-		rdev->mc.gtt_location = rdev->mc.vram_size;
+		rdev->mc.gtt_location = rdev->mc.mc_vram_size;
 	}
-	DRM_INFO("radeon: VRAM %uM\n", rdev->mc.vram_size >> 20);
+	DRM_INFO("radeon: VRAM %uM\n", rdev->mc.real_vram_size >> 20);
 	DRM_INFO("radeon: VRAM from 0x%08X to 0x%08X\n",
 		 rdev->mc.vram_location,
-		 rdev->mc.vram_location + rdev->mc.vram_size - 1);
+		 rdev->mc.vram_location + rdev->mc.mc_vram_size - 1);
+	if (rdev->mc.real_vram_size != rdev->mc.mc_vram_size)
+		DRM_INFO("radeon: VRAM less than aperture workaround enabled\n");
 	DRM_INFO("radeon: GTT %uM\n", rdev->mc.gtt_size >> 20);
 	DRM_INFO("radeon: GTT from 0x%08X to 0x%08X\n",
 		 rdev->mc.gtt_location,
@@ -450,6 +454,7 @@ int radeon_device_init(struct radeon_device *rdev,
 		       uint32_t flags)
 {
 	int r, ret;
+	int dma_bits;
 
 	DRM_INFO("radeon: Initializing kernel modesetting.\n");
 	rdev->shutdown = false;
@@ -492,8 +497,20 @@ int radeon_device_init(struct radeon_device *rdev,
 		return r;
 	}
 
-	/* Report DMA addressing limitation */
-	r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(32));
+	/* set DMA mask + need_dma32 flags.
+	 * PCIE - can handle 40-bits.
+	 * IGP - can handle 40-bits (in theory)
+	 * AGP - generally dma32 is safest
+	 * PCI - only dma32
+	 */
+	rdev->need_dma32 = false;
+	if (rdev->flags & RADEON_IS_AGP)
+		rdev->need_dma32 = true;
+	if (rdev->flags & RADEON_IS_PCI)
+		rdev->need_dma32 = true;
+
+	dma_bits = rdev->need_dma32 ? 32 : 40;
+	r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(dma_bits));
 	if (r) {
 		printk(KERN_WARNING "radeon: No suitable DMA available.\n");
 	}
@@ -546,27 +563,22 @@ int radeon_device_init(struct radeon_device *rdev,
 			radeon_combios_asic_init(rdev->ddev);
 		}
 	}
+	/* Initialize clocks */
+	r = radeon_clocks_init(rdev);
+	if (r) {
+		return r;
+	}
 	/* Get vram informations */
 	radeon_vram_info(rdev);
-	/* Device is severly broken if aper size > vram size.
-	 * for RN50/M6/M7 - Novell bug 204882 ?
-	 */
-	if (rdev->mc.vram_size < rdev->mc.aper_size) {
-		rdev->mc.aper_size = rdev->mc.vram_size;
-	}
+
 	/* Add an MTRR for the VRAM */
 	rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
 				      MTRR_TYPE_WRCOMB, 1);
 	DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n",
-		 rdev->mc.vram_size >> 20,
+		 rdev->mc.real_vram_size >> 20,
 		 (unsigned)rdev->mc.aper_size >> 20);
 	DRM_INFO("RAM width %dbits %cDR\n",
 		 rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
-	/* Initialize clocks */
-	r = radeon_clocks_init(rdev);
-	if (r) {
-		return r;
-	}
 	/* Initialize memory controller (also test AGP) */
 	r = radeon_mc_init(rdev);
 	if (r) {
@@ -626,6 +638,9 @@ int radeon_device_init(struct radeon_device *rdev,
 	if (!ret) {
 		DRM_INFO("radeon: kernel modesetting successfully initialized.\n");
 	}
+	if (radeon_testing) {
+		radeon_test_moves(rdev);
+	}
 	if (radeon_benchmarking) {
 		radeon_benchmark(rdev);
 	}

+ 47 - 27
drivers/gpu/drm/radeon/radeon_display.c

@@ -187,6 +187,7 @@ static void radeon_crtc_init(struct drm_device *dev, int index)
 
 	drm_mode_crtc_set_gamma_size(&radeon_crtc->base, 256);
 	radeon_crtc->crtc_id = index;
+	rdev->mode_info.crtcs[index] = radeon_crtc;
 
 	radeon_crtc->mode_set.crtc = &radeon_crtc->base;
 	radeon_crtc->mode_set.connectors = (struct drm_connector **)(radeon_crtc + 1);
@@ -491,7 +492,11 @@ void radeon_compute_pll(struct radeon_pll *pll,
 					tmp += (uint64_t)pll->reference_freq * 1000 * frac_feedback_div;
 					current_freq = radeon_div(tmp, ref_div * post_div);
 
-					error = abs(current_freq - freq);
+					if (flags & RADEON_PLL_PREFER_CLOSEST_LOWER) {
+						error = freq - current_freq;
+						error = error < 0 ? 0xffffffff : error;
+					} else
+						error = abs(current_freq - freq);
 					vco_diff = abs(vco - best_vco);
 
 					if ((best_vco == 0 && error < best_error) ||
@@ -657,36 +662,51 @@ void radeon_modeset_fini(struct radeon_device *rdev)
 	}
 }
 
-void radeon_init_disp_bandwidth(struct drm_device *dev)
+bool radeon_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
 {
-	struct radeon_device *rdev = dev->dev_private;
-	struct drm_display_mode *modes[2];
-	int pixel_bytes[2];
-	struct drm_crtc *crtc;
-
-	pixel_bytes[0] = pixel_bytes[1] = 0;
-	modes[0] = modes[1] = NULL;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct drm_encoder *encoder;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct radeon_encoder *radeon_encoder;
+	bool first = true;
 
-		if (crtc->enabled && crtc->fb) {
-			modes[radeon_crtc->crtc_id] = &crtc->mode;
-			pixel_bytes[radeon_crtc->crtc_id] = crtc->fb->bits_per_pixel / 8;
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		radeon_encoder = to_radeon_encoder(encoder);
+		if (encoder->crtc != crtc)
+			continue;
+		if (first) {
+			radeon_crtc->rmx_type = radeon_encoder->rmx_type;
+			radeon_crtc->devices = radeon_encoder->devices;
+			memcpy(&radeon_crtc->native_mode,
+				&radeon_encoder->native_mode,
+				sizeof(struct radeon_native_mode));
+			first = false;
+		} else {
+			if (radeon_crtc->rmx_type != radeon_encoder->rmx_type) {
+				/* WARNING: Right now this can't happen but
+				 * in the future we need to check that scaling
+				 * are consistent accross different encoder
+				 * (ie all encoder can work with the same
+				 *  scaling).
+				 */
+				DRM_ERROR("Scaling not consistent accross encoder.\n");
+				return false;
+			}
 		}
 	}
-
-	if (ASIC_IS_AVIVO(rdev)) {
-		radeon_init_disp_bw_avivo(dev,
-					  modes[0],
-					  pixel_bytes[0],
-					  modes[1],
-					  pixel_bytes[1]);
+	if (radeon_crtc->rmx_type != RMX_OFF) {
+		fixed20_12 a, b;
+		a.full = rfixed_const(crtc->mode.vdisplay);
+		b.full = rfixed_const(radeon_crtc->native_mode.panel_xres);
+		radeon_crtc->vsc.full = rfixed_div(a, b);
+		a.full = rfixed_const(crtc->mode.hdisplay);
+		b.full = rfixed_const(radeon_crtc->native_mode.panel_yres);
+		radeon_crtc->hsc.full = rfixed_div(a, b);
 	} else {
-		radeon_init_disp_bw_legacy(dev,
-					   modes[0],
-					   pixel_bytes[0],
-					   modes[1],
-					   pixel_bytes[1]);
+		radeon_crtc->vsc.full = rfixed_const(1);
+		radeon_crtc->hsc.full = rfixed_const(1);
 	}
+	return true;
 }

+ 4 - 0
drivers/gpu/drm/radeon/radeon_drv.c

@@ -89,6 +89,7 @@ int radeon_agpmode = 0;
 int radeon_vram_limit = 0;
 int radeon_gart_size = 512; /* default gart size */
 int radeon_benchmarking = 0;
+int radeon_testing = 0;
 int radeon_connector_table = 0;
 #endif
 
@@ -117,6 +118,9 @@ module_param_named(gartsize, radeon_gart_size, int, 0600);
 MODULE_PARM_DESC(benchmark, "Run benchmark");
 module_param_named(benchmark, radeon_benchmarking, int, 0444);
 
+MODULE_PARM_DESC(test, "Run tests");
+module_param_named(test, radeon_testing, int, 0444);
+
 MODULE_PARM_DESC(connector_table, "Force connector table");
 module_param_named(connector_table, radeon_connector_table, int, 0444);
 #endif

+ 2 - 357
drivers/gpu/drm/radeon/radeon_encoders.c

@@ -154,7 +154,6 @@ void radeon_rmx_mode_fixup(struct drm_encoder *encoder,
 
 	if (mode->hdisplay < native_mode->panel_xres ||
 	    mode->vdisplay < native_mode->panel_yres) {
-		radeon_encoder->flags |= RADEON_USE_RMX;
 		if (ASIC_IS_AVIVO(rdev)) {
 			adjusted_mode->hdisplay = native_mode->panel_xres;
 			adjusted_mode->vdisplay = native_mode->panel_yres;
@@ -197,15 +196,13 @@ void radeon_rmx_mode_fixup(struct drm_encoder *encoder,
 	}
 }
 
+
 static bool radeon_atom_mode_fixup(struct drm_encoder *encoder,
 				   struct drm_display_mode *mode,
 				   struct drm_display_mode *adjusted_mode)
 {
-
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 
-	radeon_encoder->flags &= ~RADEON_USE_RMX;
-
 	drm_mode_set_crtcinfo(adjusted_mode, 0);
 
 	if (radeon_encoder->rmx_type != RMX_OFF)
@@ -808,234 +805,6 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action)
 
 }
 
-static void atom_rv515_force_tv_scaler(struct radeon_device *rdev)
-{
-
-	WREG32(0x659C, 0x0);
-	WREG32(0x6594, 0x705);
-	WREG32(0x65A4, 0x10001);
-	WREG32(0x65D8, 0x0);
-	WREG32(0x65B0, 0x0);
-	WREG32(0x65C0, 0x0);
-	WREG32(0x65D4, 0x0);
-	WREG32(0x6578, 0x0);
-	WREG32(0x657C, 0x841880A8);
-	WREG32(0x6578, 0x1);
-	WREG32(0x657C, 0x84208680);
-	WREG32(0x6578, 0x2);
-	WREG32(0x657C, 0xBFF880B0);
-	WREG32(0x6578, 0x100);
-	WREG32(0x657C, 0x83D88088);
-	WREG32(0x6578, 0x101);
-	WREG32(0x657C, 0x84608680);
-	WREG32(0x6578, 0x102);
-	WREG32(0x657C, 0xBFF080D0);
-	WREG32(0x6578, 0x200);
-	WREG32(0x657C, 0x83988068);
-	WREG32(0x6578, 0x201);
-	WREG32(0x657C, 0x84A08680);
-	WREG32(0x6578, 0x202);
-	WREG32(0x657C, 0xBFF080F8);
-	WREG32(0x6578, 0x300);
-	WREG32(0x657C, 0x83588058);
-	WREG32(0x6578, 0x301);
-	WREG32(0x657C, 0x84E08660);
-	WREG32(0x6578, 0x302);
-	WREG32(0x657C, 0xBFF88120);
-	WREG32(0x6578, 0x400);
-	WREG32(0x657C, 0x83188040);
-	WREG32(0x6578, 0x401);
-	WREG32(0x657C, 0x85008660);
-	WREG32(0x6578, 0x402);
-	WREG32(0x657C, 0xBFF88150);
-	WREG32(0x6578, 0x500);
-	WREG32(0x657C, 0x82D88030);
-	WREG32(0x6578, 0x501);
-	WREG32(0x657C, 0x85408640);
-	WREG32(0x6578, 0x502);
-	WREG32(0x657C, 0xBFF88180);
-	WREG32(0x6578, 0x600);
-	WREG32(0x657C, 0x82A08018);
-	WREG32(0x6578, 0x601);
-	WREG32(0x657C, 0x85808620);
-	WREG32(0x6578, 0x602);
-	WREG32(0x657C, 0xBFF081B8);
-	WREG32(0x6578, 0x700);
-	WREG32(0x657C, 0x82608010);
-	WREG32(0x6578, 0x701);
-	WREG32(0x657C, 0x85A08600);
-	WREG32(0x6578, 0x702);
-	WREG32(0x657C, 0x800081F0);
-	WREG32(0x6578, 0x800);
-	WREG32(0x657C, 0x8228BFF8);
-	WREG32(0x6578, 0x801);
-	WREG32(0x657C, 0x85E085E0);
-	WREG32(0x6578, 0x802);
-	WREG32(0x657C, 0xBFF88228);
-	WREG32(0x6578, 0x10000);
-	WREG32(0x657C, 0x82A8BF00);
-	WREG32(0x6578, 0x10001);
-	WREG32(0x657C, 0x82A08CC0);
-	WREG32(0x6578, 0x10002);
-	WREG32(0x657C, 0x8008BEF8);
-	WREG32(0x6578, 0x10100);
-	WREG32(0x657C, 0x81F0BF28);
-	WREG32(0x6578, 0x10101);
-	WREG32(0x657C, 0x83608CA0);
-	WREG32(0x6578, 0x10102);
-	WREG32(0x657C, 0x8018BED0);
-	WREG32(0x6578, 0x10200);
-	WREG32(0x657C, 0x8148BF38);
-	WREG32(0x6578, 0x10201);
-	WREG32(0x657C, 0x84408C80);
-	WREG32(0x6578, 0x10202);
-	WREG32(0x657C, 0x8008BEB8);
-	WREG32(0x6578, 0x10300);
-	WREG32(0x657C, 0x80B0BF78);
-	WREG32(0x6578, 0x10301);
-	WREG32(0x657C, 0x85008C20);
-	WREG32(0x6578, 0x10302);
-	WREG32(0x657C, 0x8020BEA0);
-	WREG32(0x6578, 0x10400);
-	WREG32(0x657C, 0x8028BF90);
-	WREG32(0x6578, 0x10401);
-	WREG32(0x657C, 0x85E08BC0);
-	WREG32(0x6578, 0x10402);
-	WREG32(0x657C, 0x8018BE90);
-	WREG32(0x6578, 0x10500);
-	WREG32(0x657C, 0xBFB8BFB0);
-	WREG32(0x6578, 0x10501);
-	WREG32(0x657C, 0x86C08B40);
-	WREG32(0x6578, 0x10502);
-	WREG32(0x657C, 0x8010BE90);
-	WREG32(0x6578, 0x10600);
-	WREG32(0x657C, 0xBF58BFC8);
-	WREG32(0x6578, 0x10601);
-	WREG32(0x657C, 0x87A08AA0);
-	WREG32(0x6578, 0x10602);
-	WREG32(0x657C, 0x8010BE98);
-	WREG32(0x6578, 0x10700);
-	WREG32(0x657C, 0xBF10BFF0);
-	WREG32(0x6578, 0x10701);
-	WREG32(0x657C, 0x886089E0);
-	WREG32(0x6578, 0x10702);
-	WREG32(0x657C, 0x8018BEB0);
-	WREG32(0x6578, 0x10800);
-	WREG32(0x657C, 0xBED8BFE8);
-	WREG32(0x6578, 0x10801);
-	WREG32(0x657C, 0x89408940);
-	WREG32(0x6578, 0x10802);
-	WREG32(0x657C, 0xBFE8BED8);
-	WREG32(0x6578, 0x20000);
-	WREG32(0x657C, 0x80008000);
-	WREG32(0x6578, 0x20001);
-	WREG32(0x657C, 0x90008000);
-	WREG32(0x6578, 0x20002);
-	WREG32(0x657C, 0x80008000);
-	WREG32(0x6578, 0x20003);
-	WREG32(0x657C, 0x80008000);
-	WREG32(0x6578, 0x20100);
-	WREG32(0x657C, 0x80108000);
-	WREG32(0x6578, 0x20101);
-	WREG32(0x657C, 0x8FE0BF70);
-	WREG32(0x6578, 0x20102);
-	WREG32(0x657C, 0xBFE880C0);
-	WREG32(0x6578, 0x20103);
-	WREG32(0x657C, 0x80008000);
-	WREG32(0x6578, 0x20200);
-	WREG32(0x657C, 0x8018BFF8);
-	WREG32(0x6578, 0x20201);
-	WREG32(0x657C, 0x8F80BF08);
-	WREG32(0x6578, 0x20202);
-	WREG32(0x657C, 0xBFD081A0);
-	WREG32(0x6578, 0x20203);
-	WREG32(0x657C, 0xBFF88000);
-	WREG32(0x6578, 0x20300);
-	WREG32(0x657C, 0x80188000);
-	WREG32(0x6578, 0x20301);
-	WREG32(0x657C, 0x8EE0BEC0);
-	WREG32(0x6578, 0x20302);
-	WREG32(0x657C, 0xBFB082A0);
-	WREG32(0x6578, 0x20303);
-	WREG32(0x657C, 0x80008000);
-	WREG32(0x6578, 0x20400);
-	WREG32(0x657C, 0x80188000);
-	WREG32(0x6578, 0x20401);
-	WREG32(0x657C, 0x8E00BEA0);
-	WREG32(0x6578, 0x20402);
-	WREG32(0x657C, 0xBF8883C0);
-	WREG32(0x6578, 0x20403);
-	WREG32(0x657C, 0x80008000);
-	WREG32(0x6578, 0x20500);
-	WREG32(0x657C, 0x80188000);
-	WREG32(0x6578, 0x20501);
-	WREG32(0x657C, 0x8D00BE90);
-	WREG32(0x6578, 0x20502);
-	WREG32(0x657C, 0xBF588500);
-	WREG32(0x6578, 0x20503);
-	WREG32(0x657C, 0x80008008);
-	WREG32(0x6578, 0x20600);
-	WREG32(0x657C, 0x80188000);
-	WREG32(0x6578, 0x20601);
-	WREG32(0x657C, 0x8BC0BE98);
-	WREG32(0x6578, 0x20602);
-	WREG32(0x657C, 0xBF308660);
-	WREG32(0x6578, 0x20603);
-	WREG32(0x657C, 0x80008008);
-	WREG32(0x6578, 0x20700);
-	WREG32(0x657C, 0x80108000);
-	WREG32(0x6578, 0x20701);
-	WREG32(0x657C, 0x8A80BEB0);
-	WREG32(0x6578, 0x20702);
-	WREG32(0x657C, 0xBF0087C0);
-	WREG32(0x6578, 0x20703);
-	WREG32(0x657C, 0x80008008);
-	WREG32(0x6578, 0x20800);
-	WREG32(0x657C, 0x80108000);
-	WREG32(0x6578, 0x20801);
-	WREG32(0x657C, 0x8920BED0);
-	WREG32(0x6578, 0x20802);
-	WREG32(0x657C, 0xBED08920);
-	WREG32(0x6578, 0x20803);
-	WREG32(0x657C, 0x80008010);
-	WREG32(0x6578, 0x30000);
-	WREG32(0x657C, 0x90008000);
-	WREG32(0x6578, 0x30001);
-	WREG32(0x657C, 0x80008000);
-	WREG32(0x6578, 0x30100);
-	WREG32(0x657C, 0x8FE0BF90);
-	WREG32(0x6578, 0x30101);
-	WREG32(0x657C, 0xBFF880A0);
-	WREG32(0x6578, 0x30200);
-	WREG32(0x657C, 0x8F60BF40);
-	WREG32(0x6578, 0x30201);
-	WREG32(0x657C, 0xBFE88180);
-	WREG32(0x6578, 0x30300);
-	WREG32(0x657C, 0x8EC0BF00);
-	WREG32(0x6578, 0x30301);
-	WREG32(0x657C, 0xBFC88280);
-	WREG32(0x6578, 0x30400);
-	WREG32(0x657C, 0x8DE0BEE0);
-	WREG32(0x6578, 0x30401);
-	WREG32(0x657C, 0xBFA083A0);
-	WREG32(0x6578, 0x30500);
-	WREG32(0x657C, 0x8CE0BED0);
-	WREG32(0x6578, 0x30501);
-	WREG32(0x657C, 0xBF7884E0);
-	WREG32(0x6578, 0x30600);
-	WREG32(0x657C, 0x8BA0BED8);
-	WREG32(0x6578, 0x30601);
-	WREG32(0x657C, 0xBF508640);
-	WREG32(0x6578, 0x30700);
-	WREG32(0x657C, 0x8A60BEE8);
-	WREG32(0x6578, 0x30701);
-	WREG32(0x657C, 0xBF2087A0);
-	WREG32(0x6578, 0x30800);
-	WREG32(0x657C, 0x8900BF00);
-	WREG32(0x6578, 0x30801);
-	WREG32(0x657C, 0xBF008900);
-}
-
 static void
 atombios_yuv_setup(struct drm_encoder *encoder, bool enable)
 {
@@ -1073,129 +842,6 @@ atombios_yuv_setup(struct drm_encoder *encoder, bool enable)
 	WREG32(reg, temp);
 }
 
-static void
-atombios_overscan_setup(struct drm_encoder *encoder,
-			struct drm_display_mode *mode,
-			struct drm_display_mode *adjusted_mode)
-{
-	struct drm_device *dev = encoder->dev;
-	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
-	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
-	SET_CRTC_OVERSCAN_PS_ALLOCATION args;
-	int index = GetIndexIntoMasterTable(COMMAND, SetCRTC_OverScan);
-
-	memset(&args, 0, sizeof(args));
-
-	args.usOverscanRight = 0;
-	args.usOverscanLeft = 0;
-	args.usOverscanBottom = 0;
-	args.usOverscanTop = 0;
-	args.ucCRTC = radeon_crtc->crtc_id;
-
-	if (radeon_encoder->flags & RADEON_USE_RMX) {
-		if (radeon_encoder->rmx_type == RMX_FULL) {
-			args.usOverscanRight = 0;
-			args.usOverscanLeft = 0;
-			args.usOverscanBottom = 0;
-			args.usOverscanTop = 0;
-		} else if (radeon_encoder->rmx_type == RMX_CENTER) {
-			args.usOverscanTop = (adjusted_mode->crtc_vdisplay - mode->crtc_vdisplay) / 2;
-			args.usOverscanBottom = (adjusted_mode->crtc_vdisplay - mode->crtc_vdisplay) / 2;
-			args.usOverscanLeft = (adjusted_mode->crtc_hdisplay - mode->crtc_hdisplay) / 2;
-			args.usOverscanRight = (adjusted_mode->crtc_hdisplay - mode->crtc_hdisplay) / 2;
-		} else if (radeon_encoder->rmx_type == RMX_ASPECT) {
-			int a1 = mode->crtc_vdisplay * adjusted_mode->crtc_hdisplay;
-			int a2 = adjusted_mode->crtc_vdisplay * mode->crtc_hdisplay;
-
-			if (a1 > a2) {
-				args.usOverscanLeft = (adjusted_mode->crtc_hdisplay - (a2 / mode->crtc_vdisplay)) / 2;
-				args.usOverscanRight = (adjusted_mode->crtc_hdisplay - (a2 / mode->crtc_vdisplay)) / 2;
-			} else if (a2 > a1) {
-				args.usOverscanLeft = (adjusted_mode->crtc_vdisplay - (a1 / mode->crtc_hdisplay)) / 2;
-				args.usOverscanRight = (adjusted_mode->crtc_vdisplay - (a1 / mode->crtc_hdisplay)) / 2;
-			}
-		}
-	}
-
-	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
-
-}
-
-static void
-atombios_scaler_setup(struct drm_encoder *encoder)
-{
-	struct drm_device *dev = encoder->dev;
-	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
-	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
-	ENABLE_SCALER_PS_ALLOCATION args;
-	int index = GetIndexIntoMasterTable(COMMAND, EnableScaler);
-	/* fixme - fill in enc_priv for atom dac */
-	enum radeon_tv_std tv_std = TV_STD_NTSC;
-
-	if (!ASIC_IS_AVIVO(rdev) && radeon_crtc->crtc_id)
-		return;
-
-	memset(&args, 0, sizeof(args));
-
-	args.ucScaler = radeon_crtc->crtc_id;
-
-	if (radeon_encoder->devices & (ATOM_DEVICE_TV_SUPPORT)) {
-		switch (tv_std) {
-		case TV_STD_NTSC:
-		default:
-			args.ucTVStandard = ATOM_TV_NTSC;
-			break;
-		case TV_STD_PAL:
-			args.ucTVStandard = ATOM_TV_PAL;
-			break;
-		case TV_STD_PAL_M:
-			args.ucTVStandard = ATOM_TV_PALM;
-			break;
-		case TV_STD_PAL_60:
-			args.ucTVStandard = ATOM_TV_PAL60;
-			break;
-		case TV_STD_NTSC_J:
-			args.ucTVStandard = ATOM_TV_NTSCJ;
-			break;
-		case TV_STD_SCART_PAL:
-			args.ucTVStandard = ATOM_TV_PAL; /* ??? */
-			break;
-		case TV_STD_SECAM:
-			args.ucTVStandard = ATOM_TV_SECAM;
-			break;
-		case TV_STD_PAL_CN:
-			args.ucTVStandard = ATOM_TV_PALCN;
-			break;
-		}
-		args.ucEnable = SCALER_ENABLE_MULTITAP_MODE;
-	} else if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT)) {
-		args.ucTVStandard = ATOM_TV_CV;
-		args.ucEnable = SCALER_ENABLE_MULTITAP_MODE;
-	} else if (radeon_encoder->flags & RADEON_USE_RMX) {
-		if (radeon_encoder->rmx_type == RMX_FULL)
-			args.ucEnable = ATOM_SCALER_EXPANSION;
-		else if (radeon_encoder->rmx_type == RMX_CENTER)
-			args.ucEnable = ATOM_SCALER_CENTER;
-		else if (radeon_encoder->rmx_type == RMX_ASPECT)
-			args.ucEnable = ATOM_SCALER_EXPANSION;
-	} else {
-		if (ASIC_IS_AVIVO(rdev))
-			args.ucEnable = ATOM_SCALER_DISABLE;
-		else
-			args.ucEnable = ATOM_SCALER_CENTER;
-	}
-
-	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
-
-	if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT | ATOM_DEVICE_TV_SUPPORT)
-	    && rdev->family >= CHIP_RV515 && rdev->family <= CHIP_RV570) {
-		atom_rv515_force_tv_scaler(rdev);
-	}
-
-}
-
 static void
 radeon_atom_encoder_dpms(struct drm_encoder *encoder, int mode)
 {
@@ -1448,8 +1094,6 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder,
 	radeon_encoder->pixel_clock = adjusted_mode->clock;
 
 	radeon_atombios_encoder_crtc_scratch_regs(encoder, radeon_crtc->crtc_id);
-	atombios_overscan_setup(encoder, mode, adjusted_mode);
-	atombios_scaler_setup(encoder);
 	atombios_set_encoder_crtc_source(encoder);
 
 	if (ASIC_IS_AVIVO(rdev)) {
@@ -1667,6 +1311,7 @@ radeon_add_atom_encoder(struct drm_device *dev, uint32_t encoder_id, uint32_t su
 
 	radeon_encoder->encoder_id = encoder_id;
 	radeon_encoder->devices = supported_device;
+	radeon_encoder->rmx_type = RMX_OFF;
 
 	switch (radeon_encoder->encoder_id) {
 	case ENCODER_OBJECT_ID_INTERNAL_LVDS:

+ 64 - 6
drivers/gpu/drm/radeon/radeon_fb.c

@@ -101,9 +101,10 @@ static int radeonfb_setcolreg(unsigned regno,
 				break;
 			case 24:
 			case 32:
-				fb->pseudo_palette[regno] = ((red & 0xff00) << 8) |
-					(green & 0xff00) |
-					((blue  & 0xff00) >> 8);
+				fb->pseudo_palette[regno] =
+					(((red >> 8) & 0xff) << info->var.red.offset) |
+					(((green >> 8) & 0xff) << info->var.green.offset) |
+					(((blue >> 8) & 0xff) << info->var.blue.offset);
 				break;
 			}
 		}
@@ -154,6 +155,7 @@ static int radeonfb_check_var(struct fb_var_screeninfo *var,
 		var->transp.length = 0;
 		var->transp.offset = 0;
 		break;
+#ifdef __LITTLE_ENDIAN
 	case 15:
 		var->red.offset = 10;
 		var->green.offset = 5;
@@ -194,6 +196,28 @@ static int radeonfb_check_var(struct fb_var_screeninfo *var,
 		var->transp.length = 8;
 		var->transp.offset = 24;
 		break;
+#else
+	case 24:
+		var->red.offset = 8;
+		var->green.offset = 16;
+		var->blue.offset = 24;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 32:
+		var->red.offset = 8;
+		var->green.offset = 16;
+		var->blue.offset = 24;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 8;
+		var->transp.offset = 0;
+		break;
+#endif
 	default:
 		return -EINVAL;
 	}
@@ -447,10 +471,10 @@ static struct notifier_block paniced = {
 	.notifier_call = radeonfb_panic,
 };
 
-static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp)
+static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp, bool tiled)
 {
 	int aligned = width;
-	int align_large = (ASIC_IS_AVIVO(rdev));
+	int align_large = (ASIC_IS_AVIVO(rdev)) || tiled;
 	int pitch_mask = 0;
 
 	switch (bpp / 8) {
@@ -488,12 +512,13 @@ int radeonfb_create(struct radeon_device *rdev,
 	u64 fb_gpuaddr;
 	void *fbptr = NULL;
 	unsigned long tmp;
+	bool fb_tiled = false; /* useful for testing */
 
 	mode_cmd.width = surface_width;
 	mode_cmd.height = surface_height;
 	mode_cmd.bpp = 32;
 	/* need to align pitch with crtc limits */
-	mode_cmd.pitch = radeon_align_pitch(rdev, mode_cmd.width, mode_cmd.bpp) * ((mode_cmd.bpp + 1) / 8);
+	mode_cmd.pitch = radeon_align_pitch(rdev, mode_cmd.width, mode_cmd.bpp, fb_tiled) * ((mode_cmd.bpp + 1) / 8);
 	mode_cmd.depth = 24;
 
 	size = mode_cmd.pitch * mode_cmd.height;
@@ -511,6 +536,8 @@ int radeonfb_create(struct radeon_device *rdev,
 	}
 	robj = gobj->driver_private;
 
+	if (fb_tiled)
+		radeon_object_set_tiling_flags(robj, RADEON_TILING_MACRO|RADEON_TILING_SURFACE, mode_cmd.pitch);
 	mutex_lock(&rdev->ddev->struct_mutex);
 	fb = radeon_framebuffer_create(rdev->ddev, &mode_cmd, gobj);
 	if (fb == NULL) {
@@ -539,6 +566,9 @@ int radeonfb_create(struct radeon_device *rdev,
 	}
 	rfbdev = info->par;
 
+	if (fb_tiled)
+		radeon_object_check_tiling(robj, 0, 0);
+
 	ret = radeon_object_kmap(robj, &fbptr);
 	if (ret) {
 		goto out_unref;
@@ -572,6 +602,11 @@ int radeonfb_create(struct radeon_device *rdev,
 	info->var.width = -1;
 	info->var.xres = fb_width;
 	info->var.yres = fb_height;
+
+	/* setup aperture base/size for vesafb takeover */
+	info->aperture_base = rdev->ddev->mode_config.fb_base;
+	info->aperture_size = rdev->mc.real_vram_size;
+
 	info->fix.mmio_start = 0;
 	info->fix.mmio_len = 0;
 	info->pixmap.size = 64*1024;
@@ -600,6 +635,7 @@ int radeonfb_create(struct radeon_device *rdev,
 		info->var.transp.offset = 0;
 		info->var.transp.length = 0;
 		break;
+#ifdef __LITTLE_ENDIAN
 	case 15:
 		info->var.red.offset = 10;
 		info->var.green.offset = 5;
@@ -639,7 +675,29 @@ int radeonfb_create(struct radeon_device *rdev,
 		info->var.transp.offset = 24;
 		info->var.transp.length = 8;
 		break;
+#else
+	case 24:
+		info->var.red.offset = 8;
+		info->var.green.offset = 16;
+		info->var.blue.offset = 24;
+		info->var.red.length = 8;
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 0;
+		info->var.transp.length = 0;
+		break;
+	case 32:
+		info->var.red.offset = 8;
+		info->var.green.offset = 16;
+		info->var.blue.offset = 24;
+		info->var.red.length = 8;
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 0;
+		info->var.transp.length = 8;
+		break;
 	default:
+#endif
 		break;
 	}
 

+ 1 - 1
drivers/gpu/drm/radeon/radeon_fence.c

@@ -195,7 +195,7 @@ int radeon_fence_wait(struct radeon_fence *fence, bool interruptible)
 		r = wait_event_interruptible_timeout(rdev->fence_drv.queue,
 				radeon_fence_signaled(fence), timeout);
 		if (unlikely(r == -ERESTARTSYS)) {
-			return -ERESTART;
+			return -EBUSY;
 		}
 	} else {
 		r = wait_event_timeout(rdev->fence_drv.queue,

+ 1 - 1
drivers/gpu/drm/radeon/radeon_gart.c

@@ -177,7 +177,7 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 			return -ENOMEM;
 		}
 		rdev->gart.pages[p] = pagelist[i];
-		page_base = (uint32_t)rdev->gart.pages_addr[p];
+		page_base = rdev->gart.pages_addr[p];
 		for (j = 0; j < (PAGE_SIZE / 4096); j++, t++) {
 			radeon_gart_set_page(rdev, t, page_base);
 			page_base += 4096;

+ 43 - 2
drivers/gpu/drm/radeon/radeon_gem.c

@@ -157,9 +157,9 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
 	struct radeon_device *rdev = dev->dev_private;
 	struct drm_radeon_gem_info *args = data;
 
-	args->vram_size = rdev->mc.vram_size;
+	args->vram_size = rdev->mc.real_vram_size;
 	/* FIXME: report somethings that makes sense */
-	args->vram_visible = rdev->mc.vram_size - (4 * 1024 * 1024);
+	args->vram_visible = rdev->mc.real_vram_size - (4 * 1024 * 1024);
 	args->gart_size = rdev->mc.gtt_size;
 	return 0;
 }
@@ -285,3 +285,44 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 	mutex_unlock(&dev->struct_mutex);
 	return r;
 }
+
+int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	struct drm_radeon_gem_set_tiling *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_object *robj;
+	int r = 0;
+
+	DRM_DEBUG("%d \n", args->handle);
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL)
+		return -EINVAL;
+	robj = gobj->driver_private;
+	radeon_object_set_tiling_flags(robj, args->tiling_flags, args->pitch);
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(gobj);
+	mutex_unlock(&dev->struct_mutex);
+	return r;
+}
+
+int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	struct drm_radeon_gem_get_tiling *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_object *robj;
+	int r = 0;
+
+	DRM_DEBUG("\n");
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL)
+		return -EINVAL;
+	robj = gobj->driver_private;
+	radeon_object_get_tiling_flags(robj, &args->tiling_flags,
+				       &args->pitch);
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(gobj);
+	mutex_unlock(&dev->struct_mutex);
+	return r;
+}

+ 2 - 0
drivers/gpu/drm/radeon/radeon_kms.c

@@ -291,5 +291,7 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = {
 	DRM_IOCTL_DEF(DRM_RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_RADEON_INFO, radeon_info_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH),
 };
 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);

+ 197 - 490
drivers/gpu/drm/radeon/radeon_legacy_crtc.c

@@ -29,6 +29,171 @@
 #include "radeon_fixed.h"
 #include "radeon.h"
 
+static void radeon_legacy_rmx_mode_set(struct drm_crtc *crtc,
+				       struct drm_display_mode *mode,
+				       struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	int xres = mode->hdisplay;
+	int yres = mode->vdisplay;
+	bool hscale = true, vscale = true;
+	int hsync_wid;
+	int vsync_wid;
+	int hsync_start;
+	int blank_width;
+	u32 scale, inc, crtc_more_cntl;
+	u32 fp_horz_stretch, fp_vert_stretch, fp_horz_vert_active;
+	u32 fp_h_sync_strt_wid, fp_crtc_h_total_disp;
+	u32 fp_v_sync_strt_wid, fp_crtc_v_total_disp;
+	struct radeon_native_mode *native_mode = &radeon_crtc->native_mode;
+
+	fp_vert_stretch = RREG32(RADEON_FP_VERT_STRETCH) &
+		(RADEON_VERT_STRETCH_RESERVED |
+		 RADEON_VERT_AUTO_RATIO_INC);
+	fp_horz_stretch = RREG32(RADEON_FP_HORZ_STRETCH) &
+		(RADEON_HORZ_FP_LOOP_STRETCH |
+		 RADEON_HORZ_AUTO_RATIO_INC);
+
+	crtc_more_cntl = 0;
+	if ((rdev->family == CHIP_RS100) ||
+	    (rdev->family == CHIP_RS200)) {
+		/* This is to workaround the asic bug for RMX, some versions
+		   of BIOS dosen't have this register initialized correctly. */
+		crtc_more_cntl |= RADEON_CRTC_H_CUTOFF_ACTIVE_EN;
+	}
+
+
+	fp_crtc_h_total_disp = ((((mode->crtc_htotal / 8) - 1) & 0x3ff)
+				| ((((mode->crtc_hdisplay / 8) - 1) & 0x1ff) << 16));
+
+	hsync_wid = (mode->crtc_hsync_end - mode->crtc_hsync_start) / 8;
+	if (!hsync_wid)
+		hsync_wid = 1;
+	hsync_start = mode->crtc_hsync_start - 8;
+
+	fp_h_sync_strt_wid = ((hsync_start & 0x1fff)
+			      | ((hsync_wid & 0x3f) << 16)
+			      | ((mode->flags & DRM_MODE_FLAG_NHSYNC)
+				 ? RADEON_CRTC_H_SYNC_POL
+				 : 0));
+
+	fp_crtc_v_total_disp = (((mode->crtc_vtotal - 1) & 0xffff)
+				| ((mode->crtc_vdisplay - 1) << 16));
+
+	vsync_wid = mode->crtc_vsync_end - mode->crtc_vsync_start;
+	if (!vsync_wid)
+		vsync_wid = 1;
+
+	fp_v_sync_strt_wid = (((mode->crtc_vsync_start - 1) & 0xfff)
+			      | ((vsync_wid & 0x1f) << 16)
+			      | ((mode->flags & DRM_MODE_FLAG_NVSYNC)
+				 ? RADEON_CRTC_V_SYNC_POL
+				 : 0));
+
+	fp_horz_vert_active = 0;
+
+	if (native_mode->panel_xres == 0 ||
+	    native_mode->panel_yres == 0) {
+		hscale = false;
+		vscale = false;
+	} else {
+		if (xres > native_mode->panel_xres)
+			xres = native_mode->panel_xres;
+		if (yres > native_mode->panel_yres)
+			yres = native_mode->panel_yres;
+
+		if (xres == native_mode->panel_xres)
+			hscale = false;
+		if (yres == native_mode->panel_yres)
+			vscale = false;
+	}
+
+	switch (radeon_crtc->rmx_type) {
+	case RMX_FULL:
+	case RMX_ASPECT:
+		if (!hscale)
+			fp_horz_stretch |= ((xres/8-1) << 16);
+		else {
+			inc = (fp_horz_stretch & RADEON_HORZ_AUTO_RATIO_INC) ? 1 : 0;
+			scale = ((xres + inc) * RADEON_HORZ_STRETCH_RATIO_MAX)
+				/ native_mode->panel_xres + 1;
+			fp_horz_stretch |= (((scale) & RADEON_HORZ_STRETCH_RATIO_MASK) |
+					RADEON_HORZ_STRETCH_BLEND |
+					RADEON_HORZ_STRETCH_ENABLE |
+					((native_mode->panel_xres/8-1) << 16));
+		}
+
+		if (!vscale)
+			fp_vert_stretch |= ((yres-1) << 12);
+		else {
+			inc = (fp_vert_stretch & RADEON_VERT_AUTO_RATIO_INC) ? 1 : 0;
+			scale = ((yres + inc) * RADEON_VERT_STRETCH_RATIO_MAX)
+				/ native_mode->panel_yres + 1;
+			fp_vert_stretch |= (((scale) & RADEON_VERT_STRETCH_RATIO_MASK) |
+					RADEON_VERT_STRETCH_ENABLE |
+					RADEON_VERT_STRETCH_BLEND |
+					((native_mode->panel_yres-1) << 12));
+		}
+		break;
+	case RMX_CENTER:
+		fp_horz_stretch |= ((xres/8-1) << 16);
+		fp_vert_stretch |= ((yres-1) << 12);
+
+		crtc_more_cntl |= (RADEON_CRTC_AUTO_HORZ_CENTER_EN |
+				RADEON_CRTC_AUTO_VERT_CENTER_EN);
+
+		blank_width = (mode->crtc_hblank_end - mode->crtc_hblank_start) / 8;
+		if (blank_width > 110)
+			blank_width = 110;
+
+		fp_crtc_h_total_disp = (((blank_width) & 0x3ff)
+				| ((((mode->crtc_hdisplay / 8) - 1) & 0x1ff) << 16));
+
+		hsync_wid = (mode->crtc_hsync_end - mode->crtc_hsync_start) / 8;
+		if (!hsync_wid)
+			hsync_wid = 1;
+
+		fp_h_sync_strt_wid = ((((mode->crtc_hsync_start - mode->crtc_hblank_start) / 8) & 0x1fff)
+				| ((hsync_wid & 0x3f) << 16)
+				| ((mode->flags & DRM_MODE_FLAG_NHSYNC)
+					? RADEON_CRTC_H_SYNC_POL
+					: 0));
+
+		fp_crtc_v_total_disp = (((mode->crtc_vblank_end - mode->crtc_vblank_start) & 0xffff)
+				| ((mode->crtc_vdisplay - 1) << 16));
+
+		vsync_wid = mode->crtc_vsync_end - mode->crtc_vsync_start;
+		if (!vsync_wid)
+			vsync_wid = 1;
+
+		fp_v_sync_strt_wid = ((((mode->crtc_vsync_start - mode->crtc_vblank_start) & 0xfff)
+					| ((vsync_wid & 0x1f) << 16)
+					| ((mode->flags & DRM_MODE_FLAG_NVSYNC)
+						? RADEON_CRTC_V_SYNC_POL
+						: 0)));
+
+		fp_horz_vert_active = (((native_mode->panel_yres) & 0xfff) |
+				(((native_mode->panel_xres / 8) & 0x1ff) << 16));
+		break;
+	case RMX_OFF:
+	default:
+		fp_horz_stretch |= ((xres/8-1) << 16);
+		fp_vert_stretch |= ((yres-1) << 12);
+		break;
+	}
+
+	WREG32(RADEON_FP_HORZ_STRETCH,      fp_horz_stretch);
+	WREG32(RADEON_FP_VERT_STRETCH,      fp_vert_stretch);
+	WREG32(RADEON_CRTC_MORE_CNTL,       crtc_more_cntl);
+	WREG32(RADEON_FP_HORZ_VERT_ACTIVE,  fp_horz_vert_active);
+	WREG32(RADEON_FP_H_SYNC_STRT_WID,   fp_h_sync_strt_wid);
+	WREG32(RADEON_FP_V_SYNC_STRT_WID,   fp_v_sync_strt_wid);
+	WREG32(RADEON_FP_CRTC_H_TOTAL_DISP, fp_crtc_h_total_disp);
+	WREG32(RADEON_FP_CRTC_V_TOTAL_DISP, fp_crtc_v_total_disp);
+}
+
 void radeon_restore_common_regs(struct drm_device *dev)
 {
 	/* don't need this yet */
@@ -235,6 +400,7 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 	uint64_t base;
 	uint32_t crtc_offset, crtc_offset_cntl, crtc_tile_x0_y0 = 0;
 	uint32_t crtc_pitch, pitch_pixels;
+	uint32_t tiling_flags;
 
 	DRM_DEBUG("\n");
 
@@ -244,7 +410,12 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 	if (radeon_gem_object_pin(obj, RADEON_GEM_DOMAIN_VRAM, &base)) {
 		return -EINVAL;
 	}
-	crtc_offset = (u32)base;
+	/* if scanout was in GTT this really wouldn't work */
+	/* crtc offset is from display base addr not FB location */
+	radeon_crtc->legacy_display_base_addr = rdev->mc.vram_location;
+
+	base -= radeon_crtc->legacy_display_base_addr;
+
 	crtc_offset_cntl = 0;
 
 	pitch_pixels = crtc->fb->pitch / (crtc->fb->bits_per_pixel / 8);
@@ -253,8 +424,12 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 		       (crtc->fb->bits_per_pixel * 8));
 	crtc_pitch |= crtc_pitch << 16;
 
-	/* TODO tiling */
-	if (0) {
+	radeon_object_get_tiling_flags(obj->driver_private,
+				       &tiling_flags, NULL);
+	if (tiling_flags & RADEON_TILING_MICRO)
+		DRM_ERROR("trying to scanout microtiled buffer\n");
+
+	if (tiling_flags & RADEON_TILING_MACRO) {
 		if (ASIC_IS_R300(rdev))
 			crtc_offset_cntl |= (R300_CRTC_X_Y_MODE_EN |
 					     R300_CRTC_MICRO_TILE_BUFFER_DIS |
@@ -270,15 +445,13 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 			crtc_offset_cntl &= ~RADEON_CRTC_TILE_EN;
 	}
 
-
-	/* TODO more tiling */
-	if (0) {
+	if (tiling_flags & RADEON_TILING_MACRO) {
 		if (ASIC_IS_R300(rdev)) {
 			crtc_tile_x0_y0 = x | (y << 16);
 			base &= ~0x7ff;
 		} else {
 			int byteshift = crtc->fb->bits_per_pixel >> 4;
-			int tile_addr = (((y >> 3) * crtc->fb->width + x) >> (8 - byteshift)) << 11;
+			int tile_addr = (((y >> 3) * pitch_pixels +  x) >> (8 - byteshift)) << 11;
 			base += tile_addr + ((x << byteshift) % 256) + ((y % 8) << 8);
 			crtc_offset_cntl |= (y % 16);
 		}
@@ -303,11 +476,9 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 
 	base &= ~7;
 
-	/* update sarea TODO */
-
 	crtc_offset = (u32)base;
 
-	WREG32(RADEON_DISPLAY_BASE_ADDR + radeon_crtc->crtc_offset, rdev->mc.vram_location);
+	WREG32(RADEON_DISPLAY_BASE_ADDR + radeon_crtc->crtc_offset, radeon_crtc->legacy_display_base_addr);
 
 	if (ASIC_IS_R300(rdev)) {
 		if (radeon_crtc->crtc_id)
@@ -751,6 +922,8 @@ static bool radeon_crtc_mode_fixup(struct drm_crtc *crtc,
 				   struct drm_display_mode *mode,
 				   struct drm_display_mode *adjusted_mode)
 {
+	if (!radeon_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+		return false;
 	return true;
 }
 
@@ -759,16 +932,25 @@ static int radeon_crtc_mode_set(struct drm_crtc *crtc,
 				 struct drm_display_mode *adjusted_mode,
 				 int x, int y, struct drm_framebuffer *old_fb)
 {
-
-	DRM_DEBUG("\n");
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
 
 	/* TODO TV */
-
 	radeon_crtc_set_base(crtc, x, y, old_fb);
 	radeon_set_crtc_timing(crtc, adjusted_mode);
 	radeon_set_pll(crtc, adjusted_mode);
-	radeon_init_disp_bandwidth(crtc->dev);
-
+	radeon_bandwidth_update(rdev);
+	if (radeon_crtc->crtc_id == 0) {
+		radeon_legacy_rmx_mode_set(crtc, mode, adjusted_mode);
+	} else {
+		if (radeon_crtc->rmx_type != RMX_OFF) {
+			/* FIXME: only first crtc has rmx what should we
+			 * do ?
+			 */
+			DRM_ERROR("Mode need scaling but only first crtc can do that.\n");
+		}
+	}
 	return 0;
 }
 
@@ -799,478 +981,3 @@ void radeon_legacy_init_crtc(struct drm_device *dev,
 		radeon_crtc->crtc_offset = RADEON_CRTC2_H_TOTAL_DISP - RADEON_CRTC_H_TOTAL_DISP;
 	drm_crtc_helper_add(&radeon_crtc->base, &legacy_helper_funcs);
 }
-
-void radeon_init_disp_bw_legacy(struct drm_device *dev,
-				struct drm_display_mode *mode1,
-				uint32_t pixel_bytes1,
-				struct drm_display_mode *mode2,
-				uint32_t pixel_bytes2)
-{
-	struct radeon_device *rdev = dev->dev_private;
-	fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
-	fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
-	fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff;
-	uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
-	fixed20_12 memtcas_ff[8] = {
-		fixed_init(1),
-		fixed_init(2),
-		fixed_init(3),
-		fixed_init(0),
-		fixed_init_half(1),
-		fixed_init_half(2),
-		fixed_init(0),
-	};
-	fixed20_12 memtcas_rs480_ff[8] = {
-		fixed_init(0),
-		fixed_init(1),
-		fixed_init(2),
-		fixed_init(3),
-		fixed_init(0),
-		fixed_init_half(1),
-		fixed_init_half(2),
-		fixed_init_half(3),
-	};
-	fixed20_12 memtcas2_ff[8] = {
-		fixed_init(0),
-		fixed_init(1),
-		fixed_init(2),
-		fixed_init(3),
-		fixed_init(4),
-		fixed_init(5),
-		fixed_init(6),
-		fixed_init(7),
-	};
-	fixed20_12 memtrbs[8] = {
-		fixed_init(1),
-		fixed_init_half(1),
-		fixed_init(2),
-		fixed_init_half(2),
-		fixed_init(3),
-		fixed_init_half(3),
-		fixed_init(4),
-		fixed_init_half(4)
-	};
-	fixed20_12 memtrbs_r4xx[8] = {
-		fixed_init(4),
-		fixed_init(5),
-		fixed_init(6),
-		fixed_init(7),
-		fixed_init(8),
-		fixed_init(9),
-		fixed_init(10),
-		fixed_init(11)
-	};
-	fixed20_12 min_mem_eff;
-	fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
-	fixed20_12 cur_latency_mclk, cur_latency_sclk;
-	fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate,
-		disp_drain_rate2, read_return_rate;
-	fixed20_12 time_disp1_drop_priority;
-	int c;
-	int cur_size = 16;       /* in octawords */
-	int critical_point = 0, critical_point2;
-/* 	uint32_t read_return_rate, time_disp1_drop_priority; */
-	int stop_req, max_stop_req;
-
-	min_mem_eff.full = rfixed_const_8(0);
-	/* get modes */
-	if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
-		uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
-		mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
-		mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
-		/* check crtc enables */
-		if (mode2)
-			mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
-		if (mode1)
-			mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
-		WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
-	}
-
-	/*
-	 * determine is there is enough bw for current mode
-	 */
-	mclk_ff.full = rfixed_const(rdev->clock.default_mclk);
-	temp_ff.full = rfixed_const(100);
-	mclk_ff.full = rfixed_div(mclk_ff, temp_ff);
-	sclk_ff.full = rfixed_const(rdev->clock.default_sclk);
-	sclk_ff.full = rfixed_div(sclk_ff, temp_ff);
-
-	temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
-	temp_ff.full = rfixed_const(temp);
-	mem_bw.full = rfixed_mul(mclk_ff, temp_ff);
-
-	pix_clk.full = 0;
-	pix_clk2.full = 0;
-	peak_disp_bw.full = 0;
-	if (mode1) {
-		temp_ff.full = rfixed_const(1000);
-		pix_clk.full = rfixed_const(mode1->clock); /* convert to fixed point */
-		pix_clk.full = rfixed_div(pix_clk, temp_ff);
-		temp_ff.full = rfixed_const(pixel_bytes1);
-		peak_disp_bw.full += rfixed_mul(pix_clk, temp_ff);
-	}
-	if (mode2) {
-		temp_ff.full = rfixed_const(1000);
-		pix_clk2.full = rfixed_const(mode2->clock); /* convert to fixed point */
-		pix_clk2.full = rfixed_div(pix_clk2, temp_ff);
-		temp_ff.full = rfixed_const(pixel_bytes2);
-		peak_disp_bw.full += rfixed_mul(pix_clk2, temp_ff);
-	}
-
-	mem_bw.full = rfixed_mul(mem_bw, min_mem_eff);
-	if (peak_disp_bw.full >= mem_bw.full) {
-		DRM_ERROR("You may not have enough display bandwidth for current mode\n"
-			  "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
-	}
-
-	/*  Get values from the EXT_MEM_CNTL register...converting its contents. */
-	temp = RREG32(RADEON_MEM_TIMING_CNTL);
-	if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
-		mem_trcd = ((temp >> 2) & 0x3) + 1;
-		mem_trp  = ((temp & 0x3)) + 1;
-		mem_tras = ((temp & 0x70) >> 4) + 1;
-	} else if (rdev->family == CHIP_R300 ||
-		   rdev->family == CHIP_R350) { /* r300, r350 */
-		mem_trcd = (temp & 0x7) + 1;
-		mem_trp = ((temp >> 8) & 0x7) + 1;
-		mem_tras = ((temp >> 11) & 0xf) + 4;
-	} else if (rdev->family == CHIP_RV350 ||
-		   rdev->family <= CHIP_RV380) {
-		/* rv3x0 */
-		mem_trcd = (temp & 0x7) + 3;
-		mem_trp = ((temp >> 8) & 0x7) + 3;
-		mem_tras = ((temp >> 11) & 0xf) + 6;
-	} else if (rdev->family == CHIP_R420 ||
-		   rdev->family == CHIP_R423 ||
-		   rdev->family == CHIP_RV410) {
-		/* r4xx */
-		mem_trcd = (temp & 0xf) + 3;
-		if (mem_trcd > 15)
-			mem_trcd = 15;
-		mem_trp = ((temp >> 8) & 0xf) + 3;
-		if (mem_trp > 15)
-			mem_trp = 15;
-		mem_tras = ((temp >> 12) & 0x1f) + 6;
-		if (mem_tras > 31)
-			mem_tras = 31;
-	} else { /* RV200, R200 */
-		mem_trcd = (temp & 0x7) + 1;
-		mem_trp = ((temp >> 8) & 0x7) + 1;
-		mem_tras = ((temp >> 12) & 0xf) + 4;
-	}
-	/* convert to FF */
-	trcd_ff.full = rfixed_const(mem_trcd);
-	trp_ff.full = rfixed_const(mem_trp);
-	tras_ff.full = rfixed_const(mem_tras);
-
-	/* Get values from the MEM_SDRAM_MODE_REG register...converting its */
-	temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
-	data = (temp & (7 << 20)) >> 20;
-	if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
-		if (rdev->family == CHIP_RS480) /* don't think rs400 */
-			tcas_ff = memtcas_rs480_ff[data];
-		else
-			tcas_ff = memtcas_ff[data];
-	} else
-		tcas_ff = memtcas2_ff[data];
-
-	if (rdev->family == CHIP_RS400 ||
-	    rdev->family == CHIP_RS480) {
-		/* extra cas latency stored in bits 23-25 0-4 clocks */
-		data = (temp >> 23) & 0x7;
-		if (data < 5)
-			tcas_ff.full += rfixed_const(data);
-	}
-
-	if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
-		/* on the R300, Tcas is included in Trbs.
-		 */
-		temp = RREG32(RADEON_MEM_CNTL);
-		data = (R300_MEM_NUM_CHANNELS_MASK & temp);
-		if (data == 1) {
-			if (R300_MEM_USE_CD_CH_ONLY & temp) {
-				temp = RREG32(R300_MC_IND_INDEX);
-				temp &= ~R300_MC_IND_ADDR_MASK;
-				temp |= R300_MC_READ_CNTL_CD_mcind;
-				WREG32(R300_MC_IND_INDEX, temp);
-				temp = RREG32(R300_MC_IND_DATA);
-				data = (R300_MEM_RBS_POSITION_C_MASK & temp);
-			} else {
-				temp = RREG32(R300_MC_READ_CNTL_AB);
-				data = (R300_MEM_RBS_POSITION_A_MASK & temp);
-			}
-		} else {
-			temp = RREG32(R300_MC_READ_CNTL_AB);
-			data = (R300_MEM_RBS_POSITION_A_MASK & temp);
-		}
-		if (rdev->family == CHIP_RV410 ||
-		    rdev->family == CHIP_R420 ||
-		    rdev->family == CHIP_R423)
-			trbs_ff = memtrbs_r4xx[data];
-		else
-			trbs_ff = memtrbs[data];
-		tcas_ff.full += trbs_ff.full;
-	}
-
-	sclk_eff_ff.full = sclk_ff.full;
-
-	if (rdev->flags & RADEON_IS_AGP) {
-		fixed20_12 agpmode_ff;
-		agpmode_ff.full = rfixed_const(radeon_agpmode);
-		temp_ff.full = rfixed_const_666(16);
-		sclk_eff_ff.full -= rfixed_mul(agpmode_ff, temp_ff);
-	}
-	/* TODO PCIE lanes may affect this - agpmode == 16?? */
-
-	if (ASIC_IS_R300(rdev)) {
-		sclk_delay_ff.full = rfixed_const(250);
-	} else {
-		if ((rdev->family == CHIP_RV100) ||
-		    rdev->flags & RADEON_IS_IGP) {
-			if (rdev->mc.vram_is_ddr)
-				sclk_delay_ff.full = rfixed_const(41);
-			else
-				sclk_delay_ff.full = rfixed_const(33);
-		} else {
-			if (rdev->mc.vram_width == 128)
-				sclk_delay_ff.full = rfixed_const(57);
-			else
-				sclk_delay_ff.full = rfixed_const(41);
-		}
-	}
-
-	mc_latency_sclk.full = rfixed_div(sclk_delay_ff, sclk_eff_ff);
-
-	if (rdev->mc.vram_is_ddr) {
-		if (rdev->mc.vram_width == 32) {
-			k1.full = rfixed_const(40);
-			c  = 3;
-		} else {
-			k1.full = rfixed_const(20);
-			c  = 1;
-		}
-	} else {
-		k1.full = rfixed_const(40);
-		c  = 3;
-	}
-
-	temp_ff.full = rfixed_const(2);
-	mc_latency_mclk.full = rfixed_mul(trcd_ff, temp_ff);
-	temp_ff.full = rfixed_const(c);
-	mc_latency_mclk.full += rfixed_mul(tcas_ff, temp_ff);
-	temp_ff.full = rfixed_const(4);
-	mc_latency_mclk.full += rfixed_mul(tras_ff, temp_ff);
-	mc_latency_mclk.full += rfixed_mul(trp_ff, temp_ff);
-	mc_latency_mclk.full += k1.full;
-
-	mc_latency_mclk.full = rfixed_div(mc_latency_mclk, mclk_ff);
-	mc_latency_mclk.full += rfixed_div(temp_ff, sclk_eff_ff);
-
-	/*
-	  HW cursor time assuming worst case of full size colour cursor.
-	*/
-	temp_ff.full = rfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
-	temp_ff.full += trcd_ff.full;
-	if (temp_ff.full < tras_ff.full)
-		temp_ff.full = tras_ff.full;
-	cur_latency_mclk.full = rfixed_div(temp_ff, mclk_ff);
-
-	temp_ff.full = rfixed_const(cur_size);
-	cur_latency_sclk.full = rfixed_div(temp_ff, sclk_eff_ff);
-	/*
-	  Find the total latency for the display data.
-	*/
-	disp_latency_overhead.full = rfixed_const(80);
-	disp_latency_overhead.full = rfixed_div(disp_latency_overhead, sclk_ff);
-	mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
-	mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
-
-	if (mc_latency_mclk.full > mc_latency_sclk.full)
-		disp_latency.full = mc_latency_mclk.full;
-	else
-		disp_latency.full = mc_latency_sclk.full;
-
-	/* setup Max GRPH_STOP_REQ default value */
-	if (ASIC_IS_RV100(rdev))
-		max_stop_req = 0x5c;
-	else
-		max_stop_req = 0x7c;
-
-	if (mode1) {
-		/*  CRTC1
-		    Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
-		    GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
-		*/
-		stop_req = mode1->hdisplay * pixel_bytes1 / 16;
-
-		if (stop_req > max_stop_req)
-			stop_req = max_stop_req;
-
-		/*
-		  Find the drain rate of the display buffer.
-		*/
-		temp_ff.full = rfixed_const((16/pixel_bytes1));
-		disp_drain_rate.full = rfixed_div(pix_clk, temp_ff);
-
-		/*
-		  Find the critical point of the display buffer.
-		*/
-		crit_point_ff.full = rfixed_mul(disp_drain_rate, disp_latency);
-		crit_point_ff.full += rfixed_const_half(0);
-
-		critical_point = rfixed_trunc(crit_point_ff);
-
-		if (rdev->disp_priority == 2) {
-			critical_point = 0;
-		}
-
-		/*
-		  The critical point should never be above max_stop_req-4.  Setting
-		  GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
-		*/
-		if (max_stop_req - critical_point < 4)
-			critical_point = 0;
-
-		if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
-			/* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
-			critical_point = 0x10;
-		}
-
-		temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
-		temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
-		temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
-		temp &= ~(RADEON_GRPH_START_REQ_MASK);
-		if ((rdev->family == CHIP_R350) &&
-		    (stop_req > 0x15)) {
-			stop_req -= 0x10;
-		}
-		temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
-		temp |= RADEON_GRPH_BUFFER_SIZE;
-		temp &= ~(RADEON_GRPH_CRITICAL_CNTL   |
-			  RADEON_GRPH_CRITICAL_AT_SOF |
-			  RADEON_GRPH_STOP_CNTL);
-		/*
-		  Write the result into the register.
-		*/
-		WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
-						       (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
-
-#if 0
-		if ((rdev->family == CHIP_RS400) ||
-		    (rdev->family == CHIP_RS480)) {
-			/* attempt to program RS400 disp regs correctly ??? */
-			temp = RREG32(RS400_DISP1_REG_CNTL);
-			temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
-				  RS400_DISP1_STOP_REQ_LEVEL_MASK);
-			WREG32(RS400_DISP1_REQ_CNTL1, (temp |
-						       (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
-						       (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
-			temp = RREG32(RS400_DMIF_MEM_CNTL1);
-			temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
-				  RS400_DISP1_CRITICAL_POINT_STOP_MASK);
-			WREG32(RS400_DMIF_MEM_CNTL1, (temp |
-						      (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
-						      (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
-		}
-#endif
-
-		DRM_DEBUG("GRPH_BUFFER_CNTL from to %x\n",
-			  /* 	  (unsigned int)info->SavedReg->grph_buffer_cntl, */
-			  (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
-	}
-
-	if (mode2) {
-		u32 grph2_cntl;
-		stop_req = mode2->hdisplay * pixel_bytes2 / 16;
-
-		if (stop_req > max_stop_req)
-			stop_req = max_stop_req;
-
-		/*
-		  Find the drain rate of the display buffer.
-		*/
-		temp_ff.full = rfixed_const((16/pixel_bytes2));
-		disp_drain_rate2.full = rfixed_div(pix_clk2, temp_ff);
-
-		grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
-		grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
-		grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
-		grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
-		if ((rdev->family == CHIP_R350) &&
-		    (stop_req > 0x15)) {
-			stop_req -= 0x10;
-		}
-		grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
-		grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
-		grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL   |
-			  RADEON_GRPH_CRITICAL_AT_SOF |
-			  RADEON_GRPH_STOP_CNTL);
-
-		if ((rdev->family == CHIP_RS100) ||
-		    (rdev->family == CHIP_RS200))
-			critical_point2 = 0;
-		else {
-			temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
-			temp_ff.full = rfixed_const(temp);
-			temp_ff.full = rfixed_mul(mclk_ff, temp_ff);
-			if (sclk_ff.full < temp_ff.full)
-				temp_ff.full = sclk_ff.full;
-
-			read_return_rate.full = temp_ff.full;
-
-			if (mode1) {
-				temp_ff.full = read_return_rate.full - disp_drain_rate.full;
-				time_disp1_drop_priority.full = rfixed_div(crit_point_ff, temp_ff);
-			} else {
-				time_disp1_drop_priority.full = 0;
-			}
-			crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
-			crit_point_ff.full = rfixed_mul(crit_point_ff, disp_drain_rate2);
-			crit_point_ff.full += rfixed_const_half(0);
-
-			critical_point2 = rfixed_trunc(crit_point_ff);
-
-			if (rdev->disp_priority == 2) {
-				critical_point2 = 0;
-			}
-
-			if (max_stop_req - critical_point2 < 4)
-				critical_point2 = 0;
-
-		}
-
-		if (critical_point2 == 0 && rdev->family == CHIP_R300) {
-			/* some R300 cards have problem with this set to 0 */
-			critical_point2 = 0x10;
-		}
-
-		WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
-						  (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
-
-		if ((rdev->family == CHIP_RS400) ||
-		    (rdev->family == CHIP_RS480)) {
-#if 0
-			/* attempt to program RS400 disp2 regs correctly ??? */
-			temp = RREG32(RS400_DISP2_REQ_CNTL1);
-			temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
-				  RS400_DISP2_STOP_REQ_LEVEL_MASK);
-			WREG32(RS400_DISP2_REQ_CNTL1, (temp |
-						       (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
-						       (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
-			temp = RREG32(RS400_DISP2_REQ_CNTL2);
-			temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
-				  RS400_DISP2_CRITICAL_POINT_STOP_MASK);
-			WREG32(RS400_DISP2_REQ_CNTL2, (temp |
-						       (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
-						       (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
-#endif
-			WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
-			WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
-			WREG32(RS400_DMIF_MEM_CNTL1,  0x29CA71DC);
-			WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
-		}
-
-		DRM_DEBUG("GRPH2_BUFFER_CNTL from to %x\n",
-			  (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
-	}
-}

+ 4 - 184
drivers/gpu/drm/radeon/radeon_legacy_encoders.c

@@ -30,170 +30,6 @@
 #include "atom.h"
 
 
-static void radeon_legacy_rmx_mode_set(struct drm_encoder *encoder,
-				       struct drm_display_mode *mode,
-				       struct drm_display_mode *adjusted_mode)
-{
-	struct drm_device *dev = encoder->dev;
-	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
-	int    xres = mode->hdisplay;
-	int    yres = mode->vdisplay;
-	bool   hscale = true, vscale = true;
-	int    hsync_wid;
-	int    vsync_wid;
-	int    hsync_start;
-	uint32_t scale, inc;
-	uint32_t fp_horz_stretch, fp_vert_stretch, crtc_more_cntl, fp_horz_vert_active;
-	uint32_t fp_h_sync_strt_wid, fp_v_sync_strt_wid, fp_crtc_h_total_disp, fp_crtc_v_total_disp;
-	struct radeon_native_mode *native_mode = &radeon_encoder->native_mode;
-
-	DRM_DEBUG("\n");
-
-	fp_vert_stretch = RREG32(RADEON_FP_VERT_STRETCH) &
-		(RADEON_VERT_STRETCH_RESERVED |
-		 RADEON_VERT_AUTO_RATIO_INC);
-	fp_horz_stretch = RREG32(RADEON_FP_HORZ_STRETCH) &
-		(RADEON_HORZ_FP_LOOP_STRETCH |
-		 RADEON_HORZ_AUTO_RATIO_INC);
-
-	crtc_more_cntl = 0;
-	if ((rdev->family == CHIP_RS100) ||
-	    (rdev->family == CHIP_RS200)) {
-		/* This is to workaround the asic bug for RMX, some versions
-		   of BIOS dosen't have this register initialized correctly. */
-		crtc_more_cntl |= RADEON_CRTC_H_CUTOFF_ACTIVE_EN;
-	}
-
-
-	fp_crtc_h_total_disp = ((((mode->crtc_htotal / 8) - 1) & 0x3ff)
-				| ((((mode->crtc_hdisplay / 8) - 1) & 0x1ff) << 16));
-
-	hsync_wid = (mode->crtc_hsync_end - mode->crtc_hsync_start) / 8;
-	if (!hsync_wid)
-		hsync_wid = 1;
-	hsync_start = mode->crtc_hsync_start - 8;
-
-	fp_h_sync_strt_wid = ((hsync_start & 0x1fff)
-			      | ((hsync_wid & 0x3f) << 16)
-			      | ((mode->flags & DRM_MODE_FLAG_NHSYNC)
-				 ? RADEON_CRTC_H_SYNC_POL
-				 : 0));
-
-	fp_crtc_v_total_disp = (((mode->crtc_vtotal - 1) & 0xffff)
-				| ((mode->crtc_vdisplay - 1) << 16));
-
-	vsync_wid = mode->crtc_vsync_end - mode->crtc_vsync_start;
-	if (!vsync_wid)
-		vsync_wid = 1;
-
-	fp_v_sync_strt_wid = (((mode->crtc_vsync_start - 1) & 0xfff)
-			      | ((vsync_wid & 0x1f) << 16)
-			      | ((mode->flags & DRM_MODE_FLAG_NVSYNC)
-				 ? RADEON_CRTC_V_SYNC_POL
-				 : 0));
-
-	fp_horz_vert_active = 0;
-
-	if (native_mode->panel_xres == 0 ||
-	    native_mode->panel_yres == 0) {
-		hscale = false;
-		vscale = false;
-	} else {
-		if (xres > native_mode->panel_xres)
-			xres = native_mode->panel_xres;
-		if (yres > native_mode->panel_yres)
-			yres = native_mode->panel_yres;
-
-		if (xres == native_mode->panel_xres)
-			hscale = false;
-		if (yres == native_mode->panel_yres)
-			vscale = false;
-	}
-
-	if (radeon_encoder->flags & RADEON_USE_RMX) {
-		if (radeon_encoder->rmx_type != RMX_CENTER) {
-			if (!hscale)
-				fp_horz_stretch |= ((xres/8-1) << 16);
-			else {
-				inc = (fp_horz_stretch & RADEON_HORZ_AUTO_RATIO_INC) ? 1 : 0;
-				scale = ((xres + inc) * RADEON_HORZ_STRETCH_RATIO_MAX)
-					/ native_mode->panel_xres + 1;
-				fp_horz_stretch |= (((scale) & RADEON_HORZ_STRETCH_RATIO_MASK) |
-						    RADEON_HORZ_STRETCH_BLEND |
-						    RADEON_HORZ_STRETCH_ENABLE |
-						    ((native_mode->panel_xres/8-1) << 16));
-			}
-
-			if (!vscale)
-				fp_vert_stretch |= ((yres-1) << 12);
-			else {
-				inc = (fp_vert_stretch & RADEON_VERT_AUTO_RATIO_INC) ? 1 : 0;
-				scale = ((yres + inc) * RADEON_VERT_STRETCH_RATIO_MAX)
-					/ native_mode->panel_yres + 1;
-				fp_vert_stretch |= (((scale) & RADEON_VERT_STRETCH_RATIO_MASK) |
-						    RADEON_VERT_STRETCH_ENABLE |
-						    RADEON_VERT_STRETCH_BLEND |
-						    ((native_mode->panel_yres-1) << 12));
-			}
-		} else if (radeon_encoder->rmx_type == RMX_CENTER) {
-			int    blank_width;
-
-			fp_horz_stretch |= ((xres/8-1) << 16);
-			fp_vert_stretch |= ((yres-1) << 12);
-
-			crtc_more_cntl |= (RADEON_CRTC_AUTO_HORZ_CENTER_EN |
-					   RADEON_CRTC_AUTO_VERT_CENTER_EN);
-
-			blank_width = (mode->crtc_hblank_end - mode->crtc_hblank_start) / 8;
-			if (blank_width > 110)
-				blank_width = 110;
-
-			fp_crtc_h_total_disp = (((blank_width) & 0x3ff)
-						| ((((mode->crtc_hdisplay / 8) - 1) & 0x1ff) << 16));
-
-			hsync_wid = (mode->crtc_hsync_end - mode->crtc_hsync_start) / 8;
-			if (!hsync_wid)
-				hsync_wid = 1;
-
-			fp_h_sync_strt_wid = ((((mode->crtc_hsync_start - mode->crtc_hblank_start) / 8) & 0x1fff)
-					      | ((hsync_wid & 0x3f) << 16)
-					      | ((mode->flags & DRM_MODE_FLAG_NHSYNC)
-						 ? RADEON_CRTC_H_SYNC_POL
-						 : 0));
-
-			fp_crtc_v_total_disp = (((mode->crtc_vblank_end - mode->crtc_vblank_start) & 0xffff)
-						| ((mode->crtc_vdisplay - 1) << 16));
-
-			vsync_wid = mode->crtc_vsync_end - mode->crtc_vsync_start;
-			if (!vsync_wid)
-				vsync_wid = 1;
-
-			fp_v_sync_strt_wid = ((((mode->crtc_vsync_start - mode->crtc_vblank_start) & 0xfff)
-					       | ((vsync_wid & 0x1f) << 16)
-					       | ((mode->flags & DRM_MODE_FLAG_NVSYNC)
-						  ? RADEON_CRTC_V_SYNC_POL
-						  : 0)));
-
-			fp_horz_vert_active = (((native_mode->panel_yres) & 0xfff) |
-					       (((native_mode->panel_xres / 8) & 0x1ff) << 16));
-		}
-	} else {
-		fp_horz_stretch |= ((xres/8-1) << 16);
-		fp_vert_stretch |= ((yres-1) << 12);
-	}
-
-	WREG32(RADEON_FP_HORZ_STRETCH,      fp_horz_stretch);
-	WREG32(RADEON_FP_VERT_STRETCH,      fp_vert_stretch);
-	WREG32(RADEON_CRTC_MORE_CNTL,       crtc_more_cntl);
-	WREG32(RADEON_FP_HORZ_VERT_ACTIVE,  fp_horz_vert_active);
-	WREG32(RADEON_FP_H_SYNC_STRT_WID,   fp_h_sync_strt_wid);
-	WREG32(RADEON_FP_V_SYNC_STRT_WID,   fp_v_sync_strt_wid);
-	WREG32(RADEON_FP_CRTC_H_TOTAL_DISP, fp_crtc_h_total_disp);
-	WREG32(RADEON_FP_CRTC_V_TOTAL_DISP, fp_crtc_v_total_disp);
-
-}
-
 static void radeon_legacy_lvds_dpms(struct drm_encoder *encoder, int mode)
 {
 	struct drm_device *dev = encoder->dev;
@@ -287,9 +123,6 @@ static void radeon_legacy_lvds_mode_set(struct drm_encoder *encoder,
 
 	DRM_DEBUG("\n");
 
-	if (radeon_crtc->crtc_id == 0)
-		radeon_legacy_rmx_mode_set(encoder, mode, adjusted_mode);
-
 	lvds_pll_cntl = RREG32(RADEON_LVDS_PLL_CNTL);
 	lvds_pll_cntl &= ~RADEON_LVDS_PLL_EN;
 
@@ -318,7 +151,7 @@ static void radeon_legacy_lvds_mode_set(struct drm_encoder *encoder,
 
 	if (radeon_crtc->crtc_id == 0) {
 		if (ASIC_IS_R300(rdev)) {
-			if (radeon_encoder->flags & RADEON_USE_RMX)
+			if (radeon_encoder->rmx_type != RMX_OFF)
 				lvds_pll_cntl |= R300_LVDS_SRC_SEL_RMX;
 		} else
 			lvds_gen_cntl &= ~RADEON_LVDS_SEL_CRTC2;
@@ -350,8 +183,6 @@ static bool radeon_legacy_lvds_mode_fixup(struct drm_encoder *encoder,
 
 	drm_mode_set_crtcinfo(adjusted_mode, 0);
 
-	radeon_encoder->flags &= ~RADEON_USE_RMX;
-
 	if (radeon_encoder->rmx_type != RMX_OFF)
 		radeon_rmx_mode_fixup(encoder, mode, adjusted_mode);
 
@@ -455,9 +286,6 @@ static void radeon_legacy_primary_dac_mode_set(struct drm_encoder *encoder,
 
 	DRM_DEBUG("\n");
 
-	if (radeon_crtc->crtc_id == 0)
-		radeon_legacy_rmx_mode_set(encoder, mode, adjusted_mode);
-
 	if (radeon_crtc->crtc_id == 0) {
 		if (rdev->family == CHIP_R200 || ASIC_IS_R300(rdev)) {
 			disp_output_cntl = RREG32(RADEON_DISP_OUTPUT_CNTL) &
@@ -653,9 +481,6 @@ static void radeon_legacy_tmds_int_mode_set(struct drm_encoder *encoder,
 
 	DRM_DEBUG("\n");
 
-	if (radeon_crtc->crtc_id == 0)
-		radeon_legacy_rmx_mode_set(encoder, mode, adjusted_mode);
-
 	tmp = tmds_pll_cntl = RREG32(RADEON_TMDS_PLL_CNTL);
 	tmp &= 0xfffff;
 	if (rdev->family == CHIP_RV280) {
@@ -711,7 +536,7 @@ static void radeon_legacy_tmds_int_mode_set(struct drm_encoder *encoder,
     if (radeon_crtc->crtc_id == 0) {
 	    if (ASIC_IS_R300(rdev) || rdev->family == CHIP_R200) {
 		    fp_gen_cntl &= ~R200_FP_SOURCE_SEL_MASK;
-		    if (radeon_encoder->flags & RADEON_USE_RMX)
+		    if (radeon_encoder->rmx_type != RMX_OFF)
 			    fp_gen_cntl |= R200_FP_SOURCE_SEL_RMX;
 		    else
 			    fp_gen_cntl |= R200_FP_SOURCE_SEL_CRTC1;
@@ -820,9 +645,6 @@ static void radeon_legacy_tmds_ext_mode_set(struct drm_encoder *encoder,
 
 	DRM_DEBUG("\n");
 
-	if (radeon_crtc->crtc_id == 0)
-		radeon_legacy_rmx_mode_set(encoder, mode, adjusted_mode);
-
 	if (rdev->is_atom_bios) {
 		radeon_encoder->pixel_clock = adjusted_mode->clock;
 		atombios_external_tmds_setup(encoder, ATOM_ENABLE);
@@ -856,7 +678,7 @@ static void radeon_legacy_tmds_ext_mode_set(struct drm_encoder *encoder,
 	if (radeon_crtc->crtc_id == 0) {
 		if ((rdev->family == CHIP_R200) || ASIC_IS_R300(rdev)) {
 			fp2_gen_cntl &= ~R200_FP2_SOURCE_SEL_MASK;
-			if (radeon_encoder->flags & RADEON_USE_RMX)
+			if (radeon_encoder->rmx_type != RMX_OFF)
 				fp2_gen_cntl |= R200_FP2_SOURCE_SEL_RMX;
 			else
 				fp2_gen_cntl |= R200_FP2_SOURCE_SEL_CRTC1;
@@ -1014,9 +836,6 @@ static void radeon_legacy_tv_dac_mode_set(struct drm_encoder *encoder,
 
 	DRM_DEBUG("\n");
 
-	if (radeon_crtc->crtc_id == 0)
-		radeon_legacy_rmx_mode_set(encoder, mode, adjusted_mode);
-
 	if (rdev->family != CHIP_R200) {
 		tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
 		if (rdev->family == CHIP_R420 ||
@@ -1243,6 +1062,7 @@ radeon_add_legacy_encoder(struct drm_device *dev, uint32_t encoder_id, uint32_t
 
 	radeon_encoder->encoder_id = encoder_id;
 	radeon_encoder->devices = supported_device;
+	radeon_encoder->rmx_type = RMX_OFF;
 
 	switch (radeon_encoder->encoder_id) {
 	case ENCODER_OBJECT_ID_INTERNAL_LVDS:

+ 27 - 24
drivers/gpu/drm/radeon/radeon_mode.h

@@ -36,6 +36,9 @@
 #include <linux/i2c.h>
 #include <linux/i2c-id.h>
 #include <linux/i2c-algo-bit.h>
+#include "radeon_fixed.h"
+
+struct radeon_device;
 
 #define to_radeon_crtc(x) container_of(x, struct radeon_crtc, base)
 #define to_radeon_connector(x) container_of(x, struct radeon_connector, base)
@@ -124,6 +127,7 @@ struct radeon_tmds_pll {
 #define RADEON_PLL_PREFER_LOW_POST_DIV  (1 << 8)
 #define RADEON_PLL_PREFER_HIGH_POST_DIV (1 << 9)
 #define RADEON_PLL_USE_FRAC_FB_DIV      (1 << 10)
+#define RADEON_PLL_PREFER_CLOSEST_LOWER (1 << 11)
 
 struct radeon_pll {
 	uint16_t reference_freq;
@@ -170,6 +174,18 @@ struct radeon_mode_info {
 	struct atom_context *atom_context;
 	enum radeon_connector_table connector_table;
 	bool mode_config_initialized;
+	struct radeon_crtc *crtcs[2];
+};
+
+struct radeon_native_mode {
+	/* preferred mode */
+	uint32_t panel_xres, panel_yres;
+	uint32_t hoverplus, hsync_width;
+	uint32_t hblank;
+	uint32_t voverplus, vsync_width;
+	uint32_t vblank;
+	uint32_t dotclock;
+	uint32_t flags;
 };
 
 struct radeon_crtc {
@@ -185,19 +201,13 @@ struct radeon_crtc {
 	uint64_t cursor_addr;
 	int cursor_width;
 	int cursor_height;
-};
-
-#define RADEON_USE_RMX 1
-
-struct radeon_native_mode {
-	/* preferred mode */
-	uint32_t panel_xres, panel_yres;
-	uint32_t hoverplus, hsync_width;
-	uint32_t hblank;
-	uint32_t voverplus, vsync_width;
-	uint32_t vblank;
-	uint32_t dotclock;
-	uint32_t flags;
+	uint32_t legacy_display_base_addr;
+	uint32_t legacy_cursor_offset;
+	enum radeon_rmx_type rmx_type;
+	uint32_t devices;
+	fixed20_12 vsc;
+	fixed20_12 hsc;
+	struct radeon_native_mode native_mode;
 };
 
 struct radeon_encoder_primary_dac {
@@ -383,16 +393,9 @@ void radeon_enc_destroy(struct drm_encoder *encoder);
 void radeon_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj);
 void radeon_combios_asic_init(struct drm_device *dev);
 extern int radeon_static_clocks_init(struct drm_device *dev);
-void radeon_init_disp_bw_legacy(struct drm_device *dev,
-				struct drm_display_mode *mode1,
-				uint32_t pixel_bytes1,
-				struct drm_display_mode *mode2,
-				uint32_t pixel_bytes2);
-void radeon_init_disp_bw_avivo(struct drm_device *dev,
-			       struct drm_display_mode *mode1,
-			       uint32_t pixel_bytes1,
-			       struct drm_display_mode *mode2,
-			       uint32_t pixel_bytes2);
-void radeon_init_disp_bandwidth(struct drm_device *dev);
+bool radeon_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
+					struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode);
+void atom_rv515_force_tv_scaler(struct radeon_device *rdev);
 
 #endif

+ 142 - 16
drivers/gpu/drm/radeon/radeon_object.c

@@ -44,6 +44,9 @@ struct radeon_object {
 	uint64_t			gpu_addr;
 	void				*kptr;
 	bool				is_iomem;
+	uint32_t			tiling_flags;
+	uint32_t			pitch;
+	int				surface_reg;
 };
 
 int radeon_ttm_init(struct radeon_device *rdev);
@@ -70,6 +73,7 @@ static void radeon_ttm_object_object_destroy(struct ttm_buffer_object *tobj)
 
 	robj = container_of(tobj, struct radeon_object, tobj);
 	list_del_init(&robj->list);
+	radeon_object_clear_surface_reg(robj);
 	kfree(robj);
 }
 
@@ -99,16 +103,16 @@ static inline uint32_t radeon_object_flags_from_domain(uint32_t domain)
 {
 	uint32_t flags = 0;
 	if (domain & RADEON_GEM_DOMAIN_VRAM) {
-		flags |= TTM_PL_FLAG_VRAM;
+		flags |= TTM_PL_FLAG_VRAM | TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED;
 	}
 	if (domain & RADEON_GEM_DOMAIN_GTT) {
-		flags |= TTM_PL_FLAG_TT;
+		flags |= TTM_PL_FLAG_TT | TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED;
 	}
 	if (domain & RADEON_GEM_DOMAIN_CPU) {
-		flags |= TTM_PL_FLAG_SYSTEM;
+		flags |= TTM_PL_FLAG_SYSTEM | TTM_PL_MASK_CACHING;
 	}
 	if (!flags) {
-		flags |= TTM_PL_FLAG_SYSTEM;
+		flags |= TTM_PL_FLAG_SYSTEM | TTM_PL_MASK_CACHING;
 	}
 	return flags;
 }
@@ -141,6 +145,7 @@ int radeon_object_create(struct radeon_device *rdev,
 	}
 	robj->rdev = rdev;
 	robj->gobj = gobj;
+	robj->surface_reg = -1;
 	INIT_LIST_HEAD(&robj->list);
 
 	flags = radeon_object_flags_from_domain(domain);
@@ -304,7 +309,7 @@ int radeon_object_wait(struct radeon_object *robj)
 	}
 	spin_lock(&robj->tobj.lock);
 	if (robj->tobj.sync_obj) {
-		r = ttm_bo_wait(&robj->tobj, true, false, false);
+		r = ttm_bo_wait(&robj->tobj, true, true, false);
 	}
 	spin_unlock(&robj->tobj.lock);
 	radeon_object_unreserve(robj);
@@ -403,7 +408,6 @@ int radeon_object_list_validate(struct list_head *head, void *fence)
 	struct radeon_object *robj;
 	struct radeon_fence *old_fence = NULL;
 	struct list_head *i;
-	uint32_t flags;
 	int r;
 
 	r = radeon_object_list_reserve(head);
@@ -414,27 +418,25 @@ int radeon_object_list_validate(struct list_head *head, void *fence)
 	list_for_each(i, head) {
 		lobj = list_entry(i, struct radeon_object_list, list);
 		robj = lobj->robj;
-		if (lobj->wdomain) {
-			flags = radeon_object_flags_from_domain(lobj->wdomain);
-			flags |= TTM_PL_FLAG_TT;
-		} else {
-			flags = radeon_object_flags_from_domain(lobj->rdomain);
-			flags |= TTM_PL_FLAG_TT;
-			flags |= TTM_PL_FLAG_VRAM;
-		}
 		if (!robj->pin_count) {
-			robj->tobj.proposed_placement = flags | TTM_PL_MASK_CACHING;
+			if (lobj->wdomain) {
+				robj->tobj.proposed_placement =
+					radeon_object_flags_from_domain(lobj->wdomain);
+			} else {
+				robj->tobj.proposed_placement =
+					radeon_object_flags_from_domain(lobj->rdomain);
+			}
 			r = ttm_buffer_object_validate(&robj->tobj,
 						       robj->tobj.proposed_placement,
 						       true, false);
 			if (unlikely(r)) {
-				radeon_object_list_unreserve(head);
 				DRM_ERROR("radeon: failed to validate.\n");
 				return r;
 			}
 			radeon_object_gpu_addr(robj);
 		}
 		lobj->gpu_offset = robj->gpu_addr;
+		lobj->tiling_flags = robj->tiling_flags;
 		if (fence) {
 			old_fence = (struct radeon_fence *)robj->tobj.sync_obj;
 			robj->tobj.sync_obj = radeon_fence_ref(fence);
@@ -479,3 +481,127 @@ unsigned long radeon_object_size(struct radeon_object *robj)
 {
 	return robj->tobj.num_pages << PAGE_SHIFT;
 }
+
+int radeon_object_get_surface_reg(struct radeon_object *robj)
+{
+	struct radeon_device *rdev = robj->rdev;
+	struct radeon_surface_reg *reg;
+	struct radeon_object *old_object;
+	int steal;
+	int i;
+
+	if (!robj->tiling_flags)
+		return 0;
+
+	if (robj->surface_reg >= 0) {
+		reg = &rdev->surface_regs[robj->surface_reg];
+		i = robj->surface_reg;
+		goto out;
+	}
+
+	steal = -1;
+	for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) {
+
+		reg = &rdev->surface_regs[i];
+		if (!reg->robj)
+			break;
+
+		old_object = reg->robj;
+		if (old_object->pin_count == 0)
+			steal = i;
+	}
+
+	/* if we are all out */
+	if (i == RADEON_GEM_MAX_SURFACES) {
+		if (steal == -1)
+			return -ENOMEM;
+		/* find someone with a surface reg and nuke their BO */
+		reg = &rdev->surface_regs[steal];
+		old_object = reg->robj;
+		/* blow away the mapping */
+		DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object);
+		ttm_bo_unmap_virtual(&old_object->tobj);
+		old_object->surface_reg = -1;
+		i = steal;
+	}
+
+	robj->surface_reg = i;
+	reg->robj = robj;
+
+out:
+	radeon_set_surface_reg(rdev, i, robj->tiling_flags, robj->pitch,
+			       robj->tobj.mem.mm_node->start << PAGE_SHIFT,
+			       robj->tobj.num_pages << PAGE_SHIFT);
+	return 0;
+}
+
+void radeon_object_clear_surface_reg(struct radeon_object *robj)
+{
+	struct radeon_device *rdev = robj->rdev;
+	struct radeon_surface_reg *reg;
+
+	if (robj->surface_reg == -1)
+		return;
+
+	reg = &rdev->surface_regs[robj->surface_reg];
+	radeon_clear_surface_reg(rdev, robj->surface_reg);
+
+	reg->robj = NULL;
+	robj->surface_reg = -1;
+}
+
+void radeon_object_set_tiling_flags(struct radeon_object *robj,
+				    uint32_t tiling_flags, uint32_t pitch)
+{
+	robj->tiling_flags = tiling_flags;
+	robj->pitch = pitch;
+}
+
+void radeon_object_get_tiling_flags(struct radeon_object *robj,
+				    uint32_t *tiling_flags,
+				    uint32_t *pitch)
+{
+	if (tiling_flags)
+		*tiling_flags = robj->tiling_flags;
+	if (pitch)
+		*pitch = robj->pitch;
+}
+
+int radeon_object_check_tiling(struct radeon_object *robj, bool has_moved,
+			       bool force_drop)
+{
+	if (!(robj->tiling_flags & RADEON_TILING_SURFACE))
+		return 0;
+
+	if (force_drop) {
+		radeon_object_clear_surface_reg(robj);
+		return 0;
+	}
+
+	if (robj->tobj.mem.mem_type != TTM_PL_VRAM) {
+		if (!has_moved)
+			return 0;
+
+		if (robj->surface_reg >= 0)
+			radeon_object_clear_surface_reg(robj);
+		return 0;
+	}
+
+	if ((robj->surface_reg >= 0) && !has_moved)
+		return 0;
+
+	return radeon_object_get_surface_reg(robj);
+}
+
+void radeon_bo_move_notify(struct ttm_buffer_object *bo,
+			  struct ttm_mem_reg *mem)
+{
+	struct radeon_object *robj = container_of(bo, struct radeon_object, tobj);
+	radeon_object_check_tiling(robj, 0, 1);
+}
+
+void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
+{
+	struct radeon_object *robj = container_of(bo, struct radeon_object, tobj);
+	radeon_object_check_tiling(robj, 0, 0);
+}

+ 1 - 14
drivers/gpu/drm/radeon/radeon_ring.c

@@ -126,32 +126,19 @@ static void radeon_ib_align(struct radeon_device *rdev, struct radeon_ib *ib)
 	}
 }
 
-static void radeon_ib_cpu_flush(struct radeon_device *rdev,
-				struct radeon_ib *ib)
-{
-	unsigned long tmp;
-	unsigned i;
-
-	/* To force CPU cache flush ugly but seems reliable */
-	for (i = 0; i < ib->length_dw; i += (rdev->cp.align_mask + 1)) {
-		tmp = readl(&ib->ptr[i]);
-	}
-}
-
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	int r = 0;
 
 	mutex_lock(&rdev->ib_pool.mutex);
 	radeon_ib_align(rdev, ib);
-	radeon_ib_cpu_flush(rdev, ib);
 	if (!ib->length_dw || !rdev->cp.ready) {
 		/* TODO: Nothings in the ib we should report. */
 		mutex_unlock(&rdev->ib_pool.mutex);
 		DRM_ERROR("radeon: couldn't schedule IB(%lu).\n", ib->idx);
 		return -EINVAL;
 	}
-	/* 64 dwords should be enought for fence too */
+	/* 64 dwords should be enough for fence too */
 	r = radeon_ring_lock(rdev, 64);
 	if (r) {
 		DRM_ERROR("radeon: scheduling IB failled (%d).\n", r);

+ 39 - 0
drivers/gpu/drm/radeon/radeon_share.h

@@ -0,0 +1,39 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef __RADEON_SHARE_H__
+#define __RADEON_SHARE_H__
+
+void r100_vram_init_sizes(struct radeon_device *rdev);
+
+void rs690_line_buffer_adjust(struct radeon_device *rdev,
+			      struct drm_display_mode *mode1,
+			      struct drm_display_mode *mode2);
+
+void rv515_bandwidth_avivo_update(struct radeon_device *rdev);
+
+#endif

+ 209 - 0
drivers/gpu/drm/radeon/radeon_test.c

@@ -0,0 +1,209 @@
+/*
+ * Copyright 2009 VMware, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Michel Dänzer
+ */
+#include <drm/drmP.h>
+#include <drm/radeon_drm.h>
+#include "radeon_reg.h"
+#include "radeon.h"
+
+
+/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */
+void radeon_test_moves(struct radeon_device *rdev)
+{
+	struct radeon_object *vram_obj = NULL;
+	struct radeon_object **gtt_obj = NULL;
+	struct radeon_fence *fence = NULL;
+	uint64_t gtt_addr, vram_addr;
+	unsigned i, n, size;
+	int r;
+
+	size = 1024 * 1024;
+
+	/* Number of tests =
+	 * (Total GTT - IB pool - writeback page - ring buffer) / test size
+	 */
+	n = (rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - 4096 -
+	     rdev->cp.ring_size) / size;
+
+	gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL);
+	if (!gtt_obj) {
+		DRM_ERROR("Failed to allocate %d pointers\n", n);
+		r = 1;
+		goto out_cleanup;
+	}
+
+	r = radeon_object_create(rdev, NULL, size, true, RADEON_GEM_DOMAIN_VRAM,
+				 false, &vram_obj);
+	if (r) {
+		DRM_ERROR("Failed to create VRAM object\n");
+		goto out_cleanup;
+	}
+
+	r = radeon_object_pin(vram_obj, RADEON_GEM_DOMAIN_VRAM, &vram_addr);
+	if (r) {
+		DRM_ERROR("Failed to pin VRAM object\n");
+		goto out_cleanup;
+	}
+
+	for (i = 0; i < n; i++) {
+		void *gtt_map, *vram_map;
+		void **gtt_start, **gtt_end;
+		void **vram_start, **vram_end;
+
+		r = radeon_object_create(rdev, NULL, size, true,
+					 RADEON_GEM_DOMAIN_GTT, false, gtt_obj + i);
+		if (r) {
+			DRM_ERROR("Failed to create GTT object %d\n", i);
+			goto out_cleanup;
+		}
+
+		r = radeon_object_pin(gtt_obj[i], RADEON_GEM_DOMAIN_GTT, &gtt_addr);
+		if (r) {
+			DRM_ERROR("Failed to pin GTT object %d\n", i);
+			goto out_cleanup;
+		}
+
+		r = radeon_object_kmap(gtt_obj[i], &gtt_map);
+		if (r) {
+			DRM_ERROR("Failed to map GTT object %d\n", i);
+			goto out_cleanup;
+		}
+
+		for (gtt_start = gtt_map, gtt_end = gtt_map + size;
+		     gtt_start < gtt_end;
+		     gtt_start++)
+			*gtt_start = gtt_start;
+
+		radeon_object_kunmap(gtt_obj[i]);
+
+		r = radeon_fence_create(rdev, &fence);
+		if (r) {
+			DRM_ERROR("Failed to create GTT->VRAM fence %d\n", i);
+			goto out_cleanup;
+		}
+
+		r = radeon_copy(rdev, gtt_addr, vram_addr, size / 4096, fence);
+		if (r) {
+			DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
+			goto out_cleanup;
+		}
+
+		r = radeon_fence_wait(fence, false);
+		if (r) {
+			DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
+			goto out_cleanup;
+		}
+
+		radeon_fence_unref(&fence);
+
+		r = radeon_object_kmap(vram_obj, &vram_map);
+		if (r) {
+			DRM_ERROR("Failed to map VRAM object after copy %d\n", i);
+			goto out_cleanup;
+		}
+
+		for (gtt_start = gtt_map, gtt_end = gtt_map + size,
+		     vram_start = vram_map, vram_end = vram_map + size;
+		     vram_start < vram_end;
+		     gtt_start++, vram_start++) {
+			if (*vram_start != gtt_start) {
+				DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, "
+					  "expected 0x%p (GTT map 0x%p-0x%p)\n",
+					  i, *vram_start, gtt_start, gtt_map,
+					  gtt_end);
+				radeon_object_kunmap(vram_obj);
+				goto out_cleanup;
+			}
+			*vram_start = vram_start;
+		}
+
+		radeon_object_kunmap(vram_obj);
+
+		r = radeon_fence_create(rdev, &fence);
+		if (r) {
+			DRM_ERROR("Failed to create VRAM->GTT fence %d\n", i);
+			goto out_cleanup;
+		}
+
+		r = radeon_copy(rdev, vram_addr, gtt_addr, size / 4096, fence);
+		if (r) {
+			DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
+			goto out_cleanup;
+		}
+
+		r = radeon_fence_wait(fence, false);
+		if (r) {
+			DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
+			goto out_cleanup;
+		}
+
+		radeon_fence_unref(&fence);
+
+		r = radeon_object_kmap(gtt_obj[i], &gtt_map);
+		if (r) {
+			DRM_ERROR("Failed to map GTT object after copy %d\n", i);
+			goto out_cleanup;
+		}
+
+		for (gtt_start = gtt_map, gtt_end = gtt_map + size,
+		     vram_start = vram_map, vram_end = vram_map + size;
+		     gtt_start < gtt_end;
+		     gtt_start++, vram_start++) {
+			if (*gtt_start != vram_start) {
+				DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, "
+					  "expected 0x%p (VRAM map 0x%p-0x%p)\n",
+					  i, *gtt_start, vram_start, vram_map,
+					  vram_end);
+				radeon_object_kunmap(gtt_obj[i]);
+				goto out_cleanup;
+			}
+		}
+
+		radeon_object_kunmap(gtt_obj[i]);
+
+		DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n",
+			 gtt_addr - rdev->mc.gtt_location);
+	}
+
+out_cleanup:
+	if (vram_obj) {
+		radeon_object_unpin(vram_obj);
+		radeon_object_unref(&vram_obj);
+	}
+	if (gtt_obj) {
+		for (i = 0; i < n; i++) {
+			if (gtt_obj[i]) {
+				radeon_object_unpin(gtt_obj[i]);
+				radeon_object_unref(&gtt_obj[i]);
+			}
+		}
+		kfree(gtt_obj);
+	}
+	if (fence) {
+		radeon_fence_unref(&fence);
+	}
+	if (r) {
+		printk(KERN_WARNING "Error while testing BO move.\n");
+	}
+}
+

+ 15 - 9
drivers/gpu/drm/radeon/radeon_ttm.c

@@ -355,23 +355,26 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 	if (!rdev->cp.ready) {
 		/* use memcpy */
 		DRM_ERROR("CP is not ready use memcpy.\n");
-		return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+		goto memcpy;
 	}
 
 	if (old_mem->mem_type == TTM_PL_VRAM &&
 	    new_mem->mem_type == TTM_PL_SYSTEM) {
-		return radeon_move_vram_ram(bo, evict, interruptible,
+		r = radeon_move_vram_ram(bo, evict, interruptible,
 					    no_wait, new_mem);
 	} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
 		   new_mem->mem_type == TTM_PL_VRAM) {
-		return radeon_move_ram_vram(bo, evict, interruptible,
+		r = radeon_move_ram_vram(bo, evict, interruptible,
 					    no_wait, new_mem);
 	} else {
 		r = radeon_move_blit(bo, evict, no_wait, new_mem, old_mem);
-		if (unlikely(r)) {
-			return r;
-		}
 	}
+
+	if (r) {
+memcpy:
+		r = ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+	}
+
 	return r;
 }
 
@@ -429,6 +432,8 @@ static struct ttm_bo_driver radeon_bo_driver = {
 	.sync_obj_flush = &radeon_sync_obj_flush,
 	.sync_obj_unref = &radeon_sync_obj_unref,
 	.sync_obj_ref = &radeon_sync_obj_ref,
+	.move_notify = &radeon_bo_move_notify,
+	.fault_reserve_notify = &radeon_bo_fault_reserve_notify,
 };
 
 int radeon_ttm_init(struct radeon_device *rdev)
@@ -442,13 +447,14 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	/* No others user of address space so set it to 0 */
 	r = ttm_bo_device_init(&rdev->mman.bdev,
 			       rdev->mman.mem_global_ref.object,
-			       &radeon_bo_driver, DRM_FILE_PAGE_OFFSET);
+			       &radeon_bo_driver, DRM_FILE_PAGE_OFFSET,
+			       rdev->need_dma32);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
 		return r;
 	}
 	r = ttm_bo_init_mm(&rdev->mman.bdev, TTM_PL_VRAM, 0,
-			   ((rdev->mc.aper_size) >> PAGE_SHIFT));
+			   ((rdev->mc.real_vram_size) >> PAGE_SHIFT));
 	if (r) {
 		DRM_ERROR("Failed initializing VRAM heap.\n");
 		return r;
@@ -465,7 +471,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
 		return r;
 	}
 	DRM_INFO("radeon: %uM of VRAM memory ready\n",
-		 rdev->mc.vram_size / (1024 * 1024));
+		 rdev->mc.real_vram_size / (1024 * 1024));
 	r = ttm_bo_init_mm(&rdev->mman.bdev, TTM_PL_TT, 0,
 			   ((rdev->mc.gtt_size) >> PAGE_SHIFT));
 	if (r) {

+ 15 - 15
drivers/gpu/drm/radeon/rs400.c

@@ -29,6 +29,7 @@
 #include <drm/drmP.h>
 #include "radeon_reg.h"
 #include "radeon.h"
+#include "radeon_share.h"
 
 /* rs400,rs480 depends on : */
 void r100_hdp_reset(struct radeon_device *rdev);
@@ -164,7 +165,9 @@ int rs400_gart_enable(struct radeon_device *rdev)
 		WREG32(RADEON_BUS_CNTL, tmp);
 	}
 	/* Table should be in 32bits address space so ignore bits above. */
-	tmp = rdev->gart.table_addr & 0xfffff000;
+	tmp = (u32)rdev->gart.table_addr & 0xfffff000;
+	tmp |= (upper_32_bits(rdev->gart.table_addr) & 0xff) << 4;
+
 	WREG32_MC(RS480_GART_BASE, tmp);
 	/* TODO: more tweaking here */
 	WREG32_MC(RS480_GART_FEATURE_ID,
@@ -201,10 +204,17 @@ void rs400_gart_disable(struct radeon_device *rdev)
 
 int rs400_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
 {
+	uint32_t entry;
+
 	if (i < 0 || i > rdev->gart.num_gpu_pages) {
 		return -EINVAL;
 	}
-	rdev->gart.table.ram.ptr[i] = cpu_to_le32(((uint32_t)addr) | 0xC);
+
+	entry = (lower_32_bits(addr) & PAGE_MASK) |
+		((upper_32_bits(addr) & 0xff) << 4) |
+		0xc;
+	entry = cpu_to_le32(entry);
+	rdev->gart.table.ram.ptr[i] = entry;
 	return 0;
 }
 
@@ -223,10 +233,9 @@ int rs400_mc_init(struct radeon_device *rdev)
 
 	rs400_gpu_init(rdev);
 	rs400_gart_disable(rdev);
-	rdev->mc.gtt_location = rdev->mc.vram_size;
+	rdev->mc.gtt_location = rdev->mc.mc_vram_size;
 	rdev->mc.gtt_location += (rdev->mc.gtt_size - 1);
 	rdev->mc.gtt_location &= ~(rdev->mc.gtt_size - 1);
-	rdev->mc.vram_location = 0xFFFFFFFFUL;
 	r = radeon_mc_setup(rdev);
 	if (r) {
 		return r;
@@ -238,7 +247,7 @@ int rs400_mc_init(struct radeon_device *rdev)
 		       "programming pipes. Bad things might happen.\n");
 	}
 
-	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
 	tmp = REG_SET(RADEON_MC_FB_TOP, tmp >> 16);
 	tmp |= REG_SET(RADEON_MC_FB_START, rdev->mc.vram_location >> 16);
 	WREG32(RADEON_MC_FB_LOCATION, tmp);
@@ -284,21 +293,12 @@ void rs400_gpu_init(struct radeon_device *rdev)
  */
 void rs400_vram_info(struct radeon_device *rdev)
 {
-	uint32_t tom;
-
 	rs400_gart_adjust_size(rdev);
 	/* DDR for all card after R300 & IGP */
 	rdev->mc.vram_is_ddr = true;
 	rdev->mc.vram_width = 128;
 
-	/* read NB_TOM to get the amount of ram stolen for the GPU */
-	tom = RREG32(RADEON_NB_TOM);
-	rdev->mc.vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
-	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
-
-	/* Could aper size report 0 ? */
-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	r100_vram_init_sizes(rdev);
 }
 
 

+ 6 - 1
drivers/gpu/drm/radeon/rs600.c

@@ -223,7 +223,7 @@ int rs600_mc_init(struct radeon_device *rdev)
 		printk(KERN_WARNING "Failed to wait MC idle while "
 		       "programming pipes. Bad things might happen.\n");
 	}
-	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
 	tmp = REG_SET(RS600_MC_FB_TOP, tmp >> 16);
 	tmp |= REG_SET(RS600_MC_FB_START, rdev->mc.vram_location >> 16);
 	WREG32_MC(RS600_MC_FB_LOCATION, tmp);
@@ -301,6 +301,11 @@ void rs600_vram_info(struct radeon_device *rdev)
 	rdev->mc.vram_width = 128;
 }
 
+void rs600_bandwidth_update(struct radeon_device *rdev)
+{
+	/* FIXME: implement, should this be like rs690 ? */
+}
+
 
 /*
  * Indirect registers accessor

+ 476 - 3
drivers/gpu/drm/radeon/rs690.c

@@ -28,6 +28,9 @@
 #include "drmP.h"
 #include "radeon_reg.h"
 #include "radeon.h"
+#include "rs690r.h"
+#include "atom.h"
+#include "atom-bits.h"
 
 /* rs690,rs740 depends on : */
 void r100_hdp_reset(struct radeon_device *rdev);
@@ -64,7 +67,7 @@ int rs690_mc_init(struct radeon_device *rdev)
 	rs400_gart_disable(rdev);
 
 	/* Setup GPU memory space */
-	rdev->mc.gtt_location = rdev->mc.vram_size;
+	rdev->mc.gtt_location = rdev->mc.mc_vram_size;
 	rdev->mc.gtt_location += (rdev->mc.gtt_size - 1);
 	rdev->mc.gtt_location &= ~(rdev->mc.gtt_size - 1);
 	rdev->mc.vram_location = 0xFFFFFFFFUL;
@@ -79,7 +82,7 @@ int rs690_mc_init(struct radeon_device *rdev)
 		printk(KERN_WARNING "Failed to wait MC idle while "
 		       "programming pipes. Bad things might happen.\n");
 	}
-	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
 	tmp = REG_SET(RS690_MC_FB_TOP, tmp >> 16);
 	tmp |= REG_SET(RS690_MC_FB_START, rdev->mc.vram_location >> 16);
 	WREG32_MC(RS690_MCCFG_FB_LOCATION, tmp);
@@ -138,9 +141,82 @@ void rs690_gpu_init(struct radeon_device *rdev)
 /*
  * VRAM info.
  */
+void rs690_pm_info(struct radeon_device *rdev)
+{
+	int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+	struct _ATOM_INTEGRATED_SYSTEM_INFO *info;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 *info_v2;
+	void *ptr;
+	uint16_t data_offset;
+	uint8_t frev, crev;
+	fixed20_12 tmp;
+
+	atom_parse_data_header(rdev->mode_info.atom_context, index, NULL,
+			       &frev, &crev, &data_offset);
+	ptr = rdev->mode_info.atom_context->bios + data_offset;
+	info = (struct _ATOM_INTEGRATED_SYSTEM_INFO *)ptr;
+	info_v2 = (struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 *)ptr;
+	/* Get various system informations from bios */
+	switch (crev) {
+	case 1:
+		tmp.full = rfixed_const(100);
+		rdev->pm.igp_sideport_mclk.full = rfixed_const(info->ulBootUpMemoryClock);
+		rdev->pm.igp_sideport_mclk.full = rfixed_div(rdev->pm.igp_sideport_mclk, tmp);
+		rdev->pm.igp_system_mclk.full = rfixed_const(le16_to_cpu(info->usK8MemoryClock));
+		rdev->pm.igp_ht_link_clk.full = rfixed_const(le16_to_cpu(info->usFSBClock));
+		rdev->pm.igp_ht_link_width.full = rfixed_const(info->ucHTLinkWidth);
+		break;
+	case 2:
+		tmp.full = rfixed_const(100);
+		rdev->pm.igp_sideport_mclk.full = rfixed_const(info_v2->ulBootUpSidePortClock);
+		rdev->pm.igp_sideport_mclk.full = rfixed_div(rdev->pm.igp_sideport_mclk, tmp);
+		rdev->pm.igp_system_mclk.full = rfixed_const(info_v2->ulBootUpUMAClock);
+		rdev->pm.igp_system_mclk.full = rfixed_div(rdev->pm.igp_system_mclk, tmp);
+		rdev->pm.igp_ht_link_clk.full = rfixed_const(info_v2->ulHTLinkFreq);
+		rdev->pm.igp_ht_link_clk.full = rfixed_div(rdev->pm.igp_ht_link_clk, tmp);
+		rdev->pm.igp_ht_link_width.full = rfixed_const(le16_to_cpu(info_v2->usMinHTLinkWidth));
+		break;
+	default:
+		tmp.full = rfixed_const(100);
+		/* We assume the slower possible clock ie worst case */
+		/* DDR 333Mhz */
+		rdev->pm.igp_sideport_mclk.full = rfixed_const(333);
+		/* FIXME: system clock ? */
+		rdev->pm.igp_system_mclk.full = rfixed_const(100);
+		rdev->pm.igp_system_mclk.full = rfixed_div(rdev->pm.igp_system_mclk, tmp);
+		rdev->pm.igp_ht_link_clk.full = rfixed_const(200);
+		rdev->pm.igp_ht_link_width.full = rfixed_const(8);
+		DRM_ERROR("No integrated system info for your GPU, using safe default\n");
+		break;
+	}
+	/* Compute various bandwidth */
+	/* k8_bandwidth = (memory_clk / 2) * 2 * 8 * 0.5 = memory_clk * 4  */
+	tmp.full = rfixed_const(4);
+	rdev->pm.k8_bandwidth.full = rfixed_mul(rdev->pm.igp_system_mclk, tmp);
+	/* ht_bandwidth = ht_clk * 2 * ht_width / 8 * 0.8
+	 *              = ht_clk * ht_width / 5
+	 */
+	tmp.full = rfixed_const(5);
+	rdev->pm.ht_bandwidth.full = rfixed_mul(rdev->pm.igp_ht_link_clk,
+						rdev->pm.igp_ht_link_width);
+	rdev->pm.ht_bandwidth.full = rfixed_div(rdev->pm.ht_bandwidth, tmp);
+	if (tmp.full < rdev->pm.max_bandwidth.full) {
+		/* HT link is a limiting factor */
+		rdev->pm.max_bandwidth.full = tmp.full;
+	}
+	/* sideport_bandwidth = (sideport_clk / 2) * 2 * 2 * 0.7
+	 *                    = (sideport_clk * 14) / 10
+	 */
+	tmp.full = rfixed_const(14);
+	rdev->pm.sideport_bandwidth.full = rfixed_mul(rdev->pm.igp_sideport_mclk, tmp);
+	tmp.full = rfixed_const(10);
+	rdev->pm.sideport_bandwidth.full = rfixed_div(rdev->pm.sideport_bandwidth, tmp);
+}
+
 void rs690_vram_info(struct radeon_device *rdev)
 {
 	uint32_t tmp;
+	fixed20_12 a;
 
 	rs400_gart_adjust_size(rdev);
 	/* DDR for all card after R300 & IGP */
@@ -152,12 +228,409 @@ void rs690_vram_info(struct radeon_device *rdev)
 	} else {
 		rdev->mc.vram_width = 64;
 	}
-	rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
+	rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
+	rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
 
 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	rs690_pm_info(rdev);
+	/* FIXME: we should enforce default clock in case GPU is not in
+	 * default setup
+	 */
+	a.full = rfixed_const(100);
+	rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
+	rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
+	a.full = rfixed_const(16);
+	/* core_bandwidth = sclk(Mhz) * 16 */
+	rdev->pm.core_bandwidth.full = rfixed_div(rdev->pm.sclk, a);
+}
+
+void rs690_line_buffer_adjust(struct radeon_device *rdev,
+			      struct drm_display_mode *mode1,
+			      struct drm_display_mode *mode2)
+{
+	u32 tmp;
+
+	/*
+	 * Line Buffer Setup
+	 * There is a single line buffer shared by both display controllers.
+	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
+	 * the display controllers.  The paritioning can either be done
+	 * manually or via one of four preset allocations specified in bits 1:0:
+	 *  0 - line buffer is divided in half and shared between crtc
+	 *  1 - D1 gets 3/4 of the line buffer, D2 gets 1/4
+	 *  2 - D1 gets the whole buffer
+	 *  3 - D1 gets 1/4 of the line buffer, D2 gets 3/4
+	 * Setting bit 2 of DC_LB_MEMORY_SPLIT controls switches to manual
+	 * allocation mode. In manual allocation mode, D1 always starts at 0,
+	 * D1 end/2 is specified in bits 14:4; D2 allocation follows D1.
+	 */
+	tmp = RREG32(DC_LB_MEMORY_SPLIT) & ~DC_LB_MEMORY_SPLIT_MASK;
+	tmp &= ~DC_LB_MEMORY_SPLIT_SHIFT_MODE;
+	/* auto */
+	if (mode1 && mode2) {
+		if (mode1->hdisplay > mode2->hdisplay) {
+			if (mode1->hdisplay > 2560)
+				tmp |= DC_LB_MEMORY_SPLIT_D1_3Q_D2_1Q;
+			else
+				tmp |= DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
+		} else if (mode2->hdisplay > mode1->hdisplay) {
+			if (mode2->hdisplay > 2560)
+				tmp |= DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q;
+			else
+				tmp |= DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
+		} else
+			tmp |= AVIVO_DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
+	} else if (mode1) {
+		tmp |= DC_LB_MEMORY_SPLIT_D1_ONLY;
+	} else if (mode2) {
+		tmp |= DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q;
+	}
+	WREG32(DC_LB_MEMORY_SPLIT, tmp);
 }
 
+struct rs690_watermark {
+	u32        lb_request_fifo_depth;
+	fixed20_12 num_line_pair;
+	fixed20_12 estimated_width;
+	fixed20_12 worst_case_latency;
+	fixed20_12 consumption_rate;
+	fixed20_12 active_time;
+	fixed20_12 dbpp;
+	fixed20_12 priority_mark_max;
+	fixed20_12 priority_mark;
+	fixed20_12 sclk;
+};
+
+void rs690_crtc_bandwidth_compute(struct radeon_device *rdev,
+				  struct radeon_crtc *crtc,
+				  struct rs690_watermark *wm)
+{
+	struct drm_display_mode *mode = &crtc->base.mode;
+	fixed20_12 a, b, c;
+	fixed20_12 pclk, request_fifo_depth, tolerable_latency, estimated_width;
+	fixed20_12 consumption_time, line_time, chunk_time, read_delay_latency;
+	/* FIXME: detect IGP with sideport memory, i don't think there is any
+	 * such product available
+	 */
+	bool sideport = false;
+
+	if (!crtc->base.enabled) {
+		/* FIXME: wouldn't it better to set priority mark to maximum */
+		wm->lb_request_fifo_depth = 4;
+		return;
+	}
+
+	if (crtc->vsc.full > rfixed_const(2))
+		wm->num_line_pair.full = rfixed_const(2);
+	else
+		wm->num_line_pair.full = rfixed_const(1);
+
+	b.full = rfixed_const(mode->crtc_hdisplay);
+	c.full = rfixed_const(256);
+	a.full = rfixed_mul(wm->num_line_pair, b);
+	request_fifo_depth.full = rfixed_div(a, c);
+	if (a.full < rfixed_const(4)) {
+		wm->lb_request_fifo_depth = 4;
+	} else {
+		wm->lb_request_fifo_depth = rfixed_trunc(request_fifo_depth);
+	}
+
+	/* Determine consumption rate
+	 *  pclk = pixel clock period(ns) = 1000 / (mode.clock / 1000)
+	 *  vtaps = number of vertical taps,
+	 *  vsc = vertical scaling ratio, defined as source/destination
+	 *  hsc = horizontal scaling ration, defined as source/destination
+	 */
+	a.full = rfixed_const(mode->clock);
+	b.full = rfixed_const(1000);
+	a.full = rfixed_div(a, b);
+	pclk.full = rfixed_div(b, a);
+	if (crtc->rmx_type != RMX_OFF) {
+		b.full = rfixed_const(2);
+		if (crtc->vsc.full > b.full)
+			b.full = crtc->vsc.full;
+		b.full = rfixed_mul(b, crtc->hsc);
+		c.full = rfixed_const(2);
+		b.full = rfixed_div(b, c);
+		consumption_time.full = rfixed_div(pclk, b);
+	} else {
+		consumption_time.full = pclk.full;
+	}
+	a.full = rfixed_const(1);
+	wm->consumption_rate.full = rfixed_div(a, consumption_time);
+
+
+	/* Determine line time
+	 *  LineTime = total time for one line of displayhtotal
+	 *  LineTime = total number of horizontal pixels
+	 *  pclk = pixel clock period(ns)
+	 */
+	a.full = rfixed_const(crtc->base.mode.crtc_htotal);
+	line_time.full = rfixed_mul(a, pclk);
+
+	/* Determine active time
+	 *  ActiveTime = time of active region of display within one line,
+	 *  hactive = total number of horizontal active pixels
+	 *  htotal = total number of horizontal pixels
+	 */
+	a.full = rfixed_const(crtc->base.mode.crtc_htotal);
+	b.full = rfixed_const(crtc->base.mode.crtc_hdisplay);
+	wm->active_time.full = rfixed_mul(line_time, b);
+	wm->active_time.full = rfixed_div(wm->active_time, a);
+
+	/* Maximun bandwidth is the minimun bandwidth of all component */
+	rdev->pm.max_bandwidth = rdev->pm.core_bandwidth;
+	if (sideport) {
+		if (rdev->pm.max_bandwidth.full > rdev->pm.sideport_bandwidth.full &&
+			rdev->pm.sideport_bandwidth.full)
+			rdev->pm.max_bandwidth = rdev->pm.sideport_bandwidth;
+		read_delay_latency.full = rfixed_const(370 * 800 * 1000);
+		read_delay_latency.full = rfixed_div(read_delay_latency,
+			rdev->pm.igp_sideport_mclk);
+	} else {
+		if (rdev->pm.max_bandwidth.full > rdev->pm.k8_bandwidth.full &&
+			rdev->pm.k8_bandwidth.full)
+			rdev->pm.max_bandwidth = rdev->pm.k8_bandwidth;
+		if (rdev->pm.max_bandwidth.full > rdev->pm.ht_bandwidth.full &&
+			rdev->pm.ht_bandwidth.full)
+			rdev->pm.max_bandwidth = rdev->pm.ht_bandwidth;
+		read_delay_latency.full = rfixed_const(5000);
+	}
+
+	/* sclk = system clocks(ns) = 1000 / max_bandwidth / 16 */
+	a.full = rfixed_const(16);
+	rdev->pm.sclk.full = rfixed_mul(rdev->pm.max_bandwidth, a);
+	a.full = rfixed_const(1000);
+	rdev->pm.sclk.full = rfixed_div(a, rdev->pm.sclk);
+	/* Determine chunk time
+	 * ChunkTime = the time it takes the DCP to send one chunk of data
+	 * to the LB which consists of pipeline delay and inter chunk gap
+	 * sclk = system clock(ns)
+	 */
+	a.full = rfixed_const(256 * 13);
+	chunk_time.full = rfixed_mul(rdev->pm.sclk, a);
+	a.full = rfixed_const(10);
+	chunk_time.full = rfixed_div(chunk_time, a);
+
+	/* Determine the worst case latency
+	 * NumLinePair = Number of line pairs to request(1=2 lines, 2=4 lines)
+	 * WorstCaseLatency = worst case time from urgent to when the MC starts
+	 *                    to return data
+	 * READ_DELAY_IDLE_MAX = constant of 1us
+	 * ChunkTime = time it takes the DCP to send one chunk of data to the LB
+	 *             which consists of pipeline delay and inter chunk gap
+	 */
+	if (rfixed_trunc(wm->num_line_pair) > 1) {
+		a.full = rfixed_const(3);
+		wm->worst_case_latency.full = rfixed_mul(a, chunk_time);
+		wm->worst_case_latency.full += read_delay_latency.full;
+	} else {
+		a.full = rfixed_const(2);
+		wm->worst_case_latency.full = rfixed_mul(a, chunk_time);
+		wm->worst_case_latency.full += read_delay_latency.full;
+	}
+
+	/* Determine the tolerable latency
+	 * TolerableLatency = Any given request has only 1 line time
+	 *                    for the data to be returned
+	 * LBRequestFifoDepth = Number of chunk requests the LB can
+	 *                      put into the request FIFO for a display
+	 *  LineTime = total time for one line of display
+	 *  ChunkTime = the time it takes the DCP to send one chunk
+	 *              of data to the LB which consists of
+	 *  pipeline delay and inter chunk gap
+	 */
+	if ((2+wm->lb_request_fifo_depth) >= rfixed_trunc(request_fifo_depth)) {
+		tolerable_latency.full = line_time.full;
+	} else {
+		tolerable_latency.full = rfixed_const(wm->lb_request_fifo_depth - 2);
+		tolerable_latency.full = request_fifo_depth.full - tolerable_latency.full;
+		tolerable_latency.full = rfixed_mul(tolerable_latency, chunk_time);
+		tolerable_latency.full = line_time.full - tolerable_latency.full;
+	}
+	/* We assume worst case 32bits (4 bytes) */
+	wm->dbpp.full = rfixed_const(4 * 8);
+
+	/* Determine the maximum priority mark
+	 *  width = viewport width in pixels
+	 */
+	a.full = rfixed_const(16);
+	wm->priority_mark_max.full = rfixed_const(crtc->base.mode.crtc_hdisplay);
+	wm->priority_mark_max.full = rfixed_div(wm->priority_mark_max, a);
+
+	/* Determine estimated width */
+	estimated_width.full = tolerable_latency.full - wm->worst_case_latency.full;
+	estimated_width.full = rfixed_div(estimated_width, consumption_time);
+	if (rfixed_trunc(estimated_width) > crtc->base.mode.crtc_hdisplay) {
+		wm->priority_mark.full = rfixed_const(10);
+	} else {
+		a.full = rfixed_const(16);
+		wm->priority_mark.full = rfixed_div(estimated_width, a);
+		wm->priority_mark.full = wm->priority_mark_max.full - wm->priority_mark.full;
+	}
+}
+
+void rs690_bandwidth_update(struct radeon_device *rdev)
+{
+	struct drm_display_mode *mode0 = NULL;
+	struct drm_display_mode *mode1 = NULL;
+	struct rs690_watermark wm0;
+	struct rs690_watermark wm1;
+	u32 tmp;
+	fixed20_12 priority_mark02, priority_mark12, fill_rate;
+	fixed20_12 a, b;
+
+	if (rdev->mode_info.crtcs[0]->base.enabled)
+		mode0 = &rdev->mode_info.crtcs[0]->base.mode;
+	if (rdev->mode_info.crtcs[1]->base.enabled)
+		mode1 = &rdev->mode_info.crtcs[1]->base.mode;
+	/*
+	 * Set display0/1 priority up in the memory controller for
+	 * modes if the user specifies HIGH for displaypriority
+	 * option.
+	 */
+	if (rdev->disp_priority == 2) {
+		tmp = RREG32_MC(MC_INIT_MISC_LAT_TIMER);
+		tmp &= ~MC_DISP1R_INIT_LAT_MASK;
+		tmp &= ~MC_DISP0R_INIT_LAT_MASK;
+		if (mode1)
+			tmp |= (1 << MC_DISP1R_INIT_LAT_SHIFT);
+		if (mode0)
+			tmp |= (1 << MC_DISP0R_INIT_LAT_SHIFT);
+		WREG32_MC(MC_INIT_MISC_LAT_TIMER, tmp);
+	}
+	rs690_line_buffer_adjust(rdev, mode0, mode1);
+
+	if ((rdev->family == CHIP_RS690) || (rdev->family == CHIP_RS740))
+		WREG32(DCP_CONTROL, 0);
+	if ((rdev->family == CHIP_RS780) || (rdev->family == CHIP_RS880))
+		WREG32(DCP_CONTROL, 2);
+
+	rs690_crtc_bandwidth_compute(rdev, rdev->mode_info.crtcs[0], &wm0);
+	rs690_crtc_bandwidth_compute(rdev, rdev->mode_info.crtcs[1], &wm1);
+
+	tmp = (wm0.lb_request_fifo_depth - 1);
+	tmp |= (wm1.lb_request_fifo_depth - 1) << 16;
+	WREG32(LB_MAX_REQ_OUTSTANDING, tmp);
+
+	if (mode0 && mode1) {
+		if (rfixed_trunc(wm0.dbpp) > 64)
+			a.full = rfixed_mul(wm0.dbpp, wm0.num_line_pair);
+		else
+			a.full = wm0.num_line_pair.full;
+		if (rfixed_trunc(wm1.dbpp) > 64)
+			b.full = rfixed_mul(wm1.dbpp, wm1.num_line_pair);
+		else
+			b.full = wm1.num_line_pair.full;
+		a.full += b.full;
+		fill_rate.full = rfixed_div(wm0.sclk, a);
+		if (wm0.consumption_rate.full > fill_rate.full) {
+			b.full = wm0.consumption_rate.full - fill_rate.full;
+			b.full = rfixed_mul(b, wm0.active_time);
+			a.full = rfixed_mul(wm0.worst_case_latency,
+						wm0.consumption_rate);
+			a.full = a.full + b.full;
+			b.full = rfixed_const(16 * 1000);
+			priority_mark02.full = rfixed_div(a, b);
+		} else {
+			a.full = rfixed_mul(wm0.worst_case_latency,
+						wm0.consumption_rate);
+			b.full = rfixed_const(16 * 1000);
+			priority_mark02.full = rfixed_div(a, b);
+		}
+		if (wm1.consumption_rate.full > fill_rate.full) {
+			b.full = wm1.consumption_rate.full - fill_rate.full;
+			b.full = rfixed_mul(b, wm1.active_time);
+			a.full = rfixed_mul(wm1.worst_case_latency,
+						wm1.consumption_rate);
+			a.full = a.full + b.full;
+			b.full = rfixed_const(16 * 1000);
+			priority_mark12.full = rfixed_div(a, b);
+		} else {
+			a.full = rfixed_mul(wm1.worst_case_latency,
+						wm1.consumption_rate);
+			b.full = rfixed_const(16 * 1000);
+			priority_mark12.full = rfixed_div(a, b);
+		}
+		if (wm0.priority_mark.full > priority_mark02.full)
+			priority_mark02.full = wm0.priority_mark.full;
+		if (rfixed_trunc(priority_mark02) < 0)
+			priority_mark02.full = 0;
+		if (wm0.priority_mark_max.full > priority_mark02.full)
+			priority_mark02.full = wm0.priority_mark_max.full;
+		if (wm1.priority_mark.full > priority_mark12.full)
+			priority_mark12.full = wm1.priority_mark.full;
+		if (rfixed_trunc(priority_mark12) < 0)
+			priority_mark12.full = 0;
+		if (wm1.priority_mark_max.full > priority_mark12.full)
+			priority_mark12.full = wm1.priority_mark_max.full;
+		WREG32(D1MODE_PRIORITY_A_CNT, rfixed_trunc(priority_mark02));
+		WREG32(D1MODE_PRIORITY_B_CNT, rfixed_trunc(priority_mark02));
+		WREG32(D2MODE_PRIORITY_A_CNT, rfixed_trunc(priority_mark12));
+		WREG32(D2MODE_PRIORITY_B_CNT, rfixed_trunc(priority_mark12));
+	} else if (mode0) {
+		if (rfixed_trunc(wm0.dbpp) > 64)
+			a.full = rfixed_mul(wm0.dbpp, wm0.num_line_pair);
+		else
+			a.full = wm0.num_line_pair.full;
+		fill_rate.full = rfixed_div(wm0.sclk, a);
+		if (wm0.consumption_rate.full > fill_rate.full) {
+			b.full = wm0.consumption_rate.full - fill_rate.full;
+			b.full = rfixed_mul(b, wm0.active_time);
+			a.full = rfixed_mul(wm0.worst_case_latency,
+						wm0.consumption_rate);
+			a.full = a.full + b.full;
+			b.full = rfixed_const(16 * 1000);
+			priority_mark02.full = rfixed_div(a, b);
+		} else {
+			a.full = rfixed_mul(wm0.worst_case_latency,
+						wm0.consumption_rate);
+			b.full = rfixed_const(16 * 1000);
+			priority_mark02.full = rfixed_div(a, b);
+		}
+		if (wm0.priority_mark.full > priority_mark02.full)
+			priority_mark02.full = wm0.priority_mark.full;
+		if (rfixed_trunc(priority_mark02) < 0)
+			priority_mark02.full = 0;
+		if (wm0.priority_mark_max.full > priority_mark02.full)
+			priority_mark02.full = wm0.priority_mark_max.full;
+		WREG32(D1MODE_PRIORITY_A_CNT, rfixed_trunc(priority_mark02));
+		WREG32(D1MODE_PRIORITY_B_CNT, rfixed_trunc(priority_mark02));
+		WREG32(D2MODE_PRIORITY_A_CNT, MODE_PRIORITY_OFF);
+		WREG32(D2MODE_PRIORITY_B_CNT, MODE_PRIORITY_OFF);
+	} else {
+		if (rfixed_trunc(wm1.dbpp) > 64)
+			a.full = rfixed_mul(wm1.dbpp, wm1.num_line_pair);
+		else
+			a.full = wm1.num_line_pair.full;
+		fill_rate.full = rfixed_div(wm1.sclk, a);
+		if (wm1.consumption_rate.full > fill_rate.full) {
+			b.full = wm1.consumption_rate.full - fill_rate.full;
+			b.full = rfixed_mul(b, wm1.active_time);
+			a.full = rfixed_mul(wm1.worst_case_latency,
+						wm1.consumption_rate);
+			a.full = a.full + b.full;
+			b.full = rfixed_const(16 * 1000);
+			priority_mark12.full = rfixed_div(a, b);
+		} else {
+			a.full = rfixed_mul(wm1.worst_case_latency,
+						wm1.consumption_rate);
+			b.full = rfixed_const(16 * 1000);
+			priority_mark12.full = rfixed_div(a, b);
+		}
+		if (wm1.priority_mark.full > priority_mark12.full)
+			priority_mark12.full = wm1.priority_mark.full;
+		if (rfixed_trunc(priority_mark12) < 0)
+			priority_mark12.full = 0;
+		if (wm1.priority_mark_max.full > priority_mark12.full)
+			priority_mark12.full = wm1.priority_mark_max.full;
+		WREG32(D1MODE_PRIORITY_A_CNT, MODE_PRIORITY_OFF);
+		WREG32(D1MODE_PRIORITY_B_CNT, MODE_PRIORITY_OFF);
+		WREG32(D2MODE_PRIORITY_A_CNT, rfixed_trunc(priority_mark12));
+		WREG32(D2MODE_PRIORITY_B_CNT, rfixed_trunc(priority_mark12));
+	}
+}
 
 /*
  * Indirect registers accessor

+ 99 - 0
drivers/gpu/drm/radeon/rs690r.h

@@ -0,0 +1,99 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef RS690R_H
+#define RS690R_H
+
+/* RS690/RS740 registers */
+#define MC_INDEX			0x0078
+#	define MC_INDEX_MASK			0x1FF
+#	define MC_INDEX_WR_EN			(1 << 9)
+#	define MC_INDEX_WR_ACK			0x7F
+#define MC_DATA				0x007C
+#define HDP_FB_LOCATION			0x0134
+#define DC_LB_MEMORY_SPLIT		0x6520
+#define		DC_LB_MEMORY_SPLIT_MASK			0x00000003
+#define		DC_LB_MEMORY_SPLIT_SHIFT		0
+#define		DC_LB_MEMORY_SPLIT_D1HALF_D2HALF	0
+#define		DC_LB_MEMORY_SPLIT_D1_3Q_D2_1Q		1
+#define		DC_LB_MEMORY_SPLIT_D1_ONLY		2
+#define		DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q		3
+#define		DC_LB_MEMORY_SPLIT_SHIFT_MODE		(1 << 2)
+#define		DC_LB_DISP1_END_ADR_SHIFT		4
+#define		DC_LB_DISP1_END_ADR_MASK		0x00007FF0
+#define D1MODE_PRIORITY_A_CNT		0x6548
+#define		MODE_PRIORITY_MARK_MASK			0x00007FFF
+#define		MODE_PRIORITY_OFF			(1 << 16)
+#define		MODE_PRIORITY_ALWAYS_ON			(1 << 20)
+#define		MODE_PRIORITY_FORCE_MASK		(1 << 24)
+#define D1MODE_PRIORITY_B_CNT		0x654C
+#define LB_MAX_REQ_OUTSTANDING		0x6D58
+#define		LB_D1_MAX_REQ_OUTSTANDING_MASK		0x0000000F
+#define		LB_D1_MAX_REQ_OUTSTANDING_SHIFT		0
+#define		LB_D2_MAX_REQ_OUTSTANDING_MASK		0x000F0000
+#define		LB_D2_MAX_REQ_OUTSTANDING_SHIFT		16
+#define DCP_CONTROL			0x6C9C
+#define D2MODE_PRIORITY_A_CNT		0x6D48
+#define D2MODE_PRIORITY_B_CNT		0x6D4C
+
+/* MC indirect registers */
+#define MC_STATUS_IDLE				(1 << 0)
+#define MC_MISC_CNTL			0x18
+#define		DISABLE_GTW			(1 << 1)
+#define		GART_INDEX_REG_EN		(1 << 12)
+#define		BLOCK_GFX_D3_EN			(1 << 14)
+#define GART_FEATURE_ID			0x2B
+#define		HANG_EN				(1 << 11)
+#define		TLB_ENABLE			(1 << 18)
+#define		P2P_ENABLE			(1 << 19)
+#define		GTW_LAC_EN			(1 << 25)
+#define		LEVEL2_GART			(0 << 30)
+#define		LEVEL1_GART			(1 << 30)
+#define		PDC_EN				(1 << 31)
+#define GART_BASE			0x2C
+#define GART_CACHE_CNTRL		0x2E
+#	define GART_CACHE_INVALIDATE		(1 << 0)
+#define MC_STATUS			0x90
+#define MCCFG_FB_LOCATION		0x100
+#define		MC_FB_START_MASK		0x0000FFFF
+#define		MC_FB_START_SHIFT		0
+#define		MC_FB_TOP_MASK			0xFFFF0000
+#define		MC_FB_TOP_SHIFT			16
+#define MCCFG_AGP_LOCATION		0x101
+#define		MC_AGP_START_MASK		0x0000FFFF
+#define		MC_AGP_START_SHIFT		0
+#define		MC_AGP_TOP_MASK			0xFFFF0000
+#define		MC_AGP_TOP_SHIFT		16
+#define MCCFG_AGP_BASE			0x102
+#define MCCFG_AGP_BASE_2		0x103
+#define MC_INIT_MISC_LAT_TIMER		0x104
+#define		MC_DISP0R_INIT_LAT_SHIFT	8
+#define		MC_DISP0R_INIT_LAT_MASK		0x00000F00
+#define		MC_DISP1R_INIT_LAT_SHIFT	12
+#define		MC_DISP1R_INIT_LAT_MASK		0x0000F000
+
+#endif

+ 668 - 130
drivers/gpu/drm/radeon/rv515.c

@@ -27,8 +27,9 @@
  */
 #include <linux/seq_file.h>
 #include "drmP.h"
-#include "radeon_reg.h"
+#include "rv515r.h"
 #include "radeon.h"
+#include "radeon_share.h"
 
 /* rv515 depends on : */
 void r100_hdp_reset(struct radeon_device *rdev);
@@ -99,26 +100,26 @@ int rv515_mc_init(struct radeon_device *rdev)
 		       "programming pipes. Bad things might happen.\n");
 	}
 	/* Write VRAM size in case we are limiting it */
-	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
-	tmp = REG_SET(RV515_MC_FB_START, rdev->mc.vram_location >> 16);
+	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
+	tmp = REG_SET(MC_FB_START, rdev->mc.vram_location >> 16);
 	WREG32(0x134, tmp);
-	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
-	tmp = REG_SET(RV515_MC_FB_TOP, tmp >> 16);
-	tmp |= REG_SET(RV515_MC_FB_START, rdev->mc.vram_location >> 16);
-	WREG32_MC(RV515_MC_FB_LOCATION, tmp);
-	WREG32(RS690_HDP_FB_LOCATION, rdev->mc.vram_location >> 16);
+	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
+	tmp = REG_SET(MC_FB_TOP, tmp >> 16);
+	tmp |= REG_SET(MC_FB_START, rdev->mc.vram_location >> 16);
+	WREG32_MC(MC_FB_LOCATION, tmp);
+	WREG32(HDP_FB_LOCATION, rdev->mc.vram_location >> 16);
 	WREG32(0x310, rdev->mc.vram_location);
 	if (rdev->flags & RADEON_IS_AGP) {
 		tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
-		tmp = REG_SET(RV515_MC_AGP_TOP, tmp >> 16);
-		tmp |= REG_SET(RV515_MC_AGP_START, rdev->mc.gtt_location >> 16);
-		WREG32_MC(RV515_MC_AGP_LOCATION, tmp);
-		WREG32_MC(RV515_MC_AGP_BASE, rdev->mc.agp_base);
-		WREG32_MC(RV515_MC_AGP_BASE_2, 0);
+		tmp = REG_SET(MC_AGP_TOP, tmp >> 16);
+		tmp |= REG_SET(MC_AGP_START, rdev->mc.gtt_location >> 16);
+		WREG32_MC(MC_AGP_LOCATION, tmp);
+		WREG32_MC(MC_AGP_BASE, rdev->mc.agp_base);
+		WREG32_MC(MC_AGP_BASE_2, 0);
 	} else {
-		WREG32_MC(RV515_MC_AGP_LOCATION, 0x0FFFFFFF);
-		WREG32_MC(RV515_MC_AGP_BASE, 0);
-		WREG32_MC(RV515_MC_AGP_BASE_2, 0);
+		WREG32_MC(MC_AGP_LOCATION, 0x0FFFFFFF);
+		WREG32_MC(MC_AGP_BASE, 0);
+		WREG32_MC(MC_AGP_BASE_2, 0);
 	}
 	return 0;
 }
@@ -136,95 +137,67 @@ void rv515_mc_fini(struct radeon_device *rdev)
  */
 void rv515_ring_start(struct radeon_device *rdev)
 {
-	unsigned gb_tile_config;
 	int r;
 
-	/* Sub pixel 1/12 so we can have 4K rendering according to doc */
-	gb_tile_config = R300_ENABLE_TILING | R300_TILE_SIZE_16;
-	switch (rdev->num_gb_pipes) {
-	case 2:
-		gb_tile_config |= R300_PIPE_COUNT_R300;
-		break;
-	case 3:
-		gb_tile_config |= R300_PIPE_COUNT_R420_3P;
-		break;
-	case 4:
-		gb_tile_config |= R300_PIPE_COUNT_R420;
-		break;
-	case 1:
-	default:
-		gb_tile_config |= R300_PIPE_COUNT_RV350;
-		break;
-	}
-
 	r = radeon_ring_lock(rdev, 64);
 	if (r) {
 		return;
 	}
-	radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
-	radeon_ring_write(rdev,
-			  RADEON_ISYNC_ANY2D_IDLE3D |
-			  RADEON_ISYNC_ANY3D_IDLE2D |
-			  RADEON_ISYNC_WAIT_IDLEGUI |
-			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
-	radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0));
-	radeon_ring_write(rdev, gb_tile_config);
-	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(rdev, PACKET0(ISYNC_CNTL, 0));
 	radeon_ring_write(rdev,
-			  RADEON_WAIT_2D_IDLECLEAN |
-			  RADEON_WAIT_3D_IDLECLEAN);
+			  ISYNC_ANY2D_IDLE3D |
+			  ISYNC_ANY3D_IDLE2D |
+			  ISYNC_WAIT_IDLEGUI |
+			  ISYNC_CPSCRATCH_IDLEGUI);
+	radeon_ring_write(rdev, PACKET0(WAIT_UNTIL, 0));
+	radeon_ring_write(rdev, WAIT_2D_IDLECLEAN | WAIT_3D_IDLECLEAN);
 	radeon_ring_write(rdev, PACKET0(0x170C, 0));
 	radeon_ring_write(rdev, 1 << 31);
-	radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
+	radeon_ring_write(rdev, PACKET0(GB_SELECT, 0));
 	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
+	radeon_ring_write(rdev, PACKET0(GB_ENABLE, 0));
 	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, PACKET0(0x42C8, 0));
 	radeon_ring_write(rdev, (1 << rdev->num_gb_pipes) - 1);
-	radeon_ring_write(rdev, PACKET0(R500_VAP_INDEX_OFFSET, 0));
+	radeon_ring_write(rdev, PACKET0(VAP_INDEX_OFFSET, 0));
 	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
-	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
-	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
-	radeon_ring_write(rdev,
-			  RADEON_WAIT_2D_IDLECLEAN |
-			  RADEON_WAIT_3D_IDLECLEAN);
-	radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0));
+	radeon_ring_write(rdev, PACKET0(RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, RB3D_DC_FLUSH | RB3D_DC_FREE);
+	radeon_ring_write(rdev, PACKET0(ZB_ZCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, ZC_FLUSH | ZC_FREE);
+	radeon_ring_write(rdev, PACKET0(WAIT_UNTIL, 0));
+	radeon_ring_write(rdev, WAIT_2D_IDLECLEAN | WAIT_3D_IDLECLEAN);
+	radeon_ring_write(rdev, PACKET0(GB_AA_CONFIG, 0));
 	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
-	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
-	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
-	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0));
-	radeon_ring_write(rdev,
-			  ((6 << R300_MS_X0_SHIFT) |
-			   (6 << R300_MS_Y0_SHIFT) |
-			   (6 << R300_MS_X1_SHIFT) |
-			   (6 << R300_MS_Y1_SHIFT) |
-			   (6 << R300_MS_X2_SHIFT) |
-			   (6 << R300_MS_Y2_SHIFT) |
-			   (6 << R300_MSBD0_Y_SHIFT) |
-			   (6 << R300_MSBD0_X_SHIFT)));
-	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0));
+	radeon_ring_write(rdev, PACKET0(RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, RB3D_DC_FLUSH | RB3D_DC_FREE);
+	radeon_ring_write(rdev, PACKET0(ZB_ZCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, ZC_FLUSH | ZC_FREE);
+	radeon_ring_write(rdev, PACKET0(GB_MSPOS0, 0));
 	radeon_ring_write(rdev,
-			  ((6 << R300_MS_X3_SHIFT) |
-			   (6 << R300_MS_Y3_SHIFT) |
-			   (6 << R300_MS_X4_SHIFT) |
-			   (6 << R300_MS_Y4_SHIFT) |
-			   (6 << R300_MS_X5_SHIFT) |
-			   (6 << R300_MS_Y5_SHIFT) |
-			   (6 << R300_MSBD1_SHIFT)));
-	radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0));
-	radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
-	radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0));
+			  ((6 << MS_X0_SHIFT) |
+			   (6 << MS_Y0_SHIFT) |
+			   (6 << MS_X1_SHIFT) |
+			   (6 << MS_Y1_SHIFT) |
+			   (6 << MS_X2_SHIFT) |
+			   (6 << MS_Y2_SHIFT) |
+			   (6 << MSBD0_Y_SHIFT) |
+			   (6 << MSBD0_X_SHIFT)));
+	radeon_ring_write(rdev, PACKET0(GB_MSPOS1, 0));
 	radeon_ring_write(rdev,
-			  R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
-	radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0));
-	radeon_ring_write(rdev,
-			  R300_GEOMETRY_ROUND_NEAREST |
-			  R300_COLOR_ROUND_NEAREST);
+			  ((6 << MS_X3_SHIFT) |
+			   (6 << MS_Y3_SHIFT) |
+			   (6 << MS_X4_SHIFT) |
+			   (6 << MS_Y4_SHIFT) |
+			   (6 << MS_X5_SHIFT) |
+			   (6 << MS_Y5_SHIFT) |
+			   (6 << MSBD1_SHIFT)));
+	radeon_ring_write(rdev, PACKET0(GA_ENHANCE, 0));
+	radeon_ring_write(rdev, GA_DEADLOCK_CNTL | GA_FASTSYNC_CNTL);
+	radeon_ring_write(rdev, PACKET0(GA_POLY_MODE, 0));
+	radeon_ring_write(rdev, FRONT_PTYPE_TRIANGE | BACK_PTYPE_TRIANGE);
+	radeon_ring_write(rdev, PACKET0(GA_ROUND_MODE, 0));
+	radeon_ring_write(rdev, GEOMETRY_ROUND_NEAREST | COLOR_ROUND_NEAREST);
 	radeon_ring_write(rdev, PACKET0(0x20C8, 0));
 	radeon_ring_write(rdev, 0);
 	radeon_ring_unlock_commit(rdev);
@@ -242,8 +215,8 @@ int rv515_mc_wait_for_idle(struct radeon_device *rdev)
 
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		/* read MC_STATUS */
-		tmp = RREG32_MC(RV515_MC_STATUS);
-		if (tmp & RV515_MC_STATUS_IDLE) {
+		tmp = RREG32_MC(MC_STATUS);
+		if (tmp & MC_STATUS_IDLE) {
 			return 0;
 		}
 		DRM_UDELAY(1);
@@ -291,33 +264,33 @@ int rv515_ga_reset(struct radeon_device *rdev)
 	reinit_cp = rdev->cp.ready;
 	rdev->cp.ready = false;
 	for (i = 0; i < rdev->usec_timeout; i++) {
-		WREG32(RADEON_CP_CSQ_MODE, 0);
-		WREG32(RADEON_CP_CSQ_CNTL, 0);
-		WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
-		(void)RREG32(RADEON_RBBM_SOFT_RESET);
+		WREG32(CP_CSQ_MODE, 0);
+		WREG32(CP_CSQ_CNTL, 0);
+		WREG32(RBBM_SOFT_RESET, 0x32005);
+		(void)RREG32(RBBM_SOFT_RESET);
 		udelay(200);
-		WREG32(RADEON_RBBM_SOFT_RESET, 0);
+		WREG32(RBBM_SOFT_RESET, 0);
 		/* Wait to prevent race in RBBM_STATUS */
 		mdelay(1);
-		tmp = RREG32(RADEON_RBBM_STATUS);
+		tmp = RREG32(RBBM_STATUS);
 		if (tmp & ((1 << 20) | (1 << 26))) {
 			DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)\n", tmp);
 			/* GA still busy soft reset it */
 			WREG32(0x429C, 0x200);
-			WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+			WREG32(VAP_PVS_STATE_FLUSH_REG, 0);
 			WREG32(0x43E0, 0);
 			WREG32(0x43E4, 0);
 			WREG32(0x24AC, 0);
 		}
 		/* Wait to prevent race in RBBM_STATUS */
 		mdelay(1);
-		tmp = RREG32(RADEON_RBBM_STATUS);
+		tmp = RREG32(RBBM_STATUS);
 		if (!(tmp & ((1 << 20) | (1 << 26)))) {
 			break;
 		}
 	}
 	for (i = 0; i < rdev->usec_timeout; i++) {
-		tmp = RREG32(RADEON_RBBM_STATUS);
+		tmp = RREG32(RBBM_STATUS);
 		if (!(tmp & ((1 << 20) | (1 << 26)))) {
 			DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
 				 tmp);
@@ -331,7 +304,7 @@ int rv515_ga_reset(struct radeon_device *rdev)
 		}
 		DRM_UDELAY(1);
 	}
-	tmp = RREG32(RADEON_RBBM_STATUS);
+	tmp = RREG32(RBBM_STATUS);
 	DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
 	return -1;
 }
@@ -341,7 +314,7 @@ int rv515_gpu_reset(struct radeon_device *rdev)
 	uint32_t status;
 
 	/* reset order likely matter */
-	status = RREG32(RADEON_RBBM_STATUS);
+	status = RREG32(RBBM_STATUS);
 	/* reset HDP */
 	r100_hdp_reset(rdev);
 	/* reset rb2d */
@@ -353,12 +326,12 @@ int rv515_gpu_reset(struct radeon_device *rdev)
 		rv515_ga_reset(rdev);
 	}
 	/* reset CP */
-	status = RREG32(RADEON_RBBM_STATUS);
+	status = RREG32(RBBM_STATUS);
 	if (status & (1 << 16)) {
 		r100_cp_reset(rdev);
 	}
 	/* Check if GPU is idle */
-	status = RREG32(RADEON_RBBM_STATUS);
+	status = RREG32(RBBM_STATUS);
 	if (status & (1 << 31)) {
 		DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
 		return -1;
@@ -377,8 +350,7 @@ static void rv515_vram_get_type(struct radeon_device *rdev)
 
 	rdev->mc.vram_width = 128;
 	rdev->mc.vram_is_ddr = true;
-	tmp = RREG32_MC(RV515_MC_CNTL);
-	tmp &= RV515_MEM_NUM_CHANNELS_MASK;
+	tmp = RREG32_MC(RV515_MC_CNTL) & MEM_NUM_CHANNELS_MASK;
 	switch (tmp) {
 	case 0:
 		rdev->mc.vram_width = 64;
@@ -394,11 +366,16 @@ static void rv515_vram_get_type(struct radeon_device *rdev)
 
 void rv515_vram_info(struct radeon_device *rdev)
 {
+	fixed20_12 a;
+
 	rv515_vram_get_type(rdev);
-	rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
 
-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	/* FIXME: we should enforce default clock in case GPU is not in
+	 * default setup
+	 */
+	a.full = rfixed_const(100);
+	rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
+	rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
 }
 
 
@@ -409,35 +386,35 @@ uint32_t rv515_mc_rreg(struct radeon_device *rdev, uint32_t reg)
 {
 	uint32_t r;
 
-	WREG32(R520_MC_IND_INDEX, 0x7f0000 | (reg & 0xffff));
-	r = RREG32(R520_MC_IND_DATA);
-	WREG32(R520_MC_IND_INDEX, 0);
+	WREG32(MC_IND_INDEX, 0x7f0000 | (reg & 0xffff));
+	r = RREG32(MC_IND_DATA);
+	WREG32(MC_IND_INDEX, 0);
 	return r;
 }
 
 void rv515_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
 {
-	WREG32(R520_MC_IND_INDEX, 0xff0000 | ((reg) & 0xffff));
-	WREG32(R520_MC_IND_DATA, (v));
-	WREG32(R520_MC_IND_INDEX, 0);
+	WREG32(MC_IND_INDEX, 0xff0000 | ((reg) & 0xffff));
+	WREG32(MC_IND_DATA, (v));
+	WREG32(MC_IND_INDEX, 0);
 }
 
 uint32_t rv515_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
 {
 	uint32_t r;
 
-	WREG32(RADEON_PCIE_INDEX, ((reg) & 0x7ff));
-	(void)RREG32(RADEON_PCIE_INDEX);
-	r = RREG32(RADEON_PCIE_DATA);
+	WREG32(PCIE_INDEX, ((reg) & 0x7ff));
+	(void)RREG32(PCIE_INDEX);
+	r = RREG32(PCIE_DATA);
 	return r;
 }
 
 void rv515_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
 {
-	WREG32(RADEON_PCIE_INDEX, ((reg) & 0x7ff));
-	(void)RREG32(RADEON_PCIE_INDEX);
-	WREG32(RADEON_PCIE_DATA, (v));
-	(void)RREG32(RADEON_PCIE_DATA);
+	WREG32(PCIE_INDEX, ((reg) & 0x7ff));
+	(void)RREG32(PCIE_INDEX);
+	WREG32(PCIE_DATA, (v));
+	(void)RREG32(PCIE_DATA);
 }
 
 
@@ -452,13 +429,13 @@ static int rv515_debugfs_pipes_info(struct seq_file *m, void *data)
 	struct radeon_device *rdev = dev->dev_private;
 	uint32_t tmp;
 
-	tmp = RREG32(R400_GB_PIPE_SELECT);
+	tmp = RREG32(GB_PIPE_SELECT);
 	seq_printf(m, "GB_PIPE_SELECT 0x%08x\n", tmp);
-	tmp = RREG32(R500_SU_REG_DEST);
+	tmp = RREG32(SU_REG_DEST);
 	seq_printf(m, "SU_REG_DEST 0x%08x\n", tmp);
-	tmp = RREG32(R300_GB_TILE_CONFIG);
+	tmp = RREG32(GB_TILE_CONFIG);
 	seq_printf(m, "GB_TILE_CONFIG 0x%08x\n", tmp);
-	tmp = RREG32(R300_DST_PIPE_CONFIG);
+	tmp = RREG32(DST_PIPE_CONFIG);
 	seq_printf(m, "DST_PIPE_CONFIG 0x%08x\n", tmp);
 	return 0;
 }
@@ -509,9 +486,9 @@ int rv515_debugfs_ga_info_init(struct radeon_device *rdev)
 /*
  * Asic initialization
  */
-static const unsigned r500_reg_safe_bm[159] = {
+static const unsigned r500_reg_safe_bm[219] = {
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
-	0xFFFFFFBF, 0xFFFFFFFF, 0xFFFFFFBF, 0xFFFFFFFF,
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
@@ -549,14 +526,575 @@ static const unsigned r500_reg_safe_bm[159] = {
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF80FFFF,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x0003FC01, 0x3FFFFCF8, 0xFE800B19,
+	0x0003FC01, 0x3FFFFCF8, 0xFE800B19, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
 };
 
-
-
 int rv515_init(struct radeon_device *rdev)
 {
 	rdev->config.r300.reg_safe_bm = r500_reg_safe_bm;
 	rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(r500_reg_safe_bm);
 	return 0;
 }
+
+void atom_rv515_force_tv_scaler(struct radeon_device *rdev)
+{
+
+	WREG32(0x659C, 0x0);
+	WREG32(0x6594, 0x705);
+	WREG32(0x65A4, 0x10001);
+	WREG32(0x65D8, 0x0);
+	WREG32(0x65B0, 0x0);
+	WREG32(0x65C0, 0x0);
+	WREG32(0x65D4, 0x0);
+	WREG32(0x6578, 0x0);
+	WREG32(0x657C, 0x841880A8);
+	WREG32(0x6578, 0x1);
+	WREG32(0x657C, 0x84208680);
+	WREG32(0x6578, 0x2);
+	WREG32(0x657C, 0xBFF880B0);
+	WREG32(0x6578, 0x100);
+	WREG32(0x657C, 0x83D88088);
+	WREG32(0x6578, 0x101);
+	WREG32(0x657C, 0x84608680);
+	WREG32(0x6578, 0x102);
+	WREG32(0x657C, 0xBFF080D0);
+	WREG32(0x6578, 0x200);
+	WREG32(0x657C, 0x83988068);
+	WREG32(0x6578, 0x201);
+	WREG32(0x657C, 0x84A08680);
+	WREG32(0x6578, 0x202);
+	WREG32(0x657C, 0xBFF080F8);
+	WREG32(0x6578, 0x300);
+	WREG32(0x657C, 0x83588058);
+	WREG32(0x6578, 0x301);
+	WREG32(0x657C, 0x84E08660);
+	WREG32(0x6578, 0x302);
+	WREG32(0x657C, 0xBFF88120);
+	WREG32(0x6578, 0x400);
+	WREG32(0x657C, 0x83188040);
+	WREG32(0x6578, 0x401);
+	WREG32(0x657C, 0x85008660);
+	WREG32(0x6578, 0x402);
+	WREG32(0x657C, 0xBFF88150);
+	WREG32(0x6578, 0x500);
+	WREG32(0x657C, 0x82D88030);
+	WREG32(0x6578, 0x501);
+	WREG32(0x657C, 0x85408640);
+	WREG32(0x6578, 0x502);
+	WREG32(0x657C, 0xBFF88180);
+	WREG32(0x6578, 0x600);
+	WREG32(0x657C, 0x82A08018);
+	WREG32(0x6578, 0x601);
+	WREG32(0x657C, 0x85808620);
+	WREG32(0x6578, 0x602);
+	WREG32(0x657C, 0xBFF081B8);
+	WREG32(0x6578, 0x700);
+	WREG32(0x657C, 0x82608010);
+	WREG32(0x6578, 0x701);
+	WREG32(0x657C, 0x85A08600);
+	WREG32(0x6578, 0x702);
+	WREG32(0x657C, 0x800081F0);
+	WREG32(0x6578, 0x800);
+	WREG32(0x657C, 0x8228BFF8);
+	WREG32(0x6578, 0x801);
+	WREG32(0x657C, 0x85E085E0);
+	WREG32(0x6578, 0x802);
+	WREG32(0x657C, 0xBFF88228);
+	WREG32(0x6578, 0x10000);
+	WREG32(0x657C, 0x82A8BF00);
+	WREG32(0x6578, 0x10001);
+	WREG32(0x657C, 0x82A08CC0);
+	WREG32(0x6578, 0x10002);
+	WREG32(0x657C, 0x8008BEF8);
+	WREG32(0x6578, 0x10100);
+	WREG32(0x657C, 0x81F0BF28);
+	WREG32(0x6578, 0x10101);
+	WREG32(0x657C, 0x83608CA0);
+	WREG32(0x6578, 0x10102);
+	WREG32(0x657C, 0x8018BED0);
+	WREG32(0x6578, 0x10200);
+	WREG32(0x657C, 0x8148BF38);
+	WREG32(0x6578, 0x10201);
+	WREG32(0x657C, 0x84408C80);
+	WREG32(0x6578, 0x10202);
+	WREG32(0x657C, 0x8008BEB8);
+	WREG32(0x6578, 0x10300);
+	WREG32(0x657C, 0x80B0BF78);
+	WREG32(0x6578, 0x10301);
+	WREG32(0x657C, 0x85008C20);
+	WREG32(0x6578, 0x10302);
+	WREG32(0x657C, 0x8020BEA0);
+	WREG32(0x6578, 0x10400);
+	WREG32(0x657C, 0x8028BF90);
+	WREG32(0x6578, 0x10401);
+	WREG32(0x657C, 0x85E08BC0);
+	WREG32(0x6578, 0x10402);
+	WREG32(0x657C, 0x8018BE90);
+	WREG32(0x6578, 0x10500);
+	WREG32(0x657C, 0xBFB8BFB0);
+	WREG32(0x6578, 0x10501);
+	WREG32(0x657C, 0x86C08B40);
+	WREG32(0x6578, 0x10502);
+	WREG32(0x657C, 0x8010BE90);
+	WREG32(0x6578, 0x10600);
+	WREG32(0x657C, 0xBF58BFC8);
+	WREG32(0x6578, 0x10601);
+	WREG32(0x657C, 0x87A08AA0);
+	WREG32(0x6578, 0x10602);
+	WREG32(0x657C, 0x8010BE98);
+	WREG32(0x6578, 0x10700);
+	WREG32(0x657C, 0xBF10BFF0);
+	WREG32(0x6578, 0x10701);
+	WREG32(0x657C, 0x886089E0);
+	WREG32(0x6578, 0x10702);
+	WREG32(0x657C, 0x8018BEB0);
+	WREG32(0x6578, 0x10800);
+	WREG32(0x657C, 0xBED8BFE8);
+	WREG32(0x6578, 0x10801);
+	WREG32(0x657C, 0x89408940);
+	WREG32(0x6578, 0x10802);
+	WREG32(0x657C, 0xBFE8BED8);
+	WREG32(0x6578, 0x20000);
+	WREG32(0x657C, 0x80008000);
+	WREG32(0x6578, 0x20001);
+	WREG32(0x657C, 0x90008000);
+	WREG32(0x6578, 0x20002);
+	WREG32(0x657C, 0x80008000);
+	WREG32(0x6578, 0x20003);
+	WREG32(0x657C, 0x80008000);
+	WREG32(0x6578, 0x20100);
+	WREG32(0x657C, 0x80108000);
+	WREG32(0x6578, 0x20101);
+	WREG32(0x657C, 0x8FE0BF70);
+	WREG32(0x6578, 0x20102);
+	WREG32(0x657C, 0xBFE880C0);
+	WREG32(0x6578, 0x20103);
+	WREG32(0x657C, 0x80008000);
+	WREG32(0x6578, 0x20200);
+	WREG32(0x657C, 0x8018BFF8);
+	WREG32(0x6578, 0x20201);
+	WREG32(0x657C, 0x8F80BF08);
+	WREG32(0x6578, 0x20202);
+	WREG32(0x657C, 0xBFD081A0);
+	WREG32(0x6578, 0x20203);
+	WREG32(0x657C, 0xBFF88000);
+	WREG32(0x6578, 0x20300);
+	WREG32(0x657C, 0x80188000);
+	WREG32(0x6578, 0x20301);
+	WREG32(0x657C, 0x8EE0BEC0);
+	WREG32(0x6578, 0x20302);
+	WREG32(0x657C, 0xBFB082A0);
+	WREG32(0x6578, 0x20303);
+	WREG32(0x657C, 0x80008000);
+	WREG32(0x6578, 0x20400);
+	WREG32(0x657C, 0x80188000);
+	WREG32(0x6578, 0x20401);
+	WREG32(0x657C, 0x8E00BEA0);
+	WREG32(0x6578, 0x20402);
+	WREG32(0x657C, 0xBF8883C0);
+	WREG32(0x6578, 0x20403);
+	WREG32(0x657C, 0x80008000);
+	WREG32(0x6578, 0x20500);
+	WREG32(0x657C, 0x80188000);
+	WREG32(0x6578, 0x20501);
+	WREG32(0x657C, 0x8D00BE90);
+	WREG32(0x6578, 0x20502);
+	WREG32(0x657C, 0xBF588500);
+	WREG32(0x6578, 0x20503);
+	WREG32(0x657C, 0x80008008);
+	WREG32(0x6578, 0x20600);
+	WREG32(0x657C, 0x80188000);
+	WREG32(0x6578, 0x20601);
+	WREG32(0x657C, 0x8BC0BE98);
+	WREG32(0x6578, 0x20602);
+	WREG32(0x657C, 0xBF308660);
+	WREG32(0x6578, 0x20603);
+	WREG32(0x657C, 0x80008008);
+	WREG32(0x6578, 0x20700);
+	WREG32(0x657C, 0x80108000);
+	WREG32(0x6578, 0x20701);
+	WREG32(0x657C, 0x8A80BEB0);
+	WREG32(0x6578, 0x20702);
+	WREG32(0x657C, 0xBF0087C0);
+	WREG32(0x6578, 0x20703);
+	WREG32(0x657C, 0x80008008);
+	WREG32(0x6578, 0x20800);
+	WREG32(0x657C, 0x80108000);
+	WREG32(0x6578, 0x20801);
+	WREG32(0x657C, 0x8920BED0);
+	WREG32(0x6578, 0x20802);
+	WREG32(0x657C, 0xBED08920);
+	WREG32(0x6578, 0x20803);
+	WREG32(0x657C, 0x80008010);
+	WREG32(0x6578, 0x30000);
+	WREG32(0x657C, 0x90008000);
+	WREG32(0x6578, 0x30001);
+	WREG32(0x657C, 0x80008000);
+	WREG32(0x6578, 0x30100);
+	WREG32(0x657C, 0x8FE0BF90);
+	WREG32(0x6578, 0x30101);
+	WREG32(0x657C, 0xBFF880A0);
+	WREG32(0x6578, 0x30200);
+	WREG32(0x657C, 0x8F60BF40);
+	WREG32(0x6578, 0x30201);
+	WREG32(0x657C, 0xBFE88180);
+	WREG32(0x6578, 0x30300);
+	WREG32(0x657C, 0x8EC0BF00);
+	WREG32(0x6578, 0x30301);
+	WREG32(0x657C, 0xBFC88280);
+	WREG32(0x6578, 0x30400);
+	WREG32(0x657C, 0x8DE0BEE0);
+	WREG32(0x6578, 0x30401);
+	WREG32(0x657C, 0xBFA083A0);
+	WREG32(0x6578, 0x30500);
+	WREG32(0x657C, 0x8CE0BED0);
+	WREG32(0x6578, 0x30501);
+	WREG32(0x657C, 0xBF7884E0);
+	WREG32(0x6578, 0x30600);
+	WREG32(0x657C, 0x8BA0BED8);
+	WREG32(0x6578, 0x30601);
+	WREG32(0x657C, 0xBF508640);
+	WREG32(0x6578, 0x30700);
+	WREG32(0x657C, 0x8A60BEE8);
+	WREG32(0x6578, 0x30701);
+	WREG32(0x657C, 0xBF2087A0);
+	WREG32(0x6578, 0x30800);
+	WREG32(0x657C, 0x8900BF00);
+	WREG32(0x6578, 0x30801);
+	WREG32(0x657C, 0xBF008900);
+}
+
+struct rv515_watermark {
+	u32        lb_request_fifo_depth;
+	fixed20_12 num_line_pair;
+	fixed20_12 estimated_width;
+	fixed20_12 worst_case_latency;
+	fixed20_12 consumption_rate;
+	fixed20_12 active_time;
+	fixed20_12 dbpp;
+	fixed20_12 priority_mark_max;
+	fixed20_12 priority_mark;
+	fixed20_12 sclk;
+};
+
+void rv515_crtc_bandwidth_compute(struct radeon_device *rdev,
+				  struct radeon_crtc *crtc,
+				  struct rv515_watermark *wm)
+{
+	struct drm_display_mode *mode = &crtc->base.mode;
+	fixed20_12 a, b, c;
+	fixed20_12 pclk, request_fifo_depth, tolerable_latency, estimated_width;
+	fixed20_12 consumption_time, line_time, chunk_time, read_delay_latency;
+
+	if (!crtc->base.enabled) {
+		/* FIXME: wouldn't it better to set priority mark to maximum */
+		wm->lb_request_fifo_depth = 4;
+		return;
+	}
+
+	if (crtc->vsc.full > rfixed_const(2))
+		wm->num_line_pair.full = rfixed_const(2);
+	else
+		wm->num_line_pair.full = rfixed_const(1);
+
+	b.full = rfixed_const(mode->crtc_hdisplay);
+	c.full = rfixed_const(256);
+	a.full = rfixed_mul(wm->num_line_pair, b);
+	request_fifo_depth.full = rfixed_div(a, c);
+	if (a.full < rfixed_const(4)) {
+		wm->lb_request_fifo_depth = 4;
+	} else {
+		wm->lb_request_fifo_depth = rfixed_trunc(request_fifo_depth);
+	}
+
+	/* Determine consumption rate
+	 *  pclk = pixel clock period(ns) = 1000 / (mode.clock / 1000)
+	 *  vtaps = number of vertical taps,
+	 *  vsc = vertical scaling ratio, defined as source/destination
+	 *  hsc = horizontal scaling ration, defined as source/destination
+	 */
+	a.full = rfixed_const(mode->clock);
+	b.full = rfixed_const(1000);
+	a.full = rfixed_div(a, b);
+	pclk.full = rfixed_div(b, a);
+	if (crtc->rmx_type != RMX_OFF) {
+		b.full = rfixed_const(2);
+		if (crtc->vsc.full > b.full)
+			b.full = crtc->vsc.full;
+		b.full = rfixed_mul(b, crtc->hsc);
+		c.full = rfixed_const(2);
+		b.full = rfixed_div(b, c);
+		consumption_time.full = rfixed_div(pclk, b);
+	} else {
+		consumption_time.full = pclk.full;
+	}
+	a.full = rfixed_const(1);
+	wm->consumption_rate.full = rfixed_div(a, consumption_time);
+
+
+	/* Determine line time
+	 *  LineTime = total time for one line of displayhtotal
+	 *  LineTime = total number of horizontal pixels
+	 *  pclk = pixel clock period(ns)
+	 */
+	a.full = rfixed_const(crtc->base.mode.crtc_htotal);
+	line_time.full = rfixed_mul(a, pclk);
+
+	/* Determine active time
+	 *  ActiveTime = time of active region of display within one line,
+	 *  hactive = total number of horizontal active pixels
+	 *  htotal = total number of horizontal pixels
+	 */
+	a.full = rfixed_const(crtc->base.mode.crtc_htotal);
+	b.full = rfixed_const(crtc->base.mode.crtc_hdisplay);
+	wm->active_time.full = rfixed_mul(line_time, b);
+	wm->active_time.full = rfixed_div(wm->active_time, a);
+
+	/* Determine chunk time
+	 * ChunkTime = the time it takes the DCP to send one chunk of data
+	 * to the LB which consists of pipeline delay and inter chunk gap
+	 * sclk = system clock(Mhz)
+	 */
+	a.full = rfixed_const(600 * 1000);
+	chunk_time.full = rfixed_div(a, rdev->pm.sclk);
+	read_delay_latency.full = rfixed_const(1000);
+
+	/* Determine the worst case latency
+	 * NumLinePair = Number of line pairs to request(1=2 lines, 2=4 lines)
+	 * WorstCaseLatency = worst case time from urgent to when the MC starts
+	 *                    to return data
+	 * READ_DELAY_IDLE_MAX = constant of 1us
+	 * ChunkTime = time it takes the DCP to send one chunk of data to the LB
+	 *             which consists of pipeline delay and inter chunk gap
+	 */
+	if (rfixed_trunc(wm->num_line_pair) > 1) {
+		a.full = rfixed_const(3);
+		wm->worst_case_latency.full = rfixed_mul(a, chunk_time);
+		wm->worst_case_latency.full += read_delay_latency.full;
+	} else {
+		wm->worst_case_latency.full = chunk_time.full + read_delay_latency.full;
+	}
+
+	/* Determine the tolerable latency
+	 * TolerableLatency = Any given request has only 1 line time
+	 *                    for the data to be returned
+	 * LBRequestFifoDepth = Number of chunk requests the LB can
+	 *                      put into the request FIFO for a display
+	 *  LineTime = total time for one line of display
+	 *  ChunkTime = the time it takes the DCP to send one chunk
+	 *              of data to the LB which consists of
+	 *  pipeline delay and inter chunk gap
+	 */
+	if ((2+wm->lb_request_fifo_depth) >= rfixed_trunc(request_fifo_depth)) {
+		tolerable_latency.full = line_time.full;
+	} else {
+		tolerable_latency.full = rfixed_const(wm->lb_request_fifo_depth - 2);
+		tolerable_latency.full = request_fifo_depth.full - tolerable_latency.full;
+		tolerable_latency.full = rfixed_mul(tolerable_latency, chunk_time);
+		tolerable_latency.full = line_time.full - tolerable_latency.full;
+	}
+	/* We assume worst case 32bits (4 bytes) */
+	wm->dbpp.full = rfixed_const(2 * 16);
+
+	/* Determine the maximum priority mark
+	 *  width = viewport width in pixels
+	 */
+	a.full = rfixed_const(16);
+	wm->priority_mark_max.full = rfixed_const(crtc->base.mode.crtc_hdisplay);
+	wm->priority_mark_max.full = rfixed_div(wm->priority_mark_max, a);
+
+	/* Determine estimated width */
+	estimated_width.full = tolerable_latency.full - wm->worst_case_latency.full;
+	estimated_width.full = rfixed_div(estimated_width, consumption_time);
+	if (rfixed_trunc(estimated_width) > crtc->base.mode.crtc_hdisplay) {
+		wm->priority_mark.full = rfixed_const(10);
+	} else {
+		a.full = rfixed_const(16);
+		wm->priority_mark.full = rfixed_div(estimated_width, a);
+		wm->priority_mark.full = wm->priority_mark_max.full - wm->priority_mark.full;
+	}
+}
+
+void rv515_bandwidth_avivo_update(struct radeon_device *rdev)
+{
+	struct drm_display_mode *mode0 = NULL;
+	struct drm_display_mode *mode1 = NULL;
+	struct rv515_watermark wm0;
+	struct rv515_watermark wm1;
+	u32 tmp;
+	fixed20_12 priority_mark02, priority_mark12, fill_rate;
+	fixed20_12 a, b;
+
+	if (rdev->mode_info.crtcs[0]->base.enabled)
+		mode0 = &rdev->mode_info.crtcs[0]->base.mode;
+	if (rdev->mode_info.crtcs[1]->base.enabled)
+		mode1 = &rdev->mode_info.crtcs[1]->base.mode;
+	rs690_line_buffer_adjust(rdev, mode0, mode1);
+
+	rv515_crtc_bandwidth_compute(rdev, rdev->mode_info.crtcs[0], &wm0);
+	rv515_crtc_bandwidth_compute(rdev, rdev->mode_info.crtcs[1], &wm1);
+
+	tmp = wm0.lb_request_fifo_depth;
+	tmp |= wm1.lb_request_fifo_depth << 16;
+	WREG32(LB_MAX_REQ_OUTSTANDING, tmp);
+
+	if (mode0 && mode1) {
+		if (rfixed_trunc(wm0.dbpp) > 64)
+			a.full = rfixed_div(wm0.dbpp, wm0.num_line_pair);
+		else
+			a.full = wm0.num_line_pair.full;
+		if (rfixed_trunc(wm1.dbpp) > 64)
+			b.full = rfixed_div(wm1.dbpp, wm1.num_line_pair);
+		else
+			b.full = wm1.num_line_pair.full;
+		a.full += b.full;
+		fill_rate.full = rfixed_div(wm0.sclk, a);
+		if (wm0.consumption_rate.full > fill_rate.full) {
+			b.full = wm0.consumption_rate.full - fill_rate.full;
+			b.full = rfixed_mul(b, wm0.active_time);
+			a.full = rfixed_const(16);
+			b.full = rfixed_div(b, a);
+			a.full = rfixed_mul(wm0.worst_case_latency,
+						wm0.consumption_rate);
+			priority_mark02.full = a.full + b.full;
+		} else {
+			a.full = rfixed_mul(wm0.worst_case_latency,
+						wm0.consumption_rate);
+			b.full = rfixed_const(16 * 1000);
+			priority_mark02.full = rfixed_div(a, b);
+		}
+		if (wm1.consumption_rate.full > fill_rate.full) {
+			b.full = wm1.consumption_rate.full - fill_rate.full;
+			b.full = rfixed_mul(b, wm1.active_time);
+			a.full = rfixed_const(16);
+			b.full = rfixed_div(b, a);
+			a.full = rfixed_mul(wm1.worst_case_latency,
+						wm1.consumption_rate);
+			priority_mark12.full = a.full + b.full;
+		} else {
+			a.full = rfixed_mul(wm1.worst_case_latency,
+						wm1.consumption_rate);
+			b.full = rfixed_const(16 * 1000);
+			priority_mark12.full = rfixed_div(a, b);
+		}
+		if (wm0.priority_mark.full > priority_mark02.full)
+			priority_mark02.full = wm0.priority_mark.full;
+		if (rfixed_trunc(priority_mark02) < 0)
+			priority_mark02.full = 0;
+		if (wm0.priority_mark_max.full > priority_mark02.full)
+			priority_mark02.full = wm0.priority_mark_max.full;
+		if (wm1.priority_mark.full > priority_mark12.full)
+			priority_mark12.full = wm1.priority_mark.full;
+		if (rfixed_trunc(priority_mark12) < 0)
+			priority_mark12.full = 0;
+		if (wm1.priority_mark_max.full > priority_mark12.full)
+			priority_mark12.full = wm1.priority_mark_max.full;
+		WREG32(D1MODE_PRIORITY_A_CNT, rfixed_trunc(priority_mark02));
+		WREG32(D1MODE_PRIORITY_B_CNT, rfixed_trunc(priority_mark02));
+		WREG32(D2MODE_PRIORITY_A_CNT, rfixed_trunc(priority_mark12));
+		WREG32(D2MODE_PRIORITY_B_CNT, rfixed_trunc(priority_mark12));
+	} else if (mode0) {
+		if (rfixed_trunc(wm0.dbpp) > 64)
+			a.full = rfixed_div(wm0.dbpp, wm0.num_line_pair);
+		else
+			a.full = wm0.num_line_pair.full;
+		fill_rate.full = rfixed_div(wm0.sclk, a);
+		if (wm0.consumption_rate.full > fill_rate.full) {
+			b.full = wm0.consumption_rate.full - fill_rate.full;
+			b.full = rfixed_mul(b, wm0.active_time);
+			a.full = rfixed_const(16);
+			b.full = rfixed_div(b, a);
+			a.full = rfixed_mul(wm0.worst_case_latency,
+						wm0.consumption_rate);
+			priority_mark02.full = a.full + b.full;
+		} else {
+			a.full = rfixed_mul(wm0.worst_case_latency,
+						wm0.consumption_rate);
+			b.full = rfixed_const(16);
+			priority_mark02.full = rfixed_div(a, b);
+		}
+		if (wm0.priority_mark.full > priority_mark02.full)
+			priority_mark02.full = wm0.priority_mark.full;
+		if (rfixed_trunc(priority_mark02) < 0)
+			priority_mark02.full = 0;
+		if (wm0.priority_mark_max.full > priority_mark02.full)
+			priority_mark02.full = wm0.priority_mark_max.full;
+		WREG32(D1MODE_PRIORITY_A_CNT, rfixed_trunc(priority_mark02));
+		WREG32(D1MODE_PRIORITY_B_CNT, rfixed_trunc(priority_mark02));
+		WREG32(D2MODE_PRIORITY_A_CNT, MODE_PRIORITY_OFF);
+		WREG32(D2MODE_PRIORITY_B_CNT, MODE_PRIORITY_OFF);
+	} else {
+		if (rfixed_trunc(wm1.dbpp) > 64)
+			a.full = rfixed_div(wm1.dbpp, wm1.num_line_pair);
+		else
+			a.full = wm1.num_line_pair.full;
+		fill_rate.full = rfixed_div(wm1.sclk, a);
+		if (wm1.consumption_rate.full > fill_rate.full) {
+			b.full = wm1.consumption_rate.full - fill_rate.full;
+			b.full = rfixed_mul(b, wm1.active_time);
+			a.full = rfixed_const(16);
+			b.full = rfixed_div(b, a);
+			a.full = rfixed_mul(wm1.worst_case_latency,
+						wm1.consumption_rate);
+			priority_mark12.full = a.full + b.full;
+		} else {
+			a.full = rfixed_mul(wm1.worst_case_latency,
+						wm1.consumption_rate);
+			b.full = rfixed_const(16 * 1000);
+			priority_mark12.full = rfixed_div(a, b);
+		}
+		if (wm1.priority_mark.full > priority_mark12.full)
+			priority_mark12.full = wm1.priority_mark.full;
+		if (rfixed_trunc(priority_mark12) < 0)
+			priority_mark12.full = 0;
+		if (wm1.priority_mark_max.full > priority_mark12.full)
+			priority_mark12.full = wm1.priority_mark_max.full;
+		WREG32(D1MODE_PRIORITY_A_CNT, MODE_PRIORITY_OFF);
+		WREG32(D1MODE_PRIORITY_B_CNT, MODE_PRIORITY_OFF);
+		WREG32(D2MODE_PRIORITY_A_CNT, rfixed_trunc(priority_mark12));
+		WREG32(D2MODE_PRIORITY_B_CNT, rfixed_trunc(priority_mark12));
+	}
+}
+
+void rv515_bandwidth_update(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	struct drm_display_mode *mode0 = NULL;
+	struct drm_display_mode *mode1 = NULL;
+
+	if (rdev->mode_info.crtcs[0]->base.enabled)
+		mode0 = &rdev->mode_info.crtcs[0]->base.mode;
+	if (rdev->mode_info.crtcs[1]->base.enabled)
+		mode1 = &rdev->mode_info.crtcs[1]->base.mode;
+	/*
+	 * Set display0/1 priority up in the memory controller for
+	 * modes if the user specifies HIGH for displaypriority
+	 * option.
+	 */
+	if (rdev->disp_priority == 2) {
+		tmp = RREG32_MC(MC_MISC_LAT_TIMER);
+		tmp &= ~MC_DISP1R_INIT_LAT_MASK;
+		tmp &= ~MC_DISP0R_INIT_LAT_MASK;
+		if (mode1)
+			tmp |= (1 << MC_DISP1R_INIT_LAT_SHIFT);
+		if (mode0)
+			tmp |= (1 << MC_DISP0R_INIT_LAT_SHIFT);
+		WREG32_MC(MC_MISC_LAT_TIMER, tmp);
+	}
+	rv515_bandwidth_avivo_update(rdev);
+}

+ 170 - 0
drivers/gpu/drm/radeon/rv515r.h

@@ -0,0 +1,170 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef RV515R_H
+#define RV515R_H
+
+/* RV515 registers */
+#define PCIE_INDEX			0x0030
+#define PCIE_DATA			0x0034
+#define	MC_IND_INDEX			0x0070
+#define		MC_IND_WR_EN				(1 << 24)
+#define	MC_IND_DATA			0x0074
+#define	RBBM_SOFT_RESET			0x00F0
+#define	CONFIG_MEMSIZE			0x00F8
+#define HDP_FB_LOCATION			0x0134
+#define	CP_CSQ_CNTL			0x0740
+#define	CP_CSQ_MODE			0x0744
+#define	CP_CSQ_ADDR			0x07F0
+#define	CP_CSQ_DATA			0x07F4
+#define	CP_CSQ_STAT			0x07F8
+#define	CP_CSQ2_STAT			0x07FC
+#define	RBBM_STATUS			0x0E40
+#define	DST_PIPE_CONFIG			0x170C
+#define	WAIT_UNTIL			0x1720
+#define		WAIT_2D_IDLE				(1 << 14)
+#define		WAIT_3D_IDLE				(1 << 15)
+#define		WAIT_2D_IDLECLEAN			(1 << 16)
+#define		WAIT_3D_IDLECLEAN			(1 << 17)
+#define	ISYNC_CNTL			0x1724
+#define		ISYNC_ANY2D_IDLE3D			(1 << 0)
+#define		ISYNC_ANY3D_IDLE2D			(1 << 1)
+#define		ISYNC_TRIG2D_IDLE3D			(1 << 2)
+#define		ISYNC_TRIG3D_IDLE2D			(1 << 3)
+#define		ISYNC_WAIT_IDLEGUI			(1 << 4)
+#define		ISYNC_CPSCRATCH_IDLEGUI			(1 << 5)
+#define	VAP_INDEX_OFFSET		0x208C
+#define	VAP_PVS_STATE_FLUSH_REG		0x2284
+#define	GB_ENABLE			0x4008
+#define	GB_MSPOS0			0x4010
+#define		MS_X0_SHIFT				0
+#define		MS_Y0_SHIFT				4
+#define		MS_X1_SHIFT				8
+#define		MS_Y1_SHIFT				12
+#define		MS_X2_SHIFT				16
+#define		MS_Y2_SHIFT				20
+#define		MSBD0_Y_SHIFT				24
+#define		MSBD0_X_SHIFT				28
+#define	GB_MSPOS1			0x4014
+#define		MS_X3_SHIFT				0
+#define		MS_Y3_SHIFT				4
+#define		MS_X4_SHIFT				8
+#define		MS_Y4_SHIFT				12
+#define		MS_X5_SHIFT				16
+#define		MS_Y5_SHIFT				20
+#define		MSBD1_SHIFT				24
+#define GB_TILE_CONFIG			0x4018
+#define		ENABLE_TILING				(1 << 0)
+#define		PIPE_COUNT_MASK				0x0000000E
+#define		PIPE_COUNT_SHIFT			1
+#define		TILE_SIZE_8				(0 << 4)
+#define		TILE_SIZE_16				(1 << 4)
+#define		TILE_SIZE_32				(2 << 4)
+#define		SUBPIXEL_1_12				(0 << 16)
+#define		SUBPIXEL_1_16				(1 << 16)
+#define	GB_SELECT			0x401C
+#define	GB_AA_CONFIG			0x4020
+#define	GB_PIPE_SELECT			0x402C
+#define	GA_ENHANCE			0x4274
+#define		GA_DEADLOCK_CNTL			(1 << 0)
+#define		GA_FASTSYNC_CNTL			(1 << 1)
+#define	GA_POLY_MODE			0x4288
+#define		FRONT_PTYPE_POINT			(0 << 4)
+#define		FRONT_PTYPE_LINE			(1 << 4)
+#define		FRONT_PTYPE_TRIANGE			(2 << 4)
+#define		BACK_PTYPE_POINT			(0 << 7)
+#define		BACK_PTYPE_LINE				(1 << 7)
+#define		BACK_PTYPE_TRIANGE			(2 << 7)
+#define	GA_ROUND_MODE			0x428C
+#define		GEOMETRY_ROUND_TRUNC			(0 << 0)
+#define		GEOMETRY_ROUND_NEAREST			(1 << 0)
+#define		COLOR_ROUND_TRUNC			(0 << 2)
+#define		COLOR_ROUND_NEAREST			(1 << 2)
+#define	SU_REG_DEST			0x42C8
+#define	RB3D_DSTCACHE_CTLSTAT		0x4E4C
+#define		RB3D_DC_FLUSH				(2 << 0)
+#define		RB3D_DC_FREE				(2 << 2)
+#define		RB3D_DC_FINISH				(1 << 4)
+#define ZB_ZCACHE_CTLSTAT		0x4F18
+#define		ZC_FLUSH				(1 << 0)
+#define		ZC_FREE					(1 << 1)
+#define DC_LB_MEMORY_SPLIT		0x6520
+#define		DC_LB_MEMORY_SPLIT_MASK			0x00000003
+#define		DC_LB_MEMORY_SPLIT_SHIFT		0
+#define		DC_LB_MEMORY_SPLIT_D1HALF_D2HALF	0
+#define		DC_LB_MEMORY_SPLIT_D1_3Q_D2_1Q		1
+#define		DC_LB_MEMORY_SPLIT_D1_ONLY		2
+#define		DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q		3
+#define		DC_LB_MEMORY_SPLIT_SHIFT_MODE		(1 << 2)
+#define		DC_LB_DISP1_END_ADR_SHIFT		4
+#define		DC_LB_DISP1_END_ADR_MASK		0x00007FF0
+#define D1MODE_PRIORITY_A_CNT		0x6548
+#define		MODE_PRIORITY_MARK_MASK			0x00007FFF
+#define		MODE_PRIORITY_OFF			(1 << 16)
+#define		MODE_PRIORITY_ALWAYS_ON			(1 << 20)
+#define		MODE_PRIORITY_FORCE_MASK		(1 << 24)
+#define D1MODE_PRIORITY_B_CNT		0x654C
+#define LB_MAX_REQ_OUTSTANDING		0x6D58
+#define		LB_D1_MAX_REQ_OUTSTANDING_MASK		0x0000000F
+#define		LB_D1_MAX_REQ_OUTSTANDING_SHIFT		0
+#define		LB_D2_MAX_REQ_OUTSTANDING_MASK		0x000F0000
+#define		LB_D2_MAX_REQ_OUTSTANDING_SHIFT		16
+#define D2MODE_PRIORITY_A_CNT		0x6D48
+#define D2MODE_PRIORITY_B_CNT		0x6D4C
+
+/* ix[MC] registers */
+#define MC_FB_LOCATION			0x01
+#define		MC_FB_START_MASK			0x0000FFFF
+#define		MC_FB_START_SHIFT			0
+#define		MC_FB_TOP_MASK				0xFFFF0000
+#define		MC_FB_TOP_SHIFT				16
+#define MC_AGP_LOCATION			0x02
+#define		MC_AGP_START_MASK			0x0000FFFF
+#define		MC_AGP_START_SHIFT			0
+#define		MC_AGP_TOP_MASK				0xFFFF0000
+#define		MC_AGP_TOP_SHIFT			16
+#define MC_AGP_BASE			0x03
+#define MC_AGP_BASE_2			0x04
+#define	MC_CNTL				0x5
+#define		MEM_NUM_CHANNELS_MASK			0x00000003
+#define	MC_STATUS			0x08
+#define		MC_STATUS_IDLE				(1 << 4)
+#define	MC_MISC_LAT_TIMER		0x09
+#define		MC_CPR_INIT_LAT_MASK			0x0000000F
+#define		MC_VF_INIT_LAT_MASK			0x000000F0
+#define		MC_DISP0R_INIT_LAT_MASK			0x00000F00
+#define		MC_DISP0R_INIT_LAT_SHIFT		8
+#define		MC_DISP1R_INIT_LAT_MASK			0x0000F000
+#define		MC_DISP1R_INIT_LAT_SHIFT		12
+#define		MC_FIXED_INIT_LAT_MASK			0x000F0000
+#define		MC_E2R_INIT_LAT_MASK			0x00F00000
+#define		SAME_PAGE_PRIO_MASK			0x0F000000
+#define		MC_GLOBW_INIT_LAT_MASK			0xF0000000
+
+
+#endif
+

+ 1 - 1
drivers/gpu/drm/radeon/rv770.c

@@ -67,7 +67,7 @@ int rv770_mc_init(struct radeon_device *rdev)
 		       "programming pipes. Bad things might happen.\n");
 	}
 
-	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
 	tmp = REG_SET(R700_MC_FB_TOP, tmp >> 24);
 	tmp |= REG_SET(R700_MC_FB_BASE, rdev->mc.vram_location >> 24);
 	WREG32(R700_MC_VM_FB_LOCATION, tmp);

+ 49 - 14
drivers/gpu/drm/ttm/ttm_bo.c

@@ -43,7 +43,6 @@
 #define TTM_BO_HASH_ORDER 13
 
 static int ttm_bo_setup_vm(struct ttm_buffer_object *bo);
-static void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo);
 static int ttm_bo_swapout(struct ttm_mem_shrink *shrink);
 
 static inline uint32_t ttm_bo_type_flags(unsigned type)
@@ -224,6 +223,9 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
 	TTM_ASSERT_LOCKED(&bo->mutex);
 	bo->ttm = NULL;
 
+	if (bdev->need_dma32)
+		page_flags |= TTM_PAGE_FLAG_DMA32;
+
 	switch (bo->type) {
 	case ttm_bo_type_device:
 		if (zero_alloc)
@@ -304,6 +306,9 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 
 	}
 
+	if (bdev->driver->move_notify)
+		bdev->driver->move_notify(bo, mem);
+
 	if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
 	    !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
 		ret = ttm_bo_move_ttm(bo, evict, no_wait, mem);
@@ -655,31 +660,52 @@ static int ttm_bo_mem_force_space(struct ttm_bo_device *bdev,
 	return 0;
 }
 
+static uint32_t ttm_bo_select_caching(struct ttm_mem_type_manager *man,
+				      uint32_t cur_placement,
+				      uint32_t proposed_placement)
+{
+	uint32_t caching = proposed_placement & TTM_PL_MASK_CACHING;
+	uint32_t result = proposed_placement & ~TTM_PL_MASK_CACHING;
+
+	/**
+	 * Keep current caching if possible.
+	 */
+
+	if ((cur_placement & caching) != 0)
+		result |= (cur_placement & caching);
+	else if ((man->default_caching & caching) != 0)
+		result |= man->default_caching;
+	else if ((TTM_PL_FLAG_CACHED & caching) != 0)
+		result |= TTM_PL_FLAG_CACHED;
+	else if ((TTM_PL_FLAG_WC & caching) != 0)
+		result |= TTM_PL_FLAG_WC;
+	else if ((TTM_PL_FLAG_UNCACHED & caching) != 0)
+		result |= TTM_PL_FLAG_UNCACHED;
+
+	return result;
+}
+
+
 static bool ttm_bo_mt_compatible(struct ttm_mem_type_manager *man,
 				 bool disallow_fixed,
 				 uint32_t mem_type,
-				 uint32_t mask, uint32_t *res_mask)
+				 uint32_t proposed_placement,
+				 uint32_t *masked_placement)
 {
 	uint32_t cur_flags = ttm_bo_type_flags(mem_type);
 
 	if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && disallow_fixed)
 		return false;
 
-	if ((cur_flags & mask & TTM_PL_MASK_MEM) == 0)
+	if ((cur_flags & proposed_placement & TTM_PL_MASK_MEM) == 0)
 		return false;
 
-	if ((mask & man->available_caching) == 0)
+	if ((proposed_placement & man->available_caching) == 0)
 		return false;
-	if (mask & man->default_caching)
-		cur_flags |= man->default_caching;
-	else if (mask & TTM_PL_FLAG_CACHED)
-		cur_flags |= TTM_PL_FLAG_CACHED;
-	else if (mask & TTM_PL_FLAG_WC)
-		cur_flags |= TTM_PL_FLAG_WC;
-	else
-		cur_flags |= TTM_PL_FLAG_UNCACHED;
 
-	*res_mask = cur_flags;
+	cur_flags |= (proposed_placement & man->available_caching);
+
+	*masked_placement = cur_flags;
 	return true;
 }
 
@@ -723,6 +749,9 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		if (!type_ok)
 			continue;
 
+		cur_flags = ttm_bo_select_caching(man, bo->mem.placement,
+						  cur_flags);
+
 		if (mem_type == TTM_PL_SYSTEM)
 			break;
 
@@ -779,6 +808,9 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 					  proposed_placement, &cur_flags))
 			continue;
 
+		cur_flags = ttm_bo_select_caching(man, bo->mem.placement,
+						  cur_flags);
+
 		ret = ttm_bo_mem_force_space(bdev, mem, mem_type,
 					     interruptible, no_wait);
 
@@ -1305,7 +1337,8 @@ EXPORT_SYMBOL(ttm_bo_device_release);
 
 int ttm_bo_device_init(struct ttm_bo_device *bdev,
 		       struct ttm_mem_global *mem_glob,
-		       struct ttm_bo_driver *driver, uint64_t file_page_offset)
+		       struct ttm_bo_driver *driver, uint64_t file_page_offset,
+		       bool need_dma32)
 {
 	int ret = -EINVAL;
 
@@ -1342,6 +1375,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
 	INIT_LIST_HEAD(&bdev->ddestroy);
 	INIT_LIST_HEAD(&bdev->swap_lru);
 	bdev->dev_mapping = NULL;
+	bdev->need_dma32 = need_dma32;
 	ttm_mem_init_shrink(&bdev->shrink, ttm_bo_swapout);
 	ret = ttm_mem_register_shrink(mem_glob, &bdev->shrink);
 	if (unlikely(ret != 0)) {
@@ -1419,6 +1453,7 @@ void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
 
 	unmap_mapping_range(bdev->dev_mapping, offset, holelen, 1);
 }
+EXPORT_SYMBOL(ttm_bo_unmap_virtual);
 
 static void ttm_bo_vm_insert_rb(struct ttm_buffer_object *bo)
 {

+ 56 - 13
drivers/gpu/drm/ttm/ttm_bo_util.c

@@ -136,7 +136,8 @@ static int ttm_copy_io_page(void *dst, void *src, unsigned long page)
 }
 
 static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src,
-				unsigned long page)
+				unsigned long page,
+				pgprot_t prot)
 {
 	struct page *d = ttm_tt_get_page(ttm, page);
 	void *dst;
@@ -145,17 +146,35 @@ static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src,
 		return -ENOMEM;
 
 	src = (void *)((unsigned long)src + (page << PAGE_SHIFT));
-	dst = kmap(d);
+
+#ifdef CONFIG_X86
+	dst = kmap_atomic_prot(d, KM_USER0, prot);
+#else
+	if (prot != PAGE_KERNEL)
+		dst = vmap(&d, 1, 0, prot);
+	else
+		dst = kmap(d);
+#endif
 	if (!dst)
 		return -ENOMEM;
 
 	memcpy_fromio(dst, src, PAGE_SIZE);
-	kunmap(d);
+
+#ifdef CONFIG_X86
+	kunmap_atomic(dst, KM_USER0);
+#else
+	if (prot != PAGE_KERNEL)
+		vunmap(dst);
+	else
+		kunmap(d);
+#endif
+
 	return 0;
 }
 
 static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst,
-				unsigned long page)
+				unsigned long page,
+				pgprot_t prot)
 {
 	struct page *s = ttm_tt_get_page(ttm, page);
 	void *src;
@@ -164,12 +183,28 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst,
 		return -ENOMEM;
 
 	dst = (void *)((unsigned long)dst + (page << PAGE_SHIFT));
-	src = kmap(s);
+#ifdef CONFIG_X86
+	src = kmap_atomic_prot(s, KM_USER0, prot);
+#else
+	if (prot != PAGE_KERNEL)
+		src = vmap(&s, 1, 0, prot);
+	else
+		src = kmap(s);
+#endif
 	if (!src)
 		return -ENOMEM;
 
 	memcpy_toio(dst, src, PAGE_SIZE);
-	kunmap(s);
+
+#ifdef CONFIG_X86
+	kunmap_atomic(src, KM_USER0);
+#else
+	if (prot != PAGE_KERNEL)
+		vunmap(src);
+	else
+		kunmap(s);
+#endif
+
 	return 0;
 }
 
@@ -214,11 +249,17 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
 
 	for (i = 0; i < new_mem->num_pages; ++i) {
 		page = i * dir + add;
-		if (old_iomap == NULL)
-			ret = ttm_copy_ttm_io_page(ttm, new_iomap, page);
-		else if (new_iomap == NULL)
-			ret = ttm_copy_io_ttm_page(ttm, old_iomap, page);
-		else
+		if (old_iomap == NULL) {
+			pgprot_t prot = ttm_io_prot(old_mem->placement,
+						    PAGE_KERNEL);
+			ret = ttm_copy_ttm_io_page(ttm, new_iomap, page,
+						   prot);
+		} else if (new_iomap == NULL) {
+			pgprot_t prot = ttm_io_prot(new_mem->placement,
+						    PAGE_KERNEL);
+			ret = ttm_copy_io_ttm_page(ttm, old_iomap, page,
+						   prot);
+		} else
 			ret = ttm_copy_io_page(new_iomap, old_iomap, page);
 		if (ret)
 			goto out1;
@@ -509,8 +550,8 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 	if (evict) {
 		ret = ttm_bo_wait(bo, false, false, false);
 		spin_unlock(&bo->lock);
-		driver->sync_obj_unref(&bo->sync_obj);
-
+		if (tmp_obj)
+			driver->sync_obj_unref(&tmp_obj);
 		if (ret)
 			return ret;
 
@@ -532,6 +573,8 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 
 		set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
 		spin_unlock(&bo->lock);
+		if (tmp_obj)
+			driver->sync_obj_unref(&tmp_obj);
 
 		ret = ttm_buffer_object_transfer(bo, &ghost_obj);
 		if (ret)

+ 3 - 0
drivers/gpu/drm/ttm/ttm_bo_vm.c

@@ -101,6 +101,9 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		return VM_FAULT_NOPAGE;
 	}
 
+	if (bdev->driver->fault_reserve_notify)
+		bdev->driver->fault_reserve_notify(bo);
+
 	/*
 	 * Wait for buffer data in transit, due to a pipelined
 	 * move.

+ 19 - 6
drivers/gpu/drm/ttm/ttm_tt.c

@@ -86,10 +86,16 @@ void ttm_tt_cache_flush(struct page *pages[], unsigned long num_pages)
 	unsigned long i;
 
 	for (i = 0; i < num_pages; ++i) {
-		if (pages[i]) {
-			unsigned long start = (unsigned long)page_address(pages[i]);
-			flush_dcache_range(start, start + PAGE_SIZE);
-		}
+		struct page *page = pages[i];
+		void *page_virtual;
+
+		if (unlikely(page == NULL))
+			continue;
+
+		page_virtual = kmap_atomic(page, KM_USER0);
+		flush_dcache_range((unsigned long) page_virtual,
+				   (unsigned long) page_virtual + PAGE_SIZE);
+		kunmap_atomic(page_virtual, KM_USER0);
 	}
 #else
 	if (on_each_cpu(ttm_tt_ipi_handler, NULL, 1) != 0)
@@ -131,10 +137,17 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
 
 static struct page *ttm_tt_alloc_page(unsigned page_flags)
 {
+	gfp_t gfp_flags = GFP_USER;
+
 	if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
-		return alloc_page(GFP_HIGHUSER | __GFP_ZERO);
+		gfp_flags |= __GFP_ZERO;
+
+	if (page_flags & TTM_PAGE_FLAG_DMA32)
+		gfp_flags |= __GFP_DMA32;
+	else
+		gfp_flags |= __GFP_HIGHMEM;
 
-	return alloc_page(GFP_HIGHUSER);
+	return alloc_page(gfp_flags);
 }
 
 static void ttm_tt_free_user_pages(struct ttm_tt *ttm)

+ 22 - 1
include/drm/radeon_drm.h

@@ -506,6 +506,8 @@ typedef struct {
 #define DRM_RADEON_GEM_WAIT_IDLE	0x24
 #define DRM_RADEON_CS			0x26
 #define DRM_RADEON_INFO			0x27
+#define DRM_RADEON_GEM_SET_TILING	0x28
+#define DRM_RADEON_GEM_GET_TILING	0x29
 
 #define DRM_IOCTL_RADEON_CP_INIT    DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t)
 #define DRM_IOCTL_RADEON_CP_START   DRM_IO(  DRM_COMMAND_BASE + DRM_RADEON_CP_START)
@@ -544,7 +546,8 @@ typedef struct {
 #define DRM_IOCTL_RADEON_GEM_WAIT_IDLE	DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle)
 #define DRM_IOCTL_RADEON_CS		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs)
 #define DRM_IOCTL_RADEON_INFO		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info)
-
+#define DRM_IOCTL_RADEON_SET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling)
+#define DRM_IOCTL_RADEON_GET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling)
 
 typedef struct drm_radeon_init {
 	enum {
@@ -796,6 +799,24 @@ struct drm_radeon_gem_create {
 	uint32_t	flags;
 };
 
+#define RADEON_TILING_MACRO 0x1
+#define RADEON_TILING_MICRO 0x2
+#define RADEON_TILING_SWAP  0x4
+#define RADEON_TILING_SURFACE  0x8 /* this object requires a surface
+				    * when mapped - i.e. front buffer */
+
+struct drm_radeon_gem_set_tiling {
+	uint32_t	handle;
+	uint32_t	tiling_flags;
+	uint32_t	pitch;
+};
+
+struct drm_radeon_gem_get_tiling {
+	uint32_t	handle;
+	uint32_t	tiling_flags;
+	uint32_t	pitch;
+};
+
 struct drm_radeon_gem_mmap {
 	uint32_t	handle;
 	uint32_t	pad;

+ 19 - 1
include/drm/ttm/ttm_bo_driver.h

@@ -121,6 +121,7 @@ struct ttm_backend {
 #define TTM_PAGE_FLAG_SWAPPED         (1 << 4)
 #define TTM_PAGE_FLAG_PERSISTANT_SWAP (1 << 5)
 #define TTM_PAGE_FLAG_ZERO_ALLOC      (1 << 6)
+#define TTM_PAGE_FLAG_DMA32           (1 << 7)
 
 enum ttm_caching_state {
 	tt_uncached,
@@ -353,6 +354,14 @@ struct ttm_bo_driver {
 	int (*sync_obj_flush) (void *sync_obj, void *sync_arg);
 	void (*sync_obj_unref) (void **sync_obj);
 	void *(*sync_obj_ref) (void *sync_obj);
+
+	/* hook to notify driver about a driver move so it
+	 * can do tiling things */
+	void (*move_notify)(struct ttm_buffer_object *bo,
+			    struct ttm_mem_reg *new_mem);
+	/* notify the driver we are taking a fault on this BO
+	 * and have reserved it */
+	void (*fault_reserve_notify)(struct ttm_buffer_object *bo);
 };
 
 #define TTM_NUM_MEM_TYPES 8
@@ -429,6 +438,8 @@ struct ttm_bo_device {
 	 */
 
 	struct delayed_work wq;
+
+	bool need_dma32;
 };
 
 /**
@@ -648,7 +659,14 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
 extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
 			      struct ttm_mem_global *mem_glob,
 			      struct ttm_bo_driver *driver,
-			      uint64_t file_page_offset);
+			      uint64_t file_page_offset, bool need_dma32);
+
+/**
+ * ttm_bo_unmap_virtual
+ *
+ * @bo: tear down the virtual mappings for this BO
+ */
+extern void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo);
 
 /**
  * ttm_bo_reserve:

+ 1 - 1
include/drm/ttm/ttm_module.h

@@ -33,7 +33,7 @@
 
 #include <linux/kernel.h>
 
-#define TTM_PFX "[TTM]"
+#define TTM_PFX "[TTM] "
 
 enum ttm_global_types {
 	TTM_GLOBAL_TTM_MEM = 0,