Browse Source

Merge branches 'sh/hwblk', 'sh/cpuidle' and 'sh/stable-updates'

Paul Mundt 16 years ago
parent
commit
1ecc6ab669

+ 38 - 0
arch/sh/boards/mach-se/7724/setup.c

@@ -19,6 +19,7 @@
 #include <linux/smc91x.h>
 #include <linux/smc91x.h>
 #include <linux/gpio.h>
 #include <linux/gpio.h>
 #include <linux/input.h>
 #include <linux/input.h>
+#include <linux/usb/r8a66597.h>
 #include <video/sh_mobile_lcdc.h>
 #include <video/sh_mobile_lcdc.h>
 #include <media/sh_mobile_ceu.h>
 #include <media/sh_mobile_ceu.h>
 #include <asm/io.h>
 #include <asm/io.h>
@@ -302,6 +303,34 @@ static struct platform_device sh_eth_device = {
 	.resource = sh_eth_resources,
 	.resource = sh_eth_resources,
 };
 };
 
 
+static struct r8a66597_platdata sh7724_usb0_host_data = {
+};
+
+static struct resource sh7724_usb0_host_resources[] = {
+	[0] = {
+		.start	= 0xa4d80000,
+		.end	= 0xa4d800ff,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 65,
+		.end	= 65,
+		.flags	= IORESOURCE_IRQ | IRQF_TRIGGER_LOW,
+	},
+};
+
+static struct platform_device sh7724_usb0_host_device = {
+	.name		= "r8a66597_hcd",
+	.id		= 0,
+	.dev = {
+		.dma_mask		= NULL,         /*  not use dma */
+		.coherent_dma_mask	= 0xffffffff,
+		.platform_data		= &sh7724_usb0_host_data,
+	},
+	.num_resources	= ARRAY_SIZE(sh7724_usb0_host_resources),
+	.resource	= sh7724_usb0_host_resources,
+};
+
 static struct platform_device *ms7724se_devices[] __initdata = {
 static struct platform_device *ms7724se_devices[] __initdata = {
 	&heartbeat_device,
 	&heartbeat_device,
 	&smc91x_eth_device,
 	&smc91x_eth_device,
@@ -311,6 +340,7 @@ static struct platform_device *ms7724se_devices[] __initdata = {
 	&ceu1_device,
 	&ceu1_device,
 	&keysc_device,
 	&keysc_device,
 	&sh_eth_device,
 	&sh_eth_device,
+	&sh7724_usb0_host_device,
 };
 };
 
 
 #define EEPROM_OP   0xBA206000
 #define EEPROM_OP   0xBA206000
@@ -364,6 +394,7 @@ static void __init sh_eth_init(void)
 #define SW4140    0xBA201000
 #define SW4140    0xBA201000
 #define FPGA_OUT  0xBA200400
 #define FPGA_OUT  0xBA200400
 #define PORT_HIZA 0xA4050158
 #define PORT_HIZA 0xA4050158
+#define PORT_MSELCRB 0xA4050182
 
 
 #define SW41_A    0x0100
 #define SW41_A    0x0100
 #define SW41_B    0x0200
 #define SW41_B    0x0200
@@ -373,6 +404,7 @@ static void __init sh_eth_init(void)
 #define SW41_F    0x2000
 #define SW41_F    0x2000
 #define SW41_G    0x4000
 #define SW41_G    0x4000
 #define SW41_H    0x8000
 #define SW41_H    0x8000
+
 static int __init devices_setup(void)
 static int __init devices_setup(void)
 {
 {
 	u16 sw = ctrl_inw(SW4140); /* select camera, monitor */
 	u16 sw = ctrl_inw(SW4140); /* select camera, monitor */
@@ -385,6 +417,12 @@ static int __init devices_setup(void)
 		    (1 << 14)), /* RMII */
 		    (1 << 14)), /* RMII */
 		  FPGA_OUT);
 		  FPGA_OUT);
 
 
+	/* turn on USB clocks, use external clock */
+	ctrl_outw((ctrl_inw(PORT_MSELCRB) & ~0xc000) | 0x8000, PORT_MSELCRB);
+
+	/* enable USB0 port */
+	ctrl_outw(0x0600, 0xa40501d4);
+
 	/* enable IRQ 0,1,2 */
 	/* enable IRQ 0,1,2 */
 	gpio_request(GPIO_FN_INTC_IRQ0, NULL);
 	gpio_request(GPIO_FN_INTC_IRQ0, NULL);
 	gpio_request(GPIO_FN_INTC_IRQ1, NULL);
 	gpio_request(GPIO_FN_INTC_IRQ1, NULL);

+ 61 - 0
arch/sh/include/asm/hwblk.h

@@ -0,0 +1,61 @@
+#ifndef __ASM_SH_HWBLK_H
+#define __ASM_SH_HWBLK_H
+
+#include <asm/clock.h>
+#include <asm/io.h>
+
+#define HWBLK_AREA_FLAG_PARENT (1 << 0) /* valid parent */
+
+#define HWBLK_AREA(_flags, _parent)		\
+{						\
+	.flags = _flags,			\
+	.parent = _parent,			\
+}
+
+struct hwblk_area {
+	unsigned long cnt;
+	unsigned char parent;
+	unsigned char flags;
+};
+
+#define HWBLK(_mstp, _bit, _area)		\
+{						\
+	.mstp = (void __iomem *)_mstp,		\
+	.bit = _bit,				\
+	.area = _area,				\
+}
+
+struct hwblk {
+	void __iomem *mstp;
+	unsigned char bit;
+	unsigned char area;
+	unsigned long cnt;
+};
+
+struct hwblk_info {
+	struct hwblk_area *areas;
+	int nr_areas;
+	struct hwblk *hwblks;
+	int nr_hwblks;
+};
+
+/* Should be defined by processor-specific code */
+int arch_hwblk_init(void);
+int arch_hwblk_sleep_mode(void);
+
+int hwblk_register(struct hwblk_info *info);
+int hwblk_init(void);
+
+/* allow clocks to enable and disable hardware blocks */
+#define SH_HWBLK_CLK(_name, _id, _parent, _hwblk, _flags)	\
+{							\
+	.name		= _name,			\
+	.id		= _id,				\
+	.parent		= _parent,			\
+	.arch_flags	= _hwblk,			\
+	.flags		= _flags,			\
+}
+
+int sh_hwblk_clk_register(struct clk *clks, int nr);
+
+#endif /* __ASM_SH_HWBLK_H */

+ 9 - 0
arch/sh/include/asm/suspend.h

@@ -10,6 +10,15 @@ struct swsusp_arch_regs {
 	struct pt_regs user_regs;
 	struct pt_regs user_regs;
 	unsigned long bank1_regs[8];
 	unsigned long bank1_regs[8];
 };
 };
+
+void sh_mobile_call_standby(unsigned long mode);
+
+#ifdef CONFIG_CPU_IDLE
+void sh_mobile_setup_cpuidle(void);
+#else
+static inline void sh_mobile_setup_cpuidle(void) {}
+#endif
+
 #endif
 #endif
 
 
 /* flags passed to assembly suspend code */
 /* flags passed to assembly suspend code */

+ 14 - 0
arch/sh/include/cpu-sh4/cpu/sh7722.h

@@ -221,4 +221,18 @@ enum {
 	GPIO_FN_KEYOUT3, GPIO_FN_KEYOUT4_IN6, GPIO_FN_KEYOUT5_IN5,
 	GPIO_FN_KEYOUT3, GPIO_FN_KEYOUT4_IN6, GPIO_FN_KEYOUT5_IN5,
 };
 };
 
 
+enum {
+	HWBLK_UNKNOWN = 0,
+	HWBLK_TLB, HWBLK_IC, HWBLK_OC, HWBLK_URAM, HWBLK_XYMEM,
+	HWBLK_INTC, HWBLK_DMAC, HWBLK_SHYWAY, HWBLK_HUDI,
+	HWBLK_UBC, HWBLK_TMU, HWBLK_CMT, HWBLK_RWDT, HWBLK_FLCTL,
+	HWBLK_SCIF0, HWBLK_SCIF1, HWBLK_SCIF2, HWBLK_SIO,
+	HWBLK_SIOF0, HWBLK_SIOF1, HWBLK_IIC, HWBLK_RTC,
+	HWBLK_TPU, HWBLK_IRDA, HWBLK_SDHI, HWBLK_SIM, HWBLK_KEYSC,
+	HWBLK_TSIF, HWBLK_USBF, HWBLK_2DG, HWBLK_SIU, HWBLK_VOU,
+	HWBLK_JPU, HWBLK_BEU, HWBLK_CEU, HWBLK_VEU, HWBLK_VPU,
+	HWBLK_LCDC,
+	HWBLK_NR,
+};
+
 #endif /* __ASM_SH7722_H__ */
 #endif /* __ASM_SH7722_H__ */

+ 1 - 1
arch/sh/kernel/cpu/Makefile

@@ -19,4 +19,4 @@ obj-$(CONFIG_UBC_WAKEUP)	+= ubc.o
 obj-$(CONFIG_SH_ADC)		+= adc.o
 obj-$(CONFIG_SH_ADC)		+= adc.o
 obj-$(CONFIG_SH_CLK_CPG)	+= clock-cpg.o
 obj-$(CONFIG_SH_CLK_CPG)	+= clock-cpg.o
 
 
-obj-y	+= irq/ init.o clock.o
+obj-y	+= irq/ init.o clock.o hwblk.o

+ 130 - 0
arch/sh/kernel/cpu/hwblk.c

@@ -0,0 +1,130 @@
+#include <linux/clk.h>
+#include <linux/compiler.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <asm/suspend.h>
+#include <asm/hwblk.h>
+#include <asm/clock.h>
+
+static DEFINE_SPINLOCK(hwblk_lock);
+
+static void hwblk_area_inc(struct hwblk_info *info, int area)
+{
+	struct hwblk_area *hap = info->areas + area;
+
+	hap->cnt++;
+	if (hap->cnt == 1)
+		if (hap->flags & HWBLK_AREA_FLAG_PARENT)
+			hwblk_area_inc(info, hap->parent);
+}
+
+static void hwblk_area_dec(struct hwblk_info *info, int area)
+{
+	struct hwblk_area *hap = info->areas + area;
+
+	if (hap->cnt == 1)
+		if (hap->flags & HWBLK_AREA_FLAG_PARENT)
+			hwblk_area_dec(info, hap->parent);
+	hap->cnt--;
+}
+
+static void hwblk_enable(struct hwblk_info *info, int hwblk)
+{
+	struct hwblk *hp = info->hwblks + hwblk;
+	unsigned long tmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&hwblk_lock, flags);
+
+	hp->cnt++;
+	if (hp->cnt == 1) {
+		hwblk_area_inc(info, hp->area);
+
+		tmp = __raw_readl(hp->mstp);
+		tmp &= ~(1 << hp->bit);
+		__raw_writel(tmp, hp->mstp);
+	}
+
+	spin_unlock_irqrestore(&hwblk_lock, flags);
+}
+
+static void hwblk_disable(struct hwblk_info *info, int hwblk)
+{
+	struct hwblk *hp = info->hwblks + hwblk;
+	unsigned long tmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&hwblk_lock, flags);
+
+	if (hp->cnt == 1) {
+		hwblk_area_dec(info, hp->area);
+
+		tmp = __raw_readl(hp->mstp);
+		tmp |= 1 << hp->bit;
+		__raw_writel(tmp, hp->mstp);
+	}
+	hp->cnt--;
+
+	spin_unlock_irqrestore(&hwblk_lock, flags);
+}
+
+static struct hwblk_info *hwblk_info;
+
+int __init hwblk_register(struct hwblk_info *info)
+{
+	hwblk_info = info;
+	return 0;
+}
+
+int __init __weak arch_hwblk_init(void)
+{
+	return 0;
+}
+
+int __weak arch_hwblk_sleep_mode(void)
+{
+	return SUSP_SH_SLEEP;
+}
+
+int __init hwblk_init(void)
+{
+	return arch_hwblk_init();
+}
+
+/* allow clocks to enable and disable hardware blocks */
+static int sh_hwblk_clk_enable(struct clk *clk)
+{
+	if (!hwblk_info)
+		return -ENOENT;
+
+	hwblk_enable(hwblk_info, clk->arch_flags);
+	return 0;
+}
+
+static void sh_hwblk_clk_disable(struct clk *clk)
+{
+	if (hwblk_info)
+		hwblk_disable(hwblk_info, clk->arch_flags);
+}
+
+static struct clk_ops sh_hwblk_clk_ops = {
+	.enable		= sh_hwblk_clk_enable,
+	.disable	= sh_hwblk_clk_disable,
+	.recalc		= followparent_recalc,
+};
+
+int __init sh_hwblk_clk_register(struct clk *clks, int nr)
+{
+	struct clk *clkp;
+	int ret = 0;
+	int k;
+
+	for (k = 0; !ret && (k < nr); k++) {
+		clkp = clks + k;
+		clkp->ops = &sh_hwblk_clk_ops;
+		ret |= clk_register(clkp);
+	}
+
+	return ret;
+}

+ 1 - 1
arch/sh/kernel/cpu/sh4a/Makefile

@@ -25,7 +25,7 @@ clock-$(CONFIG_CPU_SUBTYPE_SH7780)	:= clock-sh7780.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7785)	:= clock-sh7785.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7785)	:= clock-sh7785.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7786)	:= clock-sh7786.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7786)	:= clock-sh7786.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7343)	:= clock-sh7343.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7343)	:= clock-sh7343.o
-clock-$(CONFIG_CPU_SUBTYPE_SH7722)	:= clock-sh7722.o
+clock-$(CONFIG_CPU_SUBTYPE_SH7722)	:= clock-sh7722.o hwblk-sh7722.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7723)	:= clock-sh7723.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7723)	:= clock-sh7723.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7724)	:= clock-sh7724.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7724)	:= clock-sh7724.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7366)	:= clock-sh7366.o
 clock-$(CONFIG_CPU_SUBTYPE_SH7366)	:= clock-sh7366.o

+ 32 - 28
arch/sh/kernel/cpu/sh4a/clock-sh7722.c

@@ -22,6 +22,8 @@
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/io.h>
 #include <asm/clock.h>
 #include <asm/clock.h>
+#include <asm/hwblk.h>
+#include <cpu/sh7722.h>
 
 
 /* SH7722 registers */
 /* SH7722 registers */
 #define FRQCR		0xa4150000
 #define FRQCR		0xa4150000
@@ -140,35 +142,37 @@ struct clk div6_clks[] = {
 	SH_CLK_DIV6("video_clk", &pll_clk, VCLKCR, 0),
 	SH_CLK_DIV6("video_clk", &pll_clk, VCLKCR, 0),
 };
 };
 
 
-#define MSTP(_str, _parent, _reg, _bit, _flags) \
-  SH_CLK_MSTP32(_str, -1, _parent, _reg, _bit, _flags)
+#define R_CLK &r_clk
+#define P_CLK &div4_clks[DIV4_P]
+#define B_CLK &div4_clks[DIV4_B]
+#define U_CLK &div4_clks[DIV4_U]
 
 
 static struct clk mstp_clks[] = {
 static struct clk mstp_clks[] = {
-	MSTP("uram0", &div4_clks[DIV4_U], MSTPCR0, 28, CLK_ENABLE_ON_INIT),
-	MSTP("xymem0", &div4_clks[DIV4_B], MSTPCR0, 26, CLK_ENABLE_ON_INIT),
-	MSTP("tmu0", &div4_clks[DIV4_P], MSTPCR0, 15, 0),
-	MSTP("cmt0", &r_clk, MSTPCR0, 14, 0),
-	MSTP("rwdt0", &r_clk, MSTPCR0, 13, 0),
-	MSTP("flctl0", &div4_clks[DIV4_P], MSTPCR0, 10, 0),
-	MSTP("scif0", &div4_clks[DIV4_P], MSTPCR0, 7, 0),
-	MSTP("scif1", &div4_clks[DIV4_P], MSTPCR0, 6, 0),
-	MSTP("scif2", &div4_clks[DIV4_P], MSTPCR0, 5, 0),
-
-	MSTP("i2c0", &div4_clks[DIV4_P], MSTPCR1, 9, 0),
-	MSTP("rtc0", &r_clk, MSTPCR1, 8, 0),
-
-	MSTP("sdhi0", &div4_clks[DIV4_P], MSTPCR2, 18, 0),
-	MSTP("keysc0", &r_clk, MSTPCR2, 14, 0),
-	MSTP("usbf0", &div4_clks[DIV4_P], MSTPCR2, 11, 0),
-	MSTP("2dg0", &div4_clks[DIV4_B], MSTPCR2, 9, 0),
-	MSTP("siu0", &div4_clks[DIV4_B], MSTPCR2, 8, 0),
-	MSTP("vou0", &div4_clks[DIV4_B], MSTPCR2, 5, 0),
-	MSTP("jpu0", &div4_clks[DIV4_B], MSTPCR2, 6, CLK_ENABLE_ON_INIT),
-	MSTP("beu0", &div4_clks[DIV4_B], MSTPCR2, 4, 0),
-	MSTP("ceu0", &div4_clks[DIV4_B], MSTPCR2, 3, 0),
-	MSTP("veu0", &div4_clks[DIV4_B], MSTPCR2, 2, CLK_ENABLE_ON_INIT),
-	MSTP("vpu0", &div4_clks[DIV4_B], MSTPCR2, 1, CLK_ENABLE_ON_INIT),
-	MSTP("lcdc0", &div4_clks[DIV4_B], MSTPCR2, 0, 0),
+	SH_HWBLK_CLK("uram0", -1, U_CLK, HWBLK_URAM, CLK_ENABLE_ON_INIT),
+	SH_HWBLK_CLK("xymem0", -1, B_CLK, HWBLK_XYMEM, CLK_ENABLE_ON_INIT),
+	SH_HWBLK_CLK("tmu0", -1, P_CLK, HWBLK_TMU, 0),
+	SH_HWBLK_CLK("cmt0", -1, R_CLK, HWBLK_CMT, 0),
+	SH_HWBLK_CLK("rwdt0", -1, R_CLK, HWBLK_RWDT, 0),
+	SH_HWBLK_CLK("flctl0", -1, P_CLK, HWBLK_FLCTL, 0),
+	SH_HWBLK_CLK("scif0", -1, P_CLK, HWBLK_SCIF0, 0),
+	SH_HWBLK_CLK("scif1", -1, P_CLK, HWBLK_SCIF1, 0),
+	SH_HWBLK_CLK("scif2", -1, P_CLK, HWBLK_SCIF2, 0),
+
+	SH_HWBLK_CLK("i2c0", -1, P_CLK, HWBLK_IIC, 0),
+	SH_HWBLK_CLK("rtc0", -1, R_CLK, HWBLK_RTC, 0),
+
+	SH_HWBLK_CLK("sdhi0", -1, P_CLK, HWBLK_SDHI, 0),
+	SH_HWBLK_CLK("keysc0", -1, R_CLK, HWBLK_KEYSC, 0),
+	SH_HWBLK_CLK("usbf0", -1, P_CLK, HWBLK_USBF, 0),
+	SH_HWBLK_CLK("2dg0", -1, B_CLK, HWBLK_2DG, 0),
+	SH_HWBLK_CLK("siu0", -1, B_CLK, HWBLK_SIU, 0),
+	SH_HWBLK_CLK("vou0", -1, B_CLK, HWBLK_VOU, 0),
+	SH_HWBLK_CLK("jpu0", -1, B_CLK, HWBLK_JPU, CLK_ENABLE_ON_INIT),
+	SH_HWBLK_CLK("beu0", -1, B_CLK, HWBLK_BEU, 0),
+	SH_HWBLK_CLK("ceu0", -1, B_CLK, HWBLK_CEU, 0),
+	SH_HWBLK_CLK("veu0", -1, B_CLK, HWBLK_VEU, CLK_ENABLE_ON_INIT),
+	SH_HWBLK_CLK("vpu0", -1, B_CLK, HWBLK_VPU, CLK_ENABLE_ON_INIT),
+	SH_HWBLK_CLK("lcdc0", -1, P_CLK, HWBLK_LCDC, 0),
 };
 };
 
 
 int __init arch_clk_init(void)
 int __init arch_clk_init(void)
@@ -191,7 +195,7 @@ int __init arch_clk_init(void)
 		ret = sh_clk_div6_register(div6_clks, ARRAY_SIZE(div6_clks));
 		ret = sh_clk_div6_register(div6_clks, ARRAY_SIZE(div6_clks));
 
 
 	if (!ret)
 	if (!ret)
-		ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks));
+		ret = sh_hwblk_clk_register(mstp_clks, ARRAY_SIZE(mstp_clks));
 
 
 	return ret;
 	return ret;
 }
 }

+ 106 - 0
arch/sh/kernel/cpu/sh4a/hwblk-sh7722.c

@@ -0,0 +1,106 @@
+/*
+ * arch/sh/kernel/cpu/sh4a/hwblk-sh7722.c
+ *
+ * SH7722 hardware block support
+ *
+ * Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <asm/suspend.h>
+#include <asm/hwblk.h>
+#include <cpu/sh7722.h>
+
+/* SH7722 registers */
+#define MSTPCR0		0xa4150030
+#define MSTPCR1		0xa4150034
+#define MSTPCR2		0xa4150038
+
+/* SH7722 Power Domains */
+enum { CORE_AREA, SUB_AREA, CORE_AREA_BM };
+static struct hwblk_area sh7722_hwblk_area[] = {
+	[CORE_AREA] = HWBLK_AREA(0, 0),
+	[CORE_AREA_BM] = HWBLK_AREA(HWBLK_AREA_FLAG_PARENT, CORE_AREA),
+	[SUB_AREA] = HWBLK_AREA(0, 0),
+};
+
+/* Table mapping HWBLK to Module Stop Bit and Power Domain */
+static struct hwblk sh7722_hwblk[HWBLK_NR] = {
+	[HWBLK_TLB] = HWBLK(MSTPCR0, 31, CORE_AREA),
+	[HWBLK_IC] = HWBLK(MSTPCR0, 30, CORE_AREA),
+	[HWBLK_OC] = HWBLK(MSTPCR0, 29, CORE_AREA),
+	[HWBLK_URAM] = HWBLK(MSTPCR0, 28, CORE_AREA),
+	[HWBLK_XYMEM] = HWBLK(MSTPCR0, 26, CORE_AREA),
+	[HWBLK_INTC] = HWBLK(MSTPCR0, 22, CORE_AREA),
+	[HWBLK_DMAC] = HWBLK(MSTPCR0, 21, CORE_AREA_BM),
+	[HWBLK_SHYWAY] = HWBLK(MSTPCR0, 20, CORE_AREA),
+	[HWBLK_HUDI] = HWBLK(MSTPCR0, 19, CORE_AREA),
+	[HWBLK_UBC] = HWBLK(MSTPCR0, 17, CORE_AREA),
+	[HWBLK_TMU] = HWBLK(MSTPCR0, 15, CORE_AREA),
+	[HWBLK_CMT] = HWBLK(MSTPCR0, 14, SUB_AREA),
+	[HWBLK_RWDT] = HWBLK(MSTPCR0, 13, SUB_AREA),
+	[HWBLK_FLCTL] = HWBLK(MSTPCR0, 10, CORE_AREA),
+	[HWBLK_SCIF0] = HWBLK(MSTPCR0, 7, CORE_AREA),
+	[HWBLK_SCIF1] = HWBLK(MSTPCR0, 6, CORE_AREA),
+	[HWBLK_SCIF2] = HWBLK(MSTPCR0, 5, CORE_AREA),
+	[HWBLK_SIO] = HWBLK(MSTPCR0, 3, CORE_AREA),
+	[HWBLK_SIOF0] = HWBLK(MSTPCR0, 2, CORE_AREA),
+	[HWBLK_SIOF1] = HWBLK(MSTPCR0, 1, CORE_AREA),
+
+	[HWBLK_IIC] = HWBLK(MSTPCR1, 9, CORE_AREA),
+	[HWBLK_RTC] = HWBLK(MSTPCR1, 8, SUB_AREA),
+
+	[HWBLK_TPU] = HWBLK(MSTPCR2, 25, CORE_AREA),
+	[HWBLK_IRDA] = HWBLK(MSTPCR2, 24, CORE_AREA),
+	[HWBLK_SDHI] = HWBLK(MSTPCR2, 18, CORE_AREA),
+	[HWBLK_SIM] = HWBLK(MSTPCR2, 16, CORE_AREA),
+	[HWBLK_KEYSC] = HWBLK(MSTPCR2, 14, SUB_AREA),
+	[HWBLK_TSIF] = HWBLK(MSTPCR2, 13, SUB_AREA),
+	[HWBLK_USBF] = HWBLK(MSTPCR2, 11, CORE_AREA),
+	[HWBLK_2DG] = HWBLK(MSTPCR2, 9, CORE_AREA_BM),
+	[HWBLK_SIU] = HWBLK(MSTPCR2, 8, CORE_AREA),
+	[HWBLK_JPU] = HWBLK(MSTPCR2, 6, CORE_AREA_BM),
+	[HWBLK_VOU] = HWBLK(MSTPCR2, 5, CORE_AREA_BM),
+	[HWBLK_BEU] = HWBLK(MSTPCR2, 4, CORE_AREA_BM),
+	[HWBLK_CEU] = HWBLK(MSTPCR2, 3, CORE_AREA_BM),
+	[HWBLK_VEU] = HWBLK(MSTPCR2, 2, CORE_AREA_BM),
+	[HWBLK_VPU] = HWBLK(MSTPCR2, 1, CORE_AREA_BM),
+	[HWBLK_LCDC] = HWBLK(MSTPCR2, 0, CORE_AREA_BM),
+};
+
+static struct hwblk_info sh7722_hwblk_info = {
+	.areas = sh7722_hwblk_area,
+	.nr_areas = ARRAY_SIZE(sh7722_hwblk_area),
+	.hwblks = sh7722_hwblk,
+	.nr_hwblks = ARRAY_SIZE(sh7722_hwblk),
+};
+
+int arch_hwblk_sleep_mode(void)
+{
+	if (!sh7722_hwblk_area[CORE_AREA].cnt)
+		return SUSP_SH_STANDBY | SUSP_SH_SF;
+
+	if (!sh7722_hwblk_area[CORE_AREA_BM].cnt)
+		return SUSP_SH_SLEEP | SUSP_SH_SF;
+
+	return SUSP_SH_SLEEP;
+}
+
+int __init arch_hwblk_init(void)
+{
+	return hwblk_register(&sh7722_hwblk_info);
+}

+ 1 - 0
arch/sh/kernel/cpu/shmobile/Makefile

@@ -4,3 +4,4 @@
 
 
 # Power Management & Sleep mode
 # Power Management & Sleep mode
 obj-$(CONFIG_PM)	+= pm.o sleep.o
 obj-$(CONFIG_PM)	+= pm.o sleep.o
+obj-$(CONFIG_CPU_IDLE)	+= cpuidle.o

+ 102 - 0
arch/sh/kernel/cpu/shmobile/cpuidle.c

@@ -0,0 +1,102 @@
+/*
+ * arch/sh/kernel/cpu/shmobile/cpuidle.c
+ *
+ * Cpuidle support code for SuperH Mobile
+ *
+ *  Copyright (C) 2009 Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/suspend.h>
+#include <linux/cpuidle.h>
+#include <asm/suspend.h>
+#include <asm/uaccess.h>
+#include <asm/hwblk.h>
+
+static unsigned long cpuidle_mode[] = {
+	SUSP_SH_SLEEP, /* regular sleep mode */
+	SUSP_SH_SLEEP | SUSP_SH_SF, /* sleep mode + self refresh */
+};
+
+static int cpuidle_sleep_enter(struct cpuidle_device *dev,
+			       struct cpuidle_state *state)
+{
+	unsigned long allowed_mode = arch_hwblk_sleep_mode();
+	ktime_t before, after;
+	int requested_state = state - &dev->states[0];
+	int allowed_state;
+	int k;
+
+	/* convert allowed mode to allowed state */
+	for (k = ARRAY_SIZE(cpuidle_mode) - 1; k > 0; k--)
+		if (cpuidle_mode[k] == allowed_mode)
+			break;
+
+	allowed_state = k;
+
+	/* take the following into account for sleep mode selection:
+	 * - allowed_state: best mode allowed by hardware (clock deps)
+	 * - requested_state: best mode allowed by software (latencies)
+	 */
+	k = min_t(int, allowed_state, requested_state);
+
+	dev->last_state = &dev->states[k];
+	before = ktime_get();
+	sh_mobile_call_standby(cpuidle_mode[k]);
+	after = ktime_get();
+	return ktime_to_ns(ktime_sub(after, before)) >> 10;
+}
+
+static struct cpuidle_device cpuidle_dev;
+static struct cpuidle_driver cpuidle_driver = {
+	.name =		"sh_idle",
+	.owner =	THIS_MODULE,
+};
+
+void sh_mobile_setup_cpuidle(void)
+{
+	struct cpuidle_device *dev = &cpuidle_dev;
+	struct cpuidle_state *state;
+	int i;
+
+	cpuidle_register_driver(&cpuidle_driver);
+
+	for (i = 0; i < CPUIDLE_STATE_MAX; i++) {
+		dev->states[i].name[0] = '\0';
+		dev->states[i].desc[0] = '\0';
+	}
+
+	i = CPUIDLE_DRIVER_STATE_START;
+
+	state = &dev->states[i++];
+	snprintf(state->name, CPUIDLE_NAME_LEN, "C0");
+	strncpy(state->desc, "SuperH Sleep Mode", CPUIDLE_DESC_LEN);
+	state->exit_latency = 1;
+	state->target_residency = 1 * 2;
+	state->power_usage = 3;
+	state->flags = 0;
+	state->flags |= CPUIDLE_FLAG_SHALLOW;
+	state->flags |= CPUIDLE_FLAG_TIME_VALID;
+	state->enter = cpuidle_sleep_enter;
+
+	dev->safe_state = state;
+
+	state = &dev->states[i++];
+	snprintf(state->name, CPUIDLE_NAME_LEN, "C1");
+	strncpy(state->desc, "SuperH Sleep Mode [SF]", CPUIDLE_DESC_LEN);
+	state->exit_latency = 100;
+	state->target_residency = 1 * 2;
+	state->power_usage = 1;
+	state->flags = 0;
+	state->flags |= CPUIDLE_FLAG_TIME_VALID;
+	state->enter = cpuidle_sleep_enter;
+
+	dev->state_count = i;
+
+	cpuidle_register_device(dev);
+}

+ 13 - 13
arch/sh/kernel/cpu/shmobile/pm.c

@@ -1,5 +1,5 @@
 /*
 /*
- * arch/sh/kernel/cpu/sh4a/pm-sh_mobile.c
+ * arch/sh/kernel/cpu/shmobile/pm.c
  *
  *
  * Power management support code for SuperH Mobile
  * Power management support code for SuperH Mobile
  *
  *
@@ -32,20 +32,17 @@
  *
  *
  * R-standby mode is unsupported, but will be added in the future
  * R-standby mode is unsupported, but will be added in the future
  * U-standby mode is low priority since it needs bootloader hacks
  * U-standby mode is low priority since it needs bootloader hacks
- *
- * All modes should be tied in with cpuidle. But before that can
- * happen we need to keep track of enabled hardware blocks so we
- * can avoid entering sleep modes that stop clocks to hardware
- * blocks that are in use even though the cpu core is idle.
  */
  */
 
 
+#define ILRAM_BASE 0xe5200000
+
 extern const unsigned char sh_mobile_standby[];
 extern const unsigned char sh_mobile_standby[];
 extern const unsigned int sh_mobile_standby_size;
 extern const unsigned int sh_mobile_standby_size;
 
 
-static void sh_mobile_call_standby(unsigned long mode)
+void sh_mobile_call_standby(unsigned long mode)
 {
 {
 	extern void *vbr_base;
 	extern void *vbr_base;
-	void *onchip_mem = (void *)0xe5200000; /* ILRAM */
+	void *onchip_mem = (void *)ILRAM_BASE;
 	void (*standby_onchip_mem)(unsigned long) = onchip_mem;
 	void (*standby_onchip_mem)(unsigned long) = onchip_mem;
 
 
 	/* Note: Wake up from sleep may generate exceptions!
 	/* Note: Wake up from sleep may generate exceptions!
@@ -55,11 +52,6 @@ static void sh_mobile_call_standby(unsigned long mode)
 	if (mode & SUSP_SH_SF)
 	if (mode & SUSP_SH_SF)
 		asm volatile("ldc %0, vbr" : : "r" (onchip_mem) : "memory");
 		asm volatile("ldc %0, vbr" : : "r" (onchip_mem) : "memory");
 
 
-	/* Copy the assembly snippet to the otherwise ununsed ILRAM */
-	memcpy(onchip_mem, sh_mobile_standby, sh_mobile_standby_size);
-	wmb();
-	ctrl_barrier();
-
 	/* Let assembly snippet in on-chip memory handle the rest */
 	/* Let assembly snippet in on-chip memory handle the rest */
 	standby_onchip_mem(mode);
 	standby_onchip_mem(mode);
 
 
@@ -85,7 +77,15 @@ static struct platform_suspend_ops sh_pm_ops = {
 
 
 static int __init sh_pm_init(void)
 static int __init sh_pm_init(void)
 {
 {
+	void *onchip_mem = (void *)ILRAM_BASE;
+
+	/* Copy the assembly snippet to the otherwise ununsed ILRAM */
+	memcpy(onchip_mem, sh_mobile_standby, sh_mobile_standby_size);
+	wmb();
+	ctrl_barrier();
+
 	suspend_set_ops(&sh_pm_ops);
 	suspend_set_ops(&sh_pm_ops);
+	sh_mobile_setup_cpuidle();
 	return 0;
 	return 0;
 }
 }
 
 

+ 2 - 0
arch/sh/kernel/time.c

@@ -21,6 +21,7 @@
 #include <linux/smp.h>
 #include <linux/smp.h>
 #include <linux/rtc.h>
 #include <linux/rtc.h>
 #include <asm/clock.h>
 #include <asm/clock.h>
+#include <asm/hwblk.h>
 #include <asm/rtc.h>
 #include <asm/rtc.h>
 
 
 /* Dummy RTC ops */
 /* Dummy RTC ops */
@@ -96,6 +97,7 @@ void __init time_init(void)
 	if (board_time_init)
 	if (board_time_init)
 		board_time_init();
 		board_time_init();
 
 
+	hwblk_init();
 	clk_init();
 	clk_init();
 
 
 	rtc_sh_get_time(&xtime);
 	rtc_sh_get_time(&xtime);

+ 2 - 13
arch/x86/Kconfig

@@ -1913,25 +1913,14 @@ config DMAR_DEFAULT_ON
 	  recommended you say N here while the DMAR code remains
 	  recommended you say N here while the DMAR code remains
 	  experimental.
 	  experimental.
 
 
-config DMAR_GFX_WA
-	def_bool y
-	prompt "Support for Graphics workaround"
-	depends on DMAR
-	---help---
-	  Current Graphics drivers tend to use physical address
-	  for DMA and avoid using DMA APIs. Setting this config
-	  option permits the IOMMU driver to set a unity map for
-	  all the OS-visible memory. Hence the driver can continue
-	  to use physical addresses for DMA.
-
 config DMAR_FLOPPY_WA
 config DMAR_FLOPPY_WA
 	def_bool y
 	def_bool y
 	depends on DMAR
 	depends on DMAR
 	---help---
 	---help---
-	  Floppy disk drivers are know to bypass DMA API calls
+	  Floppy disk drivers are known to bypass DMA API calls
 	  thereby failing to work when IOMMU is enabled. This
 	  thereby failing to work when IOMMU is enabled. This
 	  workaround will setup a 1:1 mapping for the first
 	  workaround will setup a 1:1 mapping for the first
-	  16M to make floppy (an ISA device) work.
+	  16MiB to make floppy (an ISA device) work.
 
 
 config INTR_REMAP
 config INTR_REMAP
 	bool "Support for Interrupt Remapping (EXPERIMENTAL)"
 	bool "Support for Interrupt Remapping (EXPERIMENTAL)"

+ 1 - 1
arch/x86/kernel/pci-dma.c

@@ -211,11 +211,11 @@ static __init int iommu_setup(char *p)
 #ifdef CONFIG_SWIOTLB
 #ifdef CONFIG_SWIOTLB
 		if (!strncmp(p, "soft", 4))
 		if (!strncmp(p, "soft", 4))
 			swiotlb = 1;
 			swiotlb = 1;
+#endif
 		if (!strncmp(p, "pt", 2)) {
 		if (!strncmp(p, "pt", 2)) {
 			iommu_pass_through = 1;
 			iommu_pass_through = 1;
 			return 1;
 			return 1;
 		}
 		}
-#endif
 
 
 		gart_parse_options(p);
 		gart_parse_options(p);
 
 

+ 350 - 345
drivers/pci/intel-iommu.c

@@ -56,14 +56,32 @@
 #define MAX_AGAW_WIDTH 64
 #define MAX_AGAW_WIDTH 64
 
 
 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
+#define DOMAIN_MAX_PFN(gaw)  ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
 
 
 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
 #define DMA_32BIT_PFN		IOVA_PFN(DMA_BIT_MASK(32))
 #define DMA_32BIT_PFN		IOVA_PFN(DMA_BIT_MASK(32))
 #define DMA_64BIT_PFN		IOVA_PFN(DMA_BIT_MASK(64))
 #define DMA_64BIT_PFN		IOVA_PFN(DMA_BIT_MASK(64))
 
 
-#ifndef PHYSICAL_PAGE_MASK
-#define PHYSICAL_PAGE_MASK PAGE_MASK
-#endif
+
+/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
+   are never going to work. */
+static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
+{
+	return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
+}
+
+static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
+{
+	return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
+}
+static inline unsigned long page_to_dma_pfn(struct page *pg)
+{
+	return mm_to_dma_pfn(page_to_pfn(pg));
+}
+static inline unsigned long virt_to_dma_pfn(void *p)
+{
+	return page_to_dma_pfn(virt_to_page(p));
+}
 
 
 /* global iommu list, set NULL for ignored DMAR units */
 /* global iommu list, set NULL for ignored DMAR units */
 static struct intel_iommu **g_iommus;
 static struct intel_iommu **g_iommus;
@@ -204,12 +222,17 @@ static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
 
 
 static inline u64 dma_pte_addr(struct dma_pte *pte)
 static inline u64 dma_pte_addr(struct dma_pte *pte)
 {
 {
-	return (pte->val & VTD_PAGE_MASK);
+#ifdef CONFIG_64BIT
+	return pte->val & VTD_PAGE_MASK;
+#else
+	/* Must have a full atomic 64-bit read */
+	return  __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK;
+#endif
 }
 }
 
 
-static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
+static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
 {
 {
-	pte->val |= (addr & VTD_PAGE_MASK);
+	pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
 }
 }
 
 
 static inline bool dma_pte_present(struct dma_pte *pte)
 static inline bool dma_pte_present(struct dma_pte *pte)
@@ -217,6 +240,11 @@ static inline bool dma_pte_present(struct dma_pte *pte)
 	return (pte->val & 3) != 0;
 	return (pte->val & 3) != 0;
 }
 }
 
 
+static inline int first_pte_in_page(struct dma_pte *pte)
+{
+	return !((unsigned long)pte & ~VTD_PAGE_MASK);
+}
+
 /*
 /*
  * This domain is a statically identity mapping domain.
  * This domain is a statically identity mapping domain.
  *	1. This domain creats a static 1:1 mapping to all usable memory.
  *	1. This domain creats a static 1:1 mapping to all usable memory.
@@ -244,7 +272,6 @@ struct dmar_domain {
 	struct iova_domain iovad;	/* iova's that belong to this domain */
 	struct iova_domain iovad;	/* iova's that belong to this domain */
 
 
 	struct dma_pte	*pgd;		/* virtual address */
 	struct dma_pte	*pgd;		/* virtual address */
-	spinlock_t	mapping_lock;	/* page table lock */
 	int		gaw;		/* max guest address width */
 	int		gaw;		/* max guest address width */
 
 
 	/* adjusted guest address width, 0 is level 2 30-bit */
 	/* adjusted guest address width, 0 is level 2 30-bit */
@@ -648,80 +675,78 @@ static inline int width_to_agaw(int width)
 
 
 static inline unsigned int level_to_offset_bits(int level)
 static inline unsigned int level_to_offset_bits(int level)
 {
 {
-	return (12 + (level - 1) * LEVEL_STRIDE);
+	return (level - 1) * LEVEL_STRIDE;
 }
 }
 
 
-static inline int address_level_offset(u64 addr, int level)
+static inline int pfn_level_offset(unsigned long pfn, int level)
 {
 {
-	return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
+	return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
 }
 }
 
 
-static inline u64 level_mask(int level)
+static inline unsigned long level_mask(int level)
 {
 {
-	return ((u64)-1 << level_to_offset_bits(level));
+	return -1UL << level_to_offset_bits(level);
 }
 }
 
 
-static inline u64 level_size(int level)
+static inline unsigned long level_size(int level)
 {
 {
-	return ((u64)1 << level_to_offset_bits(level));
+	return 1UL << level_to_offset_bits(level);
 }
 }
 
 
-static inline u64 align_to_level(u64 addr, int level)
+static inline unsigned long align_to_level(unsigned long pfn, int level)
 {
 {
-	return ((addr + level_size(level) - 1) & level_mask(level));
+	return (pfn + level_size(level) - 1) & level_mask(level);
 }
 }
 
 
-static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
+static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
+				      unsigned long pfn)
 {
 {
-	int addr_width = agaw_to_width(domain->agaw);
+	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
 	struct dma_pte *parent, *pte = NULL;
 	struct dma_pte *parent, *pte = NULL;
 	int level = agaw_to_level(domain->agaw);
 	int level = agaw_to_level(domain->agaw);
 	int offset;
 	int offset;
-	unsigned long flags;
 
 
 	BUG_ON(!domain->pgd);
 	BUG_ON(!domain->pgd);
-
-	addr &= (((u64)1) << addr_width) - 1;
+	BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
 	parent = domain->pgd;
 	parent = domain->pgd;
 
 
-	spin_lock_irqsave(&domain->mapping_lock, flags);
 	while (level > 0) {
 	while (level > 0) {
 		void *tmp_page;
 		void *tmp_page;
 
 
-		offset = address_level_offset(addr, level);
+		offset = pfn_level_offset(pfn, level);
 		pte = &parent[offset];
 		pte = &parent[offset];
 		if (level == 1)
 		if (level == 1)
 			break;
 			break;
 
 
 		if (!dma_pte_present(pte)) {
 		if (!dma_pte_present(pte)) {
+			uint64_t pteval;
+
 			tmp_page = alloc_pgtable_page();
 			tmp_page = alloc_pgtable_page();
 
 
-			if (!tmp_page) {
-				spin_unlock_irqrestore(&domain->mapping_lock,
-					flags);
+			if (!tmp_page)
 				return NULL;
 				return NULL;
+
+			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
+			pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
+			if (cmpxchg64(&pte->val, 0ULL, pteval)) {
+				/* Someone else set it while we were thinking; use theirs. */
+				free_pgtable_page(tmp_page);
+			} else {
+				dma_pte_addr(pte);
+				domain_flush_cache(domain, pte, sizeof(*pte));
 			}
 			}
-			domain_flush_cache(domain, tmp_page, PAGE_SIZE);
-			dma_set_pte_addr(pte, virt_to_phys(tmp_page));
-			/*
-			 * high level table always sets r/w, last level page
-			 * table control read/write
-			 */
-			dma_set_pte_readable(pte);
-			dma_set_pte_writable(pte);
-			domain_flush_cache(domain, pte, sizeof(*pte));
 		}
 		}
 		parent = phys_to_virt(dma_pte_addr(pte));
 		parent = phys_to_virt(dma_pte_addr(pte));
 		level--;
 		level--;
 	}
 	}
 
 
-	spin_unlock_irqrestore(&domain->mapping_lock, flags);
 	return pte;
 	return pte;
 }
 }
 
 
 /* return address's pte at specific level */
 /* return address's pte at specific level */
-static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
-		int level)
+static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
+					 unsigned long pfn,
+					 int level)
 {
 {
 	struct dma_pte *parent, *pte = NULL;
 	struct dma_pte *parent, *pte = NULL;
 	int total = agaw_to_level(domain->agaw);
 	int total = agaw_to_level(domain->agaw);
@@ -729,7 +754,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
 
 
 	parent = domain->pgd;
 	parent = domain->pgd;
 	while (level <= total) {
 	while (level <= total) {
-		offset = address_level_offset(addr, total);
+		offset = pfn_level_offset(pfn, total);
 		pte = &parent[offset];
 		pte = &parent[offset];
 		if (level == total)
 		if (level == total)
 			return pte;
 			return pte;
@@ -742,74 +767,82 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
 	return NULL;
 	return NULL;
 }
 }
 
 
-/* clear one page's page table */
-static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
-{
-	struct dma_pte *pte = NULL;
-
-	/* get last level pte */
-	pte = dma_addr_level_pte(domain, addr, 1);
-
-	if (pte) {
-		dma_clear_pte(pte);
-		domain_flush_cache(domain, pte, sizeof(*pte));
-	}
-}
-
 /* clear last level pte, a tlb flush should be followed */
 /* clear last level pte, a tlb flush should be followed */
-static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
+static void dma_pte_clear_range(struct dmar_domain *domain,
+				unsigned long start_pfn,
+				unsigned long last_pfn)
 {
 {
-	int addr_width = agaw_to_width(domain->agaw);
-	int npages;
+	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
+	struct dma_pte *first_pte, *pte;
+
+	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
+	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
 
 
-	start &= (((u64)1) << addr_width) - 1;
-	end &= (((u64)1) << addr_width) - 1;
-	/* in case it's partial page */
-	start &= PAGE_MASK;
-	end = PAGE_ALIGN(end);
-	npages = (end - start) / VTD_PAGE_SIZE;
+	/* we don't need lock here; nobody else touches the iova range */
+	while (start_pfn <= last_pfn) {
+		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
+		if (!pte) {
+			start_pfn = align_to_level(start_pfn + 1, 2);
+			continue;
+		}
+		do { 
+			dma_clear_pte(pte);
+			start_pfn++;
+			pte++;
+		} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
 
 
-	/* we don't need lock here, nobody else touches the iova range */
-	while (npages--) {
-		dma_pte_clear_one(domain, start);
-		start += VTD_PAGE_SIZE;
+		domain_flush_cache(domain, first_pte,
+				   (void *)pte - (void *)first_pte);
 	}
 	}
 }
 }
 
 
 /* free page table pages. last level pte should already be cleared */
 /* free page table pages. last level pte should already be cleared */
 static void dma_pte_free_pagetable(struct dmar_domain *domain,
 static void dma_pte_free_pagetable(struct dmar_domain *domain,
-	u64 start, u64 end)
+				   unsigned long start_pfn,
+				   unsigned long last_pfn)
 {
 {
-	int addr_width = agaw_to_width(domain->agaw);
-	struct dma_pte *pte;
+	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
+	struct dma_pte *first_pte, *pte;
 	int total = agaw_to_level(domain->agaw);
 	int total = agaw_to_level(domain->agaw);
 	int level;
 	int level;
-	u64 tmp;
+	unsigned long tmp;
 
 
-	start &= (((u64)1) << addr_width) - 1;
-	end &= (((u64)1) << addr_width) - 1;
+	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
+	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
 
 
-	/* we don't need lock here, nobody else touches the iova range */
+	/* We don't need lock here; nobody else touches the iova range */
 	level = 2;
 	level = 2;
 	while (level <= total) {
 	while (level <= total) {
-		tmp = align_to_level(start, level);
-		if (tmp >= end || (tmp + level_size(level) > end))
+		tmp = align_to_level(start_pfn, level);
+
+		/* If we can't even clear one PTE at this level, we're done */
+		if (tmp + level_size(level) - 1 > last_pfn)
 			return;
 			return;
 
 
-		while (tmp < end) {
-			pte = dma_addr_level_pte(domain, tmp, level);
-			if (pte) {
-				free_pgtable_page(
-					phys_to_virt(dma_pte_addr(pte)));
-				dma_clear_pte(pte);
-				domain_flush_cache(domain, pte, sizeof(*pte));
+		while (tmp + level_size(level) - 1 <= last_pfn) {
+			first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
+			if (!pte) {
+				tmp = align_to_level(tmp + 1, level + 1);
+				continue;
 			}
 			}
-			tmp += level_size(level);
+			do {
+				if (dma_pte_present(pte)) {
+					free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
+					dma_clear_pte(pte);
+				}
+				pte++;
+				tmp += level_size(level);
+			} while (!first_pte_in_page(pte) &&
+				 tmp + level_size(level) - 1 <= last_pfn);
+
+			domain_flush_cache(domain, first_pte,
+					   (void *)pte - (void *)first_pte);
+			
 		}
 		}
 		level++;
 		level++;
 	}
 	}
 	/* free pgd */
 	/* free pgd */
-	if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
+	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
 		free_pgtable_page(domain->pgd);
 		free_pgtable_page(domain->pgd);
 		domain->pgd = NULL;
 		domain->pgd = NULL;
 	}
 	}
@@ -1035,11 +1068,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
 }
 }
 
 
 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
-				  u64 addr, unsigned int pages)
+				  unsigned long pfn, unsigned int pages)
 {
 {
 	unsigned int mask = ilog2(__roundup_pow_of_two(pages));
 	unsigned int mask = ilog2(__roundup_pow_of_two(pages));
+	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
 
 
-	BUG_ON(addr & (~VTD_PAGE_MASK));
 	BUG_ON(pages == 0);
 	BUG_ON(pages == 0);
 
 
 	/*
 	/*
@@ -1054,7 +1087,12 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
 	else
 	else
 		iommu->flush.flush_iotlb(iommu, did, addr, mask,
 		iommu->flush.flush_iotlb(iommu, did, addr, mask,
 						DMA_TLB_PSI_FLUSH);
 						DMA_TLB_PSI_FLUSH);
-	if (did)
+
+	/*
+	 * In caching mode, domain ID 0 is reserved for non-present to present
+	 * mapping flush. Device IOTLB doesn't need to be flushed in this case.
+	 */
+	if (!cap_caching_mode(iommu->cap) || did)
 		iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
 		iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
 }
 }
 
 
@@ -1279,7 +1317,6 @@ static void dmar_init_reserved_ranges(void)
 	struct pci_dev *pdev = NULL;
 	struct pci_dev *pdev = NULL;
 	struct iova *iova;
 	struct iova *iova;
 	int i;
 	int i;
-	u64 addr, size;
 
 
 	init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
 	init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
 
 
@@ -1302,12 +1339,9 @@ static void dmar_init_reserved_ranges(void)
 			r = &pdev->resource[i];
 			r = &pdev->resource[i];
 			if (!r->flags || !(r->flags & IORESOURCE_MEM))
 			if (!r->flags || !(r->flags & IORESOURCE_MEM))
 				continue;
 				continue;
-			addr = r->start;
-			addr &= PHYSICAL_PAGE_MASK;
-			size = r->end - addr;
-			size = PAGE_ALIGN(size);
-			iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
-				IOVA_PFN(size + addr) - 1);
+			iova = reserve_iova(&reserved_iova_list,
+					    IOVA_PFN(r->start),
+					    IOVA_PFN(r->end));
 			if (!iova)
 			if (!iova)
 				printk(KERN_ERR "Reserve iova failed\n");
 				printk(KERN_ERR "Reserve iova failed\n");
 		}
 		}
@@ -1341,7 +1375,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
 	unsigned long sagaw;
 	unsigned long sagaw;
 
 
 	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
 	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
-	spin_lock_init(&domain->mapping_lock);
 	spin_lock_init(&domain->iommu_lock);
 	spin_lock_init(&domain->iommu_lock);
 
 
 	domain_reserve_special_ranges(domain);
 	domain_reserve_special_ranges(domain);
@@ -1388,7 +1421,6 @@ static void domain_exit(struct dmar_domain *domain)
 {
 {
 	struct dmar_drhd_unit *drhd;
 	struct dmar_drhd_unit *drhd;
 	struct intel_iommu *iommu;
 	struct intel_iommu *iommu;
-	u64 end;
 
 
 	/* Domain 0 is reserved, so dont process it */
 	/* Domain 0 is reserved, so dont process it */
 	if (!domain)
 	if (!domain)
@@ -1397,14 +1429,12 @@ static void domain_exit(struct dmar_domain *domain)
 	domain_remove_dev_info(domain);
 	domain_remove_dev_info(domain);
 	/* destroy iovas */
 	/* destroy iovas */
 	put_iova_domain(&domain->iovad);
 	put_iova_domain(&domain->iovad);
-	end = DOMAIN_MAX_ADDR(domain->gaw);
-	end = end & (~PAGE_MASK);
 
 
 	/* clear ptes */
 	/* clear ptes */
-	dma_pte_clear_range(domain, 0, end);
+	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
 
 
 	/* free page tables */
 	/* free page tables */
-	dma_pte_free_pagetable(domain, 0, end);
+	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
 
 
 	for_each_active_iommu(iommu, drhd)
 	for_each_active_iommu(iommu, drhd)
 		if (test_bit(iommu->seq_id, &domain->iommu_bmp))
 		if (test_bit(iommu->seq_id, &domain->iommu_bmp))
@@ -1618,42 +1648,86 @@ static int domain_context_mapped(struct pci_dev *pdev)
 					     tmp->devfn);
 					     tmp->devfn);
 }
 }
 
 
-static int
-domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
-			u64 hpa, size_t size, int prot)
+static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
+			    struct scatterlist *sg, unsigned long phys_pfn,
+			    unsigned long nr_pages, int prot)
 {
 {
-	u64 start_pfn, end_pfn;
-	struct dma_pte *pte;
-	int index;
-	int addr_width = agaw_to_width(domain->agaw);
+	struct dma_pte *first_pte = NULL, *pte = NULL;
+	phys_addr_t uninitialized_var(pteval);
+	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
+	unsigned long sg_res;
 
 
-	hpa &= (((u64)1) << addr_width) - 1;
+	BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
 
 
 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
 		return -EINVAL;
 		return -EINVAL;
-	iova &= PAGE_MASK;
-	start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
-	end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
-	index = 0;
-	while (start_pfn < end_pfn) {
-		pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
-		if (!pte)
-			return -ENOMEM;
+
+	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
+
+	if (sg)
+		sg_res = 0;
+	else {
+		sg_res = nr_pages + 1;
+		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
+	}
+
+	while (nr_pages--) {
+		uint64_t tmp;
+
+		if (!sg_res) {
+			sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT;
+			sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
+			sg->dma_length = sg->length;
+			pteval = page_to_phys(sg_page(sg)) | prot;
+		}
+		if (!pte) {
+			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
+			if (!pte)
+				return -ENOMEM;
+		}
 		/* We don't need lock here, nobody else
 		/* We don't need lock here, nobody else
 		 * touches the iova range
 		 * touches the iova range
 		 */
 		 */
-		BUG_ON(dma_pte_addr(pte));
-		dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
-		dma_set_pte_prot(pte, prot);
-		if (prot & DMA_PTE_SNP)
-			dma_set_pte_snp(pte);
-		domain_flush_cache(domain, pte, sizeof(*pte));
-		start_pfn++;
-		index++;
+		tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
+		if (tmp) {
+			static int dumps = 5;
+			printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
+			       iov_pfn, tmp, (unsigned long long)pteval);
+			if (dumps) {
+				dumps--;
+				debug_dma_dump_mappings(NULL);
+			}
+			WARN_ON(1);
+		}
+		pte++;
+		if (!nr_pages || first_pte_in_page(pte)) {
+			domain_flush_cache(domain, first_pte,
+					   (void *)pte - (void *)first_pte);
+			pte = NULL;
+		}
+		iov_pfn++;
+		pteval += VTD_PAGE_SIZE;
+		sg_res--;
+		if (!sg_res)
+			sg = sg_next(sg);
 	}
 	}
 	return 0;
 	return 0;
 }
 }
 
 
+static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
+				    struct scatterlist *sg, unsigned long nr_pages,
+				    int prot)
+{
+	return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
+}
+
+static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
+				     unsigned long phys_pfn, unsigned long nr_pages,
+				     int prot)
+{
+	return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
+}
+
 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
 {
 {
 	if (!iommu)
 	if (!iommu)
@@ -1844,58 +1918,61 @@ error:
 
 
 static int iommu_identity_mapping;
 static int iommu_identity_mapping;
 
 
+static int iommu_domain_identity_map(struct dmar_domain *domain,
+				     unsigned long long start,
+				     unsigned long long end)
+{
+	unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
+	unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
+
+	if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
+			  dma_to_mm_pfn(last_vpfn))) {
+		printk(KERN_ERR "IOMMU: reserve iova failed\n");
+		return -ENOMEM;
+	}
+
+	pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
+		 start, end, domain->id);
+	/*
+	 * RMRR range might have overlap with physical memory range,
+	 * clear it first
+	 */
+	dma_pte_clear_range(domain, first_vpfn, last_vpfn);
+
+	return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
+				  last_vpfn - first_vpfn + 1,
+				  DMA_PTE_READ|DMA_PTE_WRITE);
+}
+
 static int iommu_prepare_identity_map(struct pci_dev *pdev,
 static int iommu_prepare_identity_map(struct pci_dev *pdev,
 				      unsigned long long start,
 				      unsigned long long start,
 				      unsigned long long end)
 				      unsigned long long end)
 {
 {
 	struct dmar_domain *domain;
 	struct dmar_domain *domain;
-	unsigned long size;
-	unsigned long long base;
 	int ret;
 	int ret;
 
 
 	printk(KERN_INFO
 	printk(KERN_INFO
-		"IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
-		pci_name(pdev), start, end);
-	if (iommu_identity_mapping)
-		domain = si_domain;
-	else
-		/* page table init */
-		domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
+	       "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
+	       pci_name(pdev), start, end);
+
+	domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
 	if (!domain)
 	if (!domain)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	/* The address might not be aligned */
-	base = start & PAGE_MASK;
-	size = end - base;
-	size = PAGE_ALIGN(size);
-	if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
-			IOVA_PFN(base + size) - 1)) {
-		printk(KERN_ERR "IOMMU: reserve iova failed\n");
-		ret = -ENOMEM;
-		goto error;
-	}
-
-	pr_debug("Mapping reserved region %lx@%llx for %s\n",
-		size, base, pci_name(pdev));
-	/*
-	 * RMRR range might have overlap with physical memory range,
-	 * clear it first
-	 */
-	dma_pte_clear_range(domain, base, base + size);
-
-	ret = domain_page_mapping(domain, base, base, size,
-		DMA_PTE_READ|DMA_PTE_WRITE);
+	ret = iommu_domain_identity_map(domain, start, end);
 	if (ret)
 	if (ret)
 		goto error;
 		goto error;
 
 
 	/* context entry init */
 	/* context entry init */
 	ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
 	ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
-	if (!ret)
-		return 0;
-error:
+	if (ret)
+		goto error;
+
+	return 0;
+
+ error:
 	domain_exit(domain);
 	domain_exit(domain);
 	return ret;
 	return ret;
-
 }
 }
 
 
 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
@@ -1907,64 +1984,6 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
 		rmrr->end_address + 1);
 		rmrr->end_address + 1);
 }
 }
 
 
-struct iommu_prepare_data {
-	struct pci_dev *pdev;
-	int ret;
-};
-
-static int __init iommu_prepare_work_fn(unsigned long start_pfn,
-					 unsigned long end_pfn, void *datax)
-{
-	struct iommu_prepare_data *data;
-
-	data = (struct iommu_prepare_data *)datax;
-
-	data->ret = iommu_prepare_identity_map(data->pdev,
-				start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
-	return data->ret;
-
-}
-
-static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
-{
-	int nid;
-	struct iommu_prepare_data data;
-
-	data.pdev = pdev;
-	data.ret = 0;
-
-	for_each_online_node(nid) {
-		work_with_active_regions(nid, iommu_prepare_work_fn, &data);
-		if (data.ret)
-			return data.ret;
-	}
-	return data.ret;
-}
-
-#ifdef CONFIG_DMAR_GFX_WA
-static void __init iommu_prepare_gfx_mapping(void)
-{
-	struct pci_dev *pdev = NULL;
-	int ret;
-
-	for_each_pci_dev(pdev) {
-		if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
-				!IS_GFX_DEVICE(pdev))
-			continue;
-		printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
-			pci_name(pdev));
-		ret = iommu_prepare_with_active_regions(pdev);
-		if (ret)
-			printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
-	}
-}
-#else /* !CONFIG_DMAR_GFX_WA */
-static inline void iommu_prepare_gfx_mapping(void)
-{
-	return;
-}
-#endif
-
 #ifdef CONFIG_DMAR_FLOPPY_WA
 #ifdef CONFIG_DMAR_FLOPPY_WA
 static inline void iommu_prepare_isa(void)
 static inline void iommu_prepare_isa(void)
 {
 {
@@ -1975,12 +1994,12 @@ static inline void iommu_prepare_isa(void)
 	if (!pdev)
 	if (!pdev)
 		return;
 		return;
 
 
-	printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
+	printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
 	ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
 	ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
 
 
 	if (ret)
 	if (ret)
-		printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, "
-			"floppy might not work\n");
+		printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
+		       "floppy might not work\n");
 
 
 }
 }
 #else
 #else
@@ -2008,16 +2027,30 @@ static int __init init_context_pass_through(void)
 }
 }
 
 
 static int md_domain_init(struct dmar_domain *domain, int guest_width);
 static int md_domain_init(struct dmar_domain *domain, int guest_width);
+
+static int __init si_domain_work_fn(unsigned long start_pfn,
+				    unsigned long end_pfn, void *datax)
+{
+	int *ret = datax;
+
+	*ret = iommu_domain_identity_map(si_domain,
+					 (uint64_t)start_pfn << PAGE_SHIFT,
+					 (uint64_t)end_pfn << PAGE_SHIFT);
+	return *ret;
+
+}
+
 static int si_domain_init(void)
 static int si_domain_init(void)
 {
 {
 	struct dmar_drhd_unit *drhd;
 	struct dmar_drhd_unit *drhd;
 	struct intel_iommu *iommu;
 	struct intel_iommu *iommu;
-	int ret = 0;
+	int nid, ret = 0;
 
 
 	si_domain = alloc_domain();
 	si_domain = alloc_domain();
 	if (!si_domain)
 	if (!si_domain)
 		return -EFAULT;
 		return -EFAULT;
 
 
+	pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
 
 
 	for_each_active_iommu(iommu, drhd) {
 	for_each_active_iommu(iommu, drhd) {
 		ret = iommu_attach_domain(si_domain, iommu);
 		ret = iommu_attach_domain(si_domain, iommu);
@@ -2034,6 +2067,12 @@ static int si_domain_init(void)
 
 
 	si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
 	si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
 
 
+	for_each_online_node(nid) {
+		work_with_active_regions(nid, si_domain_work_fn, &ret);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2087,13 +2126,14 @@ static int iommu_prepare_static_identity_mapping(void)
 	if (ret)
 	if (ret)
 		return -EFAULT;
 		return -EFAULT;
 
 
-	printk(KERN_INFO "IOMMU: Setting identity map:\n");
 	for_each_pci_dev(pdev) {
 	for_each_pci_dev(pdev) {
-		ret = iommu_prepare_with_active_regions(pdev);
-		if (ret) {
-			printk(KERN_INFO "1:1 mapping to one domain failed.\n");
-			return -EFAULT;
-		}
+		printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
+		       pci_name(pdev));
+
+		ret = domain_context_mapping(si_domain, pdev,
+					     CONTEXT_TT_MULTI_LEVEL);
+		if (ret)
+			return ret;
 		ret = domain_add_dev_info(si_domain, pdev);
 		ret = domain_add_dev_info(si_domain, pdev);
 		if (ret)
 		if (ret)
 			return ret;
 			return ret;
@@ -2284,8 +2324,6 @@ int __init init_dmars(void)
 			}
 			}
 		}
 		}
 
 
-		iommu_prepare_gfx_mapping();
-
 		iommu_prepare_isa();
 		iommu_prepare_isa();
 	}
 	}
 
 
@@ -2330,50 +2368,40 @@ error:
 	return ret;
 	return ret;
 }
 }
 
 
-static inline u64 aligned_size(u64 host_addr, size_t size)
-{
-	u64 addr;
-	addr = (host_addr & (~PAGE_MASK)) + size;
-	return PAGE_ALIGN(addr);
-}
-
-struct iova *
-iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
+static inline unsigned long aligned_nrpages(unsigned long host_addr,
+					    size_t size)
 {
 {
-	struct iova *piova;
+	host_addr &= ~PAGE_MASK;
+	host_addr += size + PAGE_SIZE - 1;
 
 
-	/* Make sure it's in range */
-	end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
-	if (!size || (IOVA_START_ADDR + size > end))
-		return NULL;
-
-	piova = alloc_iova(&domain->iovad,
-			size >> PAGE_SHIFT, IOVA_PFN(end), 1);
-	return piova;
+	return host_addr >> VTD_PAGE_SHIFT;
 }
 }
 
 
-static struct iova *
-__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
-		   size_t size, u64 dma_mask)
+static struct iova *intel_alloc_iova(struct device *dev,
+				     struct dmar_domain *domain,
+				     unsigned long nrpages, uint64_t dma_mask)
 {
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct iova *iova = NULL;
 	struct iova *iova = NULL;
 
 
-	if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac)
-		iova = iommu_alloc_iova(domain, size, dma_mask);
-	else {
+	/* Restrict dma_mask to the width that the iommu can handle */
+	dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
+
+	if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
 		/*
 		/*
 		 * First try to allocate an io virtual address in
 		 * First try to allocate an io virtual address in
 		 * DMA_BIT_MASK(32) and if that fails then try allocating
 		 * DMA_BIT_MASK(32) and if that fails then try allocating
 		 * from higher range
 		 * from higher range
 		 */
 		 */
-		iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32));
-		if (!iova)
-			iova = iommu_alloc_iova(domain, size, dma_mask);
-	}
-
-	if (!iova) {
-		printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
+		iova = alloc_iova(&domain->iovad, nrpages,
+				  IOVA_PFN(DMA_BIT_MASK(32)), 1);
+		if (iova)
+			return iova;
+	}
+	iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
+	if (unlikely(!iova)) {
+		printk(KERN_ERR "Allocating %ld-page iova for %s failed",
+		       nrpages, pci_name(pdev));
 		return NULL;
 		return NULL;
 	}
 	}
 
 
@@ -2476,14 +2504,12 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
 		return 0;
 		return 0;
 
 
 	iommu = domain_get_iommu(domain);
 	iommu = domain_get_iommu(domain);
-	size = aligned_size((u64)paddr, size);
+	size = aligned_nrpages(paddr, size);
 
 
-	iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
+	iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
 	if (!iova)
 	if (!iova)
 		goto error;
 		goto error;
 
 
-	start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
-
 	/*
 	/*
 	 * Check if DMAR supports zero-length reads on write only
 	 * Check if DMAR supports zero-length reads on write only
 	 * mappings..
 	 * mappings..
@@ -2499,20 +2525,20 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
 	 * might have two guest_addr mapping to the same host paddr, but this
 	 * might have two guest_addr mapping to the same host paddr, but this
 	 * is not a big problem
 	 * is not a big problem
 	 */
 	 */
-	ret = domain_page_mapping(domain, start_paddr,
-				  ((u64)paddr) & PHYSICAL_PAGE_MASK,
-				  size, prot);
+	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
+				 paddr >> VTD_PAGE_SHIFT, size, prot);
 	if (ret)
 	if (ret)
 		goto error;
 		goto error;
 
 
 	/* it's a non-present to present mapping. Only flush if caching mode */
 	/* it's a non-present to present mapping. Only flush if caching mode */
 	if (cap_caching_mode(iommu->cap))
 	if (cap_caching_mode(iommu->cap))
-		iommu_flush_iotlb_psi(iommu, 0, start_paddr,
-				      size >> VTD_PAGE_SHIFT);
+		iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size);
 	else
 	else
 		iommu_flush_write_buffer(iommu);
 		iommu_flush_write_buffer(iommu);
 
 
-	return start_paddr + ((u64)paddr & (~PAGE_MASK));
+	start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
+	start_paddr += paddr & ~PAGE_MASK;
+	return start_paddr;
 
 
 error:
 error:
 	if (iova)
 	if (iova)
@@ -2605,7 +2631,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
 {
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct dmar_domain *domain;
 	struct dmar_domain *domain;
-	unsigned long start_addr;
+	unsigned long start_pfn, last_pfn;
 	struct iova *iova;
 	struct iova *iova;
 	struct intel_iommu *iommu;
 	struct intel_iommu *iommu;
 
 
@@ -2618,22 +2644,25 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
 	iommu = domain_get_iommu(domain);
 	iommu = domain_get_iommu(domain);
 
 
 	iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
 	iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
-	if (!iova)
+	if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
+		      (unsigned long long)dev_addr))
 		return;
 		return;
 
 
-	start_addr = iova->pfn_lo << PAGE_SHIFT;
-	size = aligned_size((u64)dev_addr, size);
+	start_pfn = mm_to_dma_pfn(iova->pfn_lo);
+	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
 
 
-	pr_debug("Device %s unmapping: %zx@%llx\n",
-		pci_name(pdev), size, (unsigned long long)start_addr);
+	pr_debug("Device %s unmapping: pfn %lx-%lx\n",
+		 pci_name(pdev), start_pfn, last_pfn);
 
 
 	/*  clear the whole page */
 	/*  clear the whole page */
-	dma_pte_clear_range(domain, start_addr, start_addr + size);
+	dma_pte_clear_range(domain, start_pfn, last_pfn);
+
 	/* free page tables */
 	/* free page tables */
-	dma_pte_free_pagetable(domain, start_addr, start_addr + size);
+	dma_pte_free_pagetable(domain, start_pfn, last_pfn);
+
 	if (intel_iommu_strict) {
 	if (intel_iommu_strict) {
-		iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
-				      size >> VTD_PAGE_SHIFT);
+		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
+				      last_pfn - start_pfn + 1);
 		/* free iova */
 		/* free iova */
 		__free_iova(&domain->iovad, iova);
 		__free_iova(&domain->iovad, iova);
 	} else {
 	} else {
@@ -2691,14 +2720,10 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
 			   int nelems, enum dma_data_direction dir,
 			   int nelems, enum dma_data_direction dir,
 			   struct dma_attrs *attrs)
 			   struct dma_attrs *attrs)
 {
 {
-	int i;
 	struct pci_dev *pdev = to_pci_dev(hwdev);
 	struct pci_dev *pdev = to_pci_dev(hwdev);
 	struct dmar_domain *domain;
 	struct dmar_domain *domain;
-	unsigned long start_addr;
+	unsigned long start_pfn, last_pfn;
 	struct iova *iova;
 	struct iova *iova;
-	size_t size = 0;
-	phys_addr_t addr;
-	struct scatterlist *sg;
 	struct intel_iommu *iommu;
 	struct intel_iommu *iommu;
 
 
 	if (iommu_no_mapping(pdev))
 	if (iommu_no_mapping(pdev))
@@ -2710,22 +2735,21 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
 	iommu = domain_get_iommu(domain);
 	iommu = domain_get_iommu(domain);
 
 
 	iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
 	iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
-	if (!iova)
+	if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
+		      (unsigned long long)sglist[0].dma_address))
 		return;
 		return;
-	for_each_sg(sglist, sg, nelems, i) {
-		addr = page_to_phys(sg_page(sg)) + sg->offset;
-		size += aligned_size((u64)addr, sg->length);
-	}
 
 
-	start_addr = iova->pfn_lo << PAGE_SHIFT;
+	start_pfn = mm_to_dma_pfn(iova->pfn_lo);
+	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
 
 
 	/*  clear the whole page */
 	/*  clear the whole page */
-	dma_pte_clear_range(domain, start_addr, start_addr + size);
+	dma_pte_clear_range(domain, start_pfn, last_pfn);
+
 	/* free page tables */
 	/* free page tables */
-	dma_pte_free_pagetable(domain, start_addr, start_addr + size);
+	dma_pte_free_pagetable(domain, start_pfn, last_pfn);
 
 
-	iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
-			      size >> VTD_PAGE_SHIFT);
+	iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
+			      (last_pfn - start_pfn + 1));
 
 
 	/* free iova */
 	/* free iova */
 	__free_iova(&domain->iovad, iova);
 	__free_iova(&domain->iovad, iova);
@@ -2748,17 +2772,16 @@ static int intel_nontranslate_map_sg(struct device *hddev,
 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
 			enum dma_data_direction dir, struct dma_attrs *attrs)
 			enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 {
-	phys_addr_t addr;
 	int i;
 	int i;
 	struct pci_dev *pdev = to_pci_dev(hwdev);
 	struct pci_dev *pdev = to_pci_dev(hwdev);
 	struct dmar_domain *domain;
 	struct dmar_domain *domain;
 	size_t size = 0;
 	size_t size = 0;
 	int prot = 0;
 	int prot = 0;
-	size_t offset = 0;
+	size_t offset_pfn = 0;
 	struct iova *iova = NULL;
 	struct iova *iova = NULL;
 	int ret;
 	int ret;
 	struct scatterlist *sg;
 	struct scatterlist *sg;
-	unsigned long start_addr;
+	unsigned long start_vpfn;
 	struct intel_iommu *iommu;
 	struct intel_iommu *iommu;
 
 
 	BUG_ON(dir == DMA_NONE);
 	BUG_ON(dir == DMA_NONE);
@@ -2771,12 +2794,10 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
 
 
 	iommu = domain_get_iommu(domain);
 	iommu = domain_get_iommu(domain);
 
 
-	for_each_sg(sglist, sg, nelems, i) {
-		addr = page_to_phys(sg_page(sg)) + sg->offset;
-		size += aligned_size((u64)addr, sg->length);
-	}
+	for_each_sg(sglist, sg, nelems, i)
+		size += aligned_nrpages(sg->offset, sg->length);
 
 
-	iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
+	iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
 	if (!iova) {
 	if (!iova) {
 		sglist->dma_length = 0;
 		sglist->dma_length = 0;
 		return 0;
 		return 0;
@@ -2792,35 +2813,24 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
 		prot |= DMA_PTE_WRITE;
 		prot |= DMA_PTE_WRITE;
 
 
-	start_addr = iova->pfn_lo << PAGE_SHIFT;
-	offset = 0;
-	for_each_sg(sglist, sg, nelems, i) {
-		addr = page_to_phys(sg_page(sg)) + sg->offset;
-		size = aligned_size((u64)addr, sg->length);
-		ret = domain_page_mapping(domain, start_addr + offset,
-					  ((u64)addr) & PHYSICAL_PAGE_MASK,
-					  size, prot);
-		if (ret) {
-			/*  clear the page */
-			dma_pte_clear_range(domain, start_addr,
-				  start_addr + offset);
-			/* free page tables */
-			dma_pte_free_pagetable(domain, start_addr,
-				  start_addr + offset);
-			/* free iova */
-			__free_iova(&domain->iovad, iova);
-			return 0;
-		}
-		sg->dma_address = start_addr + offset +
-				((u64)addr & (~PAGE_MASK));
-		sg->dma_length = sg->length;
-		offset += size;
+	start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
+
+	ret = domain_sg_mapping(domain, start_vpfn, sglist, mm_to_dma_pfn(size), prot);
+	if (unlikely(ret)) {
+		/*  clear the page */
+		dma_pte_clear_range(domain, start_vpfn,
+				    start_vpfn + size - 1);
+		/* free page tables */
+		dma_pte_free_pagetable(domain, start_vpfn,
+				       start_vpfn + size - 1);
+		/* free iova */
+		__free_iova(&domain->iovad, iova);
+		return 0;
 	}
 	}
 
 
 	/* it's a non-present to present mapping. Only flush if caching mode */
 	/* it's a non-present to present mapping. Only flush if caching mode */
 	if (cap_caching_mode(iommu->cap))
 	if (cap_caching_mode(iommu->cap))
-		iommu_flush_iotlb_psi(iommu, 0, start_addr,
-				      offset >> VTD_PAGE_SHIFT);
+		iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn);
 	else
 	else
 		iommu_flush_write_buffer(iommu);
 		iommu_flush_write_buffer(iommu);
 
 
@@ -3325,7 +3335,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
 	int adjust_width;
 	int adjust_width;
 
 
 	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
 	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
-	spin_lock_init(&domain->mapping_lock);
 	spin_lock_init(&domain->iommu_lock);
 	spin_lock_init(&domain->iommu_lock);
 
 
 	domain_reserve_special_ranges(domain);
 	domain_reserve_special_ranges(domain);
@@ -3379,8 +3388,6 @@ static void iommu_free_vm_domain(struct dmar_domain *domain)
 
 
 static void vm_domain_exit(struct dmar_domain *domain)
 static void vm_domain_exit(struct dmar_domain *domain)
 {
 {
-	u64 end;
-
 	/* Domain 0 is reserved, so dont process it */
 	/* Domain 0 is reserved, so dont process it */
 	if (!domain)
 	if (!domain)
 		return;
 		return;
@@ -3388,14 +3395,12 @@ static void vm_domain_exit(struct dmar_domain *domain)
 	vm_domain_remove_all_dev_info(domain);
 	vm_domain_remove_all_dev_info(domain);
 	/* destroy iovas */
 	/* destroy iovas */
 	put_iova_domain(&domain->iovad);
 	put_iova_domain(&domain->iovad);
-	end = DOMAIN_MAX_ADDR(domain->gaw);
-	end = end & (~VTD_PAGE_MASK);
 
 
 	/* clear ptes */
 	/* clear ptes */
-	dma_pte_clear_range(domain, 0, end);
+	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
 
 
 	/* free page tables */
 	/* free page tables */
-	dma_pte_free_pagetable(domain, 0, end);
+	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
 
 
 	iommu_free_vm_domain(domain);
 	iommu_free_vm_domain(domain);
 	free_domain_mem(domain);
 	free_domain_mem(domain);
@@ -3504,7 +3509,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
 	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
 	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
 		prot |= DMA_PTE_SNP;
 		prot |= DMA_PTE_SNP;
 
 
-	max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
+	max_addr = iova + size;
 	if (dmar_domain->max_addr < max_addr) {
 	if (dmar_domain->max_addr < max_addr) {
 		int min_agaw;
 		int min_agaw;
 		u64 end;
 		u64 end;
@@ -3522,8 +3527,11 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
 		}
 		}
 		dmar_domain->max_addr = max_addr;
 		dmar_domain->max_addr = max_addr;
 	}
 	}
-
-	ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot);
+	/* Round up size to next multiple of PAGE_SIZE, if it and
+	   the low bits of hpa would take us onto the next page */
+	size = aligned_nrpages(hpa, size);
+	ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
+				 hpa >> VTD_PAGE_SHIFT, size, prot);
 	return ret;
 	return ret;
 }
 }
 
 
@@ -3531,15 +3539,12 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain,
 				    unsigned long iova, size_t size)
 				    unsigned long iova, size_t size)
 {
 {
 	struct dmar_domain *dmar_domain = domain->priv;
 	struct dmar_domain *dmar_domain = domain->priv;
-	dma_addr_t base;
 
 
-	/* The address might not be aligned */
-	base = iova & VTD_PAGE_MASK;
-	size = VTD_PAGE_ALIGN(size);
-	dma_pte_clear_range(dmar_domain, base, base + size);
+	dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
+			    (iova + size - 1) >> VTD_PAGE_SHIFT);
 
 
-	if (dmar_domain->max_addr == base + size)
-		dmar_domain->max_addr = base;
+	if (dmar_domain->max_addr == iova + size)
+		dmar_domain->max_addr = iova;
 }
 }
 
 
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -3549,7 +3554,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
 	struct dma_pte *pte;
 	struct dma_pte *pte;
 	u64 phys = 0;
 	u64 phys = 0;
 
 
-	pte = addr_to_dma_pte(dmar_domain, iova);
+	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
 	if (pte)
 	if (pte)
 		phys = dma_pte_addr(pte);
 		phys = dma_pte_addr(pte);
 
 

+ 4 - 0
drivers/scsi/cxgb3i/cxgb3i_iscsi.c

@@ -13,6 +13,7 @@
 
 
 #include <linux/inet.h>
 #include <linux/inet.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
+#include <linux/if_vlan.h>
 #include <net/dst.h>
 #include <net/dst.h>
 #include <net/tcp.h>
 #include <net/tcp.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_cmnd.h>
@@ -184,6 +185,9 @@ static struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev)
 	struct cxgb3i_adapter *snic;
 	struct cxgb3i_adapter *snic;
 	int i;
 	int i;
 
 
+	if (ndev->priv_flags & IFF_802_1Q_VLAN)
+		ndev = vlan_dev_real_dev(ndev);
+
 	read_lock(&cxgb3i_snic_rwlock);
 	read_lock(&cxgb3i_snic_rwlock);
 	list_for_each_entry(snic, &cxgb3i_snic_list, list_head) {
 	list_for_each_entry(snic, &cxgb3i_snic_list, list_head) {
 		for (i = 0; i < snic->hba_cnt; i++) {
 		for (i = 0; i < snic->hba_cnt; i++) {

+ 4 - 4
drivers/scsi/fnic/fnic_main.c

@@ -473,16 +473,16 @@ static int __devinit fnic_probe(struct pci_dev *pdev,
 	 * limitation for the device.  Try 40-bit first, and
 	 * limitation for the device.  Try 40-bit first, and
 	 * fail to 32-bit.
 	 * fail to 32-bit.
 	 */
 	 */
-	err = pci_set_dma_mask(pdev, DMA_40BIT_MASK);
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
 	if (err) {
 	if (err) {
-		err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (err) {
 		if (err) {
 			shost_printk(KERN_ERR, fnic->lport->host,
 			shost_printk(KERN_ERR, fnic->lport->host,
 				     "No usable DMA configuration "
 				     "No usable DMA configuration "
 				     "aborting\n");
 				     "aborting\n");
 			goto err_out_release_regions;
 			goto err_out_release_regions;
 		}
 		}
-		err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (err) {
 		if (err) {
 			shost_printk(KERN_ERR, fnic->lport->host,
 			shost_printk(KERN_ERR, fnic->lport->host,
 				     "Unable to obtain 32-bit DMA "
 				     "Unable to obtain 32-bit DMA "
@@ -490,7 +490,7 @@ static int __devinit fnic_probe(struct pci_dev *pdev,
 			goto err_out_release_regions;
 			goto err_out_release_regions;
 		}
 		}
 	} else {
 	} else {
-		err = pci_set_consistent_dma_mask(pdev, DMA_40BIT_MASK);
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
 		if (err) {
 		if (err) {
 			shost_printk(KERN_ERR, fnic->lport->host,
 			shost_printk(KERN_ERR, fnic->lport->host,
 				     "Unable to obtain 40-bit DMA "
 				     "Unable to obtain 40-bit DMA "

+ 2 - 5
drivers/scsi/fnic/fnic_scsi.c

@@ -245,7 +245,7 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic,
 					  struct vnic_wq_copy *wq,
 					  struct vnic_wq_copy *wq,
 					  struct fnic_io_req *io_req,
 					  struct fnic_io_req *io_req,
 					  struct scsi_cmnd *sc,
 					  struct scsi_cmnd *sc,
-					  u32 sg_count)
+					  int sg_count)
 {
 {
 	struct scatterlist *sg;
 	struct scatterlist *sg;
 	struct fc_rport *rport = starget_to_rport(scsi_target(sc->device));
 	struct fc_rport *rport = starget_to_rport(scsi_target(sc->device));
@@ -260,9 +260,6 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic,
 	char msg[2];
 	char msg[2];
 
 
 	if (sg_count) {
 	if (sg_count) {
-		BUG_ON(sg_count < 0);
-		BUG_ON(sg_count > FNIC_MAX_SG_DESC_CNT);
-
 		/* For each SGE, create a device desc entry */
 		/* For each SGE, create a device desc entry */
 		desc = io_req->sgl_list;
 		desc = io_req->sgl_list;
 		for_each_sg(scsi_sglist(sc), sg, sg_count, i) {
 		for_each_sg(scsi_sglist(sc), sg, sg_count, i) {
@@ -344,7 +341,7 @@ int fnic_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 	struct fnic *fnic;
 	struct fnic *fnic;
 	struct vnic_wq_copy *wq;
 	struct vnic_wq_copy *wq;
 	int ret;
 	int ret;
-	u32 sg_count;
+	int sg_count;
 	unsigned long flags;
 	unsigned long flags;
 	unsigned long ptr;
 	unsigned long ptr;
 
 

+ 6 - 1
drivers/scsi/ibmvscsi/ibmvscsi.c

@@ -1095,9 +1095,14 @@ static void adapter_info_rsp(struct srp_event_struct *evt_struct)
 				MAX_INDIRECT_BUFS);
 				MAX_INDIRECT_BUFS);
 			hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS;
 			hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS;
 		}
 		}
+
+		if (hostdata->madapter_info.os_type == 3) {
+			enable_fast_fail(hostdata);
+			return;
+		}
 	}
 	}
 
 
-	enable_fast_fail(hostdata);
+	send_srp_login(hostdata);
 }
 }
 
 
 /**
 /**

+ 3 - 2
drivers/scsi/scsi_transport_fc.c

@@ -3670,13 +3670,14 @@ static void
 fc_bsg_goose_queue(struct fc_rport *rport)
 fc_bsg_goose_queue(struct fc_rport *rport)
 {
 {
 	int flagset;
 	int flagset;
+	unsigned long flags;
 
 
 	if (!rport->rqst_q)
 	if (!rport->rqst_q)
 		return;
 		return;
 
 
 	get_device(&rport->dev);
 	get_device(&rport->dev);
 
 
-	spin_lock(rport->rqst_q->queue_lock);
+	spin_lock_irqsave(rport->rqst_q->queue_lock, flags);
 	flagset = test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags) &&
 	flagset = test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags) &&
 		  !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
 		  !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
 	if (flagset)
 	if (flagset)
@@ -3684,7 +3685,7 @@ fc_bsg_goose_queue(struct fc_rport *rport)
 	__blk_run_queue(rport->rqst_q);
 	__blk_run_queue(rport->rqst_q);
 	if (flagset)
 	if (flagset)
 		queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
 		queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
-	spin_unlock(rport->rqst_q->queue_lock);
+	spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);
 
 
 	put_device(&rport->dev);
 	put_device(&rport->dev);
 }
 }

+ 1 - 1
drivers/scsi/zalon.c

@@ -134,7 +134,7 @@ zalon_probe(struct parisc_device *dev)
 
 
 	host = ncr_attach(&zalon7xx_template, unit, &device);
 	host = ncr_attach(&zalon7xx_template, unit, &device);
 	if (!host)
 	if (!host)
-		goto fail;
+		return -ENODEV;
 
 
 	if (request_irq(dev->irq, ncr53c8xx_intr, IRQF_SHARED, "zalon", host)) {
 	if (request_irq(dev->irq, ncr53c8xx_intr, IRQF_SHARED, "zalon", host)) {
 	  dev_printk(KERN_ERR, &dev->dev, "irq problem with %d, detaching\n ",
 	  dev_printk(KERN_ERR, &dev->dev, "irq problem with %d, detaching\n ",

+ 4 - 3
drivers/serial/sh-sci.c

@@ -707,12 +707,13 @@ static irqreturn_t sci_br_interrupt(int irq, void *ptr)
 
 
 static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr)
 static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr)
 {
 {
-	unsigned short ssr_status, scr_status;
+	unsigned short ssr_status, scr_status, err_enabled;
 	struct uart_port *port = ptr;
 	struct uart_port *port = ptr;
 	irqreturn_t ret = IRQ_NONE;
 	irqreturn_t ret = IRQ_NONE;
 
 
 	ssr_status = sci_in(port, SCxSR);
 	ssr_status = sci_in(port, SCxSR);
 	scr_status = sci_in(port, SCSCR);
 	scr_status = sci_in(port, SCSCR);
+	err_enabled = scr_status & (SCI_CTRL_FLAGS_REIE | SCI_CTRL_FLAGS_RIE);
 
 
 	/* Tx Interrupt */
 	/* Tx Interrupt */
 	if ((ssr_status & 0x0020) && (scr_status & SCI_CTRL_FLAGS_TIE))
 	if ((ssr_status & 0x0020) && (scr_status & SCI_CTRL_FLAGS_TIE))
@@ -721,10 +722,10 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr)
 	if ((ssr_status & 0x0002) && (scr_status & SCI_CTRL_FLAGS_RIE))
 	if ((ssr_status & 0x0002) && (scr_status & SCI_CTRL_FLAGS_RIE))
 		ret = sci_rx_interrupt(irq, ptr);
 		ret = sci_rx_interrupt(irq, ptr);
 	/* Error Interrupt */
 	/* Error Interrupt */
-	if ((ssr_status & 0x0080) && (scr_status & SCI_CTRL_FLAGS_REIE))
+	if ((ssr_status & 0x0080) && err_enabled)
 		ret = sci_er_interrupt(irq, ptr);
 		ret = sci_er_interrupt(irq, ptr);
 	/* Break Interrupt */
 	/* Break Interrupt */
-	if ((ssr_status & 0x0010) && (scr_status & SCI_CTRL_FLAGS_REIE))
+	if ((ssr_status & 0x0010) && err_enabled)
 		ret = sci_br_interrupt(irq, ptr);
 		ret = sci_br_interrupt(irq, ptr);
 
 
 	return ret;
 	return ret;

+ 2 - 2
drivers/usb/host/Kconfig

@@ -337,10 +337,10 @@ config USB_R8A66597_HCD
 
 
 config SUPERH_ON_CHIP_R8A66597
 config SUPERH_ON_CHIP_R8A66597
 	boolean "Enable SuperH on-chip R8A66597 USB"
 	boolean "Enable SuperH on-chip R8A66597 USB"
-	depends on USB_R8A66597_HCD && (CPU_SUBTYPE_SH7366 || CPU_SUBTYPE_SH7723)
+	depends on USB_R8A66597_HCD && (CPU_SUBTYPE_SH7366 || CPU_SUBTYPE_SH7723 || CPU_SUBTYPE_SH7724)
 	help
 	help
 	   This driver enables support for the on-chip R8A66597 in the
 	   This driver enables support for the on-chip R8A66597 in the
-	   SH7366 and SH7723 processors.
+	   SH7366, SH7723 and SH7724 processors.
 
 
 config USB_WHCI_HCD
 config USB_WHCI_HCD
 	tristate "Wireless USB Host Controller Interface (WHCI) driver (EXPERIMENTAL)"
 	tristate "Wireless USB Host Controller Interface (WHCI) driver (EXPERIMENTAL)"

+ 1 - 1
fs/btrfs/async-thread.c

@@ -299,8 +299,8 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
 					   "btrfs-%s-%d", workers->name,
 					   "btrfs-%s-%d", workers->name,
 					   workers->num_workers + i);
 					   workers->num_workers + i);
 		if (IS_ERR(worker->task)) {
 		if (IS_ERR(worker->task)) {
-			kfree(worker);
 			ret = PTR_ERR(worker->task);
 			ret = PTR_ERR(worker->task);
+			kfree(worker);
 			goto fail;
 			goto fail;
 		}
 		}
 
 

+ 1 - 2
fs/btrfs/ctree.h

@@ -2074,8 +2074,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
-int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
-			*root);
+int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref);
 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root,
 			struct btrfs_root *root,
 			struct extent_buffer *node,
 			struct extent_buffer *node,

+ 391 - 175
fs/btrfs/extent-tree.c

@@ -990,15 +990,13 @@ static inline int extent_ref_type(u64 parent, u64 owner)
 	return type;
 	return type;
 }
 }
 
 
-static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
+static int find_next_key(struct btrfs_path *path, int level,
+			 struct btrfs_key *key)
 
 
 {
 {
-	int level;
-	BUG_ON(!path->keep_locks);
-	for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+	for (; level < BTRFS_MAX_LEVEL; level++) {
 		if (!path->nodes[level])
 		if (!path->nodes[level])
 			break;
 			break;
-		btrfs_assert_tree_locked(path->nodes[level]);
 		if (path->slots[level] + 1 >=
 		if (path->slots[level] + 1 >=
 		    btrfs_header_nritems(path->nodes[level]))
 		    btrfs_header_nritems(path->nodes[level]))
 			continue;
 			continue;
@@ -1158,7 +1156,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 		 * For simplicity, we just do not add new inline back
 		 * For simplicity, we just do not add new inline back
 		 * ref if there is any kind of item for this block
 		 * ref if there is any kind of item for this block
 		 */
 		 */
-		if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
+		if (find_next_key(path, 0, &key) == 0 &&
+		    key.objectid == bytenr &&
 		    key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
 		    key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
 			err = -EAGAIN;
 			err = -EAGAIN;
 			goto out;
 			goto out;
@@ -2697,7 +2696,7 @@ again:
 
 
 		printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
 		printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
 		       ", %llu bytes_used, %llu bytes_reserved, "
 		       ", %llu bytes_used, %llu bytes_reserved, "
-		       "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
+		       "%llu bytes_pinned, %llu bytes_readonly, %llu may use "
 		       "%llu total\n", (unsigned long long)bytes,
 		       "%llu total\n", (unsigned long long)bytes,
 		       (unsigned long long)data_sinfo->bytes_delalloc,
 		       (unsigned long long)data_sinfo->bytes_delalloc,
 		       (unsigned long long)data_sinfo->bytes_used,
 		       (unsigned long long)data_sinfo->bytes_used,
@@ -4128,6 +4127,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
 	return buf;
 	return buf;
 }
 }
 
 
+#if 0
 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root, struct extent_buffer *leaf)
 			struct btrfs_root *root, struct extent_buffer *leaf)
 {
 {
@@ -4171,8 +4171,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
 	return 0;
 	return 0;
 }
 }
 
 
-#if 0
-
 static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
 static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
 					struct btrfs_root *root,
 					struct btrfs_root *root,
 					struct btrfs_leaf_ref *ref)
 					struct btrfs_leaf_ref *ref)
@@ -4553,262 +4551,471 @@ out:
 }
 }
 #endif
 #endif
 
 
+struct walk_control {
+	u64 refs[BTRFS_MAX_LEVEL];
+	u64 flags[BTRFS_MAX_LEVEL];
+	struct btrfs_key update_progress;
+	int stage;
+	int level;
+	int shared_level;
+	int update_ref;
+	int keep_locks;
+};
+
+#define DROP_REFERENCE	1
+#define UPDATE_BACKREF	2
+
 /*
 /*
- * helper function for drop_subtree, this function is similar to
- * walk_down_tree. The main difference is that it checks reference
- * counts while tree blocks are locked.
+ * hepler to process tree block while walking down the tree.
+ *
+ * when wc->stage == DROP_REFERENCE, this function checks
+ * reference count of the block. if the block is shared and
+ * we need update back refs for the subtree rooted at the
+ * block, this function changes wc->stage to UPDATE_BACKREF
+ *
+ * when wc->stage == UPDATE_BACKREF, this function updates
+ * back refs for pointers in the block.
+ *
+ * NOTE: return value 1 means we should stop walking down.
  */
  */
-static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root,
 				   struct btrfs_root *root,
-				   struct btrfs_path *path, int *level)
+				   struct btrfs_path *path,
+				   struct walk_control *wc)
 {
 {
-	struct extent_buffer *next;
-	struct extent_buffer *cur;
-	struct extent_buffer *parent;
-	u64 bytenr;
-	u64 ptr_gen;
-	u64 refs;
-	u64 flags;
-	u32 blocksize;
+	int level = wc->level;
+	struct extent_buffer *eb = path->nodes[level];
+	struct btrfs_key key;
+	u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
 	int ret;
 	int ret;
 
 
-	cur = path->nodes[*level];
-	ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len,
-				       &refs, &flags);
-	BUG_ON(ret);
-	if (refs > 1)
-		goto out;
+	if (wc->stage == UPDATE_BACKREF &&
+	    btrfs_header_owner(eb) != root->root_key.objectid)
+		return 1;
 
 
-	BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+	/*
+	 * when reference count of tree block is 1, it won't increase
+	 * again. once full backref flag is set, we never clear it.
+	 */
+	if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
+	    (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) {
+		BUG_ON(!path->locks[level]);
+		ret = btrfs_lookup_extent_info(trans, root,
+					       eb->start, eb->len,
+					       &wc->refs[level],
+					       &wc->flags[level]);
+		BUG_ON(ret);
+		BUG_ON(wc->refs[level] == 0);
+	}
 
 
-	while (*level >= 0) {
-		cur = path->nodes[*level];
-		if (*level == 0) {
-			ret = btrfs_drop_leaf_ref(trans, root, cur);
-			BUG_ON(ret);
-			clean_tree_block(trans, root, cur);
-			break;
-		}
-		if (path->slots[*level] >= btrfs_header_nritems(cur)) {
-			clean_tree_block(trans, root, cur);
-			break;
+	if (wc->stage == DROP_REFERENCE &&
+	    wc->update_ref && wc->refs[level] > 1) {
+		BUG_ON(eb == root->node);
+		BUG_ON(path->slots[level] > 0);
+		if (level == 0)
+			btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
+		else
+			btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
+		if (btrfs_header_owner(eb) == root->root_key.objectid &&
+		    btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
+			wc->stage = UPDATE_BACKREF;
+			wc->shared_level = level;
 		}
 		}
+	}
 
 
-		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
-		blocksize = btrfs_level_size(root, *level - 1);
-		ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
+	if (wc->stage == DROP_REFERENCE) {
+		if (wc->refs[level] > 1)
+			return 1;
 
 
-		next = read_tree_block(root, bytenr, blocksize, ptr_gen);
-		btrfs_tree_lock(next);
-		btrfs_set_lock_blocking(next);
+		if (path->locks[level] && !wc->keep_locks) {
+			btrfs_tree_unlock(eb);
+			path->locks[level] = 0;
+		}
+		return 0;
+	}
 
 
-		ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
-					       &refs, &flags);
+	/* wc->stage == UPDATE_BACKREF */
+	if (!(wc->flags[level] & flag)) {
+		BUG_ON(!path->locks[level]);
+		ret = btrfs_inc_ref(trans, root, eb, 1);
 		BUG_ON(ret);
 		BUG_ON(ret);
-		if (refs > 1) {
-			parent = path->nodes[*level];
-			ret = btrfs_free_extent(trans, root, bytenr,
-						blocksize, parent->start,
-						btrfs_header_owner(parent),
-						*level - 1, 0);
+		ret = btrfs_dec_ref(trans, root, eb, 0);
+		BUG_ON(ret);
+		ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
+						  eb->len, flag, 0);
+		BUG_ON(ret);
+		wc->flags[level] |= flag;
+	}
+
+	/*
+	 * the block is shared by multiple trees, so it's not good to
+	 * keep the tree lock
+	 */
+	if (path->locks[level] && level > 0) {
+		btrfs_tree_unlock(eb);
+		path->locks[level] = 0;
+	}
+	return 0;
+}
+
+/*
+ * hepler to process tree block while walking up the tree.
+ *
+ * when wc->stage == DROP_REFERENCE, this function drops
+ * reference count on the block.
+ *
+ * when wc->stage == UPDATE_BACKREF, this function changes
+ * wc->stage back to DROP_REFERENCE if we changed wc->stage
+ * to UPDATE_BACKREF previously while processing the block.
+ *
+ * NOTE: return value 1 means we should stop walking up.
+ */
+static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 struct walk_control *wc)
+{
+	int ret = 0;
+	int level = wc->level;
+	struct extent_buffer *eb = path->nodes[level];
+	u64 parent = 0;
+
+	if (wc->stage == UPDATE_BACKREF) {
+		BUG_ON(wc->shared_level < level);
+		if (level < wc->shared_level)
+			goto out;
+
+		BUG_ON(wc->refs[level] <= 1);
+		ret = find_next_key(path, level + 1, &wc->update_progress);
+		if (ret > 0)
+			wc->update_ref = 0;
+
+		wc->stage = DROP_REFERENCE;
+		wc->shared_level = -1;
+		path->slots[level] = 0;
+
+		/*
+		 * check reference count again if the block isn't locked.
+		 * we should start walking down the tree again if reference
+		 * count is one.
+		 */
+		if (!path->locks[level]) {
+			BUG_ON(level == 0);
+			btrfs_tree_lock(eb);
+			btrfs_set_lock_blocking(eb);
+			path->locks[level] = 1;
+
+			ret = btrfs_lookup_extent_info(trans, root,
+						       eb->start, eb->len,
+						       &wc->refs[level],
+						       &wc->flags[level]);
 			BUG_ON(ret);
 			BUG_ON(ret);
-			path->slots[*level]++;
-			btrfs_tree_unlock(next);
-			free_extent_buffer(next);
-			continue;
+			BUG_ON(wc->refs[level] == 0);
+			if (wc->refs[level] == 1) {
+				btrfs_tree_unlock(eb);
+				path->locks[level] = 0;
+				return 1;
+			}
+		} else {
+			BUG_ON(level != 0);
 		}
 		}
+	}
 
 
-		BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+	/* wc->stage == DROP_REFERENCE */
+	BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
 
 
-		*level = btrfs_header_level(next);
-		path->nodes[*level] = next;
-		path->slots[*level] = 0;
-		path->locks[*level] = 1;
-		cond_resched();
+	if (wc->refs[level] == 1) {
+		if (level == 0) {
+			if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+				ret = btrfs_dec_ref(trans, root, eb, 1);
+			else
+				ret = btrfs_dec_ref(trans, root, eb, 0);
+			BUG_ON(ret);
+		}
+		/* make block locked assertion in clean_tree_block happy */
+		if (!path->locks[level] &&
+		    btrfs_header_generation(eb) == trans->transid) {
+			btrfs_tree_lock(eb);
+			btrfs_set_lock_blocking(eb);
+			path->locks[level] = 1;
+		}
+		clean_tree_block(trans, root, eb);
+	}
+
+	if (eb == root->node) {
+		if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+			parent = eb->start;
+		else
+			BUG_ON(root->root_key.objectid !=
+			       btrfs_header_owner(eb));
+	} else {
+		if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+			parent = path->nodes[level + 1]->start;
+		else
+			BUG_ON(root->root_key.objectid !=
+			       btrfs_header_owner(path->nodes[level + 1]));
 	}
 	}
-out:
-	if (path->nodes[*level] == root->node)
-		parent = path->nodes[*level];
-	else
-		parent = path->nodes[*level + 1];
-	bytenr = path->nodes[*level]->start;
-	blocksize = path->nodes[*level]->len;
 
 
-	ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start,
-				btrfs_header_owner(parent), *level, 0);
+	ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent,
+				root->root_key.objectid, level, 0);
 	BUG_ON(ret);
 	BUG_ON(ret);
+out:
+	wc->refs[level] = 0;
+	wc->flags[level] = 0;
+	return ret;
+}
+
+static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   struct btrfs_path *path,
+				   struct walk_control *wc)
+{
+	struct extent_buffer *next;
+	struct extent_buffer *cur;
+	u64 bytenr;
+	u64 ptr_gen;
+	u32 blocksize;
+	int level = wc->level;
+	int ret;
+
+	while (level >= 0) {
+		cur = path->nodes[level];
+		BUG_ON(path->slots[level] >= btrfs_header_nritems(cur));
 
 
-	if (path->locks[*level]) {
-		btrfs_tree_unlock(path->nodes[*level]);
-		path->locks[*level] = 0;
+		ret = walk_down_proc(trans, root, path, wc);
+		if (ret > 0)
+			break;
+
+		if (level == 0)
+			break;
+
+		bytenr = btrfs_node_blockptr(cur, path->slots[level]);
+		blocksize = btrfs_level_size(root, level - 1);
+		ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]);
+
+		next = read_tree_block(root, bytenr, blocksize, ptr_gen);
+		btrfs_tree_lock(next);
+		btrfs_set_lock_blocking(next);
+
+		level--;
+		BUG_ON(level != btrfs_header_level(next));
+		path->nodes[level] = next;
+		path->slots[level] = 0;
+		path->locks[level] = 1;
+		wc->level = level;
 	}
 	}
-	free_extent_buffer(path->nodes[*level]);
-	path->nodes[*level] = NULL;
-	*level += 1;
-	cond_resched();
 	return 0;
 	return 0;
 }
 }
 
 
-/*
- * helper for dropping snapshots.  This walks back up the tree in the path
- * to find the first node higher up where we haven't yet gone through
- * all the slots
- */
 static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
 static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root,
 				 struct btrfs_root *root,
 				 struct btrfs_path *path,
 				 struct btrfs_path *path,
-				 int *level, int max_level)
+				 struct walk_control *wc, int max_level)
 {
 {
-	struct btrfs_root_item *root_item = &root->root_item;
-	int i;
-	int slot;
+	int level = wc->level;
 	int ret;
 	int ret;
 
 
-	for (i = *level; i < max_level && path->nodes[i]; i++) {
-		slot = path->slots[i];
-		if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
-			/*
-			 * there is more work to do in this level.
-			 * Update the drop_progress marker to reflect
-			 * the work we've done so far, and then bump
-			 * the slot number
-			 */
-			path->slots[i]++;
-			WARN_ON(*level == 0);
-			if (max_level == BTRFS_MAX_LEVEL) {
-				btrfs_node_key(path->nodes[i],
-					       &root_item->drop_progress,
-					       path->slots[i]);
-				root_item->drop_level = i;
-			}
-			*level = i;
+	path->slots[level] = btrfs_header_nritems(path->nodes[level]);
+	while (level < max_level && path->nodes[level]) {
+		wc->level = level;
+		if (path->slots[level] + 1 <
+		    btrfs_header_nritems(path->nodes[level])) {
+			path->slots[level]++;
 			return 0;
 			return 0;
 		} else {
 		} else {
-			struct extent_buffer *parent;
-
-			/*
-			 * this whole node is done, free our reference
-			 * on it and go up one level
-			 */
-			if (path->nodes[*level] == root->node)
-				parent = path->nodes[*level];
-			else
-				parent = path->nodes[*level + 1];
+			ret = walk_up_proc(trans, root, path, wc);
+			if (ret > 0)
+				return 0;
 
 
-			clean_tree_block(trans, root, path->nodes[i]);
-			ret = btrfs_free_extent(trans, root,
-						path->nodes[i]->start,
-						path->nodes[i]->len,
-						parent->start,
-						btrfs_header_owner(parent),
-						*level, 0);
-			BUG_ON(ret);
-			if (path->locks[*level]) {
-				btrfs_tree_unlock(path->nodes[i]);
-				path->locks[i] = 0;
+			if (path->locks[level]) {
+				btrfs_tree_unlock(path->nodes[level]);
+				path->locks[level] = 0;
 			}
 			}
-			free_extent_buffer(path->nodes[i]);
-			path->nodes[i] = NULL;
-			*level = i + 1;
+			free_extent_buffer(path->nodes[level]);
+			path->nodes[level] = NULL;
+			level++;
 		}
 		}
 	}
 	}
 	return 1;
 	return 1;
 }
 }
 
 
 /*
 /*
- * drop the reference count on the tree rooted at 'snap'.  This traverses
- * the tree freeing any blocks that have a ref count of zero after being
- * decremented.
+ * drop a subvolume tree.
+ *
+ * this function traverses the tree freeing any blocks that only
+ * referenced by the tree.
+ *
+ * when a shared tree block is found. this function decreases its
+ * reference count by one. if update_ref is true, this function
+ * also make sure backrefs for the shared block and all lower level
+ * blocks are properly updated.
  */
  */
-int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
-			*root)
+int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
 {
 {
-	int ret = 0;
-	int wret;
-	int level;
 	struct btrfs_path *path;
 	struct btrfs_path *path;
-	int update_count;
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *tree_root = root->fs_info->tree_root;
 	struct btrfs_root_item *root_item = &root->root_item;
 	struct btrfs_root_item *root_item = &root->root_item;
+	struct walk_control *wc;
+	struct btrfs_key key;
+	int err = 0;
+	int ret;
+	int level;
 
 
 	path = btrfs_alloc_path();
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 	BUG_ON(!path);
 
 
-	level = btrfs_header_level(root->node);
+	wc = kzalloc(sizeof(*wc), GFP_NOFS);
+	BUG_ON(!wc);
+
+	trans = btrfs_start_transaction(tree_root, 1);
+
 	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
 	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
+		level = btrfs_header_level(root->node);
 		path->nodes[level] = btrfs_lock_root_node(root);
 		path->nodes[level] = btrfs_lock_root_node(root);
 		btrfs_set_lock_blocking(path->nodes[level]);
 		btrfs_set_lock_blocking(path->nodes[level]);
 		path->slots[level] = 0;
 		path->slots[level] = 0;
 		path->locks[level] = 1;
 		path->locks[level] = 1;
+		memset(&wc->update_progress, 0,
+		       sizeof(wc->update_progress));
 	} else {
 	} else {
-		struct btrfs_key key;
-		struct btrfs_disk_key found_key;
-		struct extent_buffer *node;
-
 		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
 		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
+		memcpy(&wc->update_progress, &key,
+		       sizeof(wc->update_progress));
+
 		level = root_item->drop_level;
 		level = root_item->drop_level;
+		BUG_ON(level == 0);
 		path->lowest_level = level;
 		path->lowest_level = level;
-		wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-		if (wret < 0) {
-			ret = wret;
+		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+		path->lowest_level = 0;
+		if (ret < 0) {
+			err = ret;
 			goto out;
 			goto out;
 		}
 		}
-		node = path->nodes[level];
-		btrfs_node_key(node, &found_key, path->slots[level]);
-		WARN_ON(memcmp(&found_key, &root_item->drop_progress,
-			       sizeof(found_key)));
+		btrfs_node_key_to_cpu(path->nodes[level], &key,
+				      path->slots[level]);
+		WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
+
 		/*
 		/*
 		 * unlock our path, this is safe because only this
 		 * unlock our path, this is safe because only this
 		 * function is allowed to delete this snapshot
 		 * function is allowed to delete this snapshot
 		 */
 		 */
 		btrfs_unlock_up_safe(path, 0);
 		btrfs_unlock_up_safe(path, 0);
+
+		level = btrfs_header_level(root->node);
+		while (1) {
+			btrfs_tree_lock(path->nodes[level]);
+			btrfs_set_lock_blocking(path->nodes[level]);
+
+			ret = btrfs_lookup_extent_info(trans, root,
+						path->nodes[level]->start,
+						path->nodes[level]->len,
+						&wc->refs[level],
+						&wc->flags[level]);
+			BUG_ON(ret);
+			BUG_ON(wc->refs[level] == 0);
+
+			if (level == root_item->drop_level)
+				break;
+
+			btrfs_tree_unlock(path->nodes[level]);
+			WARN_ON(wc->refs[level] != 1);
+			level--;
+		}
 	}
 	}
+
+	wc->level = level;
+	wc->shared_level = -1;
+	wc->stage = DROP_REFERENCE;
+	wc->update_ref = update_ref;
+	wc->keep_locks = 0;
+
 	while (1) {
 	while (1) {
-		unsigned long update;
-		wret = walk_down_tree(trans, root, path, &level);
-		if (wret > 0)
+		ret = walk_down_tree(trans, root, path, wc);
+		if (ret < 0) {
+			err = ret;
 			break;
 			break;
-		if (wret < 0)
-			ret = wret;
+		}
 
 
-		wret = walk_up_tree(trans, root, path, &level,
-				    BTRFS_MAX_LEVEL);
-		if (wret > 0)
+		ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
+		if (ret < 0) {
+			err = ret;
 			break;
 			break;
-		if (wret < 0)
-			ret = wret;
-		if (trans->transaction->in_commit ||
-		    trans->transaction->delayed_refs.flushing) {
-			ret = -EAGAIN;
+		}
+
+		if (ret > 0) {
+			BUG_ON(wc->stage != DROP_REFERENCE);
 			break;
 			break;
 		}
 		}
-		for (update_count = 0; update_count < 16; update_count++) {
+
+		if (wc->stage == DROP_REFERENCE) {
+			level = wc->level;
+			btrfs_node_key(path->nodes[level],
+				       &root_item->drop_progress,
+				       path->slots[level]);
+			root_item->drop_level = level;
+		}
+
+		BUG_ON(wc->level == 0);
+		if (trans->transaction->in_commit ||
+		    trans->transaction->delayed_refs.flushing) {
+			ret = btrfs_update_root(trans, tree_root,
+						&root->root_key,
+						root_item);
+			BUG_ON(ret);
+
+			btrfs_end_transaction(trans, tree_root);
+			trans = btrfs_start_transaction(tree_root, 1);
+		} else {
+			unsigned long update;
 			update = trans->delayed_ref_updates;
 			update = trans->delayed_ref_updates;
 			trans->delayed_ref_updates = 0;
 			trans->delayed_ref_updates = 0;
 			if (update)
 			if (update)
-				btrfs_run_delayed_refs(trans, root, update);
-			else
-				break;
+				btrfs_run_delayed_refs(trans, tree_root,
+						       update);
 		}
 		}
 	}
 	}
+	btrfs_release_path(root, path);
+	BUG_ON(err);
+
+	ret = btrfs_del_root(trans, tree_root, &root->root_key);
+	BUG_ON(ret);
+
+	free_extent_buffer(root->node);
+	free_extent_buffer(root->commit_root);
+	kfree(root);
 out:
 out:
+	btrfs_end_transaction(trans, tree_root);
+	kfree(wc);
 	btrfs_free_path(path);
 	btrfs_free_path(path);
-	return ret;
+	return err;
 }
 }
 
 
+/*
+ * drop subtree rooted at tree block 'node'.
+ *
+ * NOTE: this function will unlock and release tree block 'node'
+ */
 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root,
 			struct btrfs_root *root,
 			struct extent_buffer *node,
 			struct extent_buffer *node,
 			struct extent_buffer *parent)
 			struct extent_buffer *parent)
 {
 {
 	struct btrfs_path *path;
 	struct btrfs_path *path;
+	struct walk_control *wc;
 	int level;
 	int level;
 	int parent_level;
 	int parent_level;
 	int ret = 0;
 	int ret = 0;
 	int wret;
 	int wret;
 
 
+	BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
+
 	path = btrfs_alloc_path();
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 	BUG_ON(!path);
 
 
+	wc = kzalloc(sizeof(*wc), GFP_NOFS);
+	BUG_ON(!wc);
+
 	btrfs_assert_tree_locked(parent);
 	btrfs_assert_tree_locked(parent);
 	parent_level = btrfs_header_level(parent);
 	parent_level = btrfs_header_level(parent);
 	extent_buffer_get(parent);
 	extent_buffer_get(parent);
@@ -4817,24 +5024,33 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 
 
 	btrfs_assert_tree_locked(node);
 	btrfs_assert_tree_locked(node);
 	level = btrfs_header_level(node);
 	level = btrfs_header_level(node);
-	extent_buffer_get(node);
 	path->nodes[level] = node;
 	path->nodes[level] = node;
 	path->slots[level] = 0;
 	path->slots[level] = 0;
+	path->locks[level] = 1;
+
+	wc->refs[parent_level] = 1;
+	wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+	wc->level = level;
+	wc->shared_level = -1;
+	wc->stage = DROP_REFERENCE;
+	wc->update_ref = 0;
+	wc->keep_locks = 1;
 
 
 	while (1) {
 	while (1) {
-		wret = walk_down_tree(trans, root, path, &level);
-		if (wret < 0)
+		wret = walk_down_tree(trans, root, path, wc);
+		if (wret < 0) {
 			ret = wret;
 			ret = wret;
-		if (wret != 0)
 			break;
 			break;
+		}
 
 
-		wret = walk_up_tree(trans, root, path, &level, parent_level);
+		wret = walk_up_tree(trans, root, path, wc, parent_level);
 		if (wret < 0)
 		if (wret < 0)
 			ret = wret;
 			ret = wret;
 		if (wret != 0)
 		if (wret != 0)
 			break;
 			break;
 	}
 	}
 
 
+	kfree(wc);
 	btrfs_free_path(path);
 	btrfs_free_path(path);
 	return ret;
 	return ret;
 }
 }

+ 4 - 1
fs/btrfs/file.c

@@ -151,7 +151,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	}
 	}
 	if (end_pos > isize) {
 	if (end_pos > isize) {
 		i_size_write(inode, end_pos);
 		i_size_write(inode, end_pos);
-		btrfs_update_inode(trans, root, inode);
+		/* we've only changed i_size in ram, and we haven't updated
+		 * the disk i_size.  There is no need to log the inode
+		 * at this time.
+		 */
 	}
 	}
 	err = btrfs_end_transaction(trans, root);
 	err = btrfs_end_transaction(trans, root);
 out_unlock:
 out_unlock:

+ 18 - 7
fs/btrfs/inode.c

@@ -3580,12 +3580,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 		owner = 1;
 		owner = 1;
 	BTRFS_I(inode)->block_group =
 	BTRFS_I(inode)->block_group =
 			btrfs_find_block_group(root, 0, alloc_hint, owner);
 			btrfs_find_block_group(root, 0, alloc_hint, owner);
-	if ((mode & S_IFREG)) {
-		if (btrfs_test_opt(root, NODATASUM))
-			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
-		if (btrfs_test_opt(root, NODATACOW))
-			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
-	}
 
 
 	key[0].objectid = objectid;
 	key[0].objectid = objectid;
 	btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
 	btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
@@ -3640,6 +3634,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 
 
 	btrfs_inherit_iflags(inode, dir);
 	btrfs_inherit_iflags(inode, dir);
 
 
+	if ((mode & S_IFREG)) {
+		if (btrfs_test_opt(root, NODATASUM))
+			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
+		if (btrfs_test_opt(root, NODATACOW))
+			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
+	}
+
 	insert_inode_hash(inode);
 	insert_inode_hash(inode);
 	inode_tree_add(inode);
 	inode_tree_add(inode);
 	return inode;
 	return inode;
@@ -5082,6 +5083,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 	u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
 	u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
 	struct extent_map *em;
 	struct extent_map *em;
 	struct btrfs_trans_handle *trans;
 	struct btrfs_trans_handle *trans;
+	struct btrfs_root *root;
 	int ret;
 	int ret;
 
 
 	alloc_start = offset & ~mask;
 	alloc_start = offset & ~mask;
@@ -5100,6 +5102,13 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 			goto out;
 			goto out;
 	}
 	}
 
 
+	root = BTRFS_I(inode)->root;
+
+	ret = btrfs_check_data_free_space(root, inode,
+					  alloc_end - alloc_start);
+	if (ret)
+		goto out;
+
 	locked_end = alloc_end - 1;
 	locked_end = alloc_end - 1;
 	while (1) {
 	while (1) {
 		struct btrfs_ordered_extent *ordered;
 		struct btrfs_ordered_extent *ordered;
@@ -5107,7 +5116,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 		trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
 		trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
 		if (!trans) {
 		if (!trans) {
 			ret = -EIO;
 			ret = -EIO;
-			goto out;
+			goto out_free;
 		}
 		}
 
 
 		/* the extent lock is ordered inside the running
 		/* the extent lock is ordered inside the running
@@ -5168,6 +5177,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 		      GFP_NOFS);
 		      GFP_NOFS);
 
 
 	btrfs_end_transaction(trans, BTRFS_I(inode)->root);
 	btrfs_end_transaction(trans, BTRFS_I(inode)->root);
+out_free:
+	btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
 out:
 out:
 	mutex_unlock(&inode->i_mutex);
 	mutex_unlock(&inode->i_mutex);
 	return ret;
 	return ret;

+ 4 - 2
fs/btrfs/ioctl.c

@@ -1028,7 +1028,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 						struct btrfs_file_extent_item);
 						struct btrfs_file_extent_item);
 			comp = btrfs_file_extent_compression(leaf, extent);
 			comp = btrfs_file_extent_compression(leaf, extent);
 			type = btrfs_file_extent_type(leaf, extent);
 			type = btrfs_file_extent_type(leaf, extent);
-			if (type == BTRFS_FILE_EXTENT_REG) {
+			if (type == BTRFS_FILE_EXTENT_REG ||
+			    type == BTRFS_FILE_EXTENT_PREALLOC) {
 				disko = btrfs_file_extent_disk_bytenr(leaf,
 				disko = btrfs_file_extent_disk_bytenr(leaf,
 								      extent);
 								      extent);
 				diskl = btrfs_file_extent_disk_num_bytes(leaf,
 				diskl = btrfs_file_extent_disk_num_bytes(leaf,
@@ -1051,7 +1052,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 			new_key.objectid = inode->i_ino;
 			new_key.objectid = inode->i_ino;
 			new_key.offset = key.offset + destoff - off;
 			new_key.offset = key.offset + destoff - off;
 
 
-			if (type == BTRFS_FILE_EXTENT_REG) {
+			if (type == BTRFS_FILE_EXTENT_REG ||
+			    type == BTRFS_FILE_EXTENT_PREALLOC) {
 				ret = btrfs_insert_empty_item(trans, root, path,
 				ret = btrfs_insert_empty_item(trans, root, path,
 							      &new_key, size);
 							      &new_key, size);
 				if (ret)
 				if (ret)

+ 1 - 4
fs/btrfs/relocation.c

@@ -1788,7 +1788,7 @@ static void merge_func(struct btrfs_work *work)
 		btrfs_end_transaction(trans, root);
 		btrfs_end_transaction(trans, root);
 	}
 	}
 
 
-	btrfs_drop_dead_root(reloc_root);
+	btrfs_drop_snapshot(reloc_root, 0);
 
 
 	if (atomic_dec_and_test(async->num_pending))
 	if (atomic_dec_and_test(async->num_pending))
 		complete(async->done);
 		complete(async->done);
@@ -2075,9 +2075,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 
 
 			ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
 			ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
 			BUG_ON(ret);
 			BUG_ON(ret);
-
-			btrfs_tree_unlock(eb);
-			free_extent_buffer(eb);
 		}
 		}
 		if (!lowest) {
 		if (!lowest) {
 			btrfs_tree_unlock(upper->eb);
 			btrfs_tree_unlock(upper->eb);

+ 3 - 1
fs/btrfs/transaction.c

@@ -593,6 +593,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
 	return 0;
 	return 0;
 }
 }
 
 
+#if 0
 /*
 /*
  * when dropping snapshots, we generate a ton of delayed refs, and it makes
  * when dropping snapshots, we generate a ton of delayed refs, and it makes
  * sense not to join the transaction while it is trying to flush the current
  * sense not to join the transaction while it is trying to flush the current
@@ -681,6 +682,7 @@ int btrfs_drop_dead_root(struct btrfs_root *root)
 	btrfs_btree_balance_dirty(tree_root, nr);
 	btrfs_btree_balance_dirty(tree_root, nr);
 	return ret;
 	return ret;
 }
 }
+#endif
 
 
 /*
 /*
  * new snapshots need to be created at a very specific time in the
  * new snapshots need to be created at a very specific time in the
@@ -1081,7 +1083,7 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
 	while (!list_empty(&list)) {
 	while (!list_empty(&list)) {
 		root = list_entry(list.next, struct btrfs_root, root_list);
 		root = list_entry(list.next, struct btrfs_root, root_list);
 		list_del_init(&root->root_list);
 		list_del_init(&root->root_list);
-		btrfs_drop_dead_root(root);
+		btrfs_drop_snapshot(root, 0);
 	}
 	}
 	return 0;
 	return 0;
 }
 }

+ 3 - 0
fs/notify/inotify/inotify_user.c

@@ -296,12 +296,15 @@ static int inotify_fasync(int fd, struct file *file, int on)
 static int inotify_release(struct inode *ignored, struct file *file)
 static int inotify_release(struct inode *ignored, struct file *file)
 {
 {
 	struct fsnotify_group *group = file->private_data;
 	struct fsnotify_group *group = file->private_data;
+	struct user_struct *user = group->inotify_data.user;
 
 
 	fsnotify_clear_marks_by_group(group);
 	fsnotify_clear_marks_by_group(group);
 
 
 	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
 	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
 	fsnotify_put_group(group);
 	fsnotify_put_group(group);
 
 
+	atomic_dec(&user->inotify_devs);
+
 	return 0;
 	return 0;
 }
 }