7 years ago · 06ae48269d
--- a/include/linux/reciprocal_div.h
+++ b/include/linux/reciprocal_div.h
@@ -25,6 +25,9 @@ struct reciprocal_value {
 
				 	u8 sh1, sh2;
			
 
				 };
			
 
				 
			
 
				+/* "reciprocal_value" and "reciprocal_divide" together implement the basic
			
 
				+ * version of the algorithm described in Figure 4.1 of the paper.
			
 
				+ */
			
 
				 struct reciprocal_value reciprocal_value(u32 d);
			
 
				 
			
 
				 static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
			
@@ -33,4 +36,69 @@ static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
 
				 	return (t + ((a - t) >> R.sh1)) >> R.sh2;
			
 
				 }
			
 
				 
			
 
				+struct reciprocal_value_adv {
			
 
				+	u32 m;
			
 
				+	u8 sh, exp;
			
 
				+	bool is_wide_m;
			
 
				+};
			
 
				+
			
 
				+/* "reciprocal_value_adv" implements the advanced version of the algorithm
			
 
				+ * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose
			
 
				+ * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The
			
 
				+ * exception case could be easily handled before calling "reciprocal_value_adv".
			
 
				+ *
			
 
				+ * The advanced version requires more complex calculation to get the reciprocal
			
 
				+ * multiplier and other control variables, but then could reduce the required
			
 
				+ * emulation operations.
			
 
				+ *
			
 
				+ * It makes no sense to use this advanced version for host divide emulation,
			
 
				+ * those extra complexities for calculating multiplier etc could completely
			
 
				+ * waive our saving on emulation operations.
			
 
				+ *
			
 
				+ * However, it makes sense to use it for JIT divide code generation for which
			
 
				+ * we are willing to trade performance of JITed code with that of host. As shown
			
 
				+ * by the following pseudo code, the required emulation operations could go down
			
 
				+ * from 6 (the basic version) to 3 or 4.
			
 
				+ *
			
 
				+ * To use the result of "reciprocal_value_adv", suppose we want to calculate
			
 
				+ * n/d, the pseudo C code will be:
			
 
				+ *
			
 
				+ *   struct reciprocal_value_adv rvalue;
			
 
				+ *   u8 pre_shift, exp;
			
 
				+ *
			
 
				+ *   // handle exception case.
			
 
				+ *   if (d >= (1U << 31)) {
			
 
				+ *     result = n >= d;
			
 
				+ *     return;
			
 
				+ *   }
			
 
				+ *
			
 
				+ *   rvalue = reciprocal_value_adv(d, 32)
			
 
				+ *   exp = rvalue.exp;
			
 
				+ *   if (rvalue.is_wide_m && !(d & 1)) {
			
 
				+ *     // floor(log2(d & (2^32 -d)))
			
 
				+ *     pre_shift = fls(d & -d) - 1;
			
 
				+ *     rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift);
			
 
				+ *   } else {
			
 
				+ *     pre_shift = 0;
			
 
				+ *   }
			
 
				+ *
			
 
				+ *   // code generation starts.
			
 
				+ *   if (imm == 1U << exp) {
			
 
				+ *     result = n >> exp;
			
 
				+ *   } else if (rvalue.is_wide_m) {
			
 
				+ *     // pre_shift must be zero when reached here.
			
 
				+ *     t = (n * rvalue.m) >> 32;
			
 
				+ *     result = n - t;
			
 
				+ *     result >>= 1;
			
 
				+ *     result += t;
			
 
				+ *     result >>= rvalue.sh - 1;
			
 
				+ *   } else {
			
 
				+ *     if (pre_shift)
			
 
				+ *       result = n >> pre_shift;
			
 
				+ *     result = ((u64)result * rvalue.m) >> 32;
			
 
				+ *     result >>= rvalue.sh;
			
 
				+ *   }
			
 
				+ */
			
 
				+struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec);
			
 
				+
			
 
				 #endif /* _LINUX_RECIPROCAL_DIV_H */
			
--- a/lib/reciprocal_div.c
+++ b/lib/reciprocal_div.c
@@ -1,4 +1,5 @@
 
				 // SPDX-License-Identifier: GPL-2.0
			
 
				+#include <linux/bug.h>
			
 
				 #include <linux/kernel.h>
			
 
				 #include <asm/div64.h>
			
 
				 #include <linux/reciprocal_div.h>
			
@@ -26,3 +27,43 @@ struct reciprocal_value reciprocal_value(u32 d)
 
				 	return R;
			
 
				 }
			
 
				 EXPORT_SYMBOL(reciprocal_value);
			
 
				+
			
 
				+struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec)
			
 
				+{
			
 
				+	struct reciprocal_value_adv R;
			
 
				+	u32 l, post_shift;
			
 
				+	u64 mhigh, mlow;
			
 
				+
			
 
				+	/* ceil(log2(d)) */
			
 
				+	l = fls(d - 1);
			
 
				+	/* NOTE: mlow/mhigh could overflow u64 when l == 32. This case needs to
			
 
				+	 * be handled before calling "reciprocal_value_adv", please see the
			
 
				+	 * comment at include/linux/reciprocal_div.h.
			
 
				+	 */
			
 
				+	WARN(l == 32,
			
 
				+	     "ceil(log2(0x%08x)) == 32, %s doesn't support such divisor",
			
 
				+	     d, __func__);
			
 
				+	post_shift = l;
			
 
				+	mlow = 1ULL << (32 + l);
			
 
				+	do_div(mlow, d);
			
 
				+	mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec));
			
 
				+	do_div(mhigh, d);
			
 
				+
			
 
				+	for (; post_shift > 0; post_shift--) {
			
 
				+		u64 lo = mlow >> 1, hi = mhigh >> 1;
			
 
				+
			
 
				+		if (lo >= hi)
			
 
				+			break;
			
 
				+
			
 
				+		mlow = lo;
			
 
				+		mhigh = hi;
			
 
				+	}
			
 
				+
			
 
				+	R.m = (u32)mhigh;
			
 
				+	R.sh = post_shift;
			
 
				+	R.exp = l;
			
 
				+	R.is_wide_m = mhigh > U32_MAX;
			
 
				+
			
 
				+	return R;
			
 
				+}
			
 
				+EXPORT_SYMBOL(reciprocal_value_adv);