|
@@ -28,6 +28,11 @@ static inline bool is_simm5(unsigned int value)
|
|
|
return value + 0x10 < 0x20;
|
|
|
}
|
|
|
|
|
|
+static inline bool is_sethi(unsigned int value)
|
|
|
+{
|
|
|
+ return (value & ~0x3fffff) == 0;
|
|
|
+}
|
|
|
+
|
|
|
static void bpf_flush_icache(void *start_, void *end_)
|
|
|
{
|
|
|
/* Cheetah's I-cache is fully coherent. */
|
|
@@ -367,16 +372,252 @@ static void emit_loadimm_sext(s32 K, unsigned int dest, struct jit_ctx *ctx)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+static void analyze_64bit_constant(u32 high_bits, u32 low_bits,
|
|
|
+ int *hbsp, int *lbsp, int *abbasp)
|
|
|
+{
|
|
|
+ int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ lowest_bit_set = highest_bit_set = -1;
|
|
|
+ i = 0;
|
|
|
+ do {
|
|
|
+ if ((lowest_bit_set == -1) && ((low_bits >> i) & 1))
|
|
|
+ lowest_bit_set = i;
|
|
|
+ if ((highest_bit_set == -1) && ((high_bits >> (32 - i - 1)) & 1))
|
|
|
+ highest_bit_set = (64 - i - 1);
|
|
|
+ } while (++i < 32 && (highest_bit_set == -1 ||
|
|
|
+ lowest_bit_set == -1));
|
|
|
+ if (i == 32) {
|
|
|
+ i = 0;
|
|
|
+ do {
|
|
|
+ if (lowest_bit_set == -1 && ((high_bits >> i) & 1))
|
|
|
+ lowest_bit_set = i + 32;
|
|
|
+ if (highest_bit_set == -1 &&
|
|
|
+ ((low_bits >> (32 - i - 1)) & 1))
|
|
|
+ highest_bit_set = 32 - i - 1;
|
|
|
+ } while (++i < 32 && (highest_bit_set == -1 ||
|
|
|
+ lowest_bit_set == -1));
|
|
|
+ }
|
|
|
+
|
|
|
+ all_bits_between_are_set = 1;
|
|
|
+ for (i = lowest_bit_set; i <= highest_bit_set; i++) {
|
|
|
+ if (i < 32) {
|
|
|
+ if ((low_bits & (1 << i)) != 0)
|
|
|
+ continue;
|
|
|
+ } else {
|
|
|
+ if ((high_bits & (1 << (i - 32))) != 0)
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ all_bits_between_are_set = 0;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ *hbsp = highest_bit_set;
|
|
|
+ *lbsp = lowest_bit_set;
|
|
|
+ *abbasp = all_bits_between_are_set;
|
|
|
+}
|
|
|
+
|
|
|
+static unsigned long create_simple_focus_bits(unsigned long high_bits,
|
|
|
+ unsigned long low_bits,
|
|
|
+ int lowest_bit_set, int shift)
|
|
|
+{
|
|
|
+ long hi, lo;
|
|
|
+
|
|
|
+ if (lowest_bit_set < 32) {
|
|
|
+ lo = (low_bits >> lowest_bit_set) << shift;
|
|
|
+ hi = ((high_bits << (32 - lowest_bit_set)) << shift);
|
|
|
+ } else {
|
|
|
+ lo = 0;
|
|
|
+ hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
|
|
|
+ }
|
|
|
+ return hi | lo;
|
|
|
+}
|
|
|
+
|
|
|
+static bool const64_is_2insns(unsigned long high_bits,
|
|
|
+ unsigned long low_bits)
|
|
|
+{
|
|
|
+ int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
|
|
|
+
|
|
|
+ if (high_bits == 0 || high_bits == 0xffffffff)
|
|
|
+ return true;
|
|
|
+
|
|
|
+ analyze_64bit_constant(high_bits, low_bits,
|
|
|
+ &highest_bit_set, &lowest_bit_set,
|
|
|
+ &all_bits_between_are_set);
|
|
|
+
|
|
|
+ if ((highest_bit_set == 63 || lowest_bit_set == 0) &&
|
|
|
+ all_bits_between_are_set != 0)
|
|
|
+ return true;
|
|
|
+
|
|
|
+ if (highest_bit_set - lowest_bit_set < 21)
|
|
|
+ return true;
|
|
|
+
|
|
|
+ return false;
|
|
|
+}
|
|
|
+
|
|
|
+static void sparc_emit_set_const64_quick2(unsigned long high_bits,
|
|
|
+ unsigned long low_imm,
|
|
|
+ unsigned int dest,
|
|
|
+ int shift_count, struct jit_ctx *ctx)
|
|
|
+{
|
|
|
+ emit_loadimm32(high_bits, dest, ctx);
|
|
|
+
|
|
|
+ /* Now shift it up into place. */
|
|
|
+ emit_alu_K(SLLX, dest, shift_count, ctx);
|
|
|
+
|
|
|
+ /* If there is a low immediate part piece, finish up by
|
|
|
+ * putting that in as well.
|
|
|
+ */
|
|
|
+ if (low_imm != 0)
|
|
|
+ emit(OR | IMMED | RS1(dest) | S13(low_imm) | RD(dest), ctx);
|
|
|
+}
|
|
|
+
|
|
|
static void emit_loadimm64(u64 K, unsigned int dest, struct jit_ctx *ctx)
|
|
|
{
|
|
|
+ int all_bits_between_are_set, lowest_bit_set, highest_bit_set;
|
|
|
unsigned int tmp = bpf2sparc[TMP_REG_1];
|
|
|
- u32 high_part = (K >> 32);
|
|
|
- u32 low_part = (K & 0xffffffff);
|
|
|
+ u32 low_bits = (K & 0xffffffff);
|
|
|
+ u32 high_bits = (K >> 32);
|
|
|
+
|
|
|
+ /* These two tests also take care of all of the one
|
|
|
+ * instruction cases.
|
|
|
+ */
|
|
|
+ if (high_bits == 0xffffffff && (low_bits & 0x80000000))
|
|
|
+ return emit_loadimm_sext(K, dest, ctx);
|
|
|
+ if (high_bits == 0x00000000)
|
|
|
+ return emit_loadimm32(K, dest, ctx);
|
|
|
+
|
|
|
+ analyze_64bit_constant(high_bits, low_bits, &highest_bit_set,
|
|
|
+ &lowest_bit_set, &all_bits_between_are_set);
|
|
|
+
|
|
|
+ /* 1) mov -1, %reg
|
|
|
+ * sllx %reg, shift, %reg
|
|
|
+ * 2) mov -1, %reg
|
|
|
+ * srlx %reg, shift, %reg
|
|
|
+ * 3) mov some_small_const, %reg
|
|
|
+ * sllx %reg, shift, %reg
|
|
|
+ */
|
|
|
+ if (((highest_bit_set == 63 || lowest_bit_set == 0) &&
|
|
|
+ all_bits_between_are_set != 0) ||
|
|
|
+ ((highest_bit_set - lowest_bit_set) < 12)) {
|
|
|
+ int shift = lowest_bit_set;
|
|
|
+ long the_const = -1;
|
|
|
+
|
|
|
+ if ((highest_bit_set != 63 && lowest_bit_set != 0) ||
|
|
|
+ all_bits_between_are_set == 0) {
|
|
|
+ the_const =
|
|
|
+ create_simple_focus_bits(high_bits, low_bits,
|
|
|
+ lowest_bit_set, 0);
|
|
|
+ } else if (lowest_bit_set == 0)
|
|
|
+ shift = -(63 - highest_bit_set);
|
|
|
+
|
|
|
+ emit(OR | IMMED | RS1(G0) | S13(the_const) | RD(dest), ctx);
|
|
|
+ if (shift > 0)
|
|
|
+ emit_alu_K(SLLX, dest, shift, ctx);
|
|
|
+ else if (shift < 0)
|
|
|
+ emit_alu_K(SRLX, dest, -shift, ctx);
|
|
|
+
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Now a range of 22 or less bits set somewhere.
|
|
|
+ * 1) sethi %hi(focus_bits), %reg
|
|
|
+ * sllx %reg, shift, %reg
|
|
|
+ * 2) sethi %hi(focus_bits), %reg
|
|
|
+ * srlx %reg, shift, %reg
|
|
|
+ */
|
|
|
+ if ((highest_bit_set - lowest_bit_set) < 21) {
|
|
|
+ unsigned long focus_bits =
|
|
|
+ create_simple_focus_bits(high_bits, low_bits,
|
|
|
+ lowest_bit_set, 10);
|
|
|
+
|
|
|
+ emit(SETHI(focus_bits, dest), ctx);
|
|
|
+
|
|
|
+ /* If lowest_bit_set == 10 then a sethi alone could
|
|
|
+ * have done it.
|
|
|
+ */
|
|
|
+ if (lowest_bit_set < 10)
|
|
|
+ emit_alu_K(SRLX, dest, 10 - lowest_bit_set, ctx);
|
|
|
+ else if (lowest_bit_set > 10)
|
|
|
+ emit_alu_K(SLLX, dest, lowest_bit_set - 10, ctx);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Ok, now 3 instruction sequences. */
|
|
|
+ if (low_bits == 0) {
|
|
|
+ emit_loadimm32(high_bits, dest, ctx);
|
|
|
+ emit_alu_K(SLLX, dest, 32, ctx);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* We may be able to do something quick
|
|
|
+ * when the constant is negated, so try that.
|
|
|
+ */
|
|
|
+ if (const64_is_2insns((~high_bits) & 0xffffffff,
|
|
|
+ (~low_bits) & 0xfffffc00)) {
|
|
|
+ /* NOTE: The trailing bits get XOR'd so we need the
|
|
|
+ * non-negated bits, not the negated ones.
|
|
|
+ */
|
|
|
+ unsigned long trailing_bits = low_bits & 0x3ff;
|
|
|
+
|
|
|
+ if ((((~high_bits) & 0xffffffff) == 0 &&
|
|
|
+ ((~low_bits) & 0x80000000) == 0) ||
|
|
|
+ (((~high_bits) & 0xffffffff) == 0xffffffff &&
|
|
|
+ ((~low_bits) & 0x80000000) != 0)) {
|
|
|
+ unsigned long fast_int = (~low_bits & 0xffffffff);
|
|
|
+
|
|
|
+ if ((is_sethi(fast_int) &&
|
|
|
+ (~high_bits & 0xffffffff) == 0)) {
|
|
|
+ emit(SETHI(fast_int, dest), ctx);
|
|
|
+ } else if (is_simm13(fast_int)) {
|
|
|
+ emit(OR | IMMED | RS1(G0) | S13(fast_int) | RD(dest), ctx);
|
|
|
+ } else {
|
|
|
+ emit_loadimm64(fast_int, dest, ctx);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ u64 n = ((~low_bits) & 0xfffffc00) |
|
|
|
+ (((unsigned long)((~high_bits) & 0xffffffff))<<32);
|
|
|
+ emit_loadimm64(n, dest, ctx);
|
|
|
+ }
|
|
|
+
|
|
|
+ low_bits = -0x400 | trailing_bits;
|
|
|
+
|
|
|
+ emit(XOR | IMMED | RS1(dest) | S13(low_bits) | RD(dest), ctx);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* 1) sethi %hi(xxx), %reg
|
|
|
+ * or %reg, %lo(xxx), %reg
|
|
|
+ * sllx %reg, yyy, %reg
|
|
|
+ */
|
|
|
+ if ((highest_bit_set - lowest_bit_set) < 32) {
|
|
|
+ unsigned long focus_bits =
|
|
|
+ create_simple_focus_bits(high_bits, low_bits,
|
|
|
+ lowest_bit_set, 0);
|
|
|
+
|
|
|
+ /* So what we know is that the set bits straddle the
|
|
|
+ * middle of the 64-bit word.
|
|
|
+ */
|
|
|
+ sparc_emit_set_const64_quick2(focus_bits, 0, dest,
|
|
|
+ lowest_bit_set, ctx);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* 1) sethi %hi(high_bits), %reg
|
|
|
+ * or %reg, %lo(high_bits), %reg
|
|
|
+ * sllx %reg, 32, %reg
|
|
|
+ * or %reg, low_bits, %reg
|
|
|
+ */
|
|
|
+ if (is_simm13(low_bits) && ((int)low_bits > 0)) {
|
|
|
+ sparc_emit_set_const64_quick2(high_bits, low_bits,
|
|
|
+ dest, 32, ctx);
|
|
|
+ return;
|
|
|
+ }
|
|
|
|
|
|
+ /* Oh well, we tried... Do a full 64-bit decomposition. */
|
|
|
ctx->tmp_1_used = true;
|
|
|
|
|
|
- emit_set_const(high_part, tmp, ctx);
|
|
|
- emit_set_const(low_part, dest, ctx);
|
|
|
+ emit_loadimm32(high_bits, tmp, ctx);
|
|
|
+ emit_loadimm32(low_bits, dest, ctx);
|
|
|
emit_alu_K(SLLX, tmp, 32, ctx);
|
|
|
emit(OR | RS1(dest) | RS2(tmp) | RD(dest), ctx);
|
|
|
}
|