From a079836005fb92eb1fee19e59654b337a41fd0ff Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 24 Apr 2025 18:23:36 +0000 Subject: [PATCH 001/161] tcg/loongarch64: Fix vec_val computation in tcg_target_const_match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only use vece for a vector constant. This avoids an assertion failure in sextract64 when vece contains garbage. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target.c.inc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index cbd7642b58..740b7c264d 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -211,12 +211,14 @@ static bool tcg_target_const_match(int64_t val, int ct, if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { return true; } - int64_t vec_val = sextract64(val, 0, 8 << vece); - if ((ct & TCG_CT_CONST_VCMP) && -0x10 <= vec_val && vec_val <= 0x1f) { - return true; - } - if ((ct & TCG_CT_CONST_VADD) && -0x1f <= vec_val && vec_val <= 0x1f) { - return true; + if (ct & (TCG_CT_CONST_VCMP | TCG_CT_CONST_VADD)) { + int64_t vec_val = sextract64(val, 0, 8 << vece); + if ((ct & TCG_CT_CONST_VCMP) && -0x10 <= vec_val && vec_val <= 0x1f) { + return true; + } + if ((ct & TCG_CT_CONST_VADD) && -0x1f <= vec_val && vec_val <= 0x1f) { + return true; + } } return false; } From 911f7328e9e9de80bf6b1a4697ecf83cc3661f5f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 24 Apr 2025 18:45:28 +0000 Subject: [PATCH 002/161] tcg/loongarch64: Improve constraints for TCG_CT_CONST_VCMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the TCGCond given to tcg_target_const_match to exactly match the supported constant. Adjust the code generation to assume this has been done -- recall that encode_*_insn contain assertions that the constants are valid. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target.c.inc | 38 ++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 740b7c264d..879f66f255 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -213,8 +213,18 @@ static bool tcg_target_const_match(int64_t val, int ct, } if (ct & (TCG_CT_CONST_VCMP | TCG_CT_CONST_VADD)) { int64_t vec_val = sextract64(val, 0, 8 << vece); - if ((ct & TCG_CT_CONST_VCMP) && -0x10 <= vec_val && vec_val <= 0x1f) { - return true; + if (ct & TCG_CT_CONST_VCMP) { + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_LE: + case TCG_COND_LT: + return -0x10 <= vec_val && vec_val <= 0x0f; + case TCG_COND_LEU: + case TCG_COND_LTU: + return 0x00 <= vec_val && vec_val <= 0x1f; + default: + return false; + } } if ((ct & TCG_CT_CONST_VADD) && -0x1f <= vec_val && vec_val <= 0x1f) { return true; @@ -2029,28 +2039,22 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, * Try vseqi/vslei/vslti */ int64_t value = sextract64(a2, 0, 8 << vece); - if ((cond == TCG_COND_EQ || - cond == TCG_COND_LE || - cond == TCG_COND_LT) && - (-0x10 <= value && value <= 0x0f)) { + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_LE: + case TCG_COND_LT: insn = cmp_vec_imm_insn[cond][lasx][vece]; tcg_out32(s, encode_vdvjsk5_insn(insn, a0, a1, value)); break; - } else if ((cond == TCG_COND_LEU || - cond == TCG_COND_LTU) && - (0x00 <= value && value <= 0x1f)) { + case TCG_COND_LEU: + case TCG_COND_LTU: insn = cmp_vec_imm_insn[cond][lasx][vece]; tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, value)); break; + default: + g_assert_not_reached(); } - - /* - * Fallback to: - * dupi_vec temp, a2 - * cmp_vec a0, a1, temp, cond - */ - tcg_out_dupi_vec(s, type, vece, TCG_VEC_TMP0, a2); - a2 = TCG_VEC_TMP0; + break; } insn = cmp_vec_insn[cond][lasx][vece]; From a3c1c579baf511a2b95f7b233187a981b6ef46ea Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 21 Apr 2025 11:05:29 -0700 Subject: [PATCH 003/161] tcg/optimize: Introduce opt_insert_{before,after} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidate the places we call tcg_op_insert_{before,after} within the optimization pass. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/optimize.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index f922f86a1d..a4d4ad3005 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -344,6 +344,18 @@ static TCGArg arg_new_temp(OptContext *ctx) return temp_arg(ts); } +static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op, + TCGOpcode opc, unsigned narg) +{ + return tcg_op_insert_after(ctx->tcg, op, opc, narg); +} + +static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op, + TCGOpcode opc, unsigned narg) +{ + return tcg_op_insert_before(ctx->tcg, op, opc, narg); +} + static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) { TCGTemp *dst_ts = arg_temp(dst); @@ -808,7 +820,7 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, if (!TCG_TARGET_HAS_tst) { TCGOpcode and_opc = (ctx->type == TCG_TYPE_I32 ? INDEX_op_and_i32 : INDEX_op_and_i64); - TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, and_opc, 3); + TCGOp *op2 = opt_insert_before(ctx, op, and_opc, 3); TCGArg tmp = arg_new_temp(ctx); op2->args[0] = tmp; @@ -901,8 +913,8 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) /* Expand to AND with a temporary if no backend support. */ if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) { - TCGOp *op1 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3); - TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3); + TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and_i32, 3); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and_i32, 3); TCGArg t1 = arg_new_temp(ctx); TCGArg t2 = arg_new_temp(ctx); @@ -1263,7 +1275,7 @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) rh = op->args[1]; /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); + op2 = opt_insert_before(ctx, op, 0, 2); tcg_opt_gen_movi(ctx, op, rl, al); tcg_opt_gen_movi(ctx, op2, rh, ah); @@ -2096,7 +2108,7 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) rh = op->args[1]; /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); + op2 = opt_insert_before(ctx, op, 0, 2); tcg_opt_gen_movi(ctx, op, rl, l); tcg_opt_gen_movi(ctx, op2, rh, h); @@ -2406,7 +2418,7 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) op->args[3] = 1; } else { if (sh) { - op2 = tcg_op_insert_before(ctx->tcg, op, shr_opc, 3); + op2 = opt_insert_before(ctx, op, shr_opc, 3); op2->args[0] = ret; op2->args[1] = src1; op2->args[2] = arg_new_constant(ctx, sh); @@ -2418,17 +2430,17 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) } if (neg && inv) { - op2 = tcg_op_insert_after(ctx->tcg, op, sub_opc, 3); + op2 = opt_insert_after(ctx, op, sub_opc, 3); op2->args[0] = ret; op2->args[1] = ret; op2->args[2] = arg_new_constant(ctx, 1); } else if (inv) { - op2 = tcg_op_insert_after(ctx->tcg, op, xor_opc, 3); + op2 = opt_insert_after(ctx, op, xor_opc, 3); op2->args[0] = ret; op2->args[1] = ret; op2->args[2] = arg_new_constant(ctx, 1); } else if (neg) { - op2 = tcg_op_insert_after(ctx->tcg, op, neg_opc, 2); + op2 = opt_insert_after(ctx, op, neg_opc, 2); op2->args[0] = ret; op2->args[1] = ret; } From cf5c9f697f2025880e8555467ddb6debc6349cd8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 21 Jan 2025 20:34:41 -0800 Subject: [PATCH 004/161] tcg: Add TCGType to tcg_op_insert_{after,before} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We cannot rely on the value copied from TCGOP_TYPE(op), because the relevant op could be typeless, such as INDEX_op_call. Fixes: fb744ece3a78 ("tcg: Copy TCGOP_TYPE in tcg_op_insert_{after,before}") Suggested-by: Nicholas Piggin Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/optimize.c | 4 ++-- tcg/tcg-internal.h | 4 ++-- tcg/tcg.c | 17 ++++++++++------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index a4d4ad3005..3bd4ee4d58 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -347,13 +347,13 @@ static TCGArg arg_new_temp(OptContext *ctx) static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op, TCGOpcode opc, unsigned narg) { - return tcg_op_insert_after(ctx->tcg, op, opc, narg); + return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg); } static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op, TCGOpcode opc, unsigned narg) { - return tcg_op_insert_before(ctx->tcg, op, opc, narg); + return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg); } static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h index ff85fb23fa..d6a12afe06 100644 --- a/tcg/tcg-internal.h +++ b/tcg/tcg-internal.h @@ -107,8 +107,8 @@ void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e); TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, - TCGOpcode opc, unsigned nargs); + TCGOpcode, TCGType, unsigned nargs); TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, - TCGOpcode opc, unsigned nargs); + TCGOpcode, TCGType, unsigned nargs); #endif /* TCG_INTERNAL_H */ diff --git a/tcg/tcg.c b/tcg/tcg.c index ec7f6743d7..198d6181d9 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -3449,21 +3449,21 @@ TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) } TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, - TCGOpcode opc, unsigned nargs) + TCGOpcode opc, TCGType type, unsigned nargs) { TCGOp *new_op = tcg_op_alloc(opc, nargs); - TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op); + TCGOP_TYPE(new_op) = type; QTAILQ_INSERT_BEFORE(old_op, new_op, link); return new_op; } TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, - TCGOpcode opc, unsigned nargs) + TCGOpcode opc, TCGType type, unsigned nargs) { TCGOp *new_op = tcg_op_alloc(opc, nargs); - TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op); + TCGOP_TYPE(new_op) = type; QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); return new_op; } @@ -4214,7 +4214,8 @@ liveness_pass_2(TCGContext *s) TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 ? INDEX_op_ld_i32 : INDEX_op_ld_i64); - TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); + TCGOp *lop = tcg_op_insert_before(s, op, lopc, + arg_ts->type, 3); lop->args[0] = temp_arg(dir_ts); lop->args[1] = temp_arg(arg_ts->mem_base); @@ -4277,7 +4278,8 @@ liveness_pass_2(TCGContext *s) TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 ? INDEX_op_st_i32 : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); + TCGOp *sop = tcg_op_insert_after(s, op, sopc, + arg_ts->type, 3); TCGTemp *out_ts = dir_ts; if (IS_DEAD_ARG(0)) { @@ -4313,7 +4315,8 @@ liveness_pass_2(TCGContext *s) TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 ? INDEX_op_st_i32 : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); + TCGOp *sop = tcg_op_insert_after(s, op, sopc, + arg_ts->type, 3); sop->args[0] = temp_arg(dir_ts); sop->args[1] = temp_arg(arg_ts->mem_base); From 5500bd9e2ec5ec85858fcca06c04a57337aa14c7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 3 Jan 2025 14:55:56 -0800 Subject: [PATCH 005/161] tcg: Add all_outop[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add infrastructure for more consolidated output of opcodes. The base structure allows for constraints to be either static or dynamic, and for the existence of those constraints to replace TCG_TARGET_HAS_* and the bulk of tcg_op_supported. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/tcg.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 8 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index 198d6181d9..5090cdb3c6 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -861,6 +861,7 @@ static int tcg_out_pool_finalize(TCGContext *s) #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), typedef enum { + C_Dynamic = -2, C_NotImplemented = -1, #include "tcg-target-con-set.h" } TCGConstraintSetIndex; @@ -954,6 +955,29 @@ static const TCGConstraintSet constraint_sets[] = { #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) +/* + * TCGOutOp is the base class for a set of structures that describe how + * to generate code for a given TCGOpcode. + * + * @static_constraint: + * C_NotImplemented: The TCGOpcode is not supported by the backend. + * C_Dynamic: Use @dynamic_constraint to select a constraint set + * based on any of @type, @flags, or host isa. + * Otherwise: The register allocation constrains for the TCGOpcode. + * + * Subclasses of TCGOutOp will define a set of output routines that may + * be used. Such routines will often be selected by the set of registers + * and constants that come out of register allocation. The set of + * routines that are provided will guide the set of constraints that are + * legal. In particular, assume that tcg_optimize() has done its job in + * swapping commutative operands and folding operations for which all + * operands are constant. + */ +typedef struct TCGOutOp { + TCGConstraintSetIndex static_constraint; + TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); +} TCGOutOp; + #include "tcg-target.c.inc" #ifndef CONFIG_TCG_INTERPRETER @@ -963,6 +987,10 @@ QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - < MIN_TLB_MASK_TABLE_OFS); #endif +/* Register allocation descriptions for every TCGOpcode. */ +static const TCGOutOp * const all_outop[NB_OPS] = { +}; + /* * All TCG threads except the parent (i.e. the one that called tcg_context_init * and registered the target's TCG globals) must register with this function @@ -2416,8 +2444,32 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return has_type && TCG_TARGET_HAS_cmpsel_vec; default: - tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); + if (op < INDEX_op_last_generic) { + const TCGOutOp *outop; + TCGConstraintSetIndex con_set; + + if (!has_type) { + return false; + } + + outop = all_outop[op]; + tcg_debug_assert(outop != NULL); + + con_set = outop->static_constraint; + if (con_set == C_Dynamic) { + con_set = outop->dynamic_constraint(type, flags); + } + if (con_set >= 0) { + return true; + } + tcg_debug_assert(con_set == C_NotImplemented); + return false; + } + tcg_debug_assert(op < NB_OPS); return true; + + case INDEX_op_last_generic: + g_assert_not_reached(); } } @@ -3335,19 +3387,27 @@ static void process_constraint_sets(void) static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) { - const TCGOpDef *def = &tcg_op_defs[op->opc]; + TCGOpcode opc = op->opc; + TCGType type = TCGOP_TYPE(op); + unsigned flags = TCGOP_FLAGS(op); + const TCGOpDef *def = &tcg_op_defs[opc]; + const TCGOutOp *outop = all_outop[opc]; TCGConstraintSetIndex con_set; -#ifdef CONFIG_DEBUG_TCG - assert(tcg_op_supported(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op))); -#endif - if (def->flags & TCG_OPF_NOT_PRESENT) { return empty_cts; } - con_set = tcg_target_op_def(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op)); - tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets)); + if (outop) { + con_set = outop->static_constraint; + if (con_set == C_Dynamic) { + con_set = outop->dynamic_constraint(type, flags); + } + } else { + con_set = tcg_target_op_def(opc, type, flags); + } + tcg_debug_assert(con_set >= 0); + tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); /* The constraint arguments must match TCGOpcode arguments. */ tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); From 2225fa242c5d95c5cd83bb6f8566d43dd7a00211 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 22 Feb 2025 09:36:21 -0800 Subject: [PATCH 006/161] tcg: Use extract2 for cross-word 64-bit extract on 32-bit host MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/tcg-op.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index fec6d678a2..f68c4f9702 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2804,9 +2804,18 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, tcg_gen_movi_i32(TCGV_HIGH(ret), 0); return; } - /* The field is split across two words. One double-word - shift is better than two double-word shifts. */ - goto do_shift_and; + + /* The field is split across two words. */ + tcg_gen_extract2_i32(TCGV_LOW(ret), TCGV_LOW(arg), + TCGV_HIGH(arg), ofs); + if (len <= 32) { + tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_LOW(ret), 0, len); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } else { + tcg_gen_extract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg), + ofs, len - 32); + } + return; } if (TCG_TARGET_extract_valid(TCG_TYPE_I64, ofs, len)) { @@ -2844,7 +2853,6 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, so that we get ext8u, ext16u, and ext32u. */ switch (len) { case 1 ... 8: case 16: case 32: - do_shift_and: tcg_gen_shri_i64(ret, arg, ofs); tcg_gen_andi_i64(ret, ret, (1ull << len) - 1); break; From 48e8de684aff7ad112aafcf74f776d2a66ef192e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 26 Dec 2024 12:01:57 -0800 Subject: [PATCH 007/161] tcg: Remove INDEX_op_ext{8,16,32}* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the fully general extract opcodes instead. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 14 -- include/tcg/tcg-opc.h | 10 - tcg/aarch64/tcg-target-has.h | 10 - tcg/aarch64/tcg-target.c.inc | 22 +- tcg/arm/tcg-target-has.h | 4 - tcg/arm/tcg-target.c.inc | 7 - tcg/i386/tcg-target-has.h | 10 - tcg/i386/tcg-target.c.inc | 24 +- tcg/loongarch64/tcg-target-has.h | 10 - tcg/loongarch64/tcg-target.c.inc | 22 +- tcg/mips/tcg-target-has.h | 13 - tcg/mips/tcg-target.c.inc | 20 +- tcg/optimize.c | 61 +---- tcg/ppc/tcg-target-has.h | 12 - tcg/ppc/tcg-target.c.inc | 17 +- tcg/riscv/tcg-target-has.h | 10 - tcg/riscv/tcg-target.c.inc | 22 +- tcg/s390x/tcg-target-has.h | 10 - tcg/s390x/tcg-target.c.inc | 22 +- tcg/sparc64/tcg-target-has.h | 10 - tcg/sparc64/tcg-target.c.inc | 14 +- tcg/tcg-has.h | 6 - tcg/tcg-op.c | 414 +++++++------------------------ tcg/tcg.c | 46 ---- tcg/tci.c | 36 --- tcg/tci/tcg-target-has.h | 10 - tcg/tci/tcg-target.c.inc | 102 +++----- 27 files changed, 135 insertions(+), 823 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 688984fd39..3db7b81637 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -396,20 +396,6 @@ Misc - | *t0* = *t1* | Move *t1* to *t0* (both operands must have the same type). - * - ext8s_i32/i64 *t0*, *t1* - - ext8u_i32/i64 *t0*, *t1* - - ext16s_i32/i64 *t0*, *t1* - - ext16u_i32/i64 *t0*, *t1* - - ext32s_i64 *t0*, *t1* - - ext32u_i64 *t0*, *t1* - - - | 8, 16 or 32 bit sign/zero extension (both operands must have the same type) - * - bswap16_i32/i64 *t0*, *t1*, *flags* - | 16 bit byte swap on the low bits of a 32/64 bit input. diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 5bf78b0764..c26cffaa3f 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -85,10 +85,6 @@ DEF(mulsh_i32, 1, 2, 0, 0) DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(setcond2_i32, 1, 4, 1, 0) -DEF(ext8s_i32, 1, 1, 0, 0) -DEF(ext16s_i32, 1, 1, 0, 0) -DEF(ext8u_i32, 1, 1, 0, 0) -DEF(ext16u_i32, 1, 1, 0, 0) DEF(bswap16_i32, 1, 1, 1, 0) DEF(bswap32_i32, 1, 1, 1, 0) DEF(not_i32, 1, 1, 0, 0) @@ -149,12 +145,6 @@ DEF(extrl_i64_i32, 1, 1, 0, 0) DEF(extrh_i64_i32, 1, 1, 0, 0) DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) -DEF(ext8s_i64, 1, 1, 0, 0) -DEF(ext16s_i64, 1, 1, 0, 0) -DEF(ext32s_i64, 1, 1, 0, 0) -DEF(ext8u_i64, 1, 1, 0, 0) -DEF(ext16u_i64, 1, 1, 0, 0) -DEF(ext32u_i64, 1, 1, 0, 0) DEF(bswap16_i64, 1, 1, 1, 0) DEF(bswap32_i64, 1, 1, 1, 0) DEF(bswap64_i64, 1, 1, 1, 0) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 39f01c14cd..bfd587c0fc 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -15,10 +15,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 1 -#define TCG_TARGET_HAS_ext16u_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 @@ -44,12 +40,6 @@ #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 1 -#define TCG_TARGET_HAS_ext16u_i64 1 -#define TCG_TARGET_HAS_ext32u_i64 1 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 4645242d85..b8b26c1c93 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2493,17 +2493,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: + case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: @@ -2979,16 +2969,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: - case INDEX_op_ext8s_i32: - case INDEX_op_ext16s_i32: - case INDEX_op_ext8u_i32: - case INDEX_op_ext16u_i32: - case INDEX_op_ext8s_i64: - case INDEX_op_ext16s_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32u_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extract_i32: diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index e3510a8f7a..8398c80c8e 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -24,10 +24,6 @@ extern bool use_neon_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 0 /* and r0, r1, #0xff */ -#define TCG_TARGET_HAS_ext16u_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index cec3d761d4..0e48f790f9 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -2113,10 +2113,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8u_i32: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16u_i32: default: g_assert_not_reached(); } @@ -2138,9 +2134,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_not_i32: case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: - case INDEX_op_ext8s_i32: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16u_i32: case INDEX_op_extract_i32: case INDEX_op_sextract_i32: return C_O1_I1(r, r); diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index 63768ff058..bbf55c86b6 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -28,10 +28,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_div2_i32 1 #define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 1 -#define TCG_TARGET_HAS_ext16u_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 @@ -57,12 +53,6 @@ #define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 1 -#define TCG_TARGET_HAS_ext16u_i64 1 -#define TCG_TARGET_HAS_ext32u_i64 1 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 33d303a123..02024018cb 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3016,17 +3016,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: + case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: @@ -3663,18 +3653,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrh_i64_i32: return C_O1_I1(r, 0); - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - return C_O1_I1(r, q); - - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 188b00799f..166c9d7e41 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -22,10 +22,6 @@ #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 1 -#define TCG_TARGET_HAS_ext16u_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 @@ -47,12 +43,6 @@ #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 1 -#define TCG_TARGET_HAS_ext16u_i64 1 -#define TCG_TARGET_HAS_ext32u_i64 1 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 879f66f255..6e77d3e79b 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1707,17 +1707,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: + case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: @@ -2243,16 +2233,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_brcond_i64: return C_O0_I2(rz, rz); - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index df6960fe9a..fd96905484 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -80,8 +80,6 @@ extern bool use_mips32r2_instructions; /* optional instructions detected at runtime */ #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions -#define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_clz_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_ctz_i32 0 @@ -93,23 +91,12 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_ext8s_i64 use_mips32r2_instructions -#define TCG_TARGET_HAS_ext16s_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_rot_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_clz_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 0 #endif -/* optional instructions automatically implemented */ -#define TCG_TARGET_HAS_ext8u_i32 0 /* andi rt, rs, 0xff */ -#define TCG_TARGET_HAS_ext16u_i32 0 /* andi rt, rs, 0xffff */ - -#if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_ext8u_i64 0 /* andi rt, rs, 0xff */ -#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */ -#endif - #define TCG_TARGET_HAS_qemu_ldst_i128 0 #define TCG_TARGET_HAS_tst 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index f8c105ba37..f77159bdc7 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -647,7 +647,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs) { - tcg_debug_assert(TCG_TARGET_HAS_ext8s_i32); + tcg_debug_assert(use_mips32r2_instructions); tcg_out_opc_reg(s, OPC_SEB, rd, TCG_REG_ZERO, rs); } @@ -658,7 +658,7 @@ static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rs) static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs) { - tcg_debug_assert(TCG_TARGET_HAS_ext16s_i32); + tcg_debug_assert(use_mips32r2_instructions); tcg_out_opc_reg(s, OPC_SEH, rd, TCG_REG_ZERO, rs); } @@ -2106,15 +2106,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: + case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: @@ -2138,8 +2130,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_not_i32: case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: - case INDEX_op_ext8s_i32: - case INDEX_op_ext16s_i32: case INDEX_op_extract_i32: case INDEX_op_sextract_i32: case INDEX_op_ld8u_i64: @@ -2154,10 +2144,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: - case INDEX_op_ext8s_i64: - case INDEX_op_ext16s_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: diff --git a/tcg/optimize.c b/tcg/optimize.c index 3bd4ee4d58..e9e654597d 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -513,18 +513,6 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) case INDEX_op_ctpop_i64: return ctpop64(x); - CASE_OP_32_64(ext8s): - return (int8_t)x; - - CASE_OP_32_64(ext16s): - return (int16_t)x; - - CASE_OP_32_64(ext8u): - return (uint8_t)x; - - CASE_OP_32_64(ext16u): - return (uint16_t)x; - CASE_OP_32_64(bswap16): x = bswap16(x); return y & TCG_BSWAP_OS ? (int16_t)x : x; @@ -537,12 +525,10 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) return bswap64(x); case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: return (int32_t)x; case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: - case INDEX_op_ext32u_i64: return (uint32_t)x; case INDEX_op_extrh_i64_i32: @@ -1869,8 +1855,7 @@ static bool fold_extract2(OptContext *ctx, TCGOp *op) static bool fold_exts(OptContext *ctx, TCGOp *op) { - uint64_t s_mask_old, s_mask, z_mask; - bool type_change = false; + uint64_t s_mask, z_mask; TempOptInfo *t1; if (fold_const1(ctx, op)) { @@ -1880,72 +1865,38 @@ static bool fold_exts(OptContext *ctx, TCGOp *op) t1 = arg_info(op->args[1]); z_mask = t1->z_mask; s_mask = t1->s_mask; - s_mask_old = s_mask; switch (op->opc) { - CASE_OP_32_64(ext8s): - s_mask |= INT8_MIN; - z_mask = (int8_t)z_mask; - break; - CASE_OP_32_64(ext16s): - s_mask |= INT16_MIN; - z_mask = (int16_t)z_mask; - break; case INDEX_op_ext_i32_i64: - type_change = true; - QEMU_FALLTHROUGH; - case INDEX_op_ext32s_i64: s_mask |= INT32_MIN; z_mask = (int32_t)z_mask; break; default: g_assert_not_reached(); } - - if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) { - return true; - } - return fold_masks_zs(ctx, op, z_mask, s_mask); } static bool fold_extu(OptContext *ctx, TCGOp *op) { - uint64_t z_mask_old, z_mask; - bool type_change = false; + uint64_t z_mask; if (fold_const1(ctx, op)) { return true; } - z_mask_old = z_mask = arg_info(op->args[1])->z_mask; - + z_mask = arg_info(op->args[1])->z_mask; switch (op->opc) { - CASE_OP_32_64(ext8u): - z_mask = (uint8_t)z_mask; - break; - CASE_OP_32_64(ext16u): - z_mask = (uint16_t)z_mask; - break; case INDEX_op_extrl_i64_i32: case INDEX_op_extu_i32_i64: - type_change = true; - QEMU_FALLTHROUGH; - case INDEX_op_ext32u_i64: z_mask = (uint32_t)z_mask; break; case INDEX_op_extrh_i64_i32: - type_change = true; z_mask >>= 32; break; default: g_assert_not_reached(); } - - if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) { - return true; - } - return fold_masks_z(ctx, op, z_mask); } @@ -2948,15 +2899,9 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(extract2): done = fold_extract2(&ctx, op); break; - CASE_OP_32_64(ext8s): - CASE_OP_32_64(ext16s): - case INDEX_op_ext32s_i64: case INDEX_op_ext_i32_i64: done = fold_exts(&ctx, op); break; - CASE_OP_32_64(ext8u): - CASE_OP_32_64(ext16u): - case INDEX_op_ext32u_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 6db91f78ce..9acfc574c5 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -16,16 +16,10 @@ #define have_altivec (cpuinfo & CPUINFO_ALTIVEC) #define have_vsx (cpuinfo & CPUINFO_VSX) -/* optional instructions automatically implemented */ -#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */ -#define TCG_TARGET_HAS_ext16u_i32 0 - /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 have_isa_3_00 #define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 @@ -52,12 +46,6 @@ #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 have_isa_3_00 #define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 0 -#define TCG_TARGET_HAS_ext16u_i64 0 -#define TCG_TARGET_HAS_ext32u_i64 0 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 822925a19b..e10c1c5162 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3473,17 +3473,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: + case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: @@ -4109,8 +4099,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ctpop_i32: case INDEX_op_neg_i32: case INDEX_op_not_i32: - case INDEX_op_ext8s_i32: - case INDEX_op_ext16s_i32: case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: case INDEX_op_extract_i32: @@ -4125,9 +4113,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ctpop_i64: case INDEX_op_neg_i64: case INDEX_op_not_i64: - case INDEX_op_ext8s_i64: - case INDEX_op_ext16s_i64: - case INDEX_op_ext32s_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_bswap16_i64: diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index 98081084f2..fc62049c78 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -22,10 +22,6 @@ #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 1 -#define TCG_TARGET_HAS_ext16u_i32 1 #define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_not_i32 1 @@ -46,12 +42,6 @@ #define TCG_TARGET_HAS_rot_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 1 -#define TCG_TARGET_HAS_ext16u_i64 1 -#define TCG_TARGET_HAS_ext32u_i64 1 #define TCG_TARGET_HAS_bswap16_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index f7e1ca5a56..d525df4e1d 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2385,17 +2385,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: + case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: @@ -2643,17 +2633,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_not_i64: case INDEX_op_neg_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32u_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext32s_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: case INDEX_op_ext_i32_i64: diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index e99e671642..aea805455f 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -31,10 +31,6 @@ extern uint64_t s390_facilities[3]; /* optional instructions */ #define TCG_TARGET_HAS_div2_i32 1 #define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 1 -#define TCG_TARGET_HAS_ext16u_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 HAVE_FACILITY(MISC_INSN_EXT3) @@ -59,12 +55,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 1 -#define TCG_TARGET_HAS_ext16u_i64 1 -#define TCG_TARGET_HAS_ext32u_i64 1 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index b2e1cd60ff..8421320928 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2781,17 +2781,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: + case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: @@ -3340,16 +3330,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_neg_i64: case INDEX_op_not_i32: case INDEX_op_not_i64: - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extract_i32: diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 2f46df8c61..ad6f35da17 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -17,10 +17,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 0 #define TCG_TARGET_HAS_rot_i32 0 -#define TCG_TARGET_HAS_ext8s_i32 0 -#define TCG_TARGET_HAS_ext16s_i32 0 -#define TCG_TARGET_HAS_ext8u_i32 0 -#define TCG_TARGET_HAS_ext16u_i32 0 #define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_bswap32_i32 0 #define TCG_TARGET_HAS_not_i32 1 @@ -46,12 +42,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 0 -#define TCG_TARGET_HAS_ext8s_i64 0 -#define TCG_TARGET_HAS_ext16s_i64 0 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 0 -#define TCG_TARGET_HAS_ext16u_i64 0 -#define TCG_TARGET_HAS_ext32u_i64 1 #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 7c722f59a8..787e0d896c 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1517,17 +1517,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: + case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: default: g_assert_not_reached(); @@ -1557,8 +1547,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_neg_i64: case INDEX_op_not_i32: case INDEX_op_not_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extract_i64: diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 418e4673eb..4ccdc6bbee 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -16,12 +16,6 @@ #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_div2_i64 0 #define TCG_TARGET_HAS_rot_i64 0 -#define TCG_TARGET_HAS_ext8s_i64 0 -#define TCG_TARGET_HAS_ext16s_i64 0 -#define TCG_TARGET_HAS_ext32s_i64 0 -#define TCG_TARGET_HAS_ext8u_i64 0 -#define TCG_TARGET_HAS_ext16u_i64 0 -#define TCG_TARGET_HAS_ext32u_i64 0 #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index f68c4f9702..48793ed439 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -414,17 +414,19 @@ void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) case -1: tcg_gen_mov_i32(ret, arg1); return; - case 0xff: - /* Don't recurse with tcg_gen_ext8u_i32. */ - if (TCG_TARGET_HAS_ext8u_i32) { - tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1); - return; - } - break; - case 0xffff: - if (TCG_TARGET_HAS_ext16u_i32) { - tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1); - return; + default: + /* + * Canonicalize on extract, if valid. This aids x86 with its + * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode + * which does not require matching operands. Other backends can + * trivially expand the extract to AND during code generation. + */ + if (!(arg2 & (arg2 + 1))) { + unsigned len = ctz32(~arg2); + if (TCG_TARGET_extract_valid(TCG_TYPE_I32, 0, len)) { + tcg_gen_extract_i32(ret, arg1, 0, len); + return; + } } break; } @@ -955,40 +957,20 @@ void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg, TCGv_i32 zero = tcg_constant_i32(0); tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len); } else { - /* To help two-operand hosts we prefer to zero-extend first, - which allows ARG to stay live. */ - switch (len) { - case 16: - if (TCG_TARGET_HAS_ext16u_i32) { - tcg_gen_ext16u_i32(ret, arg); - tcg_gen_shli_i32(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i32) { - tcg_gen_ext8u_i32(ret, arg); - tcg_gen_shli_i32(ret, ret, ofs); - return; - } - break; + /* + * To help two-operand hosts we prefer to zero-extend first, + * which allows ARG to stay live. + */ + if (TCG_TARGET_extract_valid(TCG_TYPE_I32, 0, len)) { + tcg_gen_extract_i32(ret, arg, 0, len); + tcg_gen_shli_i32(ret, ret, ofs); + return; } /* Otherwise prefer zero-extension over AND for code size. */ - switch (ofs + len) { - case 16: - if (TCG_TARGET_HAS_ext16u_i32) { - tcg_gen_shli_i32(ret, arg, ofs); - tcg_gen_ext16u_i32(ret, ret); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i32) { - tcg_gen_shli_i32(ret, arg, ofs); - tcg_gen_ext8u_i32(ret, ret); - return; - } - break; + if (TCG_TARGET_extract_valid(TCG_TYPE_I32, 0, ofs + len)) { + tcg_gen_shli_i32(ret, arg, ofs); + tcg_gen_extract_i32(ret, ret, 0, ofs + len); + return; } tcg_gen_andi_i32(ret, arg, (1u << len) - 1); tcg_gen_shli_i32(ret, ret, ofs); @@ -1008,32 +990,21 @@ void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg, tcg_gen_shri_i32(ret, arg, 32 - len); return; } - if (ofs == 0) { - tcg_gen_andi_i32(ret, arg, (1u << len) - 1); - return; - } if (TCG_TARGET_extract_valid(TCG_TYPE_I32, ofs, len)) { tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len); return; } + if (ofs == 0) { + tcg_gen_andi_i32(ret, arg, (1u << len) - 1); + return; + } /* Assume that zero-extension, if available, is cheaper than a shift. */ - switch (ofs + len) { - case 16: - if (TCG_TARGET_HAS_ext16u_i32) { - tcg_gen_ext16u_i32(ret, arg); - tcg_gen_shri_i32(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i32) { - tcg_gen_ext8u_i32(ret, arg); - tcg_gen_shri_i32(ret, ret, ofs); - return; - } - break; + if (TCG_TARGET_extract_valid(TCG_TYPE_I32, 0, ofs + len)) { + tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, 0, ofs + len); + tcg_gen_shri_i32(ret, ret, ofs); + return; } /* ??? Ideally we'd know what values are available for immediate AND. @@ -1064,16 +1035,6 @@ void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg, tcg_gen_sari_i32(ret, arg, 32 - len); return; } - if (ofs == 0) { - switch (len) { - case 16: - tcg_gen_ext16s_i32(ret, arg); - return; - case 8: - tcg_gen_ext8s_i32(ret, arg); - return; - } - } if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, ofs, len)) { tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len); @@ -1081,37 +1042,15 @@ void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg, } /* Assume that sign-extension, if available, is cheaper than a shift. */ - switch (ofs + len) { - case 16: - if (TCG_TARGET_HAS_ext16s_i32) { - tcg_gen_ext16s_i32(ret, arg); - tcg_gen_sari_i32(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8s_i32) { - tcg_gen_ext8s_i32(ret, arg); - tcg_gen_sari_i32(ret, ret, ofs); - return; - } - break; + if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, 0, ofs + len)) { + tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, 0, ofs + len); + tcg_gen_sari_i32(ret, ret, ofs); + return; } - switch (len) { - case 16: - if (TCG_TARGET_HAS_ext16s_i32) { - tcg_gen_shri_i32(ret, arg, ofs); - tcg_gen_ext16s_i32(ret, ret); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8s_i32) { - tcg_gen_shri_i32(ret, arg, ofs); - tcg_gen_ext8s_i32(ret, ret); - return; - } - break; + if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, 0, len)) { + tcg_gen_shri_i32(ret, arg, ofs); + tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, ret, 0, len); + return; } tcg_gen_shli_i32(ret, arg, 32 - len - ofs); @@ -1281,40 +1220,22 @@ void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_ext8s_i32) { - tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg); - } else { - tcg_gen_shli_i32(ret, arg, 24); - tcg_gen_sari_i32(ret, ret, 24); - } + tcg_gen_sextract_i32(ret, arg, 0, 8); } void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_ext16s_i32) { - tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg); - } else { - tcg_gen_shli_i32(ret, arg, 16); - tcg_gen_sari_i32(ret, ret, 16); - } + tcg_gen_sextract_i32(ret, arg, 0, 16); } void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_ext8u_i32) { - tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg); - } else { - tcg_gen_andi_i32(ret, arg, 0xffu); - } + tcg_gen_extract_i32(ret, arg, 0, 8); } void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_ext16u_i32) { - tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg); - } else { - tcg_gen_andi_i32(ret, arg, 0xffffu); - } + tcg_gen_extract_i32(ret, arg, 0, 16); } /* @@ -1794,23 +1715,19 @@ void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) case -1: tcg_gen_mov_i64(ret, arg1); return; - case 0xff: - /* Don't recurse with tcg_gen_ext8u_i64. */ - if (TCG_TARGET_HAS_ext8u_i64) { - tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1); - return; - } - break; - case 0xffff: - if (TCG_TARGET_HAS_ext16u_i64) { - tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1); - return; - } - break; - case 0xffffffffu: - if (TCG_TARGET_HAS_ext32u_i64) { - tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1); - return; + default: + /* + * Canonicalize on extract, if valid. This aids x86 with its + * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode + * which does not require matching operands. Other backends can + * trivially expand the extract to AND during code generation. + */ + if (!(arg2 & (arg2 + 1))) { + unsigned len = ctz64(~arg2); + if (TCG_TARGET_extract_valid(TCG_TYPE_I64, 0, len)) { + tcg_gen_extract_i64(ret, arg1, 0, len); + return; + } } break; } @@ -2118,77 +2035,32 @@ void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); - } else if (TCG_TARGET_HAS_ext8s_i64) { - tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg); - } else { - tcg_gen_shli_i64(ret, arg, 56); - tcg_gen_sari_i64(ret, ret, 56); - } + tcg_gen_sextract_i64(ret, arg, 0, 8); } void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); - } else if (TCG_TARGET_HAS_ext16s_i64) { - tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg); - } else { - tcg_gen_shli_i64(ret, arg, 48); - tcg_gen_sari_i64(ret, ret, 48); - } + tcg_gen_sextract_i64(ret, arg, 0, 16); } void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); - } else if (TCG_TARGET_HAS_ext32s_i64) { - tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg); - } else { - tcg_gen_shli_i64(ret, arg, 32); - tcg_gen_sari_i64(ret, ret, 32); - } + tcg_gen_sextract_i64(ret, arg, 0, 32); } void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); - } else if (TCG_TARGET_HAS_ext8u_i64) { - tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg); - } else { - tcg_gen_andi_i64(ret, arg, 0xffu); - } + tcg_gen_extract_i64(ret, arg, 0, 8); } void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); - } else if (TCG_TARGET_HAS_ext16u_i64) { - tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg); - } else { - tcg_gen_andi_i64(ret, arg, 0xffffu); - } + tcg_gen_extract_i64(ret, arg, 0, 16); } void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); - } else if (TCG_TARGET_HAS_ext32u_i64) { - tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg); - } else { - tcg_gen_andi_i64(ret, arg, 0xffffffffu); - } + tcg_gen_extract_i64(ret, arg, 0, 32); } /* @@ -2720,54 +2592,20 @@ void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg, return; } } - /* To help two-operand hosts we prefer to zero-extend first, - which allows ARG to stay live. */ - switch (len) { - case 32: - if (TCG_TARGET_HAS_ext32u_i64) { - tcg_gen_ext32u_i64(ret, arg); - tcg_gen_shli_i64(ret, ret, ofs); - return; - } - break; - case 16: - if (TCG_TARGET_HAS_ext16u_i64) { - tcg_gen_ext16u_i64(ret, arg); - tcg_gen_shli_i64(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i64) { - tcg_gen_ext8u_i64(ret, arg); - tcg_gen_shli_i64(ret, ret, ofs); - return; - } - break; + /* + * To help two-operand hosts we prefer to zero-extend first, + * which allows ARG to stay live. + */ + if (TCG_TARGET_extract_valid(TCG_TYPE_I64, 0, len)) { + tcg_gen_extract_i64(ret, arg, 0, len); + tcg_gen_shli_i64(ret, ret, ofs); + return; } /* Otherwise prefer zero-extension over AND for code size. */ - switch (ofs + len) { - case 32: - if (TCG_TARGET_HAS_ext32u_i64) { - tcg_gen_shli_i64(ret, arg, ofs); - tcg_gen_ext32u_i64(ret, ret); - return; - } - break; - case 16: - if (TCG_TARGET_HAS_ext16u_i64) { - tcg_gen_shli_i64(ret, arg, ofs); - tcg_gen_ext16u_i64(ret, ret); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i64) { - tcg_gen_shli_i64(ret, arg, ofs); - tcg_gen_ext8u_i64(ret, ret); - return; - } - break; + if (TCG_TARGET_extract_valid(TCG_TYPE_I64, 0, ofs + len)) { + tcg_gen_shli_i64(ret, arg, ofs); + tcg_gen_extract_i64(ret, ret, 0, ofs + len); + return; } tcg_gen_andi_i64(ret, arg, (1ull << len) - 1); tcg_gen_shli_i64(ret, ret, ofs); @@ -2787,10 +2625,6 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, tcg_gen_shri_i64(ret, arg, 64 - len); return; } - if (ofs == 0) { - tcg_gen_andi_i64(ret, arg, (1ull << len) - 1); - return; - } if (TCG_TARGET_REG_BITS == 32) { /* Look for a 32-bit extract within one of the two words. */ @@ -2822,30 +2656,16 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len); return; } + if (ofs == 0) { + tcg_gen_andi_i64(ret, arg, (1ull << len) - 1); + return; + } /* Assume that zero-extension, if available, is cheaper than a shift. */ - switch (ofs + len) { - case 32: - if (TCG_TARGET_HAS_ext32u_i64) { - tcg_gen_ext32u_i64(ret, arg); - tcg_gen_shri_i64(ret, ret, ofs); - return; - } - break; - case 16: - if (TCG_TARGET_HAS_ext16u_i64) { - tcg_gen_ext16u_i64(ret, arg); - tcg_gen_shri_i64(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i64) { - tcg_gen_ext8u_i64(ret, arg); - tcg_gen_shri_i64(ret, ret, ofs); - return; - } - break; + if (TCG_TARGET_extract_valid(TCG_TYPE_I64, 0, ofs + len)) { + tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, 0, ofs + len); + tcg_gen_shri_i64(ret, ret, ofs); + return; } /* ??? Ideally we'd know what values are available for immediate AND. @@ -2876,19 +2696,6 @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg, tcg_gen_sari_i64(ret, arg, 64 - len); return; } - if (ofs == 0) { - switch (len) { - case 32: - tcg_gen_ext32s_i64(ret, arg); - return; - case 16: - tcg_gen_ext16s_i64(ret, arg); - return; - case 8: - tcg_gen_ext8s_i64(ret, arg); - return; - } - } if (TCG_TARGET_REG_BITS == 32) { /* Look for a 32-bit extract within one of the two words. */ @@ -2928,52 +2735,17 @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg, } /* Assume that sign-extension, if available, is cheaper than a shift. */ - switch (ofs + len) { - case 32: - if (TCG_TARGET_HAS_ext32s_i64) { - tcg_gen_ext32s_i64(ret, arg); - tcg_gen_sari_i64(ret, ret, ofs); - return; - } - break; - case 16: - if (TCG_TARGET_HAS_ext16s_i64) { - tcg_gen_ext16s_i64(ret, arg); - tcg_gen_sari_i64(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8s_i64) { - tcg_gen_ext8s_i64(ret, arg); - tcg_gen_sari_i64(ret, ret, ofs); - return; - } - break; + if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, 0, ofs + len)) { + tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, 0, ofs + len); + tcg_gen_sari_i64(ret, ret, ofs); + return; } - switch (len) { - case 32: - if (TCG_TARGET_HAS_ext32s_i64) { - tcg_gen_shri_i64(ret, arg, ofs); - tcg_gen_ext32s_i64(ret, ret); - return; - } - break; - case 16: - if (TCG_TARGET_HAS_ext16s_i64) { - tcg_gen_shri_i64(ret, arg, ofs); - tcg_gen_ext16s_i64(ret, ret); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8s_i64) { - tcg_gen_shri_i64(ret, arg, ofs); - tcg_gen_ext8s_i64(ret, ret); - return; - } - break; + if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, 0, len)) { + tcg_gen_shri_i64(ret, arg, ofs); + tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, ret, 0, len); + return; } + tcg_gen_shli_i64(ret, arg, 64 - len - ofs); tcg_gen_sari_i64(ret, ret, 64 - len); } diff --git a/tcg/tcg.c b/tcg/tcg.c index 5090cdb3c6..e8fd89e4c8 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -2242,14 +2242,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_muluh_i32; case INDEX_op_mulsh_i32: return TCG_TARGET_HAS_mulsh_i32; - case INDEX_op_ext8s_i32: - return TCG_TARGET_HAS_ext8s_i32; - case INDEX_op_ext16s_i32: - return TCG_TARGET_HAS_ext16s_i32; - case INDEX_op_ext8u_i32: - return TCG_TARGET_HAS_ext8u_i32; - case INDEX_op_ext16u_i32: - return TCG_TARGET_HAS_ext16u_i32; case INDEX_op_bswap16_i32: return TCG_TARGET_HAS_bswap16_i32; case INDEX_op_bswap32_i32: @@ -2328,18 +2320,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: return TCG_TARGET_HAS_extr_i64_i32; - case INDEX_op_ext8s_i64: - return TCG_TARGET_HAS_ext8s_i64; - case INDEX_op_ext16s_i64: - return TCG_TARGET_HAS_ext16s_i64; - case INDEX_op_ext32s_i64: - return TCG_TARGET_HAS_ext32s_i64; - case INDEX_op_ext8u_i64: - return TCG_TARGET_HAS_ext8u_i64; - case INDEX_op_ext16u_i64: - return TCG_TARGET_HAS_ext16u_i64; - case INDEX_op_ext32u_i64: - return TCG_TARGET_HAS_ext32u_i64; case INDEX_op_bswap16_i64: return TCG_TARGET_HAS_bswap16_i64; case INDEX_op_bswap32_i64: @@ -5430,32 +5410,6 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* emit instruction */ switch (op->opc) { - case INDEX_op_ext8s_i32: - tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]); - break; - case INDEX_op_ext8s_i64: - tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]); - break; - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - tcg_out_ext8u(s, new_args[0], new_args[1]); - break; - case INDEX_op_ext16s_i32: - tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]); - break; - case INDEX_op_ext16s_i64: - tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]); - break; - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - tcg_out_ext16u(s, new_args[0], new_args[1]); - break; - case INDEX_op_ext32s_i64: - tcg_out_ext32s(s, new_args[0], new_args[1]); - break; - case INDEX_op_ext32u_i64: - tcg_out_ext32u(s, new_args[0], new_args[1]); - break; case INDEX_op_ext_i32_i64: tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); break; diff --git a/tcg/tci.c b/tcg/tci.c index d223258efe..531cd83aae 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -689,31 +689,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_write_reg64(regs, r1, r0, tmp64); break; #endif -#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64 - CASE_32_64(ext8s) - tci_args_rr(insn, &r0, &r1); - regs[r0] = (int8_t)regs[r1]; - break; -#endif -#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 || \ - TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64 - CASE_32_64(ext16s) - tci_args_rr(insn, &r0, &r1); - regs[r0] = (int16_t)regs[r1]; - break; -#endif -#if TCG_TARGET_HAS_ext8u_i32 || TCG_TARGET_HAS_ext8u_i64 - CASE_32_64(ext8u) - tci_args_rr(insn, &r0, &r1); - regs[r0] = (uint8_t)regs[r1]; - break; -#endif -#if TCG_TARGET_HAS_ext16u_i32 || TCG_TARGET_HAS_ext16u_i64 - CASE_32_64(ext16u) - tci_args_rr(insn, &r0, &r1); - regs[r0] = (uint16_t)regs[r1]; - break; -#endif #if TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64 CASE_32_64(bswap16) tci_args_rr(insn, &r0, &r1); @@ -864,12 +839,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tb_ptr = ptr; } break; - case INDEX_op_ext32s_i64: case INDEX_op_ext_i32_i64: tci_args_rr(insn, &r0, &r1); regs[r0] = (int32_t)regs[r1]; break; - case INDEX_op_ext32u_i64: case INDEX_op_extu_i32_i64: tci_args_rr(insn, &r0, &r1); regs[r0] = (uint32_t)regs[r1]; @@ -1092,15 +1065,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_mov_i32: case INDEX_op_mov_i64: - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_bswap16_i32: diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index c8785ca8dc..cb0964c3d4 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -11,10 +11,6 @@ #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 1 -#define TCG_TARGET_HAS_ext16u_i32 1 #define TCG_TARGET_HAS_andc_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_eqv_i32 1 @@ -40,12 +36,6 @@ #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 1 -#define TCG_TARGET_HAS_ext16u_i64 1 -#define TCG_TARGET_HAS_ext32u_i64 1 #define TCG_TARGET_HAS_andc_i64 1 #define TCG_TARGET_HAS_eqv_i64 1 #define TCG_TARGET_HAS_nand_i64 1 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 36e018dd19..6f8f1dd8ae 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -59,16 +59,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_not_i64: case INDEX_op_neg_i32: case INDEX_op_neg_i64: - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_bswap16_i32: @@ -535,76 +525,54 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } } +static void tcg_out_extract(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rs, unsigned pos, unsigned len) +{ + TCGOpcode opc = type == TCG_TYPE_I32 ? + INDEX_op_extract_i32 : + INDEX_op_extract_i64; + tcg_out_op_rrbb(s, opc, rd, rs, pos, len); +} + +static void tcg_out_sextract(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rs, unsigned pos, unsigned len) +{ + TCGOpcode opc = type == TCG_TYPE_I32 ? + INDEX_op_sextract_i32 : + INDEX_op_sextract_i64; + tcg_out_op_rrbb(s, opc, rd, rs, pos, len); +} + static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs) { - switch (type) { - case TCG_TYPE_I32: - tcg_debug_assert(TCG_TARGET_HAS_ext8s_i32); - tcg_out_op_rr(s, INDEX_op_ext8s_i32, rd, rs); - break; -#if TCG_TARGET_REG_BITS == 64 - case TCG_TYPE_I64: - tcg_debug_assert(TCG_TARGET_HAS_ext8s_i64); - tcg_out_op_rr(s, INDEX_op_ext8s_i64, rd, rs); - break; -#endif - default: - g_assert_not_reached(); - } + tcg_out_sextract(s, type, rd, rs, 0, 8); } static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rs) { - if (TCG_TARGET_REG_BITS == 64) { - tcg_debug_assert(TCG_TARGET_HAS_ext8u_i64); - tcg_out_op_rr(s, INDEX_op_ext8u_i64, rd, rs); - } else { - tcg_debug_assert(TCG_TARGET_HAS_ext8u_i32); - tcg_out_op_rr(s, INDEX_op_ext8u_i32, rd, rs); - } + tcg_out_extract(s, TCG_TYPE_REG, rd, rs, 0, 8); } static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs) { - switch (type) { - case TCG_TYPE_I32: - tcg_debug_assert(TCG_TARGET_HAS_ext16s_i32); - tcg_out_op_rr(s, INDEX_op_ext16s_i32, rd, rs); - break; -#if TCG_TARGET_REG_BITS == 64 - case TCG_TYPE_I64: - tcg_debug_assert(TCG_TARGET_HAS_ext16s_i64); - tcg_out_op_rr(s, INDEX_op_ext16s_i64, rd, rs); - break; -#endif - default: - g_assert_not_reached(); - } + tcg_out_sextract(s, type, rd, rs, 0, 16); } static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rs) { - if (TCG_TARGET_REG_BITS == 64) { - tcg_debug_assert(TCG_TARGET_HAS_ext16u_i64); - tcg_out_op_rr(s, INDEX_op_ext16u_i64, rd, rs); - } else { - tcg_debug_assert(TCG_TARGET_HAS_ext16u_i32); - tcg_out_op_rr(s, INDEX_op_ext16u_i32, rd, rs); - } + tcg_out_extract(s, TCG_TYPE_REG, rd, rs, 0, 16); } static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rs) { tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - tcg_debug_assert(TCG_TARGET_HAS_ext32s_i64); - tcg_out_op_rr(s, INDEX_op_ext32s_i64, rd, rs); + tcg_out_sextract(s, TCG_TYPE_I64, rd, rs, 0, 32); } static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rs) { tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - tcg_debug_assert(TCG_TARGET_HAS_ext32u_i64); - tcg_out_op_rr(s, INDEX_op_ext32u_i64, rd, rs); + tcg_out_extract(s, TCG_TYPE_I64, rd, rs, 0, 32); } static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs) @@ -690,7 +658,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - TCGOpcode exts; + int width; switch (opc) { case INDEX_op_goto_ptr: @@ -777,18 +745,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, break; case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */ - exts = INDEX_op_ext16s_i32; - goto do_bswap; case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */ - exts = INDEX_op_ext16s_i64; + width = 16; goto do_bswap; case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */ - exts = INDEX_op_ext32s_i64; + width = 32; do_bswap: /* The base tci bswaps zero-extend, and ignore high bits. */ tcg_out_op_rr(s, opc, args[0], args[1]); if (args[2] & TCG_BSWAP_OS) { - tcg_out_op_rr(s, exts, args[0], args[0]); + tcg_out_sextract(s, TCG_TYPE_REG, args[0], args[0], 0, width); } break; @@ -838,17 +804,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: + case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: From b5701261da6607e61ef1fe605d85bf31806fcd34 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 28 Dec 2024 15:58:24 -0800 Subject: [PATCH 008/161] tcg: Merge INDEX_op_mov_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Begin to rely on TCGOp.type to discriminate operations, rather than two different opcodes. Convert mov first. Introduce TCG_OPF_INT in order to keep opcode dumps the same. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 4 ++-- include/tcg/tcg-opc.h | 4 ++-- include/tcg/tcg.h | 2 ++ tcg/aarch64/tcg-target.c.inc | 2 -- tcg/arm/tcg-target.c.inc | 1 - tcg/i386/tcg-target.c.inc | 2 -- tcg/loongarch64/tcg-target.c.inc | 2 -- tcg/mips/tcg-target.c.inc | 2 -- tcg/optimize.c | 7 +++---- tcg/ppc/tcg-target.c.inc | 2 -- tcg/riscv/tcg-target.c.inc | 2 -- tcg/s390x/tcg-target.c.inc | 2 -- tcg/sparc64/tcg-target.c.inc | 2 -- tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 32 ++++++++++++++++++-------------- tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 15 +-------------- 17 files changed, 32 insertions(+), 58 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 3db7b81637..e6ccc78fa1 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -391,10 +391,10 @@ Misc .. list-table:: - * - mov_i32/i64 *t0*, *t1* + * - mov *t0*, *t1* - | *t0* = *t1* - | Move *t1* to *t0* (both operands must have the same type). + | Move *t1* to *t0*. * - bswap16_i32/i64 *t0*, *t1*, *flags* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index c26cffaa3f..766fd00d99 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -37,7 +37,8 @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT) -DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT) +DEF(mov, 1, 1, 0, TCG_OPF_INT | TCG_OPF_NOT_PRESENT) + DEF(setcond_i32, 1, 2, 1, 0) DEF(negsetcond_i32, 1, 2, 1, 0) DEF(movcond_i32, 1, 4, 1, 0) @@ -98,7 +99,6 @@ DEF(clz_i32, 1, 2, 0, 0) DEF(ctz_i32, 1, 2, 0, 0) DEF(ctpop_i32, 1, 1, 0, 0) -DEF(mov_i64, 1, 1, 0, TCG_OPF_NOT_PRESENT) DEF(setcond_i64, 1, 2, 1, 0) DEF(negsetcond_i64, 1, 2, 1, 0) DEF(movcond_i64, 1, 4, 1, 0) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 84d99508b6..c6b50b5226 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -741,6 +741,8 @@ enum { /* Instruction has side effects: it cannot be removed if its outputs are not used, and might trigger exceptions. */ TCG_OPF_SIDE_EFFECTS = 0x08, + /* Instruction operands may be I32 or I64 */ + TCG_OPF_INT = 0x10, /* Instruction is optional and not implemented by the host, or insn is generic and should not be implemented by the host. */ TCG_OPF_NOT_PRESENT = 0x20, diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index b8b26c1c93..466042a577 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2488,8 +2488,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_mb(s, a0); break; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 0e48f790f9..0fafe97230 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -2109,7 +2109,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mb(s, args[0]); break; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 02024018cb..75c8665d74 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3011,8 +3011,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_mb: tcg_out_mb(s, a0); break; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 6e77d3e79b..b0a4ec4c13 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1702,8 +1702,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, false); break; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index f77159bdc7..4d52e0bde0 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2101,8 +2101,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_mb: tcg_out_mb(s, a0); break; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/optimize.c b/tcg/optimize.c index e9e654597d..8d5bad07aa 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -375,10 +375,8 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) switch (ctx->type) { case TCG_TYPE_I32: - new_op = INDEX_op_mov_i32; - break; case TCG_TYPE_I64: - new_op = INDEX_op_mov_i64; + new_op = INDEX_op_mov; break; case TCG_TYPE_V64: case TCG_TYPE_V128: @@ -2933,7 +2931,8 @@ void tcg_optimize(TCGContext *s) case INDEX_op_mb: done = fold_mb(&ctx, op); break; - CASE_OP_32_64_VEC(mov): + case INDEX_op_mov: + case INDEX_op_mov_vec: done = fold_mov(&ctx, op); break; CASE_OP_32_64(movcond): diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index e10c1c5162..11dcfe66f3 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3468,8 +3468,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mb(s, args[0]); break; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index d525df4e1d..6f9d87df48 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2380,8 +2380,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 8421320928..30fa26e884 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2776,8 +2776,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 787e0d896c..cb5e8d554d 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1512,8 +1512,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_arithi(s, a0, a1, a2, SHIFT_SRA); break; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 48793ed439..108dc61e9a 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -351,7 +351,7 @@ void tcg_gen_discard_i32(TCGv_i32 arg) void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg) { if (ret != arg) { - tcg_gen_op2_i32(INDEX_op_mov_i32, ret, arg); + tcg_gen_op2_i32(INDEX_op_mov, ret, arg); } } @@ -1411,7 +1411,7 @@ void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg) return; } if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op2_i64(INDEX_op_mov_i64, ret, arg); + tcg_gen_op2_i64(INDEX_op_mov, ret, arg); } else { TCGTemp *ts = tcgv_i64_temp(arg); diff --git a/tcg/tcg.c b/tcg/tcg.c index e8fd89e4c8..7ff211081d 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -2187,7 +2187,9 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i128: return TCG_TARGET_HAS_qemu_ldst_i128; - case INDEX_op_mov_i32: + case INDEX_op_mov: + return has_type; + case INDEX_op_setcond_i32: case INDEX_op_brcond_i32: case INDEX_op_movcond_i32: @@ -2269,7 +2271,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond2_i32: return TCG_TARGET_REG_BITS == 32; - case INDEX_op_mov_i64: case INDEX_op_setcond_i64: case INDEX_op_brcond_i64: case INDEX_op_movcond_i64: @@ -2840,18 +2841,23 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) col += ne_fprintf(f, ",%s", t); } } else { - col += ne_fprintf(f, " %s ", def->name); + if (def->flags & TCG_OPF_INT) { + col += ne_fprintf(f, " %s_i%d ", + def->name, + 8 * tcg_type_size(TCGOP_TYPE(op))); + } else if (def->flags & TCG_OPF_VECTOR) { + col += ne_fprintf(f, "%s v%d,e%d,", + def->name, + 8 * tcg_type_size(TCGOP_TYPE(op)), + 8 << TCGOP_VECE(op)); + } else { + col += ne_fprintf(f, " %s ", def->name); + } nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; nb_cargs = def->nb_cargs; - if (def->flags & TCG_OPF_VECTOR) { - col += ne_fprintf(f, "v%d,e%d,", - 8 * tcg_type_size(TCGOP_TYPE(op)), - 8 << TCGOP_VECE(op)); - } - k = 0; for (i = 0; i < nb_oargs; i++) { const char *sep = k ? "," : ""; @@ -4144,8 +4150,7 @@ liveness_pass_1(TCGContext *s) /* Incorporate constraints for this operand. */ switch (opc) { - case INDEX_op_mov_i32: - case INDEX_op_mov_i64: + case INDEX_op_mov: /* Note that these are TCG_OPF_NOT_PRESENT and do not have proper constraints. That said, special case moves to propagate preferences backward. */ @@ -4304,7 +4309,7 @@ liveness_pass_2(TCGContext *s) } /* Outputs become available. */ - if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { + if (opc == INDEX_op_mov) { arg_ts = arg_temp(op->args[0]); dir_ts = arg_ts->state_ptr; if (dir_ts) { @@ -6435,8 +6440,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) TCGOpcode opc = op->opc; switch (opc) { - case INDEX_op_mov_i32: - case INDEX_op_mov_i64: + case INDEX_op_mov: case INDEX_op_mov_vec: tcg_reg_alloc_mov(s, op); break; diff --git a/tcg/tci.c b/tcg/tci.c index 531cd83aae..78183ea47d 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -463,7 +463,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, regs[r0] = regs[tmp32 ? r3 : r4]; break; #endif - CASE_32_64(mov) + case INDEX_op_mov: tci_args_rr(insn, &r0, &r1); regs[r0] = regs[r1]; break; @@ -1063,8 +1063,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) op_name, str_r(r0), str_r(r1), s2); break; - case INDEX_op_mov_i32: - case INDEX_op_mov_i64: + case INDEX_op_mov: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_bswap16_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 6f8f1dd8ae..9a5d3c2875 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -483,18 +483,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg val, TCGReg base, static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { - switch (type) { - case TCG_TYPE_I32: - tcg_out_op_rr(s, INDEX_op_mov_i32, ret, arg); - break; -#if TCG_TARGET_REG_BITS == 64 - case TCG_TYPE_I64: - tcg_out_op_rr(s, INDEX_op_mov_i64, ret, arg); - break; -#endif - default: - g_assert_not_reached(); - } + tcg_out_op_rr(s, INDEX_op_mov, ret, arg); return true; } @@ -799,8 +788,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_op_v(s, opc); break; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ From 662cdbcf0073c4de8456f5e573523571e3d9da5b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 09:00:07 -0800 Subject: [PATCH 009/161] tcg: Convert add to TCGOutOpBinary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop all backend support for an immediate as the first operand. This should never happen in any case, as we swap commutative operands to place immediates as the second operand. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 51 +++++++------- tcg/arm/tcg-target.c.inc | 43 ++++++++---- tcg/i386/tcg-target.c.inc | 56 +++++++++------- tcg/loongarch64/tcg-target.c.inc | 38 +++++------ tcg/mips/tcg-target.c.inc | 31 ++++++--- tcg/ppc/tcg-target.c.inc | 47 +++++++------ tcg/riscv/tcg-target.c.inc | 39 ++++++----- tcg/s390x/tcg-target.c.inc | 110 +++++++++++++++---------------- tcg/sparc64/tcg-target-con-set.h | 1 + tcg/sparc64/tcg-target.c.inc | 25 +++++-- tcg/tcg.c | 41 +++++++++++- tcg/tci/tcg-target.c.inc | 15 ++++- 12 files changed, 302 insertions(+), 195 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 466042a577..a181b7e65a 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1592,16 +1592,6 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) tcg_out_mov(s, TCG_TYPE_I32, rd, rn); } -static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, - TCGReg rn, int64_t aimm) -{ - if (aimm >= 0) { - tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); - } else { - tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); - } -} - static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, TCGReg rh, TCGReg al, TCGReg ah, tcg_target_long bl, tcg_target_long bh, @@ -2115,6 +2105,30 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, flush_idcache_range(jmp_rx, jmp_rw, 4); } + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3502, ADD, type, a0, a1, a2); +} + +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a2 >= 0) { + tcg_out_insn(s, 3401, ADDI, type, a0, a1, a2); + } else { + tcg_out_insn(s, 3401, SUBI, type, a0, a1, -a2); + } +} + +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, rA), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2181,23 +2195,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_add_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_add_i64: - if (c2) { - tcg_out_addsubi(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); - } - break; - case INDEX_op_sub_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ case INDEX_op_sub_i64: if (c2) { - tcg_out_addsubi(s, ext, a0, a1, -a2); + tgen_addi(s, ext, a0, a1, -a2); } else { tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); } @@ -2984,8 +2985,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_add_i32: - case INDEX_op_add_i64: case INDEX_op_sub_i32: case INDEX_op_sub_i64: return C_O1_I2(r, r, rA); diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 0fafe97230..a1f2184ac4 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -890,6 +890,17 @@ static void tcg_out_dat_rIK(TCGContext *s, ARMCond cond, ARMInsn opc, } } +static void tcg_out_dat_IN(TCGContext *s, ARMCond cond, ARMInsn opc, + ARMInsn opneg, TCGReg dst, TCGReg lhs, TCGArg rhs) +{ + int imm12 = encode_imm(rhs); + if (imm12 < 0) { + imm12 = encode_imm_nofail(-rhs); + opc = opneg; + } + tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12); +} + static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, ARMInsn opc, ARMInsn opneg, TCGReg dst, TCGReg lhs, TCGArg rhs, bool rhs_is_const) @@ -898,12 +909,7 @@ static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, ARMInsn opc, * rhs must satisfy the "rIN" constraint. */ if (rhs_is_const) { - int imm12 = encode_imm(rhs); - if (imm12 < 0) { - imm12 = encode_imm_nofail(-rhs); - opc = opneg; - } - tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12); + tcg_out_dat_IN(s, cond, opc, opneg, dst, lhs, rhs); } else { tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); } @@ -1821,6 +1827,26 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, flush_idcache_range(jmp_rx, jmp_rw, 4); } + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IN(s, COND_AL, ARITH_ADD, ARITH_SUB, a0, a1, a2); +} + +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, rIN), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1869,10 +1895,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[c], ARITH_MOV, ARITH_MVN, args[0], 0, args[3], const_args[3]); break; - case INDEX_op_add_i32: - tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB, - args[0], args[1], args[2], const_args[2]); - break; case INDEX_op_sub_i32: if (const_args[1]) { if (const_args[2]) { @@ -2142,7 +2164,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i32: return C_O0_I2(r, r); - case INDEX_op_add_i32: case INDEX_op_sub_i32: case INDEX_op_setcond_i32: case INDEX_op_negsetcond_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 75c8665d74..1115d1e38d 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2562,6 +2562,40 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, /* no need to flush icache explicitly */ } + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + if (a0 == a1) { + tgen_arithr(s, ARITH_ADD + rexw, a0, a2); + } else if (a0 == a2) { + tgen_arithr(s, ARITH_ADD + rexw, a0, a1); + } else { + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, 0); + } +} + +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + if (a0 == a1) { + tgen_arithi(s, ARITH_ADD + rexw, a0, a2, false); + } else { + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, -1, 0, a2); + } +} + +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, re), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2642,24 +2676,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(add): - /* For 3-operand addition, use LEA. */ - if (a0 != a1) { - TCGArg c3 = 0; - if (const_a2) { - c3 = a2, a2 = -1; - } else if (a0 == a2) { - /* Watch out for dest = src + dest, since we've removed - the matching constraint on the add. */ - tgen_arithr(s, ARITH_ADD + rexw, a0, a1); - break; - } - - tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3); - break; - } - c = ARITH_ADD; - goto gen_arith; OP_32_64(sub): c = ARITH_SUB; goto gen_arith; @@ -3599,10 +3615,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(re, r); - case INDEX_op_add_i32: - case INDEX_op_add_i64: - return C_O1_I2(r, r, re); - case INDEX_op_sub_i32: case INDEX_op_sub_i64: case INDEX_op_mul_i32: diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index b0a4ec4c13..fee5e7c577 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1286,6 +1286,24 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, flush_idcache_range(jmp_rx, jmp_rw, 4); } + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_add_w(s, a0, a1, a2); + } else { + tcg_out_opc_add_d(s, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_add, + .out_rri = tcg_out_addi, +}; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1544,21 +1562,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_add_i32: - if (c2) { - tcg_out_addi(s, TCG_TYPE_I32, a0, a1, a2); - } else { - tcg_out_opc_add_w(s, a0, a1, a2); - } - break; - case INDEX_op_add_i64: - if (c2) { - tcg_out_addi(s, TCG_TYPE_I64, a0, a1, a2); - } else { - tcg_out_opc_add_d(s, a0, a1, a2); - } - break; - case INDEX_op_sub_i32: if (c2) { tcg_out_addi(s, TCG_TYPE_I32, a0, a1, -a2); @@ -2287,11 +2290,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rotr_i64: return C_O1_I2(r, r, ri); - case INDEX_op_add_i32: - return C_O1_I2(r, r, ri); - case INDEX_op_add_i64: - return C_O1_I2(r, r, rJ); - case INDEX_op_and_i32: case INDEX_op_and_i64: case INDEX_op_nor_i32: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 4d52e0bde0..263e7e66c9 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1655,6 +1655,28 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, /* Always indirect, nothing to do */ } + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_ADDU : OPC_DADDU; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_ADDIU : OPC_DADDIU; + tcg_out_opc_imm(s, insn, a0, a1, a2); +} + +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1727,12 +1749,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_add_i32: - i1 = OPC_ADDU, i2 = OPC_ADDIU; - goto do_binary; - case INDEX_op_add_i64: - i1 = OPC_DADDU, i2 = OPC_DADDIU; - goto do_binary; case INDEX_op_or_i32: case INDEX_op_or_i64: i1 = OPC_OR, i2 = OPC_ORI; @@ -2159,9 +2175,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_add_i32: - case INDEX_op_add_i64: - return C_O1_I2(r, r, rJ); case INDEX_op_sub_i32: case INDEX_op_sub_i64: return C_O1_I2(r, rz, rN); diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 11dcfe66f3..6b27238499 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2902,6 +2902,26 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, flush_idcache_range(jmp_rx, jmp_rw, 4); } + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, ADD | TAB(a0, a1, a2)); +} + +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); +} + +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, rT), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2971,15 +2991,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); break; - case INDEX_op_add_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - do_addi_32: - tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); - } else { - tcg_out32(s, ADD | TAB(a0, a1, a2)); - } - break; case INDEX_op_sub_i32: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[1]) { @@ -2989,8 +3000,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); } } else if (const_args[2]) { - a2 = -a2; - goto do_addi_32; + tgen_addi(s, type, a0, a1, (int32_t)-a2); } else { tcg_out32(s, SUBF | TAB(a0, a2, a1)); } @@ -3185,15 +3195,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); break; - case INDEX_op_add_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - do_addi_64: - tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); - } else { - tcg_out32(s, ADD | TAB(a0, a1, a2)); - } - break; case INDEX_op_sub_i64: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[1]) { @@ -3203,8 +3204,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); } } else if (const_args[2]) { - a2 = -a2; - goto do_addi_64; + tgen_addi(s, type, a0, a1, -a2); } else { tcg_out32(s, SUBF | TAB(a0, a2, a1)); } @@ -4129,7 +4129,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_add_i32: case INDEX_op_and_i32: case INDEX_op_or_i32: case INDEX_op_xor_i32: @@ -4176,8 +4175,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i32: return C_O1_I2(r, rI, ri); - case INDEX_op_add_i64: - return C_O1_I2(r, r, rT); case INDEX_op_or_i64: case INDEX_op_xor_i64: return C_O1_I2(r, r, rU); diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 6f9d87df48..135137ff53 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1957,6 +1957,28 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, flush_idcache_range(jmp_rx, jmp_rw, 4); } + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_ADDW : OPC_ADD; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI; + tcg_out_opc_imm(s, insn, a0, a1, a2); +} + +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2019,21 +2041,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, OPC_SD, a0, a1, a2); break; - case INDEX_op_add_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_ADDW, a0, a1, a2); - } - break; - case INDEX_op_add_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_ADDI, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_ADD, a0, a1, a2); - } - break; - case INDEX_op_sub_i32: if (c2) { tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, -a2); @@ -2657,11 +2664,9 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_add_i32: case INDEX_op_and_i32: case INDEX_op_or_i32: case INDEX_op_xor_i32: - case INDEX_op_add_i64: case INDEX_op_and_i64: case INDEX_op_or_i64: case INDEX_op_xor_i64: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 30fa26e884..f5441d2033 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2145,6 +2145,58 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, /* no need to flush icache explicitly */ } + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (a0 != a1) { + tcg_out_insn(s, RX, LA, a0, a1, a2, 0); + } else if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, AR, a0, a2); + } else { + tcg_out_insn(s, RRE, AGR, a0, a2); + } +} + +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a0 == a1) { + if (type == TCG_TYPE_I32) { + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, AHI, a0, a2); + } else { + tcg_out_insn(s, RIL, AFI, a0, a2); + } + return; + } + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, AGHI, a0, a2); + return; + } + if (a2 == (int32_t)a2) { + tcg_out_insn(s, RIL, AGFI, a0, a2); + return; + } + if (a2 == (uint32_t)a2) { + tcg_out_insn(s, RIL, ALGFI, a0, a2); + return; + } + if (-a2 == (uint32_t)-a2) { + tcg_out_insn(s, RIL, SLGFI, a0, -a2); + return; + } + } + tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); +} + +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; + + # define OP_32_64(x) \ case glue(glue(INDEX_op_,x),_i32): \ case glue(glue(INDEX_op_,x),_i64) @@ -2201,30 +2253,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_add_i32: - a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; - if (const_args[2]) { - do_addi_32: - if (a0 == a1) { - if (a2 == (int16_t)a2) { - tcg_out_insn(s, RI, AHI, a0, a2); - break; - } - tcg_out_insn(s, RIL, AFI, a0, a2); - break; - } - tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RR, AR, a0, a2); - } else { - tcg_out_insn(s, RX, LA, a0, a1, a2, 0); - } - break; case INDEX_op_sub_i32: - a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; + a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - a2 = -a2; - goto do_addi_32; + tgen_addi(s, type, a0, a1, (int32_t)-a2); } else if (a0 == a1) { tcg_out_insn(s, RR, SR, a0, a2); } else { @@ -2494,40 +2526,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); break; - case INDEX_op_add_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - do_addi_64: - if (a0 == a1) { - if (a2 == (int16_t)a2) { - tcg_out_insn(s, RI, AGHI, a0, a2); - break; - } - if (a2 == (int32_t)a2) { - tcg_out_insn(s, RIL, AGFI, a0, a2); - break; - } - if (a2 == (uint32_t)a2) { - tcg_out_insn(s, RIL, ALGFI, a0, a2); - break; - } - if (-a2 == (uint32_t)-a2) { - tcg_out_insn(s, RIL, SLGFI, a0, -a2); - break; - } - } - tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RRE, AGR, a0, a2); - } else { - tcg_out_insn(s, RX, LA, a0, a1, a2, 0); - } - break; case INDEX_op_sub_i64: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - a2 = -a2; - goto do_addi_64; + tgen_addi(s, type, a0, a1, -a2); } else { tcg_out_insn(s, RRFa, SGRK, a0, a1, a2); } @@ -3253,8 +3255,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_add_i32: - case INDEX_op_add_i64: case INDEX_op_shl_i64: case INDEX_op_shr_i64: case INDEX_op_sar_i64: diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h index 61f9fa3d9f..d90ba11443 100644 --- a/tcg/sparc64/tcg-target-con-set.h +++ b/tcg/sparc64/tcg-target-con-set.h @@ -14,6 +14,7 @@ C_O0_I2(rz, r) C_O0_I2(rz, rJ) C_O1_I1(r, r) C_O1_I2(r, r, r) +C_O1_I2(r, r, rJ) C_O1_I2(r, rz, rJ) C_O1_I4(r, rz, rJ, rI, 0) C_O2_I2(r, r, rz, rJ) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index cb5e8d554d..f43d95b025 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1285,6 +1285,26 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, { } + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_ADD); +} + +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_arithi(s, a0, a1, a2, ARITH_ADD); +} + +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1338,9 +1358,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_st32_i64: tcg_out_ldst(s, a0, a1, a2, STW); break; - OP_32_64(add): - c = ARITH_ADD; - goto gen_arith; OP_32_64(sub): c = ARITH_SUB; goto gen_arith; @@ -1564,8 +1581,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i64: return C_O0_I2(rz, r); - case INDEX_op_add_i32: - case INDEX_op_add_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: case INDEX_op_div_i32: diff --git a/tcg/tcg.c b/tcg/tcg.c index 7ff211081d..18b2981c79 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -978,6 +978,14 @@ typedef struct TCGOutOp { TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); } TCGOutOp; +typedef struct TCGOutOpBinary { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2); + void (*out_rri)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2); +} TCGOutOpBinary; + #include "tcg-target.c.inc" #ifndef CONFIG_TCG_INTERPRETER @@ -987,10 +995,21 @@ QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - < MIN_TLB_MASK_TABLE_OFS); #endif +/* + * Register V as the TCGOutOp for O. + * This verifies that V is of type T, otherwise give a nice compiler error. + * This prevents trivial mistakes within each arch/tcg-target.c.inc. + */ +#define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base) + /* Register allocation descriptions for every TCGOpcode. */ static const TCGOutOp * const all_outop[NB_OPS] = { + OUTOP(INDEX_op_add_i32, TCGOutOpBinary, outop_add), + OUTOP(INDEX_op_add_i64, TCGOutOpBinary, outop_add), }; +#undef OUTOP + /* * All TCG threads except the parent (i.e. the one that called tcg_context_init * and registered the target's TCG globals) must register with this function @@ -5414,6 +5433,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } /* emit instruction */ + TCGType type = TCGOP_TYPE(op); switch (op->opc) { case INDEX_op_ext_i32_i64: tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); @@ -5424,12 +5444,29 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_extrl_i64_i32: tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); break; + + case INDEX_op_add_i32: + case INDEX_op_add_i64: + { + const TCGOutOpBinary *out = + container_of(all_outop[op->opc], TCGOutOpBinary, base); + + /* Constants should never appear in the first source operand. */ + tcg_debug_assert(!const_args[1]); + if (const_args[2]) { + out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); + } + } + break; + default: if (def->flags & TCG_OPF_VECTOR) { - tcg_out_vec_op(s, op->opc, TCGOP_TYPE(op) - TCG_TYPE_V64, + tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, TCGOP_VECE(op), new_args, const_args); } else { - tcg_out_op(s, op->opc, TCGOP_TYPE(op), new_args, const_args); + tcg_out_op(s, op->opc, type, new_args, const_args); } break; } diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 9a5d3c2875..e6ec31e351 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -91,8 +91,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rem_i64: case INDEX_op_remu_i32: case INDEX_op_remu_i64: - case INDEX_op_add_i32: - case INDEX_op_add_i64: case INDEX_op_sub_i32: case INDEX_op_sub_i64: case INDEX_op_mul_i32: @@ -643,6 +641,18 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, /* Always indirect, nothing to do */ } +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, glue(INDEX_op_add_i,TCG_TARGET_REG_BITS), a0, a1, a2); +} + +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_add, +}; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -684,7 +694,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, opc, args[0], args[1], args[2]); break; - CASE_32_64(add) CASE_32_64(sub) CASE_32_64(mul) CASE_32_64(and) From 79602f632a20228fc963161cd53f7d5f6a3bd953 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 09:11:39 -0800 Subject: [PATCH 010/161] tcg: Merge INDEX_op_add_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rely on TCGOP_TYPE instead of opcodes specific to each type. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 4 ++-- target/sh4/translate.c | 6 +++--- tcg/optimize.c | 13 +++++-------- tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 15 +++++---------- tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 6 ++---- 8 files changed, 22 insertions(+), 33 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index e6ccc78fa1..67387bfddf 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -261,7 +261,7 @@ Arithmetic .. list-table:: - * - add_i32/i64 *t0*, *t1*, *t2* + * - add *t0*, *t1*, *t2* - | *t0* = *t1* + *t2* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 766fd00d99..0282779468 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -39,6 +39,8 @@ DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT) DEF(mov, 1, 1, 0, TCG_OPF_INT | TCG_OPF_NOT_PRESENT) +DEF(add, 1, 2, 0, TCG_OPF_INT) + DEF(setcond_i32, 1, 2, 1, 0) DEF(negsetcond_i32, 1, 2, 1, 0) DEF(movcond_i32, 1, 4, 1, 0) @@ -52,7 +54,6 @@ DEF(st8_i32, 0, 2, 1, 0) DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) /* arith */ -DEF(add_i32, 1, 2, 0, 0) DEF(sub_i32, 1, 2, 0, 0) DEF(mul_i32, 1, 2, 0, 0) DEF(div_i32, 1, 2, 0, 0) @@ -115,7 +116,6 @@ DEF(st16_i64, 0, 2, 1, 0) DEF(st32_i64, 0, 2, 1, 0) DEF(st_i64, 0, 2, 1, 0) /* arith */ -DEF(add_i64, 1, 2, 0, 0) DEF(sub_i64, 1, 2, 0, 0) DEF(mul_i64, 1, 2, 0, 0) DEF(div_i64, 1, 2, 0, 0) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index d796ad52c4..c20204cb52 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -1940,7 +1940,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) NEXT_INSN; switch (ctx->opcode & 0xf00f) { case 0x300c: /* add Rm,Rn */ - op_opc = INDEX_op_add_i32; + op_opc = INDEX_op_add; goto do_reg_op; case 0x2009: /* and Rm,Rn */ op_opc = INDEX_op_and_i32; @@ -1984,7 +1984,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) if (op_dst != B11_8 || mv_src >= 0) { goto fail; } - op_opc = INDEX_op_add_i32; + op_opc = INDEX_op_add; op_arg = tcg_constant_i32(B7_0s); break; @@ -2087,7 +2087,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) ctx->memidx, ld_mop); break; - case INDEX_op_add_i32: + case INDEX_op_add: if (op_dst != st_src) { goto fail; } diff --git a/tcg/optimize.c b/tcg/optimize.c index 8d5bad07aa..a53e4f4675 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -424,7 +424,7 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) uint64_t l64, h64; switch (op) { - CASE_OP_32_64(add): + case INDEX_op_add: return x + y; CASE_OP_32_64(sub): @@ -2261,7 +2261,7 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) break; } if (convert) { - TCGOpcode add_opc, xor_opc, neg_opc; + TCGOpcode xor_opc, neg_opc; if (!inv && !neg) { return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); @@ -2269,12 +2269,10 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) switch (ctx->type) { case TCG_TYPE_I32: - add_opc = INDEX_op_add_i32; neg_opc = INDEX_op_neg_i32; xor_opc = INDEX_op_xor_i32; break; case TCG_TYPE_I64: - add_opc = INDEX_op_add_i64; neg_opc = INDEX_op_neg_i64; xor_opc = INDEX_op_xor_i64; break; @@ -2285,7 +2283,7 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) if (!inv) { op->opc = neg_opc; } else if (neg) { - op->opc = add_opc; + op->opc = INDEX_op_add; op->args[2] = arg_new_constant(ctx, -1); } else { op->opc = xor_opc; @@ -2650,8 +2648,7 @@ static bool fold_sub(OptContext *ctx, TCGOp *op) if (arg_is_const(op->args[2])) { uint64_t val = arg_info(op->args[2])->val; - op->opc = (ctx->type == TCG_TYPE_I32 - ? INDEX_op_add_i32 : INDEX_op_add_i64); + op->opc = INDEX_op_add; op->args[2] = arg_new_constant(ctx, -val); } return finish_folding(ctx, op); @@ -2842,7 +2839,7 @@ void tcg_optimize(TCGContext *s) * Sorted alphabetically by opcode as much as possible. */ switch (opc) { - CASE_OP_32_64(add): + case INDEX_op_add: done = fold_add(&ctx, op); break; case INDEX_op_add_vec: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 108dc61e9a..344d490966 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -362,7 +362,7 @@ void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg) void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_add_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_add, ret, arg1, arg2); } void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -1555,7 +1555,7 @@ void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_add_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_add, ret, arg1, arg2); } else { tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); diff --git a/tcg/tcg.c b/tcg/tcg.c index 18b2981c79..0f0a3f56d8 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1004,8 +1004,7 @@ QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - /* Register allocation descriptions for every TCGOpcode. */ static const TCGOutOp * const all_outop[NB_OPS] = { - OUTOP(INDEX_op_add_i32, TCGOutOpBinary, outop_add), - OUTOP(INDEX_op_add_i64, TCGOutOpBinary, outop_add), + OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), }; #undef OUTOP @@ -2206,6 +2205,7 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i128: return TCG_TARGET_HAS_qemu_ldst_i128; + case INDEX_op_add: case INDEX_op_mov: return has_type; @@ -2220,7 +2220,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: - case INDEX_op_add_i32: case INDEX_op_sub_i32: case INDEX_op_neg_i32: case INDEX_op_mul_i32: @@ -2304,7 +2303,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st16_i64: case INDEX_op_st32_i64: case INDEX_op_st_i64: - case INDEX_op_add_i64: case INDEX_op_sub_i64: case INDEX_op_neg_i64: case INDEX_op_mul_i64: @@ -4015,14 +4013,12 @@ liveness_pass_1(TCGContext *s) break; case INDEX_op_add2_i32: - opc_new = INDEX_op_add_i32; + case INDEX_op_add2_i64: + opc_new = INDEX_op_add; goto do_addsub2; case INDEX_op_sub2_i32: opc_new = INDEX_op_sub_i32; goto do_addsub2; - case INDEX_op_add2_i64: - opc_new = INDEX_op_add_i64; - goto do_addsub2; case INDEX_op_sub2_i64: opc_new = INDEX_op_sub_i64; do_addsub2: @@ -5445,8 +5441,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); break; - case INDEX_op_add_i32: - case INDEX_op_add_i64: + case INDEX_op_add: { const TCGOutOpBinary *out = container_of(all_outop[op->opc], TCGOutOpBinary, base); diff --git a/tcg/tci.c b/tcg/tci.c index 78183ea47d..ceb791a735 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -523,7 +523,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, /* Arithmetic operations (mixed 32/64 bit). */ - CASE_32_64(add) + case INDEX_op_add: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] + regs[r2]; break; @@ -1082,8 +1082,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) op_name, str_r(r0), str_r(r1)); break; - case INDEX_op_add_i32: - case INDEX_op_add_i64: + case INDEX_op_add: case INDEX_op_sub_i32: case INDEX_op_sub_i64: case INDEX_op_mul_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index e6ec31e351..726b645da8 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -453,9 +453,7 @@ static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val, stack_bounds_check(base, offset); if (offset != sextract32(offset, 0, 16)) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset); - tcg_out_op_rrr(s, (TCG_TARGET_REG_BITS == 32 - ? INDEX_op_add_i32 : INDEX_op_add_i64), - TCG_REG_TMP, TCG_REG_TMP, base); + tcg_out_op_rrr(s, INDEX_op_add, TCG_REG_TMP, TCG_REG_TMP, base); base = TCG_REG_TMP; offset = 0; } @@ -644,7 +642,7 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, static void tgen_add(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { - tcg_out_op_rrr(s, glue(INDEX_op_add_i,TCG_TARGET_REG_BITS), a0, a1, a2); + tcg_out_op_rrr(s, INDEX_op_add, a0, a1, a2); } static const TCGOutOpBinary outop_add = { From bf40f915fc24b97e27e852f0eae5f59eaf63de59 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 10:22:29 -0800 Subject: [PATCH 011/161] tcg: Convert and to TCGOutOpBinary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop all backend support for an immediate as the first operand. This should never happen in any case, as we swap commutative operands to place immediates as the second operand. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 31 ++++++++++-------- tcg/arm/tcg-target.c.inc | 41 +++++++++++++++++------- tcg/i386/tcg-target.c.inc | 27 ++++++++++++---- tcg/loongarch64/tcg-target.c.inc | 29 ++++++++++------- tcg/mips/tcg-target.c.inc | 55 +++++++++++++++++++------------- tcg/ppc/tcg-target.c.inc | 40 ++++++++++++----------- tcg/riscv/tcg-target.c.inc | 29 ++++++++++------- tcg/s390x/tcg-target.c.inc | 48 +++++++++++++++------------- tcg/sparc64/tcg-target.c.inc | 23 ++++++++++--- tcg/tcg.c | 4 +++ tcg/tci/tcg-target.c.inc | 14 ++++++-- 11 files changed, 216 insertions(+), 125 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index a181b7e65a..b7d11887e3 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2128,6 +2128,24 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3510, AND, type, a0, a1, a2); +} + +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_logicali(s, I3404_ANDI, type, a0, a1, a2); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, rL), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2209,17 +2227,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); break; - case INDEX_op_and_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_and_i64: - if (c2) { - tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); - } - break; - case INDEX_op_andc_i32: a2 = (int32_t)a2; /* FALLTHRU */ @@ -3009,8 +3016,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_mulsh_i64: return C_O1_I2(r, r, r); - case INDEX_op_and_i32: - case INDEX_op_and_i64: case INDEX_op_or_i32: case INDEX_op_or_i64: case INDEX_op_xor_i32: diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index a1f2184ac4..cb4b2becef 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -874,17 +874,23 @@ static void tcg_out_dat_rI(TCGContext *s, ARMCond cond, ARMInsn opc, * Emit either the reg,imm or reg,reg form of a data-processing insn. * rhs must satisfy the "rIK" constraint. */ +static void tcg_out_dat_IK(TCGContext *s, ARMCond cond, ARMInsn opc, + ARMInsn opinv, TCGReg dst, TCGReg lhs, TCGArg rhs) +{ + int imm12 = encode_imm(rhs); + if (imm12 < 0) { + imm12 = encode_imm_nofail(~rhs); + opc = opinv; + } + tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12); +} + static void tcg_out_dat_rIK(TCGContext *s, ARMCond cond, ARMInsn opc, ARMInsn opinv, TCGReg dst, TCGReg lhs, TCGArg rhs, bool rhs_is_const) { if (rhs_is_const) { - int imm12 = encode_imm(rhs); - if (imm12 < 0) { - imm12 = encode_imm_nofail(~rhs); - opc = opinv; - } - tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12); + tcg_out_dat_IK(s, cond, opc, opinv, dst, lhs, rhs); } else { tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); } @@ -1846,6 +1852,24 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_AND, a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IK(s, COND_AL, ARITH_AND, ARITH_BIC, a0, a1, a2); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, rIK), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1908,10 +1932,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, args[0], args[1], args[2], const_args[2]); } break; - case INDEX_op_and_i32: - tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC, - args[0], args[1], args[2], const_args[2]); - break; case INDEX_op_andc_i32: tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND, args[0], args[1], args[2], const_args[2]); @@ -2169,7 +2189,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i32: return C_O1_I2(r, r, rIN); - case INDEX_op_and_i32: case INDEX_op_andc_i32: case INDEX_op_clz_i32: case INDEX_op_ctz_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 1115d1e38d..4f4c5ebbb1 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2595,6 +2595,26 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_AND + rexw, a0, a2); +} + +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_AND + rexw, a0, a2, false); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, 0, reZ), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2679,9 +2699,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, OP_32_64(sub): c = ARITH_SUB; goto gen_arith; - OP_32_64(and): - c = ARITH_AND; - goto gen_arith; OP_32_64(or): c = ARITH_OR; goto gen_arith; @@ -3625,10 +3642,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_xor_i64: return C_O1_I2(r, 0, re); - case INDEX_op_and_i32: - case INDEX_op_and_i64: - return C_O1_I2(r, 0, reZ); - case INDEX_op_andc_i32: case INDEX_op_andc_i64: return C_O1_I2(r, r, rI); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index fee5e7c577..b9c6e0d017 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1303,6 +1303,24 @@ static const TCGOutOpBinary outop_add = { .out_rri = tcg_out_addi, }; +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_and(s, a0, a1, a2); +} + +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_andi(s, a0, a1, a2); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, rU), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1373,15 +1391,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_and_i32: - case INDEX_op_and_i64: - if (c2) { - tcg_out_opc_andi(s, a0, a1, a2); - } else { - tcg_out_opc_and(s, a0, a1, a2); - } - break; - case INDEX_op_or_i32: case INDEX_op_or_i64: if (c2) { @@ -2290,8 +2299,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rotr_i64: return C_O1_I2(r, r, ri); - case INDEX_op_and_i32: - case INDEX_op_and_i64: case INDEX_op_nor_i32: case INDEX_op_nor_i64: case INDEX_op_or_i32: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 263e7e66c9..460f73d06a 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1676,6 +1676,38 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_AND, a0, a1, a2); +} + +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int msb; + + if (a2 == (uint16_t)a2) { + tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2); + return; + } + + tcg_debug_assert(use_mips32r2_instructions); + tcg_debug_assert(is_p2m1(a2)); + msb = ctz64(~a2) - 1; + if (type == TCG_TYPE_I32) { + tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0); + } else { + tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, a0, a1, msb, 0); + } +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, rIK), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1776,26 +1808,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, break; } goto do_binaryv; - case INDEX_op_and_i32: - if (c2 && a2 != (uint16_t)a2) { - int msb = ctz32(~a2) - 1; - tcg_debug_assert(use_mips32r2_instructions); - tcg_debug_assert(is_p2m1(a2)); - tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0); - break; - } - i1 = OPC_AND, i2 = OPC_ANDI; - goto do_binary; - case INDEX_op_and_i64: - if (c2 && a2 != (uint16_t)a2) { - int msb = ctz64(~a2) - 1; - tcg_debug_assert(use_mips32r2_instructions); - tcg_debug_assert(is_p2m1(a2)); - tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, a0, a1, msb, 0); - break; - } - i1 = OPC_AND, i2 = OPC_ANDI; - goto do_binary; case INDEX_op_nor_i32: case INDEX_op_nor_i64: i1 = OPC_NOR; @@ -2202,9 +2214,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_muls2_i64: case INDEX_op_mulu2_i64: return C_O2_I2(r, r, r, r); - case INDEX_op_and_i32: - case INDEX_op_and_i64: - return C_O1_I2(r, r, rIK); case INDEX_op_or_i32: case INDEX_op_xor_i32: case INDEX_op_or_i64: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 6b27238499..3d34edfa79 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2921,6 +2921,28 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, AND | SAB(a1, a0, a2)); +} + +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_andi32(s, a0, a1, a2); + } else { + tcg_out_andi64(s, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -3006,22 +3028,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_and_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi32(s, a0, a1, a2); - } else { - tcg_out32(s, AND | SAB(a1, a0, a2)); - } - break; - case INDEX_op_and_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi64(s, a0, a1, a2); - } else { - tcg_out32(s, AND | SAB(a1, a0, a2)); - } - break; case INDEX_op_or_i64: case INDEX_op_or_i32: a0 = args[0], a1 = args[1], a2 = args[2]; @@ -4129,7 +4135,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_and_i32: case INDEX_op_or_i32: case INDEX_op_xor_i32: case INDEX_op_andc_i32: @@ -4140,7 +4145,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sar_i32: case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: - case INDEX_op_and_i64: case INDEX_op_andc_i64: case INDEX_op_shl_i64: case INDEX_op_shr_i64: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 135137ff53..7f585bc4f9 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1978,6 +1978,24 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_AND, a0, a1, a2); +} + +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2056,15 +2074,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_and_i32: - case INDEX_op_and_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_AND, a0, a1, a2); - } - break; - case INDEX_op_or_i32: case INDEX_op_or_i64: if (c2) { @@ -2664,10 +2673,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_and_i32: case INDEX_op_or_i32: case INDEX_op_xor_i32: - case INDEX_op_and_i64: case INDEX_op_or_i64: case INDEX_op_xor_i64: case INDEX_op_setcond_i32: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index f5441d2033..d60bdaba25 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2196,6 +2196,31 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, NGRK, a0, a1, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RR, NR, a0, a2); + } else { + tcg_out_insn(s, RRFa, NRK, a0, a1, a2); + } +} + +static void tgen_andi_3(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_mov(s, type, a0, a1); + tgen_andi(s, type, a0, a2); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, rNKR), + .out_rrr = tgen_and, + .out_rri = tgen_andi_3, +}; + # define OP_32_64(x) \ case glue(glue(INDEX_op_,x),_i32): \ @@ -2264,17 +2289,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_and_i32: - a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tgen_andi(s, TCG_TYPE_I32, a0, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RR, NR, a0, a2); - } else { - tcg_out_insn(s, RRFa, NRK, a0, a1, a2); - } - break; case INDEX_op_or_i32: a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; if (const_args[2]) { @@ -2535,15 +2549,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_and_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - tgen_andi(s, TCG_TYPE_I64, args[0], args[2]); - } else { - tcg_out_insn(s, RRFa, NGRK, a0, a1, a2); - } - break; case INDEX_op_or_i64: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { @@ -3274,12 +3279,9 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i32: case INDEX_op_sub_i64: - case INDEX_op_and_i32: case INDEX_op_or_i32: case INDEX_op_xor_i32: return C_O1_I2(r, r, ri); - case INDEX_op_and_i64: - return C_O1_I2(r, r, rNKR); case INDEX_op_or_i64: case INDEX_op_xor_i64: return C_O1_I2(r, r, rK); diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index f43d95b025..b3fbe127c0 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1304,6 +1304,24 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_AND); +} + +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_arithi(s, a0, a1, a2, ARITH_AND); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1361,9 +1379,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, OP_32_64(sub): c = ARITH_SUB; goto gen_arith; - OP_32_64(and): - c = ARITH_AND; - goto gen_arith; OP_32_64(andc): c = ARITH_ANDN; goto gen_arith; @@ -1589,8 +1604,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_divu_i64: case INDEX_op_sub_i32: case INDEX_op_sub_i64: - case INDEX_op_and_i32: - case INDEX_op_and_i64: case INDEX_op_andc_i32: case INDEX_op_andc_i64: case INDEX_op_or_i32: diff --git a/tcg/tcg.c b/tcg/tcg.c index 0f0a3f56d8..94574c90c5 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1005,6 +1005,8 @@ QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - /* Register allocation descriptions for every TCGOpcode. */ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), + OUTOP(INDEX_op_and_i32, TCGOutOpBinary, outop_and), + OUTOP(INDEX_op_and_i64, TCGOutOpBinary, outop_and), }; #undef OUTOP @@ -5442,6 +5444,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; case INDEX_op_add: + case INDEX_op_and_i32: + case INDEX_op_and_i64: { const TCGOutOpBinary *out = container_of(all_outop[op->opc], TCGOutOpBinary, base); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 726b645da8..fd38ecad39 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -95,8 +95,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: - case INDEX_op_and_i32: - case INDEX_op_and_i64: case INDEX_op_andc_i32: case INDEX_op_andc_i64: case INDEX_op_eqv_i32: @@ -650,6 +648,17 @@ static const TCGOutOpBinary outop_add = { .out_rrr = tgen_add, }; +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, glue(INDEX_op_and_i,TCG_TARGET_REG_BITS), a0, a1, a2); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_and, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -694,7 +703,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, CASE_32_64(sub) CASE_32_64(mul) - CASE_32_64(and) CASE_32_64(or) CASE_32_64(xor) CASE_32_64(andc) /* Optional (TCG_TARGET_HAS_andc_*). */ From c3b920b3d6a685484904d3060f3eb69401051bf0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 10:32:44 -0800 Subject: [PATCH 012/161] tcg: Merge INDEX_op_and_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- target/sh4/translate.c | 4 ++-- tcg/optimize.c | 40 ++++++++++++---------------------------- tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 9 +++------ tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 2 +- 8 files changed, 24 insertions(+), 45 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 67387bfddf..6a8025c0bf 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -303,7 +303,7 @@ Logical .. list-table:: - * - and_i32/i64 *t0*, *t1*, *t2* + * - and *t0*, *t1*, *t2* - | *t0* = *t1* & *t2* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 0282779468..f45029caa7 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -40,6 +40,7 @@ DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT) DEF(mov, 1, 1, 0, TCG_OPF_INT | TCG_OPF_NOT_PRESENT) DEF(add, 1, 2, 0, TCG_OPF_INT) +DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(setcond_i32, 1, 2, 1, 0) DEF(negsetcond_i32, 1, 2, 1, 0) @@ -62,7 +63,6 @@ DEF(rem_i32, 1, 2, 0, 0) DEF(remu_i32, 1, 2, 0, 0) DEF(div2_i32, 2, 3, 0, 0) DEF(divu2_i32, 2, 3, 0, 0) -DEF(and_i32, 1, 2, 0, 0) DEF(or_i32, 1, 2, 0, 0) DEF(xor_i32, 1, 2, 0, 0) /* shifts/rotates */ @@ -124,7 +124,6 @@ DEF(rem_i64, 1, 2, 0, 0) DEF(remu_i64, 1, 2, 0, 0) DEF(div2_i64, 2, 3, 0, 0) DEF(divu2_i64, 2, 3, 0, 0) -DEF(and_i64, 1, 2, 0, 0) DEF(or_i64, 1, 2, 0, 0) DEF(xor_i64, 1, 2, 0, 0) /* shifts/rotates */ diff --git a/target/sh4/translate.c b/target/sh4/translate.c index c20204cb52..3d0eda2128 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -1943,7 +1943,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) op_opc = INDEX_op_add; goto do_reg_op; case 0x2009: /* and Rm,Rn */ - op_opc = INDEX_op_and_i32; + op_opc = INDEX_op_and; goto do_reg_op; case 0x200a: /* xor Rm,Rn */ op_opc = INDEX_op_xor_i32; @@ -2105,7 +2105,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) } break; - case INDEX_op_and_i32: + case INDEX_op_and: if (op_dst != st_src) { goto fail; } diff --git a/tcg/optimize.c b/tcg/optimize.c index a53e4f4675..20cde598fb 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -433,7 +433,8 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) CASE_OP_32_64(mul): return x * y; - CASE_OP_32_64_VEC(and): + case INDEX_op_and: + case INDEX_op_and_vec: return x & y; CASE_OP_32_64_VEC(or): @@ -802,9 +803,7 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, /* Expand to AND with a temporary if no backend support. */ if (!TCG_TARGET_HAS_tst) { - TCGOpcode and_opc = (ctx->type == TCG_TYPE_I32 - ? INDEX_op_and_i32 : INDEX_op_and_i64); - TCGOp *op2 = opt_insert_before(ctx, op, and_opc, 3); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); TCGArg tmp = arg_new_temp(ctx); op2->args[0] = tmp; @@ -897,8 +896,8 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) /* Expand to AND with a temporary if no backend support. */ if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) { - TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and_i32, 3); - TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and_i32, 3); + TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); TCGArg t1 = arg_new_temp(ctx); TCGArg t2 = arg_new_temp(ctx); @@ -1709,8 +1708,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) TempOptInfo *t2 = arg_info(op->args[2]); int ofs = op->args[3]; int len = op->args[4]; - int width; - TCGOpcode and_opc; + int width = 8 * tcg_type_size(ctx->type); uint64_t z_mask, s_mask; if (ti_is_const(t1) && ti_is_const(t2)) { @@ -1719,24 +1717,11 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) ti_const_val(t2))); } - switch (ctx->type) { - case TCG_TYPE_I32: - and_opc = INDEX_op_and_i32; - width = 32; - break; - case TCG_TYPE_I64: - and_opc = INDEX_op_and_i64; - width = 64; - break; - default: - g_assert_not_reached(); - } - /* Inserting a value into zero at offset 0. */ if (ti_is_const_val(t1, 0) && ofs == 0) { uint64_t mask = MAKE_64BIT_MASK(0, len); - op->opc = and_opc; + op->opc = INDEX_op_and; op->args[1] = op->args[2]; op->args[2] = arg_new_constant(ctx, mask); return fold_and(ctx, op); @@ -1746,7 +1731,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) if (ti_is_const_val(t2, 0)) { uint64_t mask = deposit64(-1, ofs, len, 0); - op->opc = and_opc; + op->opc = INDEX_op_and; op->args[2] = arg_new_constant(ctx, mask); return fold_and(ctx, op); } @@ -2297,7 +2282,7 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { - TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc; + TCGOpcode sub_opc, xor_opc, neg_opc, shr_opc; TCGOpcode uext_opc = 0, sext_opc = 0; TCGCond cond = op->args[3]; TCGArg ret, src1, src2; @@ -2319,7 +2304,6 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) switch (ctx->type) { case TCG_TYPE_I32: - and_opc = INDEX_op_and_i32; sub_opc = INDEX_op_sub_i32; xor_opc = INDEX_op_xor_i32; shr_opc = INDEX_op_shr_i32; @@ -2332,7 +2316,6 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) } break; case TCG_TYPE_I64: - and_opc = INDEX_op_and_i64; sub_opc = INDEX_op_sub_i64; xor_opc = INDEX_op_xor_i64; shr_opc = INDEX_op_shr_i64; @@ -2371,7 +2354,7 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) op2->args[2] = arg_new_constant(ctx, sh); src1 = ret; } - op->opc = and_opc; + op->opc = INDEX_op_and; op->args[1] = src1; op->args[2] = arg_new_constant(ctx, 1); } @@ -2848,7 +2831,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(add2): done = fold_add2(&ctx, op); break; - CASE_OP_32_64_VEC(and): + case INDEX_op_and: + case INDEX_op_and_vec: done = fold_and(&ctx, op); break; CASE_OP_32_64_VEC(andc): diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 344d490966..82f3ad501f 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -401,7 +401,7 @@ void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg) void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_and_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_and, ret, arg1, arg2); } void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -1575,7 +1575,7 @@ void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_and_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_and, ret, arg1, arg2); } else { tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); diff --git a/tcg/tcg.c b/tcg/tcg.c index 94574c90c5..d60427eb7f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1005,8 +1005,7 @@ QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - /* Register allocation descriptions for every TCGOpcode. */ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), - OUTOP(INDEX_op_and_i32, TCGOutOpBinary, outop_and), - OUTOP(INDEX_op_and_i64, TCGOutOpBinary, outop_and), + OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), }; #undef OUTOP @@ -2208,6 +2207,7 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_qemu_ldst_i128; case INDEX_op_add: + case INDEX_op_and: case INDEX_op_mov: return has_type; @@ -2225,7 +2225,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i32: case INDEX_op_neg_i32: case INDEX_op_mul_i32: - case INDEX_op_and_i32: case INDEX_op_or_i32: case INDEX_op_xor_i32: case INDEX_op_shl_i32: @@ -2308,7 +2307,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i64: case INDEX_op_neg_i64: case INDEX_op_mul_i64: - case INDEX_op_and_i64: case INDEX_op_or_i64: case INDEX_op_xor_i64: case INDEX_op_shl_i64: @@ -5444,8 +5442,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; case INDEX_op_add: - case INDEX_op_and_i32: - case INDEX_op_and_i64: + case INDEX_op_and: { const TCGOutOpBinary *out = container_of(all_outop[op->opc], TCGOutOpBinary, base); diff --git a/tcg/tci.c b/tcg/tci.c index ceb791a735..8762a99fb6 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -535,7 +535,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] * regs[r2]; break; - CASE_32_64(and) + case INDEX_op_and: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] & regs[r2]; break; @@ -1083,12 +1083,11 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) break; case INDEX_op_add: + case INDEX_op_and: case INDEX_op_sub_i32: case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: - case INDEX_op_and_i32: - case INDEX_op_and_i64: case INDEX_op_or_i32: case INDEX_op_or_i64: case INDEX_op_xor_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index fd38ecad39..b0141f8ed6 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -651,7 +651,7 @@ static const TCGOutOpBinary outop_add = { static void tgen_and(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { - tcg_out_op_rrr(s, glue(INDEX_op_and_i,TCG_TARGET_REG_BITS), a0, a1, a2); + tcg_out_op_rrr(s, INDEX_op_and, a0, a1, a2); } static const TCGOutOpBinary outop_and = { From 899281c8f589cce55951e13307661a7253eb4909 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 15 Nov 2023 11:18:55 -0800 Subject: [PATCH 013/161] tcg/optimize: Fold andc with immediate to and MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/optimize.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tcg/optimize.c b/tcg/optimize.c index 20cde598fb..1f6fdee734 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1343,6 +1343,25 @@ static bool fold_andc(OptContext *ctx, TCGOp *op) t2 = arg_info(op->args[2]); z_mask = t1->z_mask; + if (ti_is_const(t2)) { + /* Fold andc r,x,i to and r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_and; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_and_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_and(ctx, op); + } + /* * Known-zeros does not imply known-ones. Therefore unless * arg2 is constant, we can't infer anything from it. From 93a9ddb84ae534a04e8d7764caa9e29b8285b2b1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 22:06:08 -0800 Subject: [PATCH 014/161] tcg/optimize: Emit add r,r,-1 in fold_setcond_tst_pow2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We canonicalize subtract with constant to add with constant. Fix this missed instance. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/optimize.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 1f6fdee734..6c7b6af624 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2301,7 +2301,7 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { - TCGOpcode sub_opc, xor_opc, neg_opc, shr_opc; + TCGOpcode xor_opc, neg_opc, shr_opc; TCGOpcode uext_opc = 0, sext_opc = 0; TCGCond cond = op->args[3]; TCGArg ret, src1, src2; @@ -2323,7 +2323,6 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) switch (ctx->type) { case TCG_TYPE_I32: - sub_opc = INDEX_op_sub_i32; xor_opc = INDEX_op_xor_i32; shr_opc = INDEX_op_shr_i32; neg_opc = INDEX_op_neg_i32; @@ -2335,7 +2334,6 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) } break; case TCG_TYPE_I64: - sub_opc = INDEX_op_sub_i64; xor_opc = INDEX_op_xor_i64; shr_opc = INDEX_op_shr_i64; neg_opc = INDEX_op_neg_i64; @@ -2379,10 +2377,10 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) } if (neg && inv) { - op2 = opt_insert_after(ctx, op, sub_opc, 3); + op2 = opt_insert_after(ctx, op, INDEX_op_add, 3); op2->args[0] = ret; op2->args[1] = ret; - op2->args[2] = arg_new_constant(ctx, 1); + op2->args[2] = arg_new_constant(ctx, -1); } else if (inv) { op2 = opt_insert_after(ctx, op, xor_opc, 3); op2->args[0] = ret; From a341c84e8153b7282b083e871ca534f15fa70898 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 12:26:28 -0800 Subject: [PATCH 015/161] tcg: Convert andc to TCGOutOpBinary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the same time, drop all backend support for immediate operands, as we now transform andc to and during optimize. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 -- tcg/aarch64/tcg-target.c.inc | 24 ++++++++-------- tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 16 +++++++---- tcg/i386/tcg-target-con-set.h | 2 +- tcg/i386/tcg-target-has.h | 2 -- tcg/i386/tcg-target.c.inc | 31 +++++++++++--------- tcg/loongarch64/tcg-target-con-set.h | 1 + tcg/loongarch64/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target.c.inc | 23 ++++++++------- tcg/mips/tcg-target-has.h | 2 -- tcg/mips/tcg-target.c.inc | 4 +++ tcg/ppc/tcg-target-has.h | 2 -- tcg/ppc/tcg-target.c.inc | 29 ++++++++----------- tcg/riscv/tcg-target-con-set.h | 1 + tcg/riscv/tcg-target-has.h | 2 -- tcg/riscv/tcg-target.c.inc | 27 +++++++++++------- tcg/s390x/tcg-target-con-set.h | 1 - tcg/s390x/tcg-target-has.h | 2 -- tcg/s390x/tcg-target.c.inc | 42 ++++++++++++++-------------- tcg/sparc64/tcg-target-has.h | 2 -- tcg/sparc64/tcg-target.c.inc | 16 +++++++---- tcg/tcg-has.h | 1 - tcg/tcg-op.c | 4 +-- tcg/tcg.c | 8 +++--- tcg/tci.c | 2 -- tcg/tci/tcg-target-has.h | 2 -- tcg/tci/tcg-target.c.inc | 14 ++++++++-- 28 files changed, 135 insertions(+), 130 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index bfd587c0fc..851f6b01b4 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -19,7 +19,6 @@ #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_andc_i32 1 #define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_eqv_i32 1 #define TCG_TARGET_HAS_nand_i32 0 @@ -45,7 +44,6 @@ #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_andc_i64 1 #define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_eqv_i64 1 #define TCG_TARGET_HAS_nand_i64 0 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index b7d11887e3..c7167cad15 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2146,6 +2146,17 @@ static const TCGOutOpBinary outop_and = { .out_rri = tgen_andi, }; +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3510, BIC, type, a0, a1, a2); +} + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_andc, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2227,17 +2238,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); break; - case INDEX_op_andc_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_andc_i64: - if (c2) { - tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); - } else { - tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); - } - break; - case INDEX_op_or_i32: a2 = (int32_t)a2; /* FALLTHRU */ @@ -3020,8 +3020,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_or_i64: case INDEX_op_xor_i32: case INDEX_op_xor_i64: - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: case INDEX_op_orc_i32: case INDEX_op_orc_i64: case INDEX_op_eqv_i32: diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index 8398c80c8e..0268858a3b 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -28,7 +28,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_andc_i32 1 #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index cb4b2becef..feea82145a 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1870,6 +1870,17 @@ static const TCGOutOpBinary outop_and = { .out_rri = tgen_andi, }; +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_BIC, a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_andc, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1932,10 +1943,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, args[0], args[1], args[2], const_args[2]); } break; - case INDEX_op_andc_i32: - tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND, - args[0], args[1], args[2], const_args[2]); - break; case INDEX_op_or_i32: c = ARITH_ORR; goto gen_arith; @@ -2189,7 +2196,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i32: return C_O1_I2(r, r, rIN); - case INDEX_op_andc_i32: case INDEX_op_clz_i32: case INDEX_op_ctz_i32: return C_O1_I2(r, r, rIK); diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h index 06e6521001..0ae9775944 100644 --- a/tcg/i386/tcg-target-con-set.h +++ b/tcg/i386/tcg-target-con-set.h @@ -42,9 +42,9 @@ C_O1_I2(r, 0, reZ) C_O1_I2(r, 0, ri) C_O1_I2(r, 0, rI) C_O1_I2(r, L, L) +C_O1_I2(r, r, r) C_O1_I2(r, r, re) C_O1_I2(r, r, ri) -C_O1_I2(r, r, rI) C_O1_I2(x, x, x) C_N1_I2(r, r, r) C_N1_I2(r, r, rW) diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index bbf55c86b6..b29b70357a 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -31,7 +31,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_andc_i32 have_bmi1 #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 @@ -57,7 +56,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_andc_i64 have_bmi1 #define TCG_TARGET_HAS_orc_i64 0 #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 4f4c5ebbb1..33c1fcc717 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2615,6 +2615,24 @@ static const TCGOutOpBinary outop_and = { .out_rri = tgen_andi, }; +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1); +} + +static TCGConstraintSetIndex cset_andc(TCGType type, unsigned flags) +{ + return have_bmi1 ? C_O1_I2(r, r, r) : C_NotImplemented; +} + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_andc, + .out_rrr = tgen_andc, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2713,15 +2731,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(andc): - if (const_a2) { - tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1); - tgen_arithi(s, ARITH_AND + rexw, a0, ~a2, 0); - } else { - tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1); - } - break; - OP_32_64(mul): if (const_a2) { int32_t val; @@ -3642,10 +3651,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_xor_i64: return C_O1_I2(r, 0, re); - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: - return C_O1_I2(r, r, rI); - case INDEX_op_shl_i32: case INDEX_op_shl_i64: case INDEX_op_shr_i32: diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h index 8afaee9476..b7c9b89e9e 100644 --- a/tcg/loongarch64/tcg-target-con-set.h +++ b/tcg/loongarch64/tcg-target-con-set.h @@ -22,6 +22,7 @@ C_O0_I3(r, r, r) C_O1_I1(r, r) C_O1_I1(w, r) C_O1_I1(w, w) +C_O1_I2(r, r, r) C_O1_I2(r, r, rC) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 166c9d7e41..71d91fec19 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -25,7 +25,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_andc_i32 1 #define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 @@ -47,7 +46,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_andc_i64 1 #define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index b9c6e0d017..b7d2984a8a 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1321,6 +1321,17 @@ static const TCGOutOpBinary outop_and = { .out_rri = tgen_andi, }; +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_andn(s, a0, a1, a2); +} + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_andc, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1371,16 +1382,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: - if (c2) { - /* guaranteed to fit due to constraint */ - tcg_out_opc_andi(s, a0, a1, ~a2); - } else { - tcg_out_opc_andn(s, a0, a1, a2); - } - break; - case INDEX_op_orc_i32: case INDEX_op_orc_i64: if (c2) { @@ -2276,8 +2277,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: case INDEX_op_orc_i32: case INDEX_op_orc_i64: /* diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index fd96905484..6a6d4377e7 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -43,7 +43,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_nor_i32 1 -#define TCG_TARGET_HAS_andc_i32 0 #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 @@ -63,7 +62,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_nor_i64 1 -#define TCG_TARGET_HAS_andc_i64 0 #define TCG_TARGET_HAS_orc_i64 0 #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 460f73d06a..ab57c78095 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1708,6 +1708,10 @@ static const TCGOutOpBinary outop_and = { .out_rri = tgen_andi, }; +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_NotImplemented, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 9acfc574c5..63bb66f446 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -23,7 +23,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_andc_i32 1 #define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_eqv_i32 1 #define TCG_TARGET_HAS_nand_i32 1 @@ -50,7 +49,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_andc_i64 1 #define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_eqv_i64 1 #define TCG_TARGET_HAS_nand_i64 1 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 3d34edfa79..7b1a82c9fa 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2943,6 +2943,17 @@ static const TCGOutOpBinary outop_and = { .out_rri = tgen_andi, }; +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, ANDC | SAB(a1, a0, a2)); +} + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_andc, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -3046,22 +3057,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, XOR | SAB(a1, a0, a2)); } break; - case INDEX_op_andc_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi32(s, a0, a1, ~a2); - } else { - tcg_out32(s, ANDC | SAB(a1, a0, a2)); - } - break; - case INDEX_op_andc_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi64(s, a0, a1, ~a2); - } else { - tcg_out32(s, ANDC | SAB(a1, a0, a2)); - } - break; case INDEX_op_orc_i32: if (const_args[2]) { tcg_out_ori32(s, args[0], args[1], ~args[2]); @@ -4137,7 +4132,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_or_i32: case INDEX_op_xor_i32: - case INDEX_op_andc_i32: case INDEX_op_orc_i32: case INDEX_op_eqv_i32: case INDEX_op_shl_i32: @@ -4145,7 +4139,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sar_i32: case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: - case INDEX_op_andc_i64: case INDEX_op_shl_i64: case INDEX_op_shr_i64: case INDEX_op_sar_i64: diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h index e92e815491..f1f5d415f7 100644 --- a/tcg/riscv/tcg-target-con-set.h +++ b/tcg/riscv/tcg-target-con-set.h @@ -13,6 +13,7 @@ C_O0_I1(r) C_O0_I2(rz, r) C_O0_I2(rz, rz) C_O1_I1(r, r) +C_O1_I2(r, r, r) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) C_O1_I2(r, r, rJ) diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index fc62049c78..a3918bf7f5 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -25,7 +25,6 @@ #define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_andc_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_orc_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_eqv_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_nand_i32 0 @@ -46,7 +45,6 @@ #define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_andc_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_orc_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_eqv_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_nand_i64 0 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 7f585bc4f9..f637604e98 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1996,6 +1996,23 @@ static const TCGOutOpBinary outop_and = { .out_rri = tgen_andi, }; +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_ANDN, a0, a1, a2); +} + +static TCGConstraintSetIndex cset_zbb_rrr(TCGType type, unsigned flags) +{ + return cpuinfo & CPUINFO_ZBB ? C_O1_I2(r, r, r) : C_NotImplemented; +} + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_zbb_rrr, + .out_rrr = tgen_andc, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2092,14 +2109,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_ANDI, a0, a1, ~a2); - } else { - tcg_out_opc_reg(s, OPC_ANDN, a0, a1, a2); - } - break; case INDEX_op_orc_i32: case INDEX_op_orc_i64: if (c2) { @@ -2683,8 +2692,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: case INDEX_op_orc_i32: case INDEX_op_orc_i64: case INDEX_op_eqv_i32: diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h index 370e4b1295..39903a60ad 100644 --- a/tcg/s390x/tcg-target-con-set.h +++ b/tcg/s390x/tcg-target-con-set.h @@ -31,7 +31,6 @@ C_O1_I2(r, r, rC) C_O1_I2(r, r, rI) C_O1_I2(r, r, rJ) C_O1_I2(r, r, rK) -C_O1_I2(r, r, rKR) C_O1_I2(r, r, rNK) C_O1_I2(r, r, rNKR) C_O1_I2(r, rZ, r) diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index aea805455f..15ec0dc2ff 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -34,7 +34,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_andc_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_orc_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_eqv_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_nand_i32 HAVE_FACILITY(MISC_INSN_EXT3) @@ -59,7 +58,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_andc_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_orc_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_eqv_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_nand_i64 HAVE_FACILITY(MISC_INSN_EXT3) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index d60bdaba25..e4b60d1924 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2221,6 +2221,27 @@ static const TCGOutOpBinary outop_and = { .out_rri = tgen_andi_3, }; +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, NCRK, a0, a1, a2); + } else { + tcg_out_insn(s, RRFa, NCGRK, a0, a1, a2); + } +} + +static TCGConstraintSetIndex cset_misc3_rrr(TCGType type, unsigned flags) +{ + return HAVE_FACILITY(MISC_INSN_EXT3) ? C_O1_I2(r, r, r) : C_NotImplemented; +} + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_misc3_rrr, + .out_rrr = tgen_andc, +}; + # define OP_32_64(x) \ case glue(glue(INDEX_op_,x),_i32): \ @@ -2312,15 +2333,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_andc_i32: - a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tgen_andi(s, TCG_TYPE_I32, a0, (uint32_t)~a2); - } else { - tcg_out_insn(s, RRFa, NCRK, a0, a1, a2); - } - break; case INDEX_op_orc_i32: a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; if (const_args[2]) { @@ -2568,15 +2580,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_andc_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - tgen_andi(s, TCG_TYPE_I64, a0, ~a2); - } else { - tcg_out_insn(s, RRFa, NCGRK, a0, a1, a2); - } - break; case INDEX_op_orc_i64: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { @@ -3286,12 +3289,9 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_xor_i64: return C_O1_I2(r, r, rK); - case INDEX_op_andc_i32: case INDEX_op_orc_i32: case INDEX_op_eqv_i32: return C_O1_I2(r, r, ri); - case INDEX_op_andc_i64: - return C_O1_I2(r, r, rKR); case INDEX_op_orc_i64: case INDEX_op_eqv_i64: return C_O1_I2(r, r, rNK); diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index ad6f35da17..510b9e64a4 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -20,7 +20,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_bswap32_i32 0 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_andc_i32 1 #define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 @@ -46,7 +45,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_andc_i64 1 #define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index b3fbe127c0..fe9175aa1a 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1322,6 +1322,17 @@ static const TCGOutOpBinary outop_and = { .out_rri = tgen_andi, }; +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_ANDN); +} + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_andc, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1379,9 +1390,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, OP_32_64(sub): c = ARITH_SUB; goto gen_arith; - OP_32_64(andc): - c = ARITH_ANDN; - goto gen_arith; OP_32_64(or): c = ARITH_OR; goto gen_arith; @@ -1604,8 +1612,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_divu_i64: case INDEX_op_sub_i32: case INDEX_op_sub_i64: - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: case INDEX_op_or_i32: case INDEX_op_or_i64: case INDEX_op_orc_i32: diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 4ccdc6bbee..7e4301521e 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -20,7 +20,6 @@ #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_not_i64 0 -#define TCG_TARGET_HAS_andc_i64 0 #define TCG_TARGET_HAS_orc_i64 0 #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 82f3ad501f..68818cbb0c 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -668,7 +668,7 @@ void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_andc_i32) { + if (tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0)) { tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); @@ -2264,7 +2264,7 @@ void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (TCG_TARGET_HAS_andc_i64) { + } else if (tcg_op_supported(INDEX_op_andc_i64, TCG_TYPE_I64, 0)) { tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index d60427eb7f..3d6dc9d1ca 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1006,6 +1006,8 @@ QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), + OUTOP(INDEX_op_andc_i32, TCGOutOpBinary, outop_andc), + OUTOP(INDEX_op_andc_i64, TCGOutOpBinary, outop_andc), }; #undef OUTOP @@ -2269,8 +2271,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap32_i32; case INDEX_op_not_i32: return TCG_TARGET_HAS_not_i32; - case INDEX_op_andc_i32: - return TCG_TARGET_HAS_andc_i32; case INDEX_op_orc_i32: return TCG_TARGET_HAS_orc_i32; case INDEX_op_eqv_i32: @@ -2346,8 +2346,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap64_i64; case INDEX_op_not_i64: return TCG_TARGET_HAS_not_i64; - case INDEX_op_andc_i64: - return TCG_TARGET_HAS_andc_i64; case INDEX_op_orc_i64: return TCG_TARGET_HAS_orc_i64; case INDEX_op_eqv_i64: @@ -5443,6 +5441,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_add: case INDEX_op_and: + case INDEX_op_andc_i32: + case INDEX_op_andc_i64: { const TCGOutOpBinary *out = container_of(all_outop[op->opc], TCGOutOpBinary, base); diff --git a/tcg/tci.c b/tcg/tci.c index 8762a99fb6..95a61e9df1 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -547,12 +547,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ^ regs[r2]; break; -#if TCG_TARGET_HAS_andc_i32 || TCG_TARGET_HAS_andc_i64 CASE_32_64(andc) tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] & ~regs[r2]; break; -#endif #if TCG_TARGET_HAS_orc_i32 || TCG_TARGET_HAS_orc_i64 CASE_32_64(orc) tci_args_rrr(insn, &r0, &r1, &r2); diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index cb0964c3d4..e09d366517 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -11,7 +11,6 @@ #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 -#define TCG_TARGET_HAS_andc_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_eqv_i32 1 #define TCG_TARGET_HAS_nand_i32 1 @@ -36,7 +35,6 @@ #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 -#define TCG_TARGET_HAS_andc_i64 1 #define TCG_TARGET_HAS_eqv_i64 1 #define TCG_TARGET_HAS_nand_i64 1 #define TCG_TARGET_HAS_nor_i64 1 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index b0141f8ed6..fb7c648b63 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -95,8 +95,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: case INDEX_op_eqv_i32: case INDEX_op_eqv_i64: case INDEX_op_nand_i32: @@ -659,6 +657,17 @@ static const TCGOutOpBinary outop_and = { .out_rrr = tgen_and, }; +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, glue(INDEX_op_andc_i,TCG_TARGET_REG_BITS), a0, a1, a2); +} + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_andc, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -705,7 +714,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, CASE_32_64(mul) CASE_32_64(or) CASE_32_64(xor) - CASE_32_64(andc) /* Optional (TCG_TARGET_HAS_andc_*). */ CASE_32_64(orc) /* Optional (TCG_TARGET_HAS_orc_*). */ CASE_32_64(eqv) /* Optional (TCG_TARGET_HAS_eqv_*). */ CASE_32_64(nand) /* Optional (TCG_TARGET_HAS_nand_*). */ From 46f96bff163512f9f8f9959de4a18c0799001422 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 12:37:02 -0800 Subject: [PATCH 016/161] tcg: Merge INDEX_op_andc_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- target/arm/tcg/translate-a64.c | 2 +- target/tricore/translate.c | 2 +- tcg/optimize.c | 6 ++++-- tcg/tcg-op.c | 8 ++++---- tcg/tcg.c | 6 ++---- tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 2 +- 9 files changed, 17 insertions(+), 19 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 6a8025c0bf..116a0438b1 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -319,7 +319,7 @@ Logical - | *t0* = ~\ *t1* - * - andc_i32/i64 *t0*, *t1*, *t2* + * - andc *t0*, *t1*, *t2* - | *t0* = *t1* & ~\ *t2* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index f45029caa7..9bc511992d 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -41,6 +41,7 @@ DEF(mov, 1, 1, 0, TCG_OPF_INT | TCG_OPF_NOT_PRESENT) DEF(add, 1, 2, 0, TCG_OPF_INT) DEF(and, 1, 2, 0, TCG_OPF_INT) +DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(setcond_i32, 1, 2, 1, 0) DEF(negsetcond_i32, 1, 2, 1, 0) @@ -91,7 +92,6 @@ DEF(bswap16_i32, 1, 1, 1, 0) DEF(bswap32_i32, 1, 1, 1, 0) DEF(not_i32, 1, 1, 0, 0) DEF(neg_i32, 1, 1, 0, 0) -DEF(andc_i32, 1, 2, 0, 0) DEF(orc_i32, 1, 2, 0, 0) DEF(eqv_i32, 1, 2, 0, 0) DEF(nand_i32, 1, 2, 0, 0) @@ -149,7 +149,6 @@ DEF(bswap32_i64, 1, 1, 1, 0) DEF(bswap64_i64, 1, 1, 1, 0) DEF(not_i64, 1, 1, 0, 0) DEF(neg_i64, 1, 1, 0, 0) -DEF(andc_i64, 1, 2, 0, 0) DEF(orc_i64, 1, 2, 0, 0) DEF(eqv_i64, 1, 2, 0, 0) DEF(nand_i64, 1, 2, 0, 0) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 43408c71bb..e076d4aa05 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -8600,7 +8600,7 @@ static bool trans_CCMP(DisasContext *s, arg_CCMP *a) tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); nzcv = a->nzcv; - has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0); + has_andc = tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0); if (nzcv & 8) { /* N */ tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); } else { diff --git a/target/tricore/translate.c b/target/tricore/translate.c index 7cd26d8eab..2d0cde0268 100644 --- a/target/tricore/translate.c +++ b/target/tricore/translate.c @@ -3981,7 +3981,7 @@ static void decode_bit_andacc(DisasContext *ctx) pos1, pos2, &tcg_gen_andc_tl, &tcg_gen_and_tl); break; case OPC2_32_BIT_AND_NOR_T: - if (tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0)) { + if (tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0)) { gen_bit_2op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2], pos1, pos2, &tcg_gen_or_tl, &tcg_gen_andc_tl); } else { diff --git a/tcg/optimize.c b/tcg/optimize.c index 6c7b6af624..875d80c254 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -479,7 +479,8 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) CASE_OP_32_64(neg): return -x; - CASE_OP_32_64_VEC(andc): + case INDEX_op_andc: + case INDEX_op_andc_vec: return x & ~y; CASE_OP_32_64_VEC(orc): @@ -2852,7 +2853,8 @@ void tcg_optimize(TCGContext *s) case INDEX_op_and_vec: done = fold_and(&ctx, op); break; - CASE_OP_32_64_VEC(andc): + case INDEX_op_andc: + case INDEX_op_andc_vec: done = fold_andc(&ctx, op); break; CASE_OP_32_64(brcond): diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 68818cbb0c..d87bd13375 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -668,8 +668,8 @@ void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_andc, ret, arg1, arg2); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_not_i32(t0, arg2); @@ -2264,8 +2264,8 @@ void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (tcg_op_supported(INDEX_op_andc_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_andc, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_andc, ret, arg1, arg2); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_not_i64(t0, arg2); diff --git a/tcg/tcg.c b/tcg/tcg.c index 3d6dc9d1ca..38b2dd1c44 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1006,8 +1006,7 @@ QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), - OUTOP(INDEX_op_andc_i32, TCGOutOpBinary, outop_andc), - OUTOP(INDEX_op_andc_i64, TCGOutOpBinary, outop_andc), + OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), }; #undef OUTOP @@ -5441,8 +5440,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_add: case INDEX_op_and: - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: + case INDEX_op_andc: { const TCGOutOpBinary *out = container_of(all_outop[op->opc], TCGOutOpBinary, base); diff --git a/tcg/tci.c b/tcg/tci.c index 95a61e9df1..e4a0408fec 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -547,7 +547,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ^ regs[r2]; break; - CASE_32_64(andc) + case INDEX_op_andc: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] & ~regs[r2]; break; @@ -1082,6 +1082,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_add: case INDEX_op_and: + case INDEX_op_andc: case INDEX_op_sub_i32: case INDEX_op_sub_i64: case INDEX_op_mul_i32: @@ -1090,8 +1091,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_or_i64: case INDEX_op_xor_i32: case INDEX_op_xor_i64: - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: case INDEX_op_orc_i32: case INDEX_op_orc_i64: case INDEX_op_eqv_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index fb7c648b63..92c588305a 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -660,7 +660,7 @@ static const TCGOutOpBinary outop_and = { static void tgen_andc(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { - tcg_out_op_rrr(s, glue(INDEX_op_andc_i,TCG_TARGET_REG_BITS), a0, a1, a2); + tcg_out_op_rrr(s, INDEX_op_andc, a0, a1, a2); } static const TCGOutOpBinary outop_andc = { From 6497af5b0d76819475839de9ea1ef9faad6215c0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 13:54:22 -0800 Subject: [PATCH 017/161] tcg: Convert or to TCGOutOpBinary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 31 ++++++++++++--------- tcg/arm/tcg-target.c.inc | 24 ++++++++++++---- tcg/i386/tcg-target.c.inc | 25 +++++++++++++---- tcg/loongarch64/tcg-target.c.inc | 29 ++++++++++++-------- tcg/mips/tcg-target.c.inc | 25 ++++++++++++----- tcg/ppc/tcg-target.c.inc | 29 ++++++++++++-------- tcg/riscv/tcg-target.c.inc | 29 ++++++++++++-------- tcg/s390x/tcg-target.c.inc | 47 +++++++++++++++++--------------- tcg/sparc64/tcg-target.c.inc | 23 ++++++++++++---- tcg/tcg.c | 4 +++ tcg/tci/tcg-target.c.inc | 14 ++++++++-- 11 files changed, 186 insertions(+), 94 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index c7167cad15..4b62e4e382 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2157,6 +2157,24 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3510, ORR, type, a0, a1, a2); +} + +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_logicali(s, I3404_ORRI, type, a0, a1, a2); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rL), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2238,17 +2256,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); break; - case INDEX_op_or_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_or_i64: - if (c2) { - tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); - } - break; - case INDEX_op_orc_i32: a2 = (int32_t)a2; /* FALLTHRU */ @@ -3016,8 +3023,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_mulsh_i64: return C_O1_I2(r, r, r); - case INDEX_op_or_i32: - case INDEX_op_or_i64: case INDEX_op_xor_i32: case INDEX_op_xor_i64: case INDEX_op_orc_i32: diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index feea82145a..0575d397c9 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1881,6 +1881,24 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_ORR, a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_imm(s, COND_AL, ARITH_ORR, a0, a1, encode_imm_nofail(a2)); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1943,13 +1961,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, args[0], args[1], args[2], const_args[2]); } break; - case INDEX_op_or_i32: - c = ARITH_ORR; - goto gen_arith; case INDEX_op_xor_i32: c = ARITH_EOR; - /* Fall through. */ - gen_arith: tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]); break; case INDEX_op_add2_i32: @@ -2209,7 +2222,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_muls2_i32: return C_O2_I2(r, r, r, r); - case INDEX_op_or_i32: case INDEX_op_xor_i32: return C_O1_I2(r, r, rI); diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 33c1fcc717..813092622c 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2633,6 +2633,26 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_OR + rexw, a0, a2); +} + +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_OR + rexw, a0, a2, false); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2717,9 +2737,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, OP_32_64(sub): c = ARITH_SUB; goto gen_arith; - OP_32_64(or): - c = ARITH_OR; - goto gen_arith; OP_32_64(xor): c = ARITH_XOR; goto gen_arith; @@ -3645,8 +3662,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: - case INDEX_op_or_i32: - case INDEX_op_or_i64: case INDEX_op_xor_i32: case INDEX_op_xor_i64: return C_O1_I2(r, 0, re); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index b7d2984a8a..a7ead51263 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1332,6 +1332,24 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_or(s, a0, a1, a2); +} + +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_ori(s, a0, a1, a2); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rU), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1392,15 +1410,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_or_i32: - case INDEX_op_or_i64: - if (c2) { - tcg_out_opc_ori(s, a0, a1, a2); - } else { - tcg_out_opc_or(s, a0, a1, a2); - } - break; - case INDEX_op_xor_i32: case INDEX_op_xor_i64: if (c2) { @@ -2300,8 +2309,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_nor_i32: case INDEX_op_nor_i64: - case INDEX_op_or_i32: - case INDEX_op_or_i64: case INDEX_op_xor_i32: case INDEX_op_xor_i64: /* LoongArch reg-imm bitops have their imms ZERO-extended */ diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index ab57c78095..74eef1d3b3 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1712,6 +1712,24 @@ static const TCGOutOpBinary outop_andc = { .base.static_constraint = C_NotImplemented, }; +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_OR, a0, a1, a2); +} + +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1785,14 +1803,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_or_i32: - case INDEX_op_or_i64: - i1 = OPC_OR, i2 = OPC_ORI; - goto do_binary; case INDEX_op_xor_i32: case INDEX_op_xor_i64: i1 = OPC_XOR, i2 = OPC_XORI; - do_binary: if (c2) { tcg_out_opc_imm(s, i2, a0, a1, a2); break; @@ -2218,9 +2231,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_muls2_i64: case INDEX_op_mulu2_i64: return C_O2_I2(r, r, r, r); - case INDEX_op_or_i32: case INDEX_op_xor_i32: - case INDEX_op_or_i64: case INDEX_op_xor_i64: return C_O1_I2(r, r, rI); case INDEX_op_shl_i32: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 7b1a82c9fa..b638a5f813 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2954,6 +2954,24 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, OR | SAB(a1, a0, a2)); +} + +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_ori32(s, a0, a1, a2); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rU), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -3039,15 +3057,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_or_i64: - case INDEX_op_or_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_ori32(s, a0, a1, a2); - } else { - tcg_out32(s, OR | SAB(a1, a0, a2)); - } - break; case INDEX_op_xor_i64: case INDEX_op_xor_i32: a0 = args[0], a1 = args[1], a2 = args[2]; @@ -4130,7 +4139,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_or_i32: case INDEX_op_xor_i32: case INDEX_op_orc_i32: case INDEX_op_eqv_i32: @@ -4172,7 +4180,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i32: return C_O1_I2(r, rI, ri); - case INDEX_op_or_i64: case INDEX_op_xor_i64: return C_O1_I2(r, r, rU); case INDEX_op_sub_i64: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index f637604e98..9bacd109d4 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2013,6 +2013,24 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_OR, a0, a1, a2); +} + +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2091,15 +2109,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_or_i32: - case INDEX_op_or_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_OR, a0, a1, a2); - } - break; - case INDEX_op_xor_i32: case INDEX_op_xor_i64: if (c2) { @@ -2682,9 +2691,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_or_i32: case INDEX_op_xor_i32: - case INDEX_op_or_i64: case INDEX_op_xor_i64: case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index e4b60d1924..9267aef544 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2242,6 +2242,31 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, OGRK, a0, a1, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RR, OR, a0, a2); + } else { + tcg_out_insn(s, RRFa, ORK, a0, a1, a2); + } +} + +static void tgen_ori_3(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_mov(s, type, a0, a1); + tgen_ori(s, a0, type == TCG_TYPE_I32 ? (uint32_t)a2 : a2); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rK), + .out_rrr = tgen_or, + .out_rri = tgen_ori_3, +}; + # define OP_32_64(x) \ case glue(glue(INDEX_op_,x),_i32): \ @@ -2310,17 +2335,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_or_i32: - a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tgen_ori(s, a0, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RR, OR, a0, a2); - } else { - tcg_out_insn(s, RRFa, ORK, a0, a1, a2); - } - break; case INDEX_op_xor_i32: a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; if (const_args[2]) { @@ -2561,15 +2575,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_or_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - tgen_ori(s, a0, a2); - } else { - tcg_out_insn(s, RRFa, OGRK, a0, a1, a2); - } - break; case INDEX_op_xor_i64: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { @@ -3282,10 +3287,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i32: case INDEX_op_sub_i64: - case INDEX_op_or_i32: case INDEX_op_xor_i32: return C_O1_I2(r, r, ri); - case INDEX_op_or_i64: case INDEX_op_xor_i64: return C_O1_I2(r, r, rK); diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index fe9175aa1a..b01d55c80b 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1333,6 +1333,24 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_OR); +} + +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_arithi(s, a0, a1, a2, ARITH_OR); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1390,9 +1408,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, OP_32_64(sub): c = ARITH_SUB; goto gen_arith; - OP_32_64(or): - c = ARITH_OR; - goto gen_arith; OP_32_64(orc): c = ARITH_ORN; goto gen_arith; @@ -1612,8 +1627,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_divu_i64: case INDEX_op_sub_i32: case INDEX_op_sub_i64: - case INDEX_op_or_i32: - case INDEX_op_or_i64: case INDEX_op_orc_i32: case INDEX_op_orc_i64: case INDEX_op_xor_i32: diff --git a/tcg/tcg.c b/tcg/tcg.c index 38b2dd1c44..7357b5c127 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1007,6 +1007,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), + OUTOP(INDEX_op_or_i32, TCGOutOpBinary, outop_or), + OUTOP(INDEX_op_or_i64, TCGOutOpBinary, outop_or), }; #undef OUTOP @@ -5441,6 +5443,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_add: case INDEX_op_and: case INDEX_op_andc: + case INDEX_op_or_i32: + case INDEX_op_or_i64: { const TCGOutOpBinary *out = container_of(all_outop[op->opc], TCGOutOpBinary, base); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 92c588305a..6fdfcab061 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -101,8 +101,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_nand_i64: case INDEX_op_nor_i32: case INDEX_op_nor_i64: - case INDEX_op_or_i32: - case INDEX_op_or_i64: case INDEX_op_orc_i32: case INDEX_op_orc_i64: case INDEX_op_xor_i32: @@ -668,6 +666,17 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, glue(INDEX_op_or_i,TCG_TARGET_REG_BITS), a0, a1, a2); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_or, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -712,7 +721,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, CASE_32_64(sub) CASE_32_64(mul) - CASE_32_64(or) CASE_32_64(xor) CASE_32_64(orc) /* Optional (TCG_TARGET_HAS_orc_*). */ CASE_32_64(eqv) /* Optional (TCG_TARGET_HAS_eqv_*). */ From 49bd751497f3b71550b152ef9da0e265a94a64c1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 14:00:40 -0800 Subject: [PATCH 018/161] tcg: Merge INDEX_op_or_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- target/sh4/translate.c | 4 ++-- tcg/optimize.c | 6 ++++-- tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 9 +++------ tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 2 +- 8 files changed, 16 insertions(+), 19 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 116a0438b1..8d67b0cdeb 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -307,7 +307,7 @@ Logical - | *t0* = *t1* & *t2* - * - or_i32/i64 *t0*, *t1*, *t2* + * - or *t0*, *t1*, *t2* - | *t0* = *t1* | *t2* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 9bc511992d..95608d6d31 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -42,6 +42,7 @@ DEF(mov, 1, 1, 0, TCG_OPF_INT | TCG_OPF_NOT_PRESENT) DEF(add, 1, 2, 0, TCG_OPF_INT) DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) +DEF(or, 1, 2, 0, TCG_OPF_INT) DEF(setcond_i32, 1, 2, 1, 0) DEF(negsetcond_i32, 1, 2, 1, 0) @@ -64,7 +65,6 @@ DEF(rem_i32, 1, 2, 0, 0) DEF(remu_i32, 1, 2, 0, 0) DEF(div2_i32, 2, 3, 0, 0) DEF(divu2_i32, 2, 3, 0, 0) -DEF(or_i32, 1, 2, 0, 0) DEF(xor_i32, 1, 2, 0, 0) /* shifts/rotates */ DEF(shl_i32, 1, 2, 0, 0) @@ -124,7 +124,6 @@ DEF(rem_i64, 1, 2, 0, 0) DEF(remu_i64, 1, 2, 0, 0) DEF(div2_i64, 2, 3, 0, 0) DEF(divu2_i64, 2, 3, 0, 0) -DEF(or_i64, 1, 2, 0, 0) DEF(xor_i64, 1, 2, 0, 0) /* shifts/rotates */ DEF(shl_i64, 1, 2, 0, 0) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 3d0eda2128..094613d312 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -1949,7 +1949,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) op_opc = INDEX_op_xor_i32; goto do_reg_op; case 0x200b: /* or Rm,Rn */ - op_opc = INDEX_op_or_i32; + op_opc = INDEX_op_or; do_reg_op: /* The operation register should be as expected, and the other input cannot depend on the load. */ @@ -2119,7 +2119,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) } break; - case INDEX_op_or_i32: + case INDEX_op_or: if (op_dst != st_src) { goto fail; } diff --git a/tcg/optimize.c b/tcg/optimize.c index 875d80c254..7d5f7af223 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -437,7 +437,8 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) case INDEX_op_and_vec: return x & y; - CASE_OP_32_64_VEC(or): + case INDEX_op_or: + case INDEX_op_or_vec: return x | y; CASE_OP_32_64_VEC(xor): @@ -2961,7 +2962,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64_VEC(not): done = fold_not(&ctx, op); break; - CASE_OP_32_64_VEC(or): + case INDEX_op_or: + case INDEX_op_or_vec: done = fold_or(&ctx, op); break; CASE_OP_32_64_VEC(orc): diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index d87bd13375..6807f4eebd 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -436,7 +436,7 @@ void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_or_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_or, ret, arg1, arg2); } void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -1585,7 +1585,7 @@ void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_or_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_or, ret, arg1, arg2); } else { tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); diff --git a/tcg/tcg.c b/tcg/tcg.c index 7357b5c127..f31ae4e56b 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1007,8 +1007,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), - OUTOP(INDEX_op_or_i32, TCGOutOpBinary, outop_or), - OUTOP(INDEX_op_or_i64, TCGOutOpBinary, outop_or), + OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), }; #undef OUTOP @@ -2212,6 +2211,7 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_add: case INDEX_op_and: case INDEX_op_mov: + case INDEX_op_or: return has_type; case INDEX_op_setcond_i32: @@ -2228,7 +2228,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i32: case INDEX_op_neg_i32: case INDEX_op_mul_i32: - case INDEX_op_or_i32: case INDEX_op_xor_i32: case INDEX_op_shl_i32: case INDEX_op_shr_i32: @@ -2308,7 +2307,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i64: case INDEX_op_neg_i64: case INDEX_op_mul_i64: - case INDEX_op_or_i64: case INDEX_op_xor_i64: case INDEX_op_shl_i64: case INDEX_op_shr_i64: @@ -5443,8 +5441,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_add: case INDEX_op_and: case INDEX_op_andc: - case INDEX_op_or_i32: - case INDEX_op_or_i64: + case INDEX_op_or: { const TCGOutOpBinary *out = container_of(all_outop[op->opc], TCGOutOpBinary, base); diff --git a/tcg/tci.c b/tcg/tci.c index e4a0408fec..3e361be6bd 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -539,7 +539,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] & regs[r2]; break; - CASE_32_64(or) + case INDEX_op_or: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] | regs[r2]; break; @@ -1083,12 +1083,11 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_add: case INDEX_op_and: case INDEX_op_andc: + case INDEX_op_or: case INDEX_op_sub_i32: case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: - case INDEX_op_or_i32: - case INDEX_op_or_i64: case INDEX_op_xor_i32: case INDEX_op_xor_i64: case INDEX_op_orc_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 6fdfcab061..4214b76b34 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -669,7 +669,7 @@ static const TCGOutOpBinary outop_andc = { static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { - tcg_out_op_rrr(s, glue(INDEX_op_or_i,TCG_TARGET_REG_BITS), a0, a1, a2); + tcg_out_op_rrr(s, INDEX_op_or, a0, a1, a2); } static const TCGOutOpBinary outop_or = { From 50e40ecd7a5eb803a67d02aa586b5671968ac58b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 10 Dec 2024 08:13:10 -0600 Subject: [PATCH 019/161] tcg/optimize: Fold orc with immediate to or MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/optimize.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 7d5f7af223..684b1099d0 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2143,6 +2143,7 @@ static bool fold_or(OptContext *ctx, TCGOp *op) static bool fold_orc(OptContext *ctx, TCGOp *op) { uint64_t s_mask; + TempOptInfo *t1, *t2; if (fold_const2(ctx, op) || fold_xx_to_i(ctx, op, -1) || @@ -2151,8 +2152,28 @@ static bool fold_orc(OptContext *ctx, TCGOp *op) return true; } - s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + /* Fold orc r,x,i to or r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_or; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_or_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_or(ctx, op); + } + + t1 = arg_info(op->args[1]); + s_mask = t1->s_mask & t2->s_mask; return fold_masks_s(ctx, op, s_mask); } From d262ae608115d2e6fc61ba6998d9813a5208b0e5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 14:30:50 -0800 Subject: [PATCH 020/161] tcg: Convert orc to TCGOutOpBinary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the same time, drop all backend support for immediate operands, as we now transform orc to or during optimize. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 -- tcg/aarch64/tcg-target.c.inc | 24 ++++++++--------- tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 4 +++ tcg/i386/tcg-target-has.h | 2 -- tcg/i386/tcg-target.c.inc | 4 +++ tcg/loongarch64/tcg-target-con-set.h | 1 - tcg/loongarch64/tcg-target-con-str.h | 1 - tcg/loongarch64/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target.c.inc | 40 ++++++++++------------------ tcg/mips/tcg-target-has.h | 2 -- tcg/mips/tcg-target.c.inc | 4 +++ tcg/ppc/tcg-target-has.h | 2 -- tcg/ppc/tcg-target.c.inc | 22 +++++++-------- tcg/riscv/tcg-target-has.h | 2 -- tcg/riscv/tcg-target.c.inc | 22 ++++++++------- tcg/s390x/tcg-target-has.h | 2 -- tcg/s390x/tcg-target.c.inc | 36 +++++++++++-------------- tcg/sparc64/tcg-target-has.h | 2 -- tcg/sparc64/tcg-target.c.inc | 16 +++++++---- tcg/tcg-has.h | 1 - tcg/tcg-op.c | 4 +-- tcg/tcg.c | 8 +++--- tcg/tci.c | 2 -- tcg/tci/tcg-target-has.h | 2 -- tcg/tci/tcg-target.c.inc | 14 +++++++--- 26 files changed, 104 insertions(+), 118 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 851f6b01b4..8469a9446f 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -19,7 +19,6 @@ #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_eqv_i32 1 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 @@ -44,7 +43,6 @@ #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_eqv_i64 1 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 4b62e4e382..13592303a8 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2175,6 +2175,17 @@ static const TCGOutOpBinary outop_or = { .out_rri = tgen_ori, }; +static void tgen_orc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3510, ORN, type, a0, a1, a2); +} + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_orc, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2256,17 +2267,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); break; - case INDEX_op_orc_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_orc_i64: - if (c2) { - tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); - } else { - tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); - } - break; - case INDEX_op_xor_i32: a2 = (int32_t)a2; /* FALLTHRU */ @@ -3025,8 +3025,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_xor_i32: case INDEX_op_xor_i64: - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: case INDEX_op_eqv_i32: case INDEX_op_eqv_i64: return C_O1_I2(r, r, rL); diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index 0268858a3b..39dcc87fe8 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -28,7 +28,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 0575d397c9..48cbcd67b9 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1899,6 +1899,10 @@ static const TCGOutOpBinary outop_or = { .out_rri = tgen_ori, }; +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_NotImplemented, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index b29b70357a..e525f23c05 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -31,7 +31,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 @@ -56,7 +55,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_orc_i64 0 #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 813092622c..3fe1d9d9cc 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2653,6 +2653,10 @@ static const TCGOutOpBinary outop_or = { .out_rri = tgen_ori, }; +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_NotImplemented, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h index b7c9b89e9e..b4af4f5423 100644 --- a/tcg/loongarch64/tcg-target-con-set.h +++ b/tcg/loongarch64/tcg-target-con-set.h @@ -23,7 +23,6 @@ C_O1_I1(r, r) C_O1_I1(w, r) C_O1_I1(w, w) C_O1_I2(r, r, r) -C_O1_I2(r, r, rC) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) C_O1_I2(r, r, rJ) diff --git a/tcg/loongarch64/tcg-target-con-str.h b/tcg/loongarch64/tcg-target-con-str.h index 99759120b4..e5e57452d6 100644 --- a/tcg/loongarch64/tcg-target-con-str.h +++ b/tcg/loongarch64/tcg-target-con-str.h @@ -23,7 +23,6 @@ REGS('w', ALL_VECTOR_REGS) CONST('I', TCG_CT_CONST_S12) CONST('J', TCG_CT_CONST_S32) CONST('U', TCG_CT_CONST_U12) -CONST('C', TCG_CT_CONST_C12) CONST('W', TCG_CT_CONST_WSZ) CONST('M', TCG_CT_CONST_VCMP) CONST('A', TCG_CT_CONST_VADD) diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 71d91fec19..fb1142958c 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -25,7 +25,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 1 @@ -46,7 +45,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 1 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index a7ead51263..b6f13090b9 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -176,10 +176,9 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define TCG_CT_CONST_S12 0x100 #define TCG_CT_CONST_S32 0x200 #define TCG_CT_CONST_U12 0x400 -#define TCG_CT_CONST_C12 0x800 -#define TCG_CT_CONST_WSZ 0x1000 -#define TCG_CT_CONST_VCMP 0x2000 -#define TCG_CT_CONST_VADD 0x4000 +#define TCG_CT_CONST_WSZ 0x800 +#define TCG_CT_CONST_VCMP 0x1000 +#define TCG_CT_CONST_VADD 0x2000 #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) #define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32) @@ -205,9 +204,6 @@ static bool tcg_target_const_match(int64_t val, int ct, if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) { return true; } - if ((ct & TCG_CT_CONST_C12) && ~val >= 0 && ~val <= 0xfff) { - return true; - } if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { return true; } @@ -1350,6 +1346,17 @@ static const TCGOutOpBinary outop_or = { .out_rri = tgen_ori, }; +static void tgen_orc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_orn(s, a0, a1, a2); +} + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_orc, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1400,16 +1407,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: - if (c2) { - /* guaranteed to fit due to constraint */ - tcg_out_opc_ori(s, a0, a1, ~a2); - } else { - tcg_out_opc_orn(s, a0, a1, a2); - } - break; - case INDEX_op_xor_i32: case INDEX_op_xor_i64: if (c2) { @@ -2286,15 +2283,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: - /* - * LoongArch insns for these ops don't have reg-imm forms, but we - * can express using andi/ori if ~constant satisfies - * TCG_CT_CONST_U12. - */ - return C_O1_I2(r, r, rC); - case INDEX_op_shl_i32: case INDEX_op_shl_i64: case INDEX_op_shr_i32: diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index 6a6d4377e7..b3dfa390f9 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -43,7 +43,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_nor_i32 1 -#define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions) @@ -62,7 +61,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_nor_i64 1 -#define TCG_TARGET_HAS_orc_i64 0 #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_add2_i64 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 74eef1d3b3..f6987963ec 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1730,6 +1730,10 @@ static const TCGOutOpBinary outop_or = { .out_rri = tgen_ori, }; +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_NotImplemented, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 63bb66f446..6f3ab41ebb 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -23,7 +23,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_eqv_i32 1 #define TCG_TARGET_HAS_nand_i32 1 #define TCG_TARGET_HAS_nor_i32 1 @@ -49,7 +48,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_eqv_i64 1 #define TCG_TARGET_HAS_nand_i64 1 #define TCG_TARGET_HAS_nor_i64 1 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index b638a5f813..ccd7812016 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2972,6 +2972,17 @@ static const TCGOutOpBinary outop_or = { .out_rri = tgen_ori, }; +static void tgen_orc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, ORC | SAB(a1, a0, a2)); +} + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_orc, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -3066,15 +3077,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, XOR | SAB(a1, a0, a2)); } break; - case INDEX_op_orc_i32: - if (const_args[2]) { - tcg_out_ori32(s, args[0], args[1], ~args[2]); - break; - } - /* FALLTHRU */ - case INDEX_op_orc_i64: - tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); - break; case INDEX_op_eqv_i32: if (const_args[2]) { tcg_out_xori32(s, args[0], args[1], ~args[2]); @@ -4140,7 +4142,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) return C_O0_I2(r, r); case INDEX_op_xor_i32: - case INDEX_op_orc_i32: case INDEX_op_eqv_i32: case INDEX_op_shl_i32: case INDEX_op_shr_i32: @@ -4166,7 +4167,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_nor_i32: case INDEX_op_muluh_i32: case INDEX_op_mulsh_i32: - case INDEX_op_orc_i64: case INDEX_op_eqv_i64: case INDEX_op_nand_i64: case INDEX_op_nor_i64: diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index a3918bf7f5..7b8f4386c9 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -25,7 +25,6 @@ #define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_orc_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_eqv_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 @@ -45,7 +44,6 @@ #define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_orc_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_eqv_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 9bacd109d4..14216e9dff 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2031,6 +2031,18 @@ static const TCGOutOpBinary outop_or = { .out_rri = tgen_ori, }; +static void tgen_orc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_ORN, a0, a1, a2); +} + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_zbb_rrr, + .out_rrr = tgen_orc, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2118,14 +2130,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_ORI, a0, a1, ~a2); - } else { - tcg_out_opc_reg(s, OPC_ORN, a0, a1, a2); - } - break; case INDEX_op_eqv_i32: case INDEX_op_eqv_i64: if (c2) { @@ -2699,8 +2703,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: case INDEX_op_eqv_i32: case INDEX_op_eqv_i64: return C_O1_I2(r, r, rJ); diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 15ec0dc2ff..850c16a164 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -34,7 +34,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_orc_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_eqv_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_nand_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3) @@ -58,7 +57,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_orc_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_eqv_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_nand_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 9267aef544..97587939bd 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2267,6 +2267,22 @@ static const TCGOutOpBinary outop_or = { .out_rri = tgen_ori_3, }; +static void tgen_orc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, OCRK, a0, a1, a2); + } else { + tcg_out_insn(s, RRFa, OCGRK, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_misc3_rrr, + .out_rrr = tgen_orc, +}; + # define OP_32_64(x) \ case glue(glue(INDEX_op_,x),_i32): \ @@ -2347,15 +2363,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_orc_i32: - a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tgen_ori(s, a0, (uint32_t)~a2); - } else { - tcg_out_insn(s, RRFa, OCRK, a0, a1, a2); - } - break; case INDEX_op_eqv_i32: a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; if (const_args[2]) { @@ -2585,15 +2592,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_orc_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - tgen_ori(s, a0, ~a2); - } else { - tcg_out_insn(s, RRFa, OCGRK, a0, a1, a2); - } - break; case INDEX_op_eqv_i64: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { @@ -3292,10 +3290,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_xor_i64: return C_O1_I2(r, r, rK); - case INDEX_op_orc_i32: case INDEX_op_eqv_i32: return C_O1_I2(r, r, ri); - case INDEX_op_orc_i64: case INDEX_op_eqv_i64: return C_O1_I2(r, r, rNK); diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 510b9e64a4..8e20e4cdeb 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -20,7 +20,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_bswap32_i32 0 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 @@ -45,7 +44,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index b01d55c80b..38b325e8a9 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1351,6 +1351,17 @@ static const TCGOutOpBinary outop_or = { .out_rri = tgen_ori, }; +static void tgen_orc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_ORN); +} + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_orc, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1408,9 +1419,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, OP_32_64(sub): c = ARITH_SUB; goto gen_arith; - OP_32_64(orc): - c = ARITH_ORN; - goto gen_arith; OP_32_64(xor): c = ARITH_XOR; goto gen_arith; @@ -1627,8 +1635,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_divu_i64: case INDEX_op_sub_i32: case INDEX_op_sub_i64: - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: case INDEX_op_xor_i32: case INDEX_op_xor_i64: case INDEX_op_shl_i32: diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 7e4301521e..df9c951262 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -20,7 +20,6 @@ #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_not_i64 0 -#define TCG_TARGET_HAS_orc_i64 0 #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 6807f4eebd..503d395ac8 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -710,7 +710,7 @@ void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_orc_i32) { + if (tcg_op_supported(INDEX_op_orc_i32, TCG_TYPE_I32, 0)) { tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); @@ -2318,7 +2318,7 @@ void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (TCG_TARGET_HAS_orc_i64) { + } else if (tcg_op_supported(INDEX_op_orc_i64, TCG_TYPE_I64, 0)) { tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index f31ae4e56b..4737a6b2cc 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1008,6 +1008,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), + OUTOP(INDEX_op_orc_i32, TCGOutOpBinary, outop_orc), + OUTOP(INDEX_op_orc_i64, TCGOutOpBinary, outop_orc), }; #undef OUTOP @@ -2271,8 +2273,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap32_i32; case INDEX_op_not_i32: return TCG_TARGET_HAS_not_i32; - case INDEX_op_orc_i32: - return TCG_TARGET_HAS_orc_i32; case INDEX_op_eqv_i32: return TCG_TARGET_HAS_eqv_i32; case INDEX_op_nand_i32: @@ -2345,8 +2345,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap64_i64; case INDEX_op_not_i64: return TCG_TARGET_HAS_not_i64; - case INDEX_op_orc_i64: - return TCG_TARGET_HAS_orc_i64; case INDEX_op_eqv_i64: return TCG_TARGET_HAS_eqv_i64; case INDEX_op_nand_i64: @@ -5442,6 +5440,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_and: case INDEX_op_andc: case INDEX_op_or: + case INDEX_op_orc_i32: + case INDEX_op_orc_i64: { const TCGOutOpBinary *out = container_of(all_outop[op->opc], TCGOutOpBinary, base); diff --git a/tcg/tci.c b/tcg/tci.c index 3e361be6bd..7a926b30db 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -551,12 +551,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] & ~regs[r2]; break; -#if TCG_TARGET_HAS_orc_i32 || TCG_TARGET_HAS_orc_i64 CASE_32_64(orc) tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] | ~regs[r2]; break; -#endif #if TCG_TARGET_HAS_eqv_i32 || TCG_TARGET_HAS_eqv_i64 CASE_32_64(eqv) tci_args_rrr(insn, &r0, &r1, &r2); diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index e09d366517..d247774e52 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -19,7 +19,6 @@ #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 0 #define TCG_TARGET_HAS_muls2_i32 1 @@ -42,7 +41,6 @@ #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_muls2_i64 1 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 4214b76b34..2e45cc4768 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -101,8 +101,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_nand_i64: case INDEX_op_nor_i32: case INDEX_op_nor_i64: - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: case INDEX_op_xor_i32: case INDEX_op_xor_i64: case INDEX_op_shl_i32: @@ -677,6 +675,17 @@ static const TCGOutOpBinary outop_or = { .out_rrr = tgen_or, }; +static void tgen_orc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, glue(INDEX_op_orc_i,TCG_TARGET_REG_BITS), a0, a1, a2); +} + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_orc, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -722,7 +731,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, CASE_32_64(sub) CASE_32_64(mul) CASE_32_64(xor) - CASE_32_64(orc) /* Optional (TCG_TARGET_HAS_orc_*). */ CASE_32_64(eqv) /* Optional (TCG_TARGET_HAS_eqv_*). */ CASE_32_64(nand) /* Optional (TCG_TARGET_HAS_nand_*). */ CASE_32_64(nor) /* Optional (TCG_TARGET_HAS_nor_*). */ From 6aba25ebb9eb6e1e86398294694aa0ab1f12076f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 14:46:26 -0800 Subject: [PATCH 021/161] tcg: Merge INDEX_op_orc_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- target/arm/tcg/translate-sve.c | 2 +- target/tricore/translate.c | 2 +- tcg/optimize.c | 6 ++++-- tcg/tcg-op.c | 8 ++++---- tcg/tcg.c | 6 ++---- tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 2 +- 9 files changed, 17 insertions(+), 19 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 8d67b0cdeb..c5c5a4d19e 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -335,7 +335,7 @@ Logical - | *t0* = ~(*t1* | *t2*) - * - orc_i32/i64 *t0*, *t1*, *t2* + * - orc *t0*, *t1*, *t2* - | *t0* = *t1* | ~\ *t2* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 95608d6d31..caf0f01042 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -43,6 +43,7 @@ DEF(add, 1, 2, 0, TCG_OPF_INT) DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(or, 1, 2, 0, TCG_OPF_INT) +DEF(orc, 1, 2, 0, TCG_OPF_INT) DEF(setcond_i32, 1, 2, 1, 0) DEF(negsetcond_i32, 1, 2, 1, 0) @@ -92,7 +93,6 @@ DEF(bswap16_i32, 1, 1, 1, 0) DEF(bswap32_i32, 1, 1, 1, 0) DEF(not_i32, 1, 1, 0, 0) DEF(neg_i32, 1, 1, 0, 0) -DEF(orc_i32, 1, 2, 0, 0) DEF(eqv_i32, 1, 2, 0, 0) DEF(nand_i32, 1, 2, 0, 0) DEF(nor_i32, 1, 2, 0, 0) @@ -148,7 +148,6 @@ DEF(bswap32_i64, 1, 1, 1, 0) DEF(bswap64_i64, 1, 1, 1, 0) DEF(not_i64, 1, 1, 0, 0) DEF(neg_i64, 1, 1, 0, 0) -DEF(orc_i64, 1, 2, 0, 0) DEF(eqv_i64, 1, 2, 0, 0) DEF(nand_i64, 1, 2, 0, 0) DEF(nor_i64, 1, 2, 0, 0) diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index d23be477b4..f3cf028cb9 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -629,7 +629,7 @@ static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) * = | ~(m | k) */ tcg_gen_and_i64(n, n, k); - if (tcg_op_supported(INDEX_op_orc_i64, TCG_TYPE_I64, 0)) { + if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I64, 0)) { tcg_gen_or_i64(m, m, k); tcg_gen_orc_i64(d, n, m); } else { diff --git a/target/tricore/translate.c b/target/tricore/translate.c index 2d0cde0268..ede0c92c1e 100644 --- a/target/tricore/translate.c +++ b/target/tricore/translate.c @@ -4114,7 +4114,7 @@ static void decode_bit_orand(DisasContext *ctx) pos1, pos2, &tcg_gen_andc_tl, &tcg_gen_or_tl); break; case OPC2_32_BIT_OR_NOR_T: - if (tcg_op_supported(INDEX_op_orc_i32, TCG_TYPE_I32, 0)) { + if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I32, 0)) { gen_bit_2op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2], pos1, pos2, &tcg_gen_or_tl, &tcg_gen_orc_tl); } else { diff --git a/tcg/optimize.c b/tcg/optimize.c index 684b1099d0..5f0ab354d6 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -484,7 +484,8 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) case INDEX_op_andc_vec: return x & ~y; - CASE_OP_32_64_VEC(orc): + case INDEX_op_orc: + case INDEX_op_orc_vec: return x | ~y; CASE_OP_32_64_VEC(eqv): @@ -2987,7 +2988,8 @@ void tcg_optimize(TCGContext *s) case INDEX_op_or_vec: done = fold_or(&ctx, op); break; - CASE_OP_32_64_VEC(orc): + case INDEX_op_orc: + case INDEX_op_orc_vec: done = fold_orc(&ctx, op); break; case INDEX_op_qemu_ld_i32: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 503d395ac8..bf481060fa 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -710,8 +710,8 @@ void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (tcg_op_supported(INDEX_op_orc_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_orc, ret, arg1, arg2); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_not_i32(t0, arg2); @@ -2318,8 +2318,8 @@ void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (tcg_op_supported(INDEX_op_orc_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_orc, ret, arg1, arg2); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_not_i64(t0, arg2); diff --git a/tcg/tcg.c b/tcg/tcg.c index 4737a6b2cc..1b7e230219 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1008,8 +1008,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), - OUTOP(INDEX_op_orc_i32, TCGOutOpBinary, outop_orc), - OUTOP(INDEX_op_orc_i64, TCGOutOpBinary, outop_orc), + OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), }; #undef OUTOP @@ -5440,8 +5439,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_and: case INDEX_op_andc: case INDEX_op_or: - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: + case INDEX_op_orc: { const TCGOutOpBinary *out = container_of(all_outop[op->opc], TCGOutOpBinary, base); diff --git a/tcg/tci.c b/tcg/tci.c index 7a926b30db..68636e70da 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -551,7 +551,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] & ~regs[r2]; break; - CASE_32_64(orc) + case INDEX_op_orc: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] | ~regs[r2]; break; @@ -1082,14 +1082,13 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_and: case INDEX_op_andc: case INDEX_op_or: + case INDEX_op_orc: case INDEX_op_sub_i32: case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: case INDEX_op_xor_i32: case INDEX_op_xor_i64: - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: case INDEX_op_eqv_i32: case INDEX_op_eqv_i64: case INDEX_op_nand_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 2e45cc4768..b9309e2fb9 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -678,7 +678,7 @@ static const TCGOutOpBinary outop_or = { static void tgen_orc(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { - tcg_out_op_rrr(s, glue(INDEX_op_orc_i,TCG_TARGET_REG_BITS), a0, a1, a2); + tcg_out_op_rrr(s, INDEX_op_orc, a0, a1, a2); } static const TCGOutOpBinary outop_orc = { From d3e56f382493540b3a46a432a7e09261f4f5dbe7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 15:11:22 -0800 Subject: [PATCH 022/161] tcg: Convert xor to TCGOutOpBinary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 31 +++++++++++--------- tcg/arm/tcg-target.c.inc | 25 +++++++++++----- tcg/i386/tcg-target.c.inc | 27 ++++++++++++----- tcg/loongarch64/tcg-target.c.inc | 29 +++++++++++------- tcg/mips/tcg-target.c.inc | 28 +++++++++++------- tcg/ppc/tcg-target.c.inc | 30 +++++++++++-------- tcg/riscv/tcg-target.c.inc | 29 +++++++++++------- tcg/s390x/tcg-target.c.inc | 50 ++++++++++++++++---------------- tcg/sparc64/tcg-target.c.inc | 23 +++++++++++---- tcg/tcg.c | 4 +++ tcg/tci/tcg-target.c.inc | 14 +++++++-- 11 files changed, 186 insertions(+), 104 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 13592303a8..d575635fe0 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2186,6 +2186,24 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3510, EOR, type, a0, a1, a2); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_logicali(s, I3404_EORI, type, a0, a1, a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rL), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2267,17 +2285,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); break; - case INDEX_op_xor_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_xor_i64: - if (c2) { - tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); - } - break; - case INDEX_op_eqv_i32: a2 = (int32_t)a2; /* FALLTHRU */ @@ -3023,8 +3030,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_mulsh_i64: return C_O1_I2(r, r, r); - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: case INDEX_op_eqv_i32: case INDEX_op_eqv_i64: return C_O1_I2(r, r, rL); diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 48cbcd67b9..98cb3cf5e2 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1903,6 +1903,24 @@ static const TCGOutOpBinary outop_orc = { .base.static_constraint = C_NotImplemented, }; +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_EOR, a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_imm(s, COND_AL, ARITH_EOR, a0, a1, encode_imm_nofail(a2)); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1965,10 +1983,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, args[0], args[1], args[2], const_args[2]); } break; - case INDEX_op_xor_i32: - c = ARITH_EOR; - tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]); - break; case INDEX_op_add2_i32: a0 = args[0], a1 = args[1], a2 = args[2]; a3 = args[3], a4 = args[4], a5 = args[5]; @@ -2226,9 +2240,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_muls2_i32: return C_O2_I2(r, r, r, r); - case INDEX_op_xor_i32: - return C_O1_I2(r, r, rI); - case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 3fe1d9d9cc..9126f9aeff 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2657,6 +2657,26 @@ static const TCGOutOpBinary outop_orc = { .base.static_constraint = C_NotImplemented, }; +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_XOR + rexw, a0, a2); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_XOR + rexw, a0, a2, false); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2740,11 +2760,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, OP_32_64(sub): c = ARITH_SUB; - goto gen_arith; - OP_32_64(xor): - c = ARITH_XOR; - goto gen_arith; - gen_arith: if (const_a2) { tgen_arithi(s, c + rexw, a0, a2, 0); } else { @@ -3666,8 +3681,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: return C_O1_I2(r, 0, re); case INDEX_op_shl_i32: diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index b6f13090b9..296a84af79 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1357,6 +1357,24 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_xor(s, a0, a1, a2); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_xori(s, a0, a1, a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rU), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1407,15 +1425,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - if (c2) { - tcg_out_opc_xori(s, a0, a1, a2); - } else { - tcg_out_opc_xor(s, a0, a1, a2); - } - break; - case INDEX_op_extract_i32: if (a2 == 0 && args[3] <= 12) { tcg_out_opc_andi(s, a0, a1, (1 << args[3]) - 1); @@ -2297,8 +2306,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_nor_i32: case INDEX_op_nor_i64: - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: /* LoongArch reg-imm bitops have their imms ZERO-extended */ return C_O1_I2(r, r, rU); diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index f6987963ec..30fb01cb0a 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1734,6 +1734,24 @@ static const TCGOutOpBinary outop_orc = { .base.static_constraint = C_NotImplemented, }; +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1807,13 +1825,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - i1 = OPC_XOR, i2 = OPC_XORI; - if (c2) { - tcg_out_opc_imm(s, i2, a0, a1, a2); - break; - } do_binaryv: tcg_out_opc_reg(s, i1, a0, a1, a2); break; @@ -2235,9 +2246,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_muls2_i64: case INDEX_op_mulu2_i64: return C_O2_I2(r, r, r, r); - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - return C_O1_I2(r, r, rI); case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index ccd7812016..16d3dbd841 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2983,6 +2983,24 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, XOR | SAB(a1, a0, a2)); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_xori32(s, a0, a1, a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rU), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -3068,15 +3086,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_xor_i64: - case INDEX_op_xor_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_xori32(s, a0, a1, a2); - } else { - tcg_out32(s, XOR | SAB(a1, a0, a2)); - } - break; case INDEX_op_eqv_i32: if (const_args[2]) { tcg_out_xori32(s, args[0], args[1], ~args[2]); @@ -4141,7 +4150,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_xor_i32: case INDEX_op_eqv_i32: case INDEX_op_shl_i32: case INDEX_op_shr_i32: @@ -4180,8 +4188,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i32: return C_O1_I2(r, rI, ri); - case INDEX_op_xor_i64: - return C_O1_I2(r, r, rU); case INDEX_op_sub_i64: return C_O1_I2(r, rI, rT); case INDEX_op_clz_i32: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 14216e9dff..c981ea389a 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2043,6 +2043,24 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2121,15 +2139,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2); - } - break; - case INDEX_op_eqv_i32: case INDEX_op_eqv_i64: if (c2) { @@ -2695,8 +2704,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: case INDEX_op_negsetcond_i32: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 97587939bd..bedad7137b 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2283,6 +2283,31 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, XGRK, a0, a1, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RR, XR, a0, a2); + } else { + tcg_out_insn(s, RRFa, XRK, a0, a1, a2); + } +} + +static void tgen_xori_3(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_mov(s, type, a0, a1); + tgen_xori(s, a0, type == TCG_TYPE_I32 ? (uint32_t)a2 : a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rK), + .out_rrr = tgen_xor, + .out_rri = tgen_xori_3, +}; + # define OP_32_64(x) \ case glue(glue(INDEX_op_,x),_i32): \ @@ -2351,18 +2376,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_xor_i32: - a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tcg_out_insn(s, RIL, XILF, a0, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RR, XR, args[0], args[2]); - } else { - tcg_out_insn(s, RRFa, XRK, a0, a1, a2); - } - break; - case INDEX_op_eqv_i32: a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; if (const_args[2]) { @@ -2582,16 +2595,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_xor_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - tgen_xori(s, a0, a2); - } else { - tcg_out_insn(s, RRFa, XGRK, a0, a1, a2); - } - break; - case INDEX_op_eqv_i64: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { @@ -3285,10 +3288,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i32: case INDEX_op_sub_i64: - case INDEX_op_xor_i32: return C_O1_I2(r, r, ri); - case INDEX_op_xor_i64: - return C_O1_I2(r, r, rK); case INDEX_op_eqv_i32: return C_O1_I2(r, r, ri); diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 38b325e8a9..8a6c9852d2 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1362,6 +1362,24 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_XOR); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_arithi(s, a0, a1, a2, ARITH_XOR); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1419,9 +1437,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, OP_32_64(sub): c = ARITH_SUB; goto gen_arith; - OP_32_64(xor): - c = ARITH_XOR; - goto gen_arith; case INDEX_op_shl_i32: c = SHIFT_SLL; do_shift32: @@ -1635,8 +1650,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_divu_i64: case INDEX_op_sub_i32: case INDEX_op_sub_i64: - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: case INDEX_op_shl_i32: case INDEX_op_shl_i64: case INDEX_op_shr_i32: diff --git a/tcg/tcg.c b/tcg/tcg.c index 1b7e230219..042f177966 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1009,6 +1009,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), + OUTOP(INDEX_op_xor_i32, TCGOutOpBinary, outop_xor), + OUTOP(INDEX_op_xor_i64, TCGOutOpBinary, outop_xor), }; #undef OUTOP @@ -5440,6 +5442,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_andc: case INDEX_op_or: case INDEX_op_orc: + case INDEX_op_xor_i32: + case INDEX_op_xor_i64: { const TCGOutOpBinary *out = container_of(all_outop[op->opc], TCGOutOpBinary, base); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index b9309e2fb9..85caff300f 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -101,8 +101,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_nand_i64: case INDEX_op_nor_i32: case INDEX_op_nor_i64: - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: case INDEX_op_shl_i32: case INDEX_op_shl_i64: case INDEX_op_shr_i32: @@ -686,6 +684,17 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, glue(INDEX_op_xor_i,TCG_TARGET_REG_BITS), a0, a1, a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_xor, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -730,7 +739,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, CASE_32_64(sub) CASE_32_64(mul) - CASE_32_64(xor) CASE_32_64(eqv) /* Optional (TCG_TARGET_HAS_eqv_*). */ CASE_32_64(nand) /* Optional (TCG_TARGET_HAS_nand_*). */ CASE_32_64(nor) /* Optional (TCG_TARGET_HAS_nor_*). */ From fffd3dc9022efe89b9196d738127c294cf43a4d6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 15:18:35 -0800 Subject: [PATCH 023/161] tcg: Merge INDEX_op_xor_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- target/sh4/translate.c | 6 +++--- tcg/optimize.c | 18 ++++++++---------- tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 9 +++------ tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 2 +- 8 files changed, 21 insertions(+), 28 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index c5c5a4d19e..a4aa4f8824 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -311,7 +311,7 @@ Logical - | *t0* = *t1* | *t2* - * - xor_i32/i64 *t0*, *t1*, *t2* + * - xor *t0*, *t1*, *t2* - | *t0* = *t1* ^ *t2* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index caf0f01042..8f6115bedb 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -44,6 +44,7 @@ DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(or, 1, 2, 0, TCG_OPF_INT) DEF(orc, 1, 2, 0, TCG_OPF_INT) +DEF(xor, 1, 2, 0, TCG_OPF_INT) DEF(setcond_i32, 1, 2, 1, 0) DEF(negsetcond_i32, 1, 2, 1, 0) @@ -66,7 +67,6 @@ DEF(rem_i32, 1, 2, 0, 0) DEF(remu_i32, 1, 2, 0, 0) DEF(div2_i32, 2, 3, 0, 0) DEF(divu2_i32, 2, 3, 0, 0) -DEF(xor_i32, 1, 2, 0, 0) /* shifts/rotates */ DEF(shl_i32, 1, 2, 0, 0) DEF(shr_i32, 1, 2, 0, 0) @@ -124,7 +124,6 @@ DEF(rem_i64, 1, 2, 0, 0) DEF(remu_i64, 1, 2, 0, 0) DEF(div2_i64, 2, 3, 0, 0) DEF(divu2_i64, 2, 3, 0, 0) -DEF(xor_i64, 1, 2, 0, 0) /* shifts/rotates */ DEF(shl_i64, 1, 2, 0, 0) DEF(shr_i64, 1, 2, 0, 0) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 094613d312..8248648c0c 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -1946,7 +1946,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) op_opc = INDEX_op_and; goto do_reg_op; case 0x200a: /* xor Rm,Rn */ - op_opc = INDEX_op_xor_i32; + op_opc = INDEX_op_xor; goto do_reg_op; case 0x200b: /* or Rm,Rn */ op_opc = INDEX_op_or; @@ -1976,7 +1976,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) goto fail; } op_dst = B11_8; - op_opc = INDEX_op_xor_i32; + op_opc = INDEX_op_xor; op_arg = tcg_constant_i32(-1); break; @@ -2133,7 +2133,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) } break; - case INDEX_op_xor_i32: + case INDEX_op_xor: if (op_dst != st_src) { goto fail; } diff --git a/tcg/optimize.c b/tcg/optimize.c index 5f0ab354d6..9303bb5b64 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -441,7 +441,8 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) case INDEX_op_or_vec: return x | y; - CASE_OP_32_64_VEC(xor): + case INDEX_op_xor: + case INDEX_op_xor_vec: return x ^ y; case INDEX_op_shl_i32: @@ -2289,7 +2290,7 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) break; } if (convert) { - TCGOpcode xor_opc, neg_opc; + TCGOpcode neg_opc; if (!inv && !neg) { return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); @@ -2298,11 +2299,9 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) switch (ctx->type) { case TCG_TYPE_I32: neg_opc = INDEX_op_neg_i32; - xor_opc = INDEX_op_xor_i32; break; case TCG_TYPE_I64: neg_opc = INDEX_op_neg_i64; - xor_opc = INDEX_op_xor_i64; break; default: g_assert_not_reached(); @@ -2314,7 +2313,7 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) op->opc = INDEX_op_add; op->args[2] = arg_new_constant(ctx, -1); } else { - op->opc = xor_opc; + op->opc = INDEX_op_xor; op->args[2] = arg_new_constant(ctx, 1); } return -1; @@ -2325,7 +2324,7 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { - TCGOpcode xor_opc, neg_opc, shr_opc; + TCGOpcode neg_opc, shr_opc; TCGOpcode uext_opc = 0, sext_opc = 0; TCGCond cond = op->args[3]; TCGArg ret, src1, src2; @@ -2347,7 +2346,6 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) switch (ctx->type) { case TCG_TYPE_I32: - xor_opc = INDEX_op_xor_i32; shr_opc = INDEX_op_shr_i32; neg_opc = INDEX_op_neg_i32; if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) { @@ -2358,7 +2356,6 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) } break; case TCG_TYPE_I64: - xor_opc = INDEX_op_xor_i64; shr_opc = INDEX_op_shr_i64; neg_opc = INDEX_op_neg_i64; if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) { @@ -2406,7 +2403,7 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) op2->args[1] = ret; op2->args[2] = arg_new_constant(ctx, -1); } else if (inv) { - op2 = opt_insert_after(ctx, op, xor_opc, 3); + op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3); op2->args[0] = ret; op2->args[1] = ret; op2->args[2] = arg_new_constant(ctx, 1); @@ -3051,7 +3048,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(sub2): done = fold_sub2(&ctx, op); break; - CASE_OP_32_64_VEC(xor): + case INDEX_op_xor: + case INDEX_op_xor_vec: done = fold_xor(&ctx, op); break; case INDEX_op_set_label: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index bf481060fa..b10f61435c 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -453,7 +453,7 @@ void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_xor_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_xor, ret, arg1, arg2); } void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -1595,7 +1595,7 @@ void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_xor_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_xor, ret, arg1, arg2); } else { tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); diff --git a/tcg/tcg.c b/tcg/tcg.c index 042f177966..3c4905aa68 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1009,8 +1009,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), - OUTOP(INDEX_op_xor_i32, TCGOutOpBinary, outop_xor), - OUTOP(INDEX_op_xor_i64, TCGOutOpBinary, outop_xor), + OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), }; #undef OUTOP @@ -2215,6 +2214,7 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_and: case INDEX_op_mov: case INDEX_op_or: + case INDEX_op_xor: return has_type; case INDEX_op_setcond_i32: @@ -2231,7 +2231,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i32: case INDEX_op_neg_i32: case INDEX_op_mul_i32: - case INDEX_op_xor_i32: case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: @@ -2308,7 +2307,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i64: case INDEX_op_neg_i64: case INDEX_op_mul_i64: - case INDEX_op_xor_i64: case INDEX_op_shl_i64: case INDEX_op_shr_i64: case INDEX_op_sar_i64: @@ -5442,8 +5440,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_andc: case INDEX_op_or: case INDEX_op_orc: - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: + case INDEX_op_xor: { const TCGOutOpBinary *out = container_of(all_outop[op->opc], TCGOutOpBinary, base); diff --git a/tcg/tci.c b/tcg/tci.c index 68636e70da..cb300c4846 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -543,7 +543,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] | regs[r2]; break; - CASE_32_64(xor) + case INDEX_op_xor: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ^ regs[r2]; break; @@ -1083,12 +1083,11 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_andc: case INDEX_op_or: case INDEX_op_orc: + case INDEX_op_xor: case INDEX_op_sub_i32: case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: case INDEX_op_eqv_i32: case INDEX_op_eqv_i64: case INDEX_op_nand_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 85caff300f..0a912744b3 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -687,7 +687,7 @@ static const TCGOutOpBinary outop_orc = { static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { - tcg_out_op_rrr(s, glue(INDEX_op_xor_i,TCG_TARGET_REG_BITS), a0, a1, a2); + tcg_out_op_rrr(s, INDEX_op_xor, a0, a1, a2); } static const TCGOutOpBinary outop_xor = { From 46c68d75063d2d1119d5907e24e64e068ff64ba4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 15 Nov 2023 11:51:28 -0800 Subject: [PATCH 024/161] tcg/optimize: Fold eqv with immediate to xor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/optimize.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 9303bb5b64..e18fe37ad2 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1807,6 +1807,7 @@ static bool fold_dup2(OptContext *ctx, TCGOp *op) static bool fold_eqv(OptContext *ctx, TCGOp *op) { uint64_t s_mask; + TempOptInfo *t1, *t2; if (fold_const2_commutative(ctx, op) || fold_xi_to_x(ctx, op, -1) || @@ -1814,8 +1815,28 @@ static bool fold_eqv(OptContext *ctx, TCGOp *op) return true; } - s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + /* Fold eqv r,x,i to xor r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_xor; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_xor_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_xor(ctx, op); + } + + t1 = arg_info(op->args[1]); + s_mask = t1->s_mask & t2->s_mask; return fold_masks_s(ctx, op, s_mask); } From 18bc92091641880121d43584093ff207b0c44cfb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 15:37:43 -0800 Subject: [PATCH 025/161] tcg: Convert eqv to TCGOutOpBinary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 -- tcg/aarch64/tcg-target.c.inc | 26 +++++++++------------ tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 4 ++++ tcg/i386/tcg-target-has.h | 2 -- tcg/i386/tcg-target.c.inc | 4 ++++ tcg/loongarch64/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target.c.inc | 4 ++++ tcg/mips/tcg-target-has.h | 2 -- tcg/mips/tcg-target.c.inc | 4 ++++ tcg/ppc/tcg-target-has.h | 2 -- tcg/ppc/tcg-target.c.inc | 22 +++++++++--------- tcg/riscv/tcg-target-con-set.h | 1 - tcg/riscv/tcg-target-con-str.h | 1 - tcg/riscv/tcg-target-has.h | 2 -- tcg/riscv/tcg-target.c.inc | 37 ++++++++++++------------------ tcg/s390x/tcg-target-con-set.h | 1 - tcg/s390x/tcg-target-has.h | 2 -- tcg/s390x/tcg-target.c.inc | 39 +++++++++++++------------------- tcg/sparc64/tcg-target-has.h | 2 -- tcg/sparc64/tcg-target.c.inc | 4 ++++ tcg/tcg-has.h | 1 - tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 8 +++---- tcg/tci.c | 2 -- tcg/tci/tcg-target-has.h | 2 -- tcg/tci/tcg-target.c.inc | 14 +++++++++--- 27 files changed, 89 insertions(+), 106 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 8469a9446f..c17aafc3bb 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -19,7 +19,6 @@ #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_eqv_i32 1 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 1 @@ -43,7 +42,6 @@ #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_eqv_i64 1 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 1 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index d575635fe0..83813af63e 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2157,6 +2157,17 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_eqv(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3510, EON, type, a0, a1, a2); +} + +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_eqv, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2285,17 +2296,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); break; - case INDEX_op_eqv_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_eqv_i64: - if (c2) { - tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); - } else { - tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); - } - break; - case INDEX_op_not_i64: case INDEX_op_not_i32: tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); @@ -3030,10 +3030,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_mulsh_i64: return C_O1_I2(r, r, r); - case INDEX_op_eqv_i32: - case INDEX_op_eqv_i64: - return C_O1_I2(r, r, rL); - case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index 39dcc87fe8..9ed85798e7 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -28,7 +28,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 1 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 98cb3cf5e2..57acb44c7a 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1881,6 +1881,10 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index e525f23c05..0183cafe61 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -31,7 +31,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 1 @@ -55,7 +54,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 9126f9aeff..1fd53cb94f 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2633,6 +2633,10 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index fb1142958c..d3697ee0f2 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -25,7 +25,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_clz_i32 1 @@ -45,7 +44,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 1 #define TCG_TARGET_HAS_clz_i64 1 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 296a84af79..6bd1826ef9 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1328,6 +1328,10 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index b3dfa390f9..9745c64db1 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -43,7 +43,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_nor_i32 1 -#define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions) @@ -61,7 +60,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_nor_i64 1 -#define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 30fb01cb0a..3a3c72cb11 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1712,6 +1712,10 @@ static const TCGOutOpBinary outop_andc = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 6f3ab41ebb..8ede19bfad 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -23,7 +23,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_eqv_i32 1 #define TCG_TARGET_HAS_nand_i32 1 #define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_clz_i32 1 @@ -48,7 +47,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_eqv_i64 1 #define TCG_TARGET_HAS_nand_i64 1 #define TCG_TARGET_HAS_nor_i64 1 #define TCG_TARGET_HAS_clz_i64 1 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 16d3dbd841..203f089cd7 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2954,6 +2954,17 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_eqv(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, EQV | SAB(a1, a0, a2)); +} + +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_eqv, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3086,15 +3097,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_eqv_i32: - if (const_args[2]) { - tcg_out_xori32(s, args[0], args[1], ~args[2]); - break; - } - /* FALLTHRU */ - case INDEX_op_eqv_i64: - tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); - break; case INDEX_op_nand_i32: case INDEX_op_nand_i64: tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); @@ -4150,7 +4152,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_eqv_i32: case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: @@ -4175,7 +4176,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_nor_i32: case INDEX_op_muluh_i32: case INDEX_op_mulsh_i32: - case INDEX_op_eqv_i64: case INDEX_op_nand_i64: case INDEX_op_nor_i64: case INDEX_op_div_i64: diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h index f1f5d415f7..21f8833b3b 100644 --- a/tcg/riscv/tcg-target-con-set.h +++ b/tcg/riscv/tcg-target-con-set.h @@ -16,7 +16,6 @@ C_O1_I1(r, r) C_O1_I2(r, r, r) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) -C_O1_I2(r, r, rJ) C_O1_I2(r, rz, rN) C_O1_I2(r, rz, rz) C_N1_I2(r, r, rM) diff --git a/tcg/riscv/tcg-target-con-str.h b/tcg/riscv/tcg-target-con-str.h index 2f9700638c..1956f75f9a 100644 --- a/tcg/riscv/tcg-target-con-str.h +++ b/tcg/riscv/tcg-target-con-str.h @@ -16,7 +16,6 @@ REGS('v', ALL_VECTOR_REGS) * CONST(letter, TCG_CT_CONST_* bit set) */ CONST('I', TCG_CT_CONST_S12) -CONST('J', TCG_CT_CONST_J12) CONST('K', TCG_CT_CONST_S5) CONST('L', TCG_CT_CONST_CMP_VI) CONST('N', TCG_CT_CONST_N12) diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index 7b8f4386c9..2faa2895e3 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -25,7 +25,6 @@ #define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_eqv_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 (cpuinfo & CPUINFO_ZBB) @@ -44,7 +43,6 @@ #define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_eqv_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 (cpuinfo & CPUINFO_ZBB) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index c981ea389a..ff2a412821 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -115,9 +115,8 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define TCG_CT_CONST_S12 0x100 #define TCG_CT_CONST_N12 0x200 #define TCG_CT_CONST_M12 0x400 -#define TCG_CT_CONST_J12 0x800 -#define TCG_CT_CONST_S5 0x1000 -#define TCG_CT_CONST_CMP_VI 0x2000 +#define TCG_CT_CONST_S5 0x800 +#define TCG_CT_CONST_CMP_VI 0x1000 #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) #define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32) @@ -416,13 +415,6 @@ static bool tcg_target_const_match(int64_t val, int ct, if ((ct & TCG_CT_CONST_M12) && val >= -0x7ff && val <= 0x7ff) { return 1; } - /* - * Inverse of sign extended from 12 bits: ~[-0x800, 0x7ff]. - * Used to map ANDN back to ANDI, etc. - */ - if ((ct & TCG_CT_CONST_J12) && ~val >= -0x800 && ~val <= 0x7ff) { - return 1; - } /* * Sign extended from 5 bits: [-0x10, 0x0f]. * Used for vector-immediate. @@ -2013,6 +2005,18 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_eqv(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_XNOR, a0, a1, a2); +} + +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_zbb_rrr, + .out_rrr = tgen_eqv, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2139,15 +2143,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_eqv_i32: - case INDEX_op_eqv_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_XORI, a0, a1, ~a2); - } else { - tcg_out_opc_reg(s, OPC_XNOR, a0, a1, a2); - } - break; - case INDEX_op_not_i32: case INDEX_op_not_i64: tcg_out_opc_imm(s, OPC_XORI, a0, a1, -1); @@ -2710,10 +2705,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_eqv_i32: - case INDEX_op_eqv_i64: - return C_O1_I2(r, r, rJ); - case INDEX_op_sub_i32: case INDEX_op_sub_i64: return C_O1_I2(r, rz, rN); diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h index 39903a60ad..86af067965 100644 --- a/tcg/s390x/tcg-target-con-set.h +++ b/tcg/s390x/tcg-target-con-set.h @@ -31,7 +31,6 @@ C_O1_I2(r, r, rC) C_O1_I2(r, r, rI) C_O1_I2(r, r, rJ) C_O1_I2(r, r, rK) -C_O1_I2(r, r, rNK) C_O1_I2(r, r, rNKR) C_O1_I2(r, rZ, r) C_O1_I2(v, v, r) diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 850c16a164..722a2ede1c 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -34,7 +34,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_eqv_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_nand_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_clz_i32 0 @@ -57,7 +56,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_eqv_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_nand_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_clz_i64 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index bedad7137b..6c32aa286d 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2242,6 +2242,22 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_eqv(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, NXRK, a0, a1, a2); + } else { + tcg_out_insn(s, RRFa, NXGRK, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_misc3_rrr, + .out_rrr = tgen_eqv, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2376,15 +2392,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_eqv_i32: - a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tcg_out_insn(s, RIL, XILF, a0, ~a2); - } else { - tcg_out_insn(s, RRFa, NXRK, a0, a1, a2); - } - break; case INDEX_op_nand_i32: tcg_out_insn(s, RRFa, NNRK, args[0], args[1], args[2]); break; @@ -2595,15 +2602,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_eqv_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - tgen_xori(s, a0, ~a2); - } else { - tcg_out_insn(s, RRFa, NXGRK, a0, a1, a2); - } - break; case INDEX_op_nand_i64: tcg_out_insn(s, RRFa, NNGRK, args[0], args[1], args[2]); break; @@ -3290,11 +3288,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i64: return C_O1_I2(r, r, ri); - case INDEX_op_eqv_i32: - return C_O1_I2(r, r, ri); - case INDEX_op_eqv_i64: - return C_O1_I2(r, r, rNK); - case INDEX_op_nand_i32: case INDEX_op_nand_i64: case INDEX_op_nor_i32: diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 8e20e4cdeb..2ec5f5657c 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -20,7 +20,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_bswap32_i32 0 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 0 @@ -44,7 +43,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 8a6c9852d2..6d7ee19db1 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1333,6 +1333,10 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index df9c951262..a5808dcc0a 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -20,7 +20,6 @@ #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_not_i64 0 -#define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index b10f61435c..8008b0d3e0 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -680,7 +680,7 @@ void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_eqv_i32) { + if (tcg_op_supported(INDEX_op_eqv_i32, TCG_TYPE_I32, 0)) { tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2); } else { tcg_gen_xor_i32(ret, arg1, arg2); @@ -2279,7 +2279,7 @@ void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (TCG_TARGET_HAS_eqv_i64) { + } else if (tcg_op_supported(INDEX_op_eqv_i64, TCG_TYPE_I64, 0)) { tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2); } else { tcg_gen_xor_i64(ret, arg1, arg2); diff --git a/tcg/tcg.c b/tcg/tcg.c index 3c4905aa68..53158a292b 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1007,6 +1007,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), + OUTOP(INDEX_op_eqv_i32, TCGOutOpBinary, outop_eqv), + OUTOP(INDEX_op_eqv_i64, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), @@ -2273,8 +2275,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap32_i32; case INDEX_op_not_i32: return TCG_TARGET_HAS_not_i32; - case INDEX_op_eqv_i32: - return TCG_TARGET_HAS_eqv_i32; case INDEX_op_nand_i32: return TCG_TARGET_HAS_nand_i32; case INDEX_op_nor_i32: @@ -2344,8 +2344,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap64_i64; case INDEX_op_not_i64: return TCG_TARGET_HAS_not_i64; - case INDEX_op_eqv_i64: - return TCG_TARGET_HAS_eqv_i64; case INDEX_op_nand_i64: return TCG_TARGET_HAS_nand_i64; case INDEX_op_nor_i64: @@ -5438,6 +5436,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_add: case INDEX_op_and: case INDEX_op_andc: + case INDEX_op_eqv_i32: + case INDEX_op_eqv_i64: case INDEX_op_or: case INDEX_op_orc: case INDEX_op_xor: diff --git a/tcg/tci.c b/tcg/tci.c index cb300c4846..26a271e71f 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -555,12 +555,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] | ~regs[r2]; break; -#if TCG_TARGET_HAS_eqv_i32 || TCG_TARGET_HAS_eqv_i64 CASE_32_64(eqv) tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ~(regs[r1] ^ regs[r2]); break; -#endif #if TCG_TARGET_HAS_nand_i32 || TCG_TARGET_HAS_nand_i64 CASE_32_64(nand) tci_args_rrr(insn, &r0, &r1, &r2); diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index d247774e52..2c0876a0fd 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -12,7 +12,6 @@ #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_eqv_i32 1 #define TCG_TARGET_HAS_nand_i32 1 #define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_clz_i32 1 @@ -34,7 +33,6 @@ #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 -#define TCG_TARGET_HAS_eqv_i64 1 #define TCG_TARGET_HAS_nand_i64 1 #define TCG_TARGET_HAS_nor_i64 1 #define TCG_TARGET_HAS_clz_i64 1 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 0a912744b3..4c9e055614 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -95,8 +95,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: - case INDEX_op_eqv_i32: - case INDEX_op_eqv_i64: case INDEX_op_nand_i32: case INDEX_op_nand_i64: case INDEX_op_nor_i32: @@ -662,6 +660,17 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_eqv(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, glue(INDEX_op_eqv_i,TCG_TARGET_REG_BITS), a0, a1, a2); +} + +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_eqv, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -739,7 +748,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, CASE_32_64(sub) CASE_32_64(mul) - CASE_32_64(eqv) /* Optional (TCG_TARGET_HAS_eqv_*). */ CASE_32_64(nand) /* Optional (TCG_TARGET_HAS_nand_*). */ CASE_32_64(nor) /* Optional (TCG_TARGET_HAS_nor_*). */ CASE_32_64(shl) From 5c0968a7e1da73f91f148d563a29af529427c5a5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 15:47:53 -0800 Subject: [PATCH 026/161] tcg: Merge INDEX_op_eqv_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 6 ++++-- tcg/tcg-op.c | 8 ++++---- tcg/tcg.c | 6 ++---- tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 15 insertions(+), 17 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index a4aa4f8824..fe149e012d 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -323,7 +323,7 @@ Logical - | *t0* = *t1* & ~\ *t2* - * - eqv_i32/i64 *t0*, *t1*, *t2* + * - eqv *t0*, *t1*, *t2* - | *t0* = ~(*t1* ^ *t2*), or equivalently, *t0* = *t1* ^ ~\ *t2* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 8f6115bedb..c6869de244 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -42,6 +42,7 @@ DEF(mov, 1, 1, 0, TCG_OPF_INT | TCG_OPF_NOT_PRESENT) DEF(add, 1, 2, 0, TCG_OPF_INT) DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) +DEF(eqv, 1, 2, 0, TCG_OPF_INT) DEF(or, 1, 2, 0, TCG_OPF_INT) DEF(orc, 1, 2, 0, TCG_OPF_INT) DEF(xor, 1, 2, 0, TCG_OPF_INT) @@ -93,7 +94,6 @@ DEF(bswap16_i32, 1, 1, 1, 0) DEF(bswap32_i32, 1, 1, 1, 0) DEF(not_i32, 1, 1, 0, 0) DEF(neg_i32, 1, 1, 0, 0) -DEF(eqv_i32, 1, 2, 0, 0) DEF(nand_i32, 1, 2, 0, 0) DEF(nor_i32, 1, 2, 0, 0) DEF(clz_i32, 1, 2, 0, 0) @@ -147,7 +147,6 @@ DEF(bswap32_i64, 1, 1, 1, 0) DEF(bswap64_i64, 1, 1, 1, 0) DEF(not_i64, 1, 1, 0, 0) DEF(neg_i64, 1, 1, 0, 0) -DEF(eqv_i64, 1, 2, 0, 0) DEF(nand_i64, 1, 2, 0, 0) DEF(nor_i64, 1, 2, 0, 0) DEF(clz_i64, 1, 2, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index e18fe37ad2..47898b7086 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -489,7 +489,8 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) case INDEX_op_orc_vec: return x | ~y; - CASE_OP_32_64_VEC(eqv): + case INDEX_op_eqv: + case INDEX_op_eqv_vec: return ~(x ^ y); CASE_OP_32_64_VEC(nand): @@ -2929,7 +2930,8 @@ void tcg_optimize(TCGContext *s) case INDEX_op_dup2_vec: done = fold_dup2(&ctx, op); break; - CASE_OP_32_64_VEC(eqv): + case INDEX_op_eqv: + case INDEX_op_eqv_vec: done = fold_eqv(&ctx, op); break; CASE_OP_32_64(extract): diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 8008b0d3e0..2520a60cee 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -680,8 +680,8 @@ void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (tcg_op_supported(INDEX_op_eqv_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_eqv, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_eqv, ret, arg1, arg2); } else { tcg_gen_xor_i32(ret, arg1, arg2); tcg_gen_not_i32(ret, ret); @@ -2279,8 +2279,8 @@ void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (tcg_op_supported(INDEX_op_eqv_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_eqv, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_eqv, ret, arg1, arg2); } else { tcg_gen_xor_i64(ret, arg1, arg2); tcg_gen_not_i64(ret, ret); diff --git a/tcg/tcg.c b/tcg/tcg.c index 53158a292b..6642429df6 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1007,8 +1007,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), - OUTOP(INDEX_op_eqv_i32, TCGOutOpBinary, outop_eqv), - OUTOP(INDEX_op_eqv_i64, TCGOutOpBinary, outop_eqv), + OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), @@ -5436,8 +5435,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_add: case INDEX_op_and: case INDEX_op_andc: - case INDEX_op_eqv_i32: - case INDEX_op_eqv_i64: + case INDEX_op_eqv: case INDEX_op_or: case INDEX_op_orc: case INDEX_op_xor: diff --git a/tcg/tci.c b/tcg/tci.c index 26a271e71f..d2baa8d3fc 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -555,7 +555,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] | ~regs[r2]; break; - CASE_32_64(eqv) + case INDEX_op_eqv: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ~(regs[r1] ^ regs[r2]); break; @@ -1079,6 +1079,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_add: case INDEX_op_and: case INDEX_op_andc: + case INDEX_op_eqv: case INDEX_op_or: case INDEX_op_orc: case INDEX_op_xor: @@ -1086,8 +1087,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: - case INDEX_op_eqv_i32: - case INDEX_op_eqv_i64: case INDEX_op_nand_i32: case INDEX_op_nand_i64: case INDEX_op_nor_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 4c9e055614..fe3450373e 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -663,7 +663,7 @@ static const TCGOutOpBinary outop_andc = { static void tgen_eqv(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { - tcg_out_op_rrr(s, glue(INDEX_op_eqv_i,TCG_TARGET_REG_BITS), a0, a1, a2); + tcg_out_op_rrr(s, INDEX_op_eqv, a0, a1, a2); } static const TCGOutOpBinary outop_eqv = { From e5a3162cb6c1aec4f47c6b51671e989b5975f345 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 16:18:19 -0800 Subject: [PATCH 027/161] tcg: Convert nand to TCGOutOpBinary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 -- tcg/aarch64/tcg-target.c.inc | 4 ++++ tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 4 ++++ tcg/i386/tcg-target-has.h | 2 -- tcg/i386/tcg-target.c.inc | 4 ++++ tcg/loongarch64/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target.c.inc | 4 ++++ tcg/mips/tcg-target-has.h | 2 -- tcg/mips/tcg-target.c.inc | 4 ++++ tcg/ppc/tcg-target-has.h | 2 -- tcg/ppc/tcg-target.c.inc | 17 +++++++++++------ tcg/riscv/tcg-target-has.h | 2 -- tcg/riscv/tcg-target.c.inc | 4 ++++ tcg/s390x/tcg-target-has.h | 2 -- tcg/s390x/tcg-target.c.inc | 24 ++++++++++++++++-------- tcg/sparc64/tcg-target-has.h | 2 -- tcg/sparc64/tcg-target.c.inc | 4 ++++ tcg/tcg-has.h | 1 - tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 8 ++++---- tcg/tci.c | 2 -- tcg/tci/tcg-target-has.h | 2 -- tcg/tci/tcg-target.c.inc | 14 +++++++++++--- 24 files changed, 72 insertions(+), 45 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index c17aafc3bb..2acc9bd3b7 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -19,7 +19,6 @@ #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 @@ -42,7 +41,6 @@ #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 83813af63e..093bb0afb7 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2168,6 +2168,10 @@ static const TCGOutOpBinary outop_eqv = { .out_rrr = tgen_eqv, }; +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index 9ed85798e7..8d7b176993 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -28,7 +28,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 57acb44c7a..55d28be15b 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1885,6 +1885,10 @@ static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index 0183cafe61..93552f2337 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -31,7 +31,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 @@ -54,7 +53,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 1fd53cb94f..51c3711ee5 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2637,6 +2637,10 @@ static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index d3697ee0f2..55249de465 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -25,7 +25,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 @@ -44,7 +43,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 1 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 6bd1826ef9..814596608a 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1332,6 +1332,10 @@ static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index 9745c64db1..2f8325d56f 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -43,7 +43,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_nor_i32 1 -#define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muluh_i32 1 @@ -60,7 +59,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_nor_i64 1 -#define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 (!use_mips32r6_instructions) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 3a3c72cb11..46cf393041 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1716,6 +1716,10 @@ static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 8ede19bfad..810f20d120 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -23,7 +23,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_nand_i32 1 #define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 have_isa_3_00 @@ -47,7 +46,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_nand_i64 1 #define TCG_TARGET_HAS_nor_i64 1 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 have_isa_3_00 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 203f089cd7..29341aff2c 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2965,6 +2965,17 @@ static const TCGOutOpBinary outop_eqv = { .out_rrr = tgen_eqv, }; +static void tgen_nand(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, NAND | SAB(a1, a0, a2)); +} + +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_nand, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3097,10 +3108,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_nand_i32: - case INDEX_op_nand_i64: - tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); - break; case INDEX_op_nor_i32: case INDEX_op_nor_i64: tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); @@ -4172,11 +4179,9 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: - case INDEX_op_nand_i32: case INDEX_op_nor_i32: case INDEX_op_muluh_i32: case INDEX_op_mulsh_i32: - case INDEX_op_nand_i64: case INDEX_op_nor_i64: case INDEX_op_div_i64: case INDEX_op_divu_i64: diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index 2faa2895e3..3736a52d56 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -25,7 +25,6 @@ #define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctz_i32 (cpuinfo & CPUINFO_ZBB) @@ -43,7 +42,6 @@ #define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctz_i64 (cpuinfo & CPUINFO_ZBB) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index ff2a412821..cb2b58e495 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2017,6 +2017,10 @@ static const TCGOutOpBinary outop_eqv = { .out_rrr = tgen_eqv, }; +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 722a2ede1c..d8afd73814 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -34,7 +34,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_nand_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 @@ -56,7 +55,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_nand_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 0 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 6c32aa286d..33eece6e5d 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2258,6 +2258,22 @@ static const TCGOutOpBinary outop_eqv = { .out_rrr = tgen_eqv, }; +static void tgen_nand(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, NNRK, a0, a1, a2); + } else { + tcg_out_insn(s, RRFa, NNGRK, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_misc3_rrr, + .out_rrr = tgen_nand, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2392,9 +2408,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_nand_i32: - tcg_out_insn(s, RRFa, NNRK, args[0], args[1], args[2]); - break; case INDEX_op_nor_i32: tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[2]); break; @@ -2602,9 +2615,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_nand_i64: - tcg_out_insn(s, RRFa, NNGRK, args[0], args[1], args[2]); - break; case INDEX_op_nor_i64: tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[2]); break; @@ -3288,8 +3298,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i64: return C_O1_I2(r, r, ri); - case INDEX_op_nand_i32: - case INDEX_op_nand_i64: case INDEX_op_nor_i32: case INDEX_op_nor_i64: return C_O1_I2(r, r, r); diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 2ec5f5657c..9bc0474107 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -20,7 +20,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_bswap32_i32 0 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 @@ -43,7 +42,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 0 #define TCG_TARGET_HAS_ctz_i64 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 6d7ee19db1..02c443efb9 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1337,6 +1337,10 @@ static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index a5808dcc0a..e2a99067ac 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -20,7 +20,6 @@ #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_not_i64 0 -#define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 0 #define TCG_TARGET_HAS_ctz_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 2520a60cee..3921bac48d 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -690,7 +690,7 @@ void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_nand_i32) { + if (tcg_op_supported(INDEX_op_nand_i32, TCG_TYPE_I32, 0)) { tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2); } else { tcg_gen_and_i32(ret, arg1, arg2); @@ -2292,7 +2292,7 @@ void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (TCG_TARGET_HAS_nand_i64) { + } else if (tcg_op_supported(INDEX_op_nand_i64, TCG_TYPE_I64, 0)) { tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2); } else { tcg_gen_and_i64(ret, arg1, arg2); diff --git a/tcg/tcg.c b/tcg/tcg.c index 6642429df6..50361864aa 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1008,6 +1008,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), + OUTOP(INDEX_op_nand_i32, TCGOutOpBinary, outop_nand), + OUTOP(INDEX_op_nand_i64, TCGOutOpBinary, outop_nand), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), @@ -2274,8 +2276,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap32_i32; case INDEX_op_not_i32: return TCG_TARGET_HAS_not_i32; - case INDEX_op_nand_i32: - return TCG_TARGET_HAS_nand_i32; case INDEX_op_nor_i32: return TCG_TARGET_HAS_nor_i32; case INDEX_op_clz_i32: @@ -2343,8 +2343,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap64_i64; case INDEX_op_not_i64: return TCG_TARGET_HAS_not_i64; - case INDEX_op_nand_i64: - return TCG_TARGET_HAS_nand_i64; case INDEX_op_nor_i64: return TCG_TARGET_HAS_nor_i64; case INDEX_op_clz_i64: @@ -5436,6 +5434,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_and: case INDEX_op_andc: case INDEX_op_eqv: + case INDEX_op_nand_i32: + case INDEX_op_nand_i64: case INDEX_op_or: case INDEX_op_orc: case INDEX_op_xor: diff --git a/tcg/tci.c b/tcg/tci.c index d2baa8d3fc..8be59a0193 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -559,12 +559,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ~(regs[r1] ^ regs[r2]); break; -#if TCG_TARGET_HAS_nand_i32 || TCG_TARGET_HAS_nand_i64 CASE_32_64(nand) tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ~(regs[r1] & regs[r2]); break; -#endif #if TCG_TARGET_HAS_nor_i32 || TCG_TARGET_HAS_nor_i64 CASE_32_64(nor) tci_args_rrr(insn, &r0, &r1, &r2); diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index 2c0876a0fd..8be70297f5 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -12,7 +12,6 @@ #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_nand_i32 1 #define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 @@ -33,7 +32,6 @@ #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 -#define TCG_TARGET_HAS_nand_i64 1 #define TCG_TARGET_HAS_nor_i64 1 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index fe3450373e..2a5c72705d 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -95,8 +95,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: - case INDEX_op_nand_i32: - case INDEX_op_nand_i64: case INDEX_op_nor_i32: case INDEX_op_nor_i64: case INDEX_op_shl_i32: @@ -671,6 +669,17 @@ static const TCGOutOpBinary outop_eqv = { .out_rrr = tgen_eqv, }; +static void tgen_nand(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, glue(INDEX_op_nand_i,TCG_TARGET_REG_BITS), a0, a1, a2); +} + +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_nand, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -748,7 +757,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, CASE_32_64(sub) CASE_32_64(mul) - CASE_32_64(nand) /* Optional (TCG_TARGET_HAS_nand_*). */ CASE_32_64(nor) /* Optional (TCG_TARGET_HAS_nor_*). */ CASE_32_64(shl) CASE_32_64(shr) From 59379a45af1f4d62fc8c1ae0ddee988f47075787 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 20:32:54 -0800 Subject: [PATCH 028/161] tcg: Merge INDEX_op_nand_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 6 ++++-- tcg/tcg-op.c | 8 ++++---- tcg/tcg.c | 6 ++---- tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 15 insertions(+), 17 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index fe149e012d..7703dfbc51 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -327,7 +327,7 @@ Logical - | *t0* = ~(*t1* ^ *t2*), or equivalently, *t0* = *t1* ^ ~\ *t2* - * - nand_i32/i64 *t0*, *t1*, *t2* + * - nand *t0*, *t1*, *t2* - | *t0* = ~(*t1* & *t2*) diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index c6869de244..1acdd7cfda 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -43,6 +43,7 @@ DEF(add, 1, 2, 0, TCG_OPF_INT) DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(eqv, 1, 2, 0, TCG_OPF_INT) +DEF(nand, 1, 2, 0, TCG_OPF_INT) DEF(or, 1, 2, 0, TCG_OPF_INT) DEF(orc, 1, 2, 0, TCG_OPF_INT) DEF(xor, 1, 2, 0, TCG_OPF_INT) @@ -94,7 +95,6 @@ DEF(bswap16_i32, 1, 1, 1, 0) DEF(bswap32_i32, 1, 1, 1, 0) DEF(not_i32, 1, 1, 0, 0) DEF(neg_i32, 1, 1, 0, 0) -DEF(nand_i32, 1, 2, 0, 0) DEF(nor_i32, 1, 2, 0, 0) DEF(clz_i32, 1, 2, 0, 0) DEF(ctz_i32, 1, 2, 0, 0) @@ -147,7 +147,6 @@ DEF(bswap32_i64, 1, 1, 1, 0) DEF(bswap64_i64, 1, 1, 1, 0) DEF(not_i64, 1, 1, 0, 0) DEF(neg_i64, 1, 1, 0, 0) -DEF(nand_i64, 1, 2, 0, 0) DEF(nor_i64, 1, 2, 0, 0) DEF(clz_i64, 1, 2, 0, 0) DEF(ctz_i64, 1, 2, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index 47898b7086..e8e6a0c2ce 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -493,7 +493,8 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) case INDEX_op_eqv_vec: return ~(x ^ y); - CASE_OP_32_64_VEC(nand): + case INDEX_op_nand: + case INDEX_op_nand_vec: return ~(x & y); CASE_OP_32_64_VEC(nor): @@ -2992,7 +2993,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(mulu2): done = fold_multiply2(&ctx, op); break; - CASE_OP_32_64_VEC(nand): + case INDEX_op_nand: + case INDEX_op_nand_vec: done = fold_nand(&ctx, op); break; CASE_OP_32_64(neg): diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 3921bac48d..57782864fa 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -690,8 +690,8 @@ void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (tcg_op_supported(INDEX_op_nand_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_nand, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_nand, ret, arg1, arg2); } else { tcg_gen_and_i32(ret, arg1, arg2); tcg_gen_not_i32(ret, ret); @@ -2292,8 +2292,8 @@ void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (tcg_op_supported(INDEX_op_nand_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_nand, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_nand, ret, arg1, arg2); } else { tcg_gen_and_i64(ret, arg1, arg2); tcg_gen_not_i64(ret, ret); diff --git a/tcg/tcg.c b/tcg/tcg.c index 50361864aa..72e9175d06 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1008,8 +1008,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), - OUTOP(INDEX_op_nand_i32, TCGOutOpBinary, outop_nand), - OUTOP(INDEX_op_nand_i64, TCGOutOpBinary, outop_nand), + OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), @@ -5434,8 +5433,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_and: case INDEX_op_andc: case INDEX_op_eqv: - case INDEX_op_nand_i32: - case INDEX_op_nand_i64: + case INDEX_op_nand: case INDEX_op_or: case INDEX_op_orc: case INDEX_op_xor: diff --git a/tcg/tci.c b/tcg/tci.c index 8be59a0193..9886ddf001 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -559,7 +559,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ~(regs[r1] ^ regs[r2]); break; - CASE_32_64(nand) + case INDEX_op_nand: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ~(regs[r1] & regs[r2]); break; @@ -1078,6 +1078,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_and: case INDEX_op_andc: case INDEX_op_eqv: + case INDEX_op_nand: case INDEX_op_or: case INDEX_op_orc: case INDEX_op_xor: @@ -1085,8 +1086,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: - case INDEX_op_nand_i32: - case INDEX_op_nand_i64: case INDEX_op_nor_i32: case INDEX_op_nor_i64: case INDEX_op_div_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 2a5c72705d..34a44a7674 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -672,7 +672,7 @@ static const TCGOutOpBinary outop_eqv = { static void tgen_nand(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { - tcg_out_op_rrr(s, glue(INDEX_op_nand_i,TCG_TARGET_REG_BITS), a0, a1, a2); + tcg_out_op_rrr(s, INDEX_op_nand, a0, a1, a2); } static const TCGOutOpBinary outop_nand = { From 8fb04b8295994e7416a222906939696d496638f3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 20:37:25 -0800 Subject: [PATCH 029/161] tcg/loongarch64: Do not accept constant argument to nor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The instruction set does not implement nor with immediate. There is no reason to pretend that we do. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target.c.inc | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 814596608a..e74c7d8a87 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1425,12 +1425,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_nor_i32: case INDEX_op_nor_i64: - if (c2) { - tcg_out_opc_ori(s, a0, a1, a2); - tcg_out_opc_nor(s, a0, a0, TCG_REG_ZERO); - } else { - tcg_out_opc_nor(s, a0, a1, a2); - } + tcg_out_opc_nor(s, a0, a1, a2); break; case INDEX_op_extract_i32: @@ -2314,8 +2309,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_nor_i32: case INDEX_op_nor_i64: - /* LoongArch reg-imm bitops have their imms ZERO-extended */ - return C_O1_I2(r, r, rU); + return C_O1_I2(r, r, r); case INDEX_op_clz_i32: case INDEX_op_clz_i64: From 3f6b223012f62bb3f73552bea4489383f08cdc23 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 20:57:21 -0800 Subject: [PATCH 030/161] tcg: Convert nor to TCGOutOpBinary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 -- tcg/aarch64/tcg-target.c.inc | 4 ++++ tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 4 ++++ tcg/i386/tcg-target-has.h | 2 -- tcg/i386/tcg-target.c.inc | 4 ++++ tcg/loongarch64/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target.c.inc | 20 +++++++++++--------- tcg/mips/tcg-target-has.h | 2 -- tcg/mips/tcg-target.c.inc | 17 +++++++++++------ tcg/ppc/tcg-target-has.h | 2 -- tcg/ppc/tcg-target.c.inc | 18 +++++++++++------- tcg/riscv/tcg-target-has.h | 2 -- tcg/riscv/tcg-target.c.inc | 4 ++++ tcg/s390x/tcg-target-has.h | 2 -- tcg/s390x/tcg-target.c.inc | 28 ++++++++++++++++------------ tcg/sparc64/tcg-target-has.h | 2 -- tcg/sparc64/tcg-target.c.inc | 4 ++++ tcg/tcg-has.h | 1 - tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 8 ++++---- tcg/tci.c | 2 -- tcg/tci/tcg-target-has.h | 2 -- tcg/tci/tcg-target.c.inc | 14 +++++++++++--- 24 files changed, 86 insertions(+), 65 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 2acc9bd3b7..240fcac2cc 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -19,7 +19,6 @@ #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 0 @@ -41,7 +40,6 @@ #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 0 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 093bb0afb7..30cad937b7 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2172,6 +2172,10 @@ static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index 8d7b176993..e80711ee40 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -28,7 +28,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions #define TCG_TARGET_HAS_ctpop_i32 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 55d28be15b..8e9edeb7c6 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1889,6 +1889,10 @@ static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index 93552f2337..b27f853dcd 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -31,7 +31,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 have_popcnt @@ -53,7 +52,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 have_popcnt diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 51c3711ee5..9185f6879c 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2641,6 +2641,10 @@ static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 55249de465..7860432489 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -25,7 +25,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 0 @@ -43,7 +42,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_nor_i64 1 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 0 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index e74c7d8a87..dc4ed0e8b0 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1336,6 +1336,17 @@ static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; +static void tgen_nor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_nor(s, a0, a1, a2); +} + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_nor, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1423,11 +1434,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO); break; - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - tcg_out_opc_nor(s, a0, a1, a2); - break; - case INDEX_op_extract_i32: if (a2 == 0 && args[3] <= 12) { tcg_out_opc_andi(s, a0, a1, (1 << args[3]) - 1); @@ -2307,10 +2313,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rotr_i64: return C_O1_I2(r, r, ri); - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - return C_O1_I2(r, r, r); - case INDEX_op_clz_i32: case INDEX_op_clz_i64: case INDEX_op_ctz_i32: diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index 2f8325d56f..987f83f761 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -42,7 +42,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muluh_i32 1 @@ -58,7 +57,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_nor_i64 1 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 (!use_mips32r6_instructions) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 46cf393041..bfe329b3ef 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1720,6 +1720,17 @@ static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; +static void tgen_nor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_NOR, a0, a1, a2); +} + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_nor, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1848,10 +1859,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, break; } goto do_binaryv; - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - i1 = OPC_NOR; - goto do_binaryv; case INDEX_op_mul_i32: if (use_mips32_instructions) { @@ -2237,7 +2244,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: - case INDEX_op_nor_i32: case INDEX_op_setcond_i32: case INDEX_op_mul_i64: case INDEX_op_mulsh_i64: @@ -2246,7 +2252,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_divu_i64: case INDEX_op_rem_i64: case INDEX_op_remu_i64: - case INDEX_op_nor_i64: case INDEX_op_setcond_i64: return C_O1_I2(r, rz, rz); case INDEX_op_muls2_i32: diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 810f20d120..6be6d7f994 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -23,7 +23,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 have_isa_3_00 #define TCG_TARGET_HAS_ctpop_i32 have_isa_2_06 @@ -46,7 +45,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_nor_i64 1 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 have_isa_3_00 #define TCG_TARGET_HAS_ctpop_i64 have_isa_2_06 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 29341aff2c..c3366e4316 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2976,6 +2976,17 @@ static const TCGOutOpBinary outop_nand = { .out_rrr = tgen_nand, }; +static void tgen_nor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, NOR | SAB(a1, a0, a2)); +} + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_nor, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3108,11 +3119,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); - break; - case INDEX_op_clz_i32: tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1], args[2], const_args[2]); @@ -4179,10 +4185,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: - case INDEX_op_nor_i32: case INDEX_op_muluh_i32: case INDEX_op_mulsh_i32: - case INDEX_op_nor_i64: case INDEX_op_div_i64: case INDEX_op_divu_i64: case INDEX_op_rem_i64: diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index 3736a52d56..0fcf940a8a 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -25,7 +25,6 @@ #define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctz_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctpop_i32 (cpuinfo & CPUINFO_ZBB) @@ -42,7 +41,6 @@ #define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctz_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctpop_i64 (cpuinfo & CPUINFO_ZBB) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index cb2b58e495..887f20d4cb 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2021,6 +2021,10 @@ static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index d8afd73814..374db3cf9d 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -34,7 +34,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_not_i32 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctpop_i32 1 @@ -55,7 +54,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_not_i64 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 33eece6e5d..29570d3be1 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2274,6 +2274,22 @@ static const TCGOutOpBinary outop_nand = { .out_rrr = tgen_nand, }; +static void tgen_nor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, NORK, a0, a1, a2); + } else { + tcg_out_insn(s, RRFa, NOGRK, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_misc3_rrr, + .out_rrr = tgen_nor, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2408,10 +2424,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_nor_i32: - tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[2]); - break; - case INDEX_op_neg_i32: tcg_out_insn(s, RR, LCR, args[0], args[1]); break; @@ -2615,10 +2627,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_nor_i64: - tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[2]); - break; - case INDEX_op_neg_i64: tcg_out_insn(s, RRE, LCGR, args[0], args[1]); break; @@ -3298,10 +3306,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i64: return C_O1_I2(r, r, ri); - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - return C_O1_I2(r, r, r); - case INDEX_op_mul_i32: return (HAVE_FACILITY(MISC_INSN_EXT2) ? C_O1_I2(r, r, ri) diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 9bc0474107..35ae536879 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -20,7 +20,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_bswap32_i32 0 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctpop_i32 0 @@ -42,7 +41,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 0 #define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 02c443efb9..1ebff04af4 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1341,6 +1341,10 @@ static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index e2a99067ac..7de13ef383 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -20,7 +20,6 @@ #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_not_i64 0 -#define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 0 #define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 57782864fa..ac939bb4ea 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -700,7 +700,7 @@ void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_nor_i32) { + if (tcg_op_supported(INDEX_op_nor_i32, TCG_TYPE_I32, 0)) { tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2); } else { tcg_gen_or_i32(ret, arg1, arg2); @@ -2305,7 +2305,7 @@ void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (TCG_TARGET_HAS_nor_i64) { + } else if (tcg_op_supported(INDEX_op_nor_i64, TCG_TYPE_I64, 0)) { tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2); } else { tcg_gen_or_i64(ret, arg1, arg2); diff --git a/tcg/tcg.c b/tcg/tcg.c index 72e9175d06..d9807b77dc 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1009,6 +1009,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), + OUTOP(INDEX_op_nor_i32, TCGOutOpBinary, outop_nor), + OUTOP(INDEX_op_nor_i64, TCGOutOpBinary, outop_nor), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), @@ -2275,8 +2277,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap32_i32; case INDEX_op_not_i32: return TCG_TARGET_HAS_not_i32; - case INDEX_op_nor_i32: - return TCG_TARGET_HAS_nor_i32; case INDEX_op_clz_i32: return TCG_TARGET_HAS_clz_i32; case INDEX_op_ctz_i32: @@ -2342,8 +2342,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap64_i64; case INDEX_op_not_i64: return TCG_TARGET_HAS_not_i64; - case INDEX_op_nor_i64: - return TCG_TARGET_HAS_nor_i64; case INDEX_op_clz_i64: return TCG_TARGET_HAS_clz_i64; case INDEX_op_ctz_i64: @@ -5434,6 +5432,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_andc: case INDEX_op_eqv: case INDEX_op_nand: + case INDEX_op_nor_i32: + case INDEX_op_nor_i64: case INDEX_op_or: case INDEX_op_orc: case INDEX_op_xor: diff --git a/tcg/tci.c b/tcg/tci.c index 9886ddf001..3ea93fa5a6 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -563,12 +563,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ~(regs[r1] & regs[r2]); break; -#if TCG_TARGET_HAS_nor_i32 || TCG_TARGET_HAS_nor_i64 CASE_32_64(nor) tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ~(regs[r1] | regs[r2]); break; -#endif /* Arithmetic operations (32 bit). */ diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index 8be70297f5..13c9dc3dfa 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -12,7 +12,6 @@ #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 1 @@ -32,7 +31,6 @@ #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 -#define TCG_TARGET_HAS_nor_i64 1 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 1 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 34a44a7674..a0f4c58be8 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -95,8 +95,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: case INDEX_op_shl_i32: case INDEX_op_shl_i64: case INDEX_op_shr_i32: @@ -680,6 +678,17 @@ static const TCGOutOpBinary outop_nand = { .out_rrr = tgen_nand, }; +static void tgen_nor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, glue(INDEX_op_nor_i,TCG_TARGET_REG_BITS), a0, a1, a2); +} + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_nor, +}; + static void tgen_or(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -757,7 +766,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, CASE_32_64(sub) CASE_32_64(mul) - CASE_32_64(nor) /* Optional (TCG_TARGET_HAS_nor_*). */ CASE_32_64(shl) CASE_32_64(shr) CASE_32_64(sar) From 3a8c4e9e53c6f4aa7c590971950000b174e74fa1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 21:02:17 -0800 Subject: [PATCH 031/161] tcg: Merge INDEX_op_nor_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 6 ++++-- tcg/tcg-op.c | 8 ++++---- tcg/tcg.c | 6 ++---- tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 15 insertions(+), 17 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 7703dfbc51..26d464fa38 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -331,7 +331,7 @@ Logical - | *t0* = ~(*t1* & *t2*) - * - nor_i32/i64 *t0*, *t1*, *t2* + * - nor *t0*, *t1*, *t2* - | *t0* = ~(*t1* | *t2*) diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 1acdd7cfda..aa9ed393c9 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -44,6 +44,7 @@ DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(eqv, 1, 2, 0, TCG_OPF_INT) DEF(nand, 1, 2, 0, TCG_OPF_INT) +DEF(nor, 1, 2, 0, TCG_OPF_INT) DEF(or, 1, 2, 0, TCG_OPF_INT) DEF(orc, 1, 2, 0, TCG_OPF_INT) DEF(xor, 1, 2, 0, TCG_OPF_INT) @@ -95,7 +96,6 @@ DEF(bswap16_i32, 1, 1, 1, 0) DEF(bswap32_i32, 1, 1, 1, 0) DEF(not_i32, 1, 1, 0, 0) DEF(neg_i32, 1, 1, 0, 0) -DEF(nor_i32, 1, 2, 0, 0) DEF(clz_i32, 1, 2, 0, 0) DEF(ctz_i32, 1, 2, 0, 0) DEF(ctpop_i32, 1, 1, 0, 0) @@ -147,7 +147,6 @@ DEF(bswap32_i64, 1, 1, 1, 0) DEF(bswap64_i64, 1, 1, 1, 0) DEF(not_i64, 1, 1, 0, 0) DEF(neg_i64, 1, 1, 0, 0) -DEF(nor_i64, 1, 2, 0, 0) DEF(clz_i64, 1, 2, 0, 0) DEF(ctz_i64, 1, 2, 0, 0) DEF(ctpop_i64, 1, 1, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index e8e6a0c2ce..e4c319fe45 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -497,7 +497,8 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) case INDEX_op_nand_vec: return ~(x & y); - CASE_OP_32_64_VEC(nor): + case INDEX_op_nor: + case INDEX_op_nor_vec: return ~(x | y); case INDEX_op_clz_i32: @@ -3000,7 +3001,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(neg): done = fold_neg(&ctx, op); break; - CASE_OP_32_64_VEC(nor): + case INDEX_op_nor: + case INDEX_op_nor_vec: done = fold_nor(&ctx, op); break; CASE_OP_32_64_VEC(not): diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index ac939bb4ea..228aa8f088 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -700,8 +700,8 @@ void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (tcg_op_supported(INDEX_op_nor_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_nor, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_nor, ret, arg1, arg2); } else { tcg_gen_or_i32(ret, arg1, arg2); tcg_gen_not_i32(ret, ret); @@ -2305,8 +2305,8 @@ void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (tcg_op_supported(INDEX_op_nor_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_nor, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_nor, ret, arg1, arg2); } else { tcg_gen_or_i64(ret, arg1, arg2); tcg_gen_not_i64(ret, ret); diff --git a/tcg/tcg.c b/tcg/tcg.c index d9807b77dc..c0178030ce 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1009,8 +1009,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), - OUTOP(INDEX_op_nor_i32, TCGOutOpBinary, outop_nor), - OUTOP(INDEX_op_nor_i64, TCGOutOpBinary, outop_nor), + OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), @@ -5432,8 +5431,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_andc: case INDEX_op_eqv: case INDEX_op_nand: - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: + case INDEX_op_nor: case INDEX_op_or: case INDEX_op_orc: case INDEX_op_xor: diff --git a/tcg/tci.c b/tcg/tci.c index 3ea93fa5a6..ff129266c2 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -563,7 +563,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ~(regs[r1] & regs[r2]); break; - CASE_32_64(nor) + case INDEX_op_nor: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ~(regs[r1] | regs[r2]); break; @@ -1077,6 +1077,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_andc: case INDEX_op_eqv: case INDEX_op_nand: + case INDEX_op_nor: case INDEX_op_or: case INDEX_op_orc: case INDEX_op_xor: @@ -1084,8 +1085,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: case INDEX_op_div_i32: case INDEX_op_div_i64: case INDEX_op_rem_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index a0f4c58be8..dec51692f0 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -681,7 +681,7 @@ static const TCGOutOpBinary outop_nand = { static void tgen_nor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { - tcg_out_op_rrr(s, glue(INDEX_op_nor_i,TCG_TARGET_REG_BITS), a0, a1, a2); + tcg_out_op_rrr(s, INDEX_op_nor, a0, a1, a2); } static const TCGOutOpBinary outop_nor = { From a3b37bc6faafe5932739c997b2b34a8f6dd57bfd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 2 Jan 2025 13:25:15 -0800 Subject: [PATCH 032/161] tcg/arm: Fix constraints for sub MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In 7536b82d288 we lost the rI constraint that allowed the use of RSB to perform reg = imm - reg. At the same time, drop support for reg = reg - imm, which is now transformed generically to addition, and need not be handled by the backend. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/arm/tcg-target-con-set.h | 1 + tcg/arm/tcg-target.c.inc | 11 ++++------- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h index 229ae258ac..f46a8444fb 100644 --- a/tcg/arm/tcg-target-con-set.h +++ b/tcg/arm/tcg-target-con-set.h @@ -30,6 +30,7 @@ C_O1_I2(r, r, rI) C_O1_I2(r, r, rIK) C_O1_I2(r, r, rIN) C_O1_I2(r, r, ri) +C_O1_I2(r, rI, r) C_O1_I2(r, rZ, rZ) C_O1_I2(w, 0, w) C_O1_I2(w, w, w) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 8e9edeb7c6..47c09ff2b1 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1984,12 +1984,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, break; case INDEX_op_sub_i32: if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]); - } else { - tcg_out_dat_rI(s, COND_AL, ARITH_RSB, - args[0], args[2], args[1], 1); - } + tcg_out_dat_imm(s, COND_AL, ARITH_RSB, + args[0], args[2], encode_imm_nofail(args[1])); } else { tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD, args[0], args[1], args[2], const_args[2]); @@ -2234,10 +2230,11 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i32: return C_O0_I2(r, r); - case INDEX_op_sub_i32: case INDEX_op_setcond_i32: case INDEX_op_negsetcond_i32: return C_O1_I2(r, r, rIN); + case INDEX_op_sub_i32: + return C_O1_I2(r, rI, r); case INDEX_op_clz_i32: case INDEX_op_ctz_i32: From 3f057e24006fbc5eaf42278ba9368d383a1c7bed Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 21:57:43 -0800 Subject: [PATCH 033/161] tcg: Convert sub to TCGOutOpSubtract Create a special subclass for sub, because two backends can support "subtract from immediate". Drop all backend support for an immediate as the second operand, as we transform sub to add during optimize. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 24 +++++++-------- tcg/arm/tcg-target.c.inc | 29 +++++++++++------- tcg/i386/tcg-target.c.inc | 23 +++++++------- tcg/loongarch64/tcg-target.c.inc | 32 +++++++++----------- tcg/mips/tcg-target-con-set.h | 1 - tcg/mips/tcg-target.c.inc | 31 ++++++++----------- tcg/ppc/tcg-target-con-set.h | 3 +- tcg/ppc/tcg-target.c.inc | 52 +++++++++++--------------------- tcg/riscv/tcg-target-con-set.h | 1 - tcg/riscv/tcg-target-con-str.h | 1 - tcg/riscv/tcg-target.c.inc | 45 +++++++++------------------ tcg/s390x/tcg-target.c.inc | 41 +++++++++++-------------- tcg/sparc64/tcg-target.c.inc | 16 +++++++--- tcg/tcg.c | 30 ++++++++++++++++-- tcg/tci/tcg-target.c.inc | 14 +++++++-- 15 files changed, 169 insertions(+), 174 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 30cad937b7..dfe67c1261 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2205,6 +2205,17 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3502, SUB, type, a0, a1, a2); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2290,15 +2301,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - if (c2) { - tgen_addi(s, ext, a0, a1, -a2); - } else { - tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); - } - break; - case INDEX_op_neg_i64: case INDEX_op_neg_i32: tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); @@ -3014,10 +3016,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - return C_O1_I2(r, r, rA); - case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: case INDEX_op_negsetcond_i32: diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 47c09ff2b1..13b78f0ada 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1915,6 +1915,24 @@ static const TCGOutOpBinary outop_orc = { .base.static_constraint = C_NotImplemented, }; +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_SUB, a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_subfi(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tcg_out_dat_imm(s, COND_AL, ARITH_RSB, a0, a2, encode_imm_nofail(a1)); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, rI, r), + .out_rrr = tgen_sub, + .out_rir = tgen_subfi, +}; + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1982,15 +2000,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[c], ARITH_MOV, ARITH_MVN, args[0], 0, args[3], const_args[3]); break; - case INDEX_op_sub_i32: - if (const_args[1]) { - tcg_out_dat_imm(s, COND_AL, ARITH_RSB, - args[0], args[2], encode_imm_nofail(args[1])); - } else { - tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD, - args[0], args[1], args[2], const_args[2]); - } - break; case INDEX_op_add2_i32: a0 = args[0], a1 = args[1], a2 = args[2]; a3 = args[3], a4 = args[4], a5 = args[5]; @@ -2233,8 +2242,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond_i32: case INDEX_op_negsetcond_i32: return C_O1_I2(r, r, rIN); - case INDEX_op_sub_i32: - return C_O1_I2(r, rI, r); case INDEX_op_clz_i32: case INDEX_op_ctz_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 9185f6879c..104f1b010a 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2669,6 +2669,18 @@ static const TCGOutOpBinary outop_orc = { .base.static_constraint = C_NotImplemented, }; +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_SUB + rexw, a0, a2); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, 0, r), + .out_rrr = tgen_sub, +}; + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2770,15 +2782,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(sub): - c = ARITH_SUB; - if (const_a2) { - tgen_arithi(s, c + rexw, a0, a2, 0); - } else { - tgen_arithr(s, c + rexw, a0, a2); - } - break; - OP_32_64(mul): if (const_a2) { int32_t val; @@ -3689,8 +3692,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(re, r); - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: return C_O1_I2(r, 0, re); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index dc4ed0e8b0..f01b19463b 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1376,6 +1376,21 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_sub_w(s, a0, a1, a2); + } else { + tcg_out_opc_sub_d(s, a0, a1, a2); + } +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1596,21 +1611,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_sub_i32: - if (c2) { - tcg_out_addi(s, TCG_TYPE_I32, a0, a1, -a2); - } else { - tcg_out_opc_sub_w(s, a0, a1, a2); - } - break; - case INDEX_op_sub_i64: - if (c2) { - tcg_out_addi(s, TCG_TYPE_I64, a0, a1, -a2); - } else { - tcg_out_opc_sub_d(s, a0, a1, a2); - } - break; - case INDEX_op_neg_i32: tcg_out_opc_sub_w(s, a0, TCG_REG_ZERO, a1); break; @@ -2324,10 +2324,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) /* Must deposit into the same register as input */ return C_O1_I2(r, 0, rz); - case INDEX_op_sub_i32: case INDEX_op_setcond_i32: return C_O1_I2(r, rz, ri); - case INDEX_op_sub_i64: case INDEX_op_setcond_i64: return C_O1_I2(r, rz, rJ); diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h index 06ab04cc4d..248bc95d9b 100644 --- a/tcg/mips/tcg-target-con-set.h +++ b/tcg/mips/tcg-target-con-set.h @@ -24,7 +24,6 @@ C_O1_I2(r, r, rI) C_O1_I2(r, r, rIK) C_O1_I2(r, r, rJ) C_O1_I2(r, r, rzW) -C_O1_I2(r, rz, rN) C_O1_I2(r, rz, rz) C_O1_I4(r, rz, rz, rz, 0) C_O1_I4(r, rz, rz, rz, rz) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index bfe329b3ef..15c5661fb8 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1753,6 +1753,18 @@ static const TCGOutOpBinary outop_orc = { .base.static_constraint = C_NotImplemented, }; +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_SUBU : OPC_DSUBU; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1844,22 +1856,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - do_binaryv: - tcg_out_opc_reg(s, i1, a0, a1, a2); - break; - - case INDEX_op_sub_i32: - i1 = OPC_SUBU, i2 = OPC_ADDIU; - goto do_subtract; - case INDEX_op_sub_i64: - i1 = OPC_DSUBU, i2 = OPC_DADDIU; - do_subtract: - if (c2) { - tcg_out_opc_imm(s, i2, a0, a1, -a2); - break; - } - goto do_binaryv; - case INDEX_op_mul_i32: if (use_mips32_instructions) { tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); @@ -2234,9 +2230,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - return C_O1_I2(r, rz, rN); case INDEX_op_mul_i32: case INDEX_op_mulsh_i32: case INDEX_op_muluh_i32: diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h index 453abde6c1..77a1038d51 100644 --- a/tcg/ppc/tcg-target-con-set.h +++ b/tcg/ppc/tcg-target-con-set.h @@ -22,8 +22,7 @@ C_O1_I1(v, r) C_O1_I1(v, v) C_O1_I1(v, vr) C_O1_I2(r, 0, rZ) -C_O1_I2(r, rI, ri) -C_O1_I2(r, rI, rT) +C_O1_I2(r, rI, r) C_O1_I2(r, r, r) C_O1_I2(r, r, ri) C_O1_I2(r, r, rC) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index c3366e4316..bfbfdc2dfa 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3016,6 +3016,24 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, SUBF | TAB(a0, a2, a1)); +} + +static void tgen_subfi(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, rI, r), + .out_rrr = tgen_sub, + .out_rir = tgen_subfi, +}; + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3104,21 +3122,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); break; - case INDEX_op_sub_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); - } else { - tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); - } - } else if (const_args[2]) { - tgen_addi(s, type, a0, a1, (int32_t)-a2); - } else { - tcg_out32(s, SUBF | TAB(a0, a2, a1)); - } - break; - case INDEX_op_clz_i32: tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1], args[2], const_args[2]); @@ -3231,21 +3234,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); break; - case INDEX_op_sub_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); - } else { - tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); - } - } else if (const_args[2]) { - tgen_addi(s, type, a0, a1, -a2); - } else { - tcg_out32(s, SUBF | TAB(a0, a2, a1)); - } - break; - case INDEX_op_shl_i64: if (const_args[2]) { /* Limit immediate shift count lest we create an illegal insn. */ @@ -4195,10 +4183,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_muluh_i64: return C_O1_I2(r, r, r); - case INDEX_op_sub_i32: - return C_O1_I2(r, rI, ri); - case INDEX_op_sub_i64: - return C_O1_I2(r, rI, rT); case INDEX_op_clz_i32: case INDEX_op_ctz_i32: case INDEX_op_clz_i64: diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h index 21f8833b3b..f3a6f7a7ed 100644 --- a/tcg/riscv/tcg-target-con-set.h +++ b/tcg/riscv/tcg-target-con-set.h @@ -16,7 +16,6 @@ C_O1_I1(r, r) C_O1_I2(r, r, r) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) -C_O1_I2(r, rz, rN) C_O1_I2(r, rz, rz) C_N1_I2(r, r, rM) C_O1_I4(r, r, rI, rM, rM) diff --git a/tcg/riscv/tcg-target-con-str.h b/tcg/riscv/tcg-target-con-str.h index 1956f75f9a..c04e15ddfa 100644 --- a/tcg/riscv/tcg-target-con-str.h +++ b/tcg/riscv/tcg-target-con-str.h @@ -18,5 +18,4 @@ REGS('v', ALL_VECTOR_REGS) CONST('I', TCG_CT_CONST_S12) CONST('K', TCG_CT_CONST_S5) CONST('L', TCG_CT_CONST_CMP_VI) -CONST('N', TCG_CT_CONST_N12) CONST('M', TCG_CT_CONST_M12) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 887f20d4cb..54da432ab1 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -113,10 +113,9 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) } #define TCG_CT_CONST_S12 0x100 -#define TCG_CT_CONST_N12 0x200 -#define TCG_CT_CONST_M12 0x400 -#define TCG_CT_CONST_S5 0x800 -#define TCG_CT_CONST_CMP_VI 0x1000 +#define TCG_CT_CONST_M12 0x200 +#define TCG_CT_CONST_S5 0x400 +#define TCG_CT_CONST_CMP_VI 0x800 #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) #define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32) @@ -400,13 +399,6 @@ static bool tcg_target_const_match(int64_t val, int ct, if ((ct & TCG_CT_CONST_S12) && val >= -0x800 && val <= 0x7ff) { return 1; } - /* - * Sign extended from 12 bits, negated: [-0x7ff, 0x800]. - * Used for subtraction, where a constant must be handled by ADDI. - */ - if ((ct & TCG_CT_CONST_N12) && val >= -0x7ff && val <= 0x800) { - return 1; - } /* * Sign extended from 12 bits, +/- matching: [-0x7ff, 0x7ff]. * Used by addsub2 and movcond, which may need the negative value, @@ -2055,6 +2047,18 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SUBW : OPC_SUB; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2136,21 +2140,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, OPC_SD, a0, a1, a2); break; - case INDEX_op_sub_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, -a2); - } else { - tcg_out_opc_reg(s, OPC_SUBW, a0, a1, a2); - } - break; - case INDEX_op_sub_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_ADDI, a0, a1, -a2); - } else { - tcg_out_opc_reg(s, OPC_SUB, a0, a1, a2); - } - break; - case INDEX_op_not_i32: case INDEX_op_not_i64: tcg_out_opc_imm(s, OPC_XORI, a0, a1, -1); @@ -2713,10 +2702,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - return C_O1_I2(r, rz, rN); - case INDEX_op_mul_i32: case INDEX_op_mulsh_i32: case INDEX_op_muluh_i32: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 29570d3be1..662984f733 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2331,6 +2331,23 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, SGRK, a0, a1, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RR, SR, a0, a2); + } else { + tcg_out_insn(s, RRFa, SRK, a0, a1, a2); + } +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2413,17 +2430,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_sub_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tgen_addi(s, type, a0, a1, (int32_t)-a2); - } else if (a0 == a1) { - tcg_out_insn(s, RR, SR, a0, a2); - } else { - tcg_out_insn(s, RRFa, SRK, a0, a1, a2); - } - break; - case INDEX_op_neg_i32: tcg_out_insn(s, RR, LCR, args[0], args[1]); break; @@ -2618,15 +2624,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); break; - case INDEX_op_sub_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tgen_addi(s, type, a0, a1, -a2); - } else { - tcg_out_insn(s, RRFa, SGRK, a0, a1, a2); - } - break; - case INDEX_op_neg_i64: tcg_out_insn(s, RRE, LCGR, args[0], args[1]); break; @@ -3302,10 +3299,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_clz_i64: return C_O1_I2(r, r, rI); - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - return C_O1_I2(r, r, ri); - case INDEX_op_mul_i32: return (HAVE_FACILITY(MISC_INSN_EXT2) ? C_O1_I2(r, r, ri) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 1ebff04af4..04b2b3b195 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1374,6 +1374,17 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_SUB); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1446,9 +1457,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_st32_i64: tcg_out_ldst(s, a0, a1, a2, STW); break; - OP_32_64(sub): - c = ARITH_SUB; - goto gen_arith; case INDEX_op_shl_i32: c = SHIFT_SLL; do_shift32: @@ -1660,8 +1668,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_div_i64: case INDEX_op_divu_i32: case INDEX_op_divu_i64: - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: case INDEX_op_shl_i32: case INDEX_op_shl_i64: case INDEX_op_shr_i32: diff --git a/tcg/tcg.c b/tcg/tcg.c index c0178030ce..d7a44ac1b1 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -986,6 +986,14 @@ typedef struct TCGOutOpBinary { TCGReg a0, TCGReg a1, tcg_target_long a2); } TCGOutOpBinary; +typedef struct TCGOutOpSubtract { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2); + void (*out_rir)(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2); +} TCGOutOpSubtract; + #include "tcg-target.c.inc" #ifndef CONFIG_TCG_INTERPRETER @@ -1012,6 +1020,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), + OUTOP(INDEX_op_sub_i32, TCGOutOpSubtract, outop_sub), + OUTOP(INDEX_op_sub_i64, TCGOutOpSubtract, outop_sub), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), }; @@ -2231,7 +2241,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: - case INDEX_op_sub_i32: case INDEX_op_neg_i32: case INDEX_op_mul_i32: case INDEX_op_shl_i32: @@ -2301,7 +2310,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st16_i64: case INDEX_op_st32_i64: case INDEX_op_st_i64: - case INDEX_op_sub_i64: case INDEX_op_neg_i64: case INDEX_op_mul_i64: case INDEX_op_shl_i64: @@ -5449,6 +5457,24 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_sub_i32: + case INDEX_op_sub_i64: + { + const TCGOutOpSubtract *out = &outop_sub; + + /* + * Constants should never appear in the second source operand. + * These are folded to add with negative constant. + */ + tcg_debug_assert(!const_args[2]); + if (const_args[1]) { + out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); + } + } + break; + default: if (def->flags & TCG_OPF_VECTOR) { tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index dec51692f0..353994e83f 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -91,8 +91,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rem_i64: case INDEX_op_remu_i32: case INDEX_op_remu_i64: - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: case INDEX_op_shl_i32: @@ -711,6 +709,17 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, glue(INDEX_op_sub_i,TCG_TARGET_REG_BITS), a0, a1, a2); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -764,7 +773,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, opc, args[0], args[1], args[2]); break; - CASE_32_64(sub) CASE_32_64(mul) CASE_32_64(shl) CASE_32_64(shr) From 60f34f55f1a708c071774bd7f837163d6b686867 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 22:06:32 -0800 Subject: [PATCH 034/161] tcg: Merge INDEX_op_sub_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 4 ++-- tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 10 +++------- tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 12 insertions(+), 18 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 26d464fa38..96b7f05919 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -265,7 +265,7 @@ Arithmetic - | *t0* = *t1* + *t2* - * - sub_i32/i64 *t0*, *t1*, *t2* + * - sub *t0*, *t1*, *t2* - | *t0* = *t1* - *t2* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index aa9ed393c9..1be9b01caf 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -47,6 +47,7 @@ DEF(nand, 1, 2, 0, TCG_OPF_INT) DEF(nor, 1, 2, 0, TCG_OPF_INT) DEF(or, 1, 2, 0, TCG_OPF_INT) DEF(orc, 1, 2, 0, TCG_OPF_INT) +DEF(sub, 1, 2, 0, TCG_OPF_INT) DEF(xor, 1, 2, 0, TCG_OPF_INT) DEF(setcond_i32, 1, 2, 1, 0) @@ -62,7 +63,6 @@ DEF(st8_i32, 0, 2, 1, 0) DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) /* arith */ -DEF(sub_i32, 1, 2, 0, 0) DEF(mul_i32, 1, 2, 0, 0) DEF(div_i32, 1, 2, 0, 0) DEF(divu_i32, 1, 2, 0, 0) @@ -116,7 +116,6 @@ DEF(st16_i64, 0, 2, 1, 0) DEF(st32_i64, 0, 2, 1, 0) DEF(st_i64, 0, 2, 1, 0) /* arith */ -DEF(sub_i64, 1, 2, 0, 0) DEF(mul_i64, 1, 2, 0, 0) DEF(div_i64, 1, 2, 0, 0) DEF(divu_i64, 1, 2, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index e4c319fe45..718809ab8d 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -427,7 +427,7 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) case INDEX_op_add: return x + y; - CASE_OP_32_64(sub): + case INDEX_op_sub: return x - y; CASE_OP_32_64(mul): @@ -3066,7 +3066,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(sextract): done = fold_sextract(&ctx, op); break; - CASE_OP_32_64(sub): + case INDEX_op_sub: done = fold_sub(&ctx, op); break; case INDEX_op_sub_vec: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 228aa8f088..15faf4dc57 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -377,7 +377,7 @@ void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_sub_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_sub, ret, arg1, arg2); } void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2) @@ -1565,7 +1565,7 @@ void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_sub_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_sub, ret, arg1, arg2); } else { tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); diff --git a/tcg/tcg.c b/tcg/tcg.c index d7a44ac1b1..b31e9798c5 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1020,8 +1020,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), - OUTOP(INDEX_op_sub_i32, TCGOutOpSubtract, outop_sub), - OUTOP(INDEX_op_sub_i64, TCGOutOpSubtract, outop_sub), + OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), }; @@ -4010,10 +4009,8 @@ liveness_pass_1(TCGContext *s) opc_new = INDEX_op_add; goto do_addsub2; case INDEX_op_sub2_i32: - opc_new = INDEX_op_sub_i32; - goto do_addsub2; case INDEX_op_sub2_i64: - opc_new = INDEX_op_sub_i64; + opc_new = INDEX_op_sub; do_addsub2: nb_iargs = 4; nb_oargs = 2; @@ -5457,8 +5454,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: + case INDEX_op_sub: { const TCGOutOpSubtract *out = &outop_sub; diff --git a/tcg/tci.c b/tcg/tci.c index ff129266c2..508d1405cd 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -527,7 +527,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] + regs[r2]; break; - CASE_32_64(sub) + case INDEX_op_sub: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] - regs[r2]; break; @@ -1080,9 +1080,8 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_nor: case INDEX_op_or: case INDEX_op_orc: + case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: case INDEX_op_div_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 353994e83f..67a46c6321 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -712,7 +712,7 @@ static const TCGOutOpBinary outop_orc = { static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { - tcg_out_op_rrr(s, glue(INDEX_op_sub_i,TCG_TARGET_REG_BITS), a0, a1, a2); + tcg_out_op_rrr(s, INDEX_op_sub, a0, a1, a2); } static const TCGOutOpSubtract outop_sub = { From e126a91c38f85750d84cabf27f44c055e17ef6df Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 22:37:07 -0800 Subject: [PATCH 035/161] tcg: Convert neg to TCGOutOpUnary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 18 +++++++++++------- tcg/arm/tcg-target.c.inc | 14 ++++++++++---- tcg/i386/tcg-target.c.inc | 16 +++++++++++----- tcg/loongarch64/tcg-target.c.inc | 19 ++++++++++--------- tcg/mips/tcg-target.c.inc | 18 ++++++++++-------- tcg/ppc/tcg-target.c.inc | 17 ++++++++++------- tcg/riscv/tcg-target.c.inc | 19 ++++++++++--------- tcg/s390x/tcg-target.c.inc | 22 ++++++++++++++-------- tcg/sparc64/tcg-target.c.inc | 15 ++++++++++----- tcg/tcg.c | 21 +++++++++++++++++++-- tcg/tci/tcg-target.c.inc | 13 ++++++++++--- 11 files changed, 125 insertions(+), 67 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index dfe67c1261..cf7a3f2632 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2235,6 +2235,17 @@ static const TCGOutOpBinary outop_xor = { }; +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_sub(s, type, a0, TCG_REG_XZR, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2301,11 +2312,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_neg_i64: - case INDEX_op_neg_i32: - tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); - break; - case INDEX_op_not_i64: case INDEX_op_not_i32: tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); @@ -2990,8 +2996,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: case INDEX_op_not_i32: case INDEX_op_not_i64: case INDEX_op_bswap16_i32: diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 13b78f0ada..5ea4488606 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1951,6 +1951,16 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori, }; +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_subfi(s, type, a0, 0, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2040,9 +2050,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } tcg_out_mov_reg(s, COND_AL, args[0], a0); break; - case INDEX_op_neg_i32: - tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0); - break; case INDEX_op_not_i32: tcg_out_dat_reg(s, COND_AL, ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0)); @@ -2226,7 +2233,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_neg_i32: case INDEX_op_not_i32: case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 104f1b010a..082aa982fb 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2701,6 +2701,17 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori, }; +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_neg, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2900,9 +2911,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(neg): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0); - break; OP_32_64(not): tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0); break; @@ -3719,8 +3727,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_bswap32_i32: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: case INDEX_op_not_i32: case INDEX_op_not_i64: case INDEX_op_extrh_i64_i32: diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index f01b19463b..31ec7262e0 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1409,6 +1409,16 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori, }; +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_sub(s, type, a0, TCG_REG_ZERO, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1611,13 +1621,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_neg_i32: - tcg_out_opc_sub_w(s, a0, TCG_REG_ZERO, a1); - break; - case INDEX_op_neg_i64: - tcg_out_opc_sub_d(s, a0, TCG_REG_ZERO, a1); - break; - case INDEX_op_mul_i32: tcg_out_opc_mul_w(s, a0, a1, a2); break; @@ -2272,8 +2275,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: case INDEX_op_ext_i32_i64: - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: case INDEX_op_not_i32: case INDEX_op_not_i64: case INDEX_op_extract_i32: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 15c5661fb8..0fda255a7b 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1783,6 +1783,16 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori, }; +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_sub(s, type, a0, TCG_REG_ZERO, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1975,12 +1985,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); break; - case INDEX_op_neg_i32: - i1 = OPC_SUBU; - goto do_unary; - case INDEX_op_neg_i64: - i1 = OPC_DSUBU; - goto do_unary; case INDEX_op_not_i32: case INDEX_op_not_i64: i1 = OPC_NOR; @@ -2195,7 +2199,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_neg_i32: case INDEX_op_not_i32: case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: @@ -2208,7 +2211,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32s_i64: case INDEX_op_ld32u_i64: case INDEX_op_ld_i64: - case INDEX_op_neg_i64: case INDEX_op_not_i64: case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i64: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index bfbfdc2dfa..da45436a5a 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3052,6 +3052,16 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori, }; +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out32(s, NEG | RT(a0) | RA(a1)); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -3224,11 +3234,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_brcond2(s, args, const_args); break; - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: - tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); - break; - case INDEX_op_not_i32: case INDEX_op_not_i64: tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); @@ -4119,7 +4124,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: case INDEX_op_ctpop_i32: - case INDEX_op_neg_i32: case INDEX_op_not_i32: case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: @@ -4133,7 +4137,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: case INDEX_op_ctpop_i64: - case INDEX_op_neg_i64: case INDEX_op_not_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 54da432ab1..4e16c44aa5 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2077,6 +2077,16 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori, }; +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_sub(s, type, a0, TCG_REG_ZERO, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2145,13 +2155,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_imm(s, OPC_XORI, a0, a1, -1); break; - case INDEX_op_neg_i32: - tcg_out_opc_reg(s, OPC_SUBW, a0, TCG_REG_ZERO, a1); - break; - case INDEX_op_neg_i64: - tcg_out_opc_reg(s, OPC_SUB, a0, TCG_REG_ZERO, a1); - break; - case INDEX_op_mul_i32: tcg_out_opc_reg(s, OPC_MULW, a0, a1, a2); break; @@ -2660,7 +2663,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: case INDEX_op_not_i32: - case INDEX_op_neg_i32: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i64: case INDEX_op_ld16u_i64: @@ -2669,7 +2671,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld_i64: case INDEX_op_not_i64: - case INDEX_op_neg_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 662984f733..08e65834d7 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2373,6 +2373,20 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori_3, }; +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, LCR, a0, a1); + } else { + tcg_out_insn(s, RRE, LCGR, a0, a1); + } +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + # define OP_32_64(x) \ case glue(glue(INDEX_op_,x),_i32): \ @@ -2430,9 +2444,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_neg_i32: - tcg_out_insn(s, RR, LCR, args[0], args[1]); - break; case INDEX_op_not_i32: tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[1]); break; @@ -2624,9 +2635,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); break; - case INDEX_op_neg_i64: - tcg_out_insn(s, RRE, LCGR, args[0], args[1]); - break; case INDEX_op_not_i64: tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[1]); break; @@ -3323,8 +3331,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_bswap32_i32: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: case INDEX_op_not_i32: case INDEX_op_not_i64: case INDEX_op_ext_i32_i64: diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 04b2b3b195..a3926ea1c3 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1403,6 +1403,16 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori, }; +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_sub(s, type, a0, TCG_REG_G0, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1473,9 +1483,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, c = ARITH_UMUL; goto gen_arith; - OP_32_64(neg): - c = ARITH_SUB; - goto gen_arith1; OP_32_64(not): c = ARITH_ORN; goto gen_arith1; @@ -1639,8 +1646,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: case INDEX_op_not_i32: case INDEX_op_not_i64: case INDEX_op_ext_i32_i64: diff --git a/tcg/tcg.c b/tcg/tcg.c index b31e9798c5..b5de69e4a9 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -986,6 +986,11 @@ typedef struct TCGOutOpBinary { TCGReg a0, TCGReg a1, tcg_target_long a2); } TCGOutOpBinary; +typedef struct TCGOutOpUnary { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); +} TCGOutOpUnary; + typedef struct TCGOutOpSubtract { TCGOutOp base; void (*out_rrr)(TCGContext *s, TCGType type, @@ -1017,6 +1022,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), + OUTOP(INDEX_op_neg_i32, TCGOutOpUnary, outop_neg), + OUTOP(INDEX_op_neg_i64, TCGOutOpUnary, outop_neg), OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), @@ -2240,7 +2247,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: - case INDEX_op_neg_i32: case INDEX_op_mul_i32: case INDEX_op_shl_i32: case INDEX_op_shr_i32: @@ -2309,7 +2315,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st16_i64: case INDEX_op_st32_i64: case INDEX_op_st_i64: - case INDEX_op_neg_i64: case INDEX_op_mul_i64: case INDEX_op_shl_i64: case INDEX_op_shr_i64: @@ -5471,6 +5476,18 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_neg_i32: + case INDEX_op_neg_i64: + { + const TCGOutOpUnary *out = + container_of(all_outop[op->opc], TCGOutOpUnary, base); + + /* Constants should have been folded. */ + tcg_debug_assert(!const_args[1]); + out->out_rr(s, type, new_args[0], new_args[1]); + } + break; + default: if (def->flags & TCG_OPF_VECTOR) { tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 67a46c6321..200b256e73 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -57,8 +57,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_not_i32: case INDEX_op_not_i64: - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_bswap16_i32: @@ -731,6 +729,16 @@ static const TCGOutOpBinary outop_xor = { .out_rrr = tgen_xor, }; +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_op_rr(s, glue(INDEX_op_neg_i,TCG_TARGET_REG_BITS), a0, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -804,7 +812,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_op_rl(s, opc, TCG_REG_TMP, arg_label(args[3])); break; - CASE_32_64(neg) /* Optional (TCG_TARGET_HAS_neg_*). */ CASE_32_64(not) /* Optional (TCG_TARGET_HAS_not_*). */ CASE_32_64(ctpop) /* Optional (TCG_TARGET_HAS_ctpop_*). */ case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */ From 6971358747d8998a5770d1bf997495d3061d6c6a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 22:48:57 -0800 Subject: [PATCH 036/161] tcg: Merge INDEX_op_neg_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 30 ++++++------------------------ tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 6 ++---- tcg/tci.c | 11 +++++------ tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 18 insertions(+), 40 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 96b7f05919..fb51691538 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -269,7 +269,7 @@ Arithmetic - | *t0* = *t1* - *t2* - * - neg_i32/i64 *t0*, *t1* + * - neg *t0*, *t1* - | *t0* = -*t1* (two's complement) diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 1be9b01caf..13b7650cec 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -44,6 +44,7 @@ DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(eqv, 1, 2, 0, TCG_OPF_INT) DEF(nand, 1, 2, 0, TCG_OPF_INT) +DEF(neg, 1, 1, 0, TCG_OPF_INT) DEF(nor, 1, 2, 0, TCG_OPF_INT) DEF(or, 1, 2, 0, TCG_OPF_INT) DEF(orc, 1, 2, 0, TCG_OPF_INT) @@ -95,7 +96,6 @@ DEF(setcond2_i32, 1, 4, 1, 0) DEF(bswap16_i32, 1, 1, 1, 0) DEF(bswap32_i32, 1, 1, 1, 0) DEF(not_i32, 1, 1, 0, 0) -DEF(neg_i32, 1, 1, 0, 0) DEF(clz_i32, 1, 2, 0, 0) DEF(ctz_i32, 1, 2, 0, 0) DEF(ctpop_i32, 1, 1, 0, 0) @@ -145,7 +145,6 @@ DEF(bswap16_i64, 1, 1, 1, 0) DEF(bswap32_i64, 1, 1, 1, 0) DEF(bswap64_i64, 1, 1, 1, 0) DEF(not_i64, 1, 1, 0, 0) -DEF(neg_i64, 1, 1, 0, 0) DEF(clz_i64, 1, 2, 0, 0) DEF(ctz_i64, 1, 2, 0, 0) DEF(ctpop_i64, 1, 1, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index 718809ab8d..eb360e2b63 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -478,7 +478,7 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) CASE_OP_32_64_VEC(not): return ~x; - CASE_OP_32_64(neg): + case INDEX_op_neg: return -x; case INDEX_op_andc: @@ -2314,25 +2314,12 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) break; } if (convert) { - TCGOpcode neg_opc; - if (!inv && !neg) { return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); } - switch (ctx->type) { - case TCG_TYPE_I32: - neg_opc = INDEX_op_neg_i32; - break; - case TCG_TYPE_I64: - neg_opc = INDEX_op_neg_i64; - break; - default: - g_assert_not_reached(); - } - if (!inv) { - op->opc = neg_opc; + op->opc = INDEX_op_neg; } else if (neg) { op->opc = INDEX_op_add; op->args[2] = arg_new_constant(ctx, -1); @@ -2348,7 +2335,7 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { - TCGOpcode neg_opc, shr_opc; + TCGOpcode shr_opc; TCGOpcode uext_opc = 0, sext_opc = 0; TCGCond cond = op->args[3]; TCGArg ret, src1, src2; @@ -2371,7 +2358,6 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) switch (ctx->type) { case TCG_TYPE_I32: shr_opc = INDEX_op_shr_i32; - neg_opc = INDEX_op_neg_i32; if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) { uext_opc = INDEX_op_extract_i32; } @@ -2381,7 +2367,6 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) break; case TCG_TYPE_I64: shr_opc = INDEX_op_shr_i64; - neg_opc = INDEX_op_neg_i64; if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) { uext_opc = INDEX_op_extract_i64; } @@ -2432,7 +2417,7 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) op2->args[1] = ret; op2->args[2] = arg_new_constant(ctx, 1); } else if (neg) { - op2 = opt_insert_after(ctx, op, neg_opc, 2); + op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2); op2->args[0] = ret; op2->args[1] = ret; } @@ -2644,11 +2629,8 @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) switch (ctx->type) { case TCG_TYPE_I32: - neg_op = INDEX_op_neg_i32; - have_neg = true; - break; case TCG_TYPE_I64: - neg_op = INDEX_op_neg_i64; + neg_op = INDEX_op_neg; have_neg = true; break; case TCG_TYPE_V64: @@ -2998,7 +2980,7 @@ void tcg_optimize(TCGContext *s) case INDEX_op_nand_vec: done = fold_nand(&ctx, op); break; - CASE_OP_32_64(neg): + case INDEX_op_neg: done = fold_neg(&ctx, op); break; case INDEX_op_nor: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 15faf4dc57..cb2eb9ae52 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -396,7 +396,7 @@ void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg) { - tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg); + tcg_gen_op2_i32(INDEX_op_neg, ret, arg); } void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) @@ -1691,7 +1691,7 @@ void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg); + tcg_gen_op2_i64(INDEX_op_neg, ret, arg); } else { TCGv_i32 zero = tcg_constant_i32(0); tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), diff --git a/tcg/tcg.c b/tcg/tcg.c index b5de69e4a9..92d185558f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1022,8 +1022,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), - OUTOP(INDEX_op_neg_i32, TCGOutOpUnary, outop_neg), - OUTOP(INDEX_op_neg_i64, TCGOutOpUnary, outop_neg), + OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), @@ -5476,8 +5475,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: + case INDEX_op_neg: { const TCGOutOpUnary *out = container_of(all_outop[op->opc], TCGOutOpUnary, base); diff --git a/tcg/tci.c b/tcg/tci.c index 508d1405cd..c736691e9f 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -567,6 +567,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ~(regs[r1] | regs[r2]); break; + case INDEX_op_neg: + tci_args_rr(insn, &r0, &r1); + regs[r0] = -regs[r1]; + break; /* Arithmetic operations (32 bit). */ @@ -697,10 +701,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, regs[r0] = ~regs[r1]; break; #endif - CASE_32_64(neg) - tci_args_rr(insn, &r0, &r1); - regs[r0] = -regs[r1]; - break; #if TCG_TARGET_REG_BITS == 64 /* Load/store operations (64 bit). */ @@ -1054,6 +1054,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) break; case INDEX_op_mov: + case INDEX_op_neg: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_bswap16_i32: @@ -1063,8 +1064,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_bswap64_i64: case INDEX_op_not_i32: case INDEX_op_not_i64: - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: case INDEX_op_ctpop_i32: case INDEX_op_ctpop_i64: tci_args_rr(insn, &r0, &r1); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 200b256e73..c42f9dff11 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -731,7 +731,7 @@ static const TCGOutOpBinary outop_xor = { static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { - tcg_out_op_rr(s, glue(INDEX_op_neg_i,TCG_TARGET_REG_BITS), a0, a1); + tcg_out_op_rr(s, INDEX_op_neg, a0, a1); } static const TCGOutOpUnary outop_neg = { From 592982bf04cfa92dc4c992056c7330ac46f4882f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 23:37:54 -0800 Subject: [PATCH 037/161] tcg: Convert not to TCGOutOpUnary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 -- tcg/aarch64/tcg-target.c.inc | 17 ++++++++++------- tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 15 ++++++++++----- tcg/i386/tcg-target-has.h | 2 -- tcg/i386/tcg-target.c.inc | 17 +++++++++++------ tcg/loongarch64/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target.c.inc | 17 ++++++++++------- tcg/mips/tcg-target-has.h | 2 -- tcg/mips/tcg-target.c.inc | 20 ++++++++++---------- tcg/optimize.c | 4 ++-- tcg/ppc/tcg-target-has.h | 2 -- tcg/ppc/tcg-target.c.inc | 17 ++++++++++------- tcg/riscv/tcg-target-has.h | 2 -- tcg/riscv/tcg-target.c.inc | 17 ++++++++++------- tcg/s390x/tcg-target-has.h | 2 -- tcg/s390x/tcg-target.c.inc | 25 ++++++++++++++++--------- tcg/sparc64/tcg-target-has.h | 2 -- tcg/sparc64/tcg-target.c.inc | 20 ++++++++++---------- tcg/tcg-has.h | 1 - tcg/tcg-op.c | 10 ++++++---- tcg/tcg.c | 8 ++++---- tcg/tci.c | 2 -- tcg/tci/tcg-target-has.h | 2 -- tcg/tci/tcg-target.c.inc | 13 ++++++++++--- 25 files changed, 119 insertions(+), 103 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 240fcac2cc..7f18727686 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -17,7 +17,6 @@ #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 @@ -38,7 +37,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index cf7a3f2632..97b444bc17 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2245,6 +2245,16 @@ static const TCGOutOpUnary outop_neg = { .out_rr = tgen_neg, }; +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_orc(s, type, a0, TCG_REG_XZR, a1); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2312,11 +2322,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_not_i64: - case INDEX_op_not_i32: - tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); - break; - case INDEX_op_mul_i64: case INDEX_op_mul_i32: tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); @@ -2996,8 +3001,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_not_i32: - case INDEX_op_not_i64: case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: case INDEX_op_bswap16_i64: diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index e80711ee40..e766c6d628 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -26,7 +26,6 @@ extern bool use_neon_instructions; /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 5ea4488606..2477b1c4ab 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1961,6 +1961,16 @@ static const TCGOutOpUnary outop_neg = { .out_rr = tgen_neg, }; +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MVN, a0, 0, a1, SHIFT_IMM_LSL(0)); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2050,10 +2060,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } tcg_out_mov_reg(s, COND_AL, args[0], a0); break; - case INDEX_op_not_i32: - tcg_out_dat_reg(s, COND_AL, - ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0)); - break; case INDEX_op_mul_i32: tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]); break; @@ -2233,7 +2239,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_not_i32: case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: case INDEX_op_extract_i32: diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index b27f853dcd..3d36fe58f2 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -30,7 +30,6 @@ #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 have_popcnt @@ -51,7 +50,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 have_popcnt diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 082aa982fb..24fef09c9e 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2712,6 +2712,17 @@ static const TCGOutOpUnary outop_neg = { .out_rr = tgen_neg, }; +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_not, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2911,10 +2922,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(not): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, a0, -1, a1, a2, TCG_TYPE_I32); break; @@ -3727,8 +3734,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_bswap32_i32: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: - case INDEX_op_not_i32: - case INDEX_op_not_i64: case INDEX_op_extrh_i64_i32: return C_O1_I1(r, 0); diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 7860432489..ffacb41e80 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -24,7 +24,6 @@ #define TCG_TARGET_HAS_mulsh_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 0 @@ -41,7 +40,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 0 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 31ec7262e0..c24882b6ba 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1419,6 +1419,16 @@ static const TCGOutOpUnary outop_neg = { .out_rr = tgen_neg, }; +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_nor(s, type, a0, a1, TCG_REG_ZERO); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1454,11 +1464,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_srai_d(s, a0, a1, 32); break; - case INDEX_op_not_i32: - case INDEX_op_not_i64: - tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO); - break; - case INDEX_op_extract_i32: if (a2 == 0 && args[3] <= 12) { tcg_out_opc_andi(s, a0, a1, (1 << args[3]) - 1); @@ -2275,8 +2280,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: case INDEX_op_ext_i32_i64: - case INDEX_op_not_i32: - case INDEX_op_not_i64: case INDEX_op_extract_i32: case INDEX_op_extract_i64: case INDEX_op_sextract_i32: diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index 987f83f761..9d8e0fb8df 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -41,7 +41,6 @@ extern bool use_mips32r2_instructions; /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 -#define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muluh_i32 1 @@ -56,7 +55,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 -#define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 (!use_mips32r6_instructions) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 0fda255a7b..4942855189 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1793,6 +1793,16 @@ static const TCGOutOpUnary outop_neg = { .out_rr = tgen_neg, }; +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_nor(s, type, a0, TCG_REG_ZERO, a1); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1985,14 +1995,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); break; - case INDEX_op_not_i32: - case INDEX_op_not_i64: - i1 = OPC_NOR; - goto do_unary; - do_unary: - tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1); - break; - case INDEX_op_bswap16_i32: case INDEX_op_bswap16_i64: tcg_out_bswap16(s, a0, a1, a2); @@ -2199,7 +2201,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_not_i32: case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: case INDEX_op_extract_i32: @@ -2211,7 +2212,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32s_i64: case INDEX_op_ld32u_i64: case INDEX_op_ld_i64: - case INDEX_op_not_i64: case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: diff --git a/tcg/optimize.c b/tcg/optimize.c index eb360e2b63..b4a675ea37 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1104,11 +1104,11 @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) switch (ctx->type) { case TCG_TYPE_I32: not_op = INDEX_op_not_i32; - have_not = TCG_TARGET_HAS_not_i32; + have_not = tcg_op_supported(INDEX_op_not_i32, TCG_TYPE_I32, 0); break; case TCG_TYPE_I64: not_op = INDEX_op_not_i64; - have_not = TCG_TARGET_HAS_not_i64; + have_not = tcg_op_supported(INDEX_op_not_i64, TCG_TYPE_I64, 0); break; case TCG_TYPE_V64: case TCG_TYPE_V128: diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 6be6d7f994..7ebcb49a19 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -22,7 +22,6 @@ #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 have_isa_3_00 #define TCG_TARGET_HAS_ctpop_i32 have_isa_2_06 @@ -44,7 +43,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 have_isa_3_00 #define TCG_TARGET_HAS_ctpop_i64 have_isa_2_06 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index da45436a5a..38cbe5223b 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3062,6 +3062,16 @@ static const TCGOutOpUnary outop_neg = { .out_rr = tgen_neg, }; +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_nor(s, type, a0, a1, a1); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -3234,11 +3244,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_brcond2(s, args, const_args); break; - case INDEX_op_not_i32: - case INDEX_op_not_i64: - tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); - break; - case INDEX_op_shl_i64: if (const_args[2]) { /* Limit immediate shift count lest we create an illegal insn. */ @@ -4124,7 +4129,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: case INDEX_op_ctpop_i32: - case INDEX_op_not_i32: case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: case INDEX_op_extract_i32: @@ -4137,7 +4141,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: case INDEX_op_ctpop_i64: - case INDEX_op_not_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_bswap16_i64: diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index 0fcf940a8a..e3018717ea 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -24,7 +24,6 @@ #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_clz_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctz_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctpop_i32 (cpuinfo & CPUINFO_ZBB) @@ -40,7 +39,6 @@ #define TCG_TARGET_HAS_bswap16_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_clz_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctz_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctpop_i64 (cpuinfo & CPUINFO_ZBB) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 4e16c44aa5..5e9e14815d 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2087,6 +2087,16 @@ static const TCGOutOpUnary outop_neg = { .out_rr = tgen_neg, }; +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_xori(s, type, a0, a1, -1); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2150,11 +2160,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, OPC_SD, a0, a1, a2); break; - case INDEX_op_not_i32: - case INDEX_op_not_i64: - tcg_out_opc_imm(s, OPC_XORI, a0, a1, -1); - break; - case INDEX_op_mul_i32: tcg_out_opc_reg(s, OPC_MULW, a0, a1, a2); break; @@ -2662,7 +2667,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_not_i32: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i64: case INDEX_op_ld16u_i64: @@ -2670,7 +2674,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32s_i64: case INDEX_op_ld32u_i64: case INDEX_op_ld_i64: - case INDEX_op_not_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 374db3cf9d..e5c132cf12 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -33,7 +33,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_not_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctpop_i32 1 @@ -53,7 +52,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_not_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 08e65834d7..98bf3ee19e 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2387,6 +2387,22 @@ static const TCGOutOpUnary outop_neg = { .out_rr = tgen_neg, }; +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_nor(s, type, a0, a1, a1); +} + +static TCGConstraintSetIndex cset_not(TCGType type, unsigned flags) +{ + return HAVE_FACILITY(MISC_INSN_EXT3) ? C_O1_I1(r, r) : C_NotImplemented; +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_not, + .out_rr = tgen_not, +}; + # define OP_32_64(x) \ case glue(glue(INDEX_op_,x),_i32): \ @@ -2444,10 +2460,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_not_i32: - tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[1]); - break; - case INDEX_op_mul_i32: a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; if (const_args[2]) { @@ -2635,9 +2647,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); break; - case INDEX_op_not_i64: - tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[1]); - break; case INDEX_op_bswap64_i64: tcg_out_insn(s, RRE, LRVGR, args[0], args[1]); break; @@ -3331,8 +3340,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_bswap32_i32: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: - case INDEX_op_not_i32: - case INDEX_op_not_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extract_i32: diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 35ae536879..df87249df2 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -19,7 +19,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_rot_i32 0 #define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_bswap32_i32 0 -#define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctpop_i32 0 @@ -40,7 +39,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 -#define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_clz_i64 0 #define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index a3926ea1c3..5819dc44fe 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1413,6 +1413,16 @@ static const TCGOutOpUnary outop_neg = { .out_rr = tgen_neg, }; +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_orc(s, type, a0, TCG_REG_G0, a1); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1483,10 +1493,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, c = ARITH_UMUL; goto gen_arith; - OP_32_64(not): - c = ARITH_ORN; - goto gen_arith1; - case INDEX_op_div_i32: tcg_out_div32(s, a0, a1, a2, c2, 0); break; @@ -1600,10 +1606,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_arithc(s, a0, a1, a2, c2, c); break; - gen_arith1: - tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c); - break; - case INDEX_op_mb: tcg_out_mb(s, a0); break; @@ -1646,8 +1648,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_not_i32: - case INDEX_op_not_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extract_i64: diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 7de13ef383..a84ed1313a 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -19,7 +19,6 @@ #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 -#define TCG_TARGET_HAS_not_i64 0 #define TCG_TARGET_HAS_clz_i64 0 #define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index cb2eb9ae52..e0f8ab28b8 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -461,7 +461,8 @@ void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) /* Some cases can be optimized here. */ if (arg2 == 0) { tcg_gen_mov_i32(ret, arg1); - } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) { + } else if (arg2 == -1 && + tcg_op_supported(INDEX_op_not_i32, TCG_TYPE_I32, 0)) { /* Don't recurse with tcg_gen_not_i32. */ tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1); } else { @@ -471,7 +472,7 @@ void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_not_i32) { + if (tcg_op_supported(INDEX_op_not_i32, TCG_TYPE_I32, 0)) { tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg); } else { tcg_gen_xori_i32(ret, arg, -1); @@ -1762,7 +1763,8 @@ void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) /* Some cases can be optimized here. */ if (arg2 == 0) { tcg_gen_mov_i64(ret, arg1); - } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) { + } else if (arg2 == -1 && + tcg_op_supported(INDEX_op_not_i64, TCG_TYPE_I64, 0)) { /* Don't recurse with tcg_gen_not_i64. */ tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1); } else { @@ -2252,7 +2254,7 @@ void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg)); tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); - } else if (TCG_TARGET_HAS_not_i64) { + } else if (tcg_op_supported(INDEX_op_not_i64, TCG_TYPE_I64, 0)) { tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg); } else { tcg_gen_xori_i64(ret, arg, -1); diff --git a/tcg/tcg.c b/tcg/tcg.c index 92d185558f..acd666d30b 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1024,6 +1024,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), + OUTOP(INDEX_op_not_i32, TCGOutOpUnary, outop_not), + OUTOP(INDEX_op_not_i64, TCGOutOpUnary, outop_not), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), @@ -2287,8 +2289,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap16_i32; case INDEX_op_bswap32_i32: return TCG_TARGET_HAS_bswap32_i32; - case INDEX_op_not_i32: - return TCG_TARGET_HAS_not_i32; case INDEX_op_clz_i32: return TCG_TARGET_HAS_clz_i32; case INDEX_op_ctz_i32: @@ -2350,8 +2350,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap32_i64; case INDEX_op_bswap64_i64: return TCG_TARGET_HAS_bswap64_i64; - case INDEX_op_not_i64: - return TCG_TARGET_HAS_not_i64; case INDEX_op_clz_i64: return TCG_TARGET_HAS_clz_i64; case INDEX_op_ctz_i64: @@ -5476,6 +5474,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; case INDEX_op_neg: + case INDEX_op_not_i32: + case INDEX_op_not_i64: { const TCGOutOpUnary *out = container_of(all_outop[op->opc], TCGOutOpUnary, base); diff --git a/tcg/tci.c b/tcg/tci.c index c736691e9f..25ad37fcd5 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -695,12 +695,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, regs[r0] = bswap32(regs[r1]); break; #endif -#if TCG_TARGET_HAS_not_i32 || TCG_TARGET_HAS_not_i64 CASE_32_64(not) tci_args_rr(insn, &r0, &r1); regs[r0] = ~regs[r1]; break; -#endif #if TCG_TARGET_REG_BITS == 64 /* Load/store operations (64 bit). */ diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index 13c9dc3dfa..f147da5c0e 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -15,7 +15,6 @@ #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 1 -#define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 0 #define TCG_TARGET_HAS_muls2_i32 1 @@ -34,7 +33,6 @@ #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 1 -#define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_muls2_i64 1 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index c42f9dff11..d3da498098 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -55,8 +55,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_not_i32: - case INDEX_op_not_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_bswap16_i32: @@ -739,6 +737,16 @@ static const TCGOutOpUnary outop_neg = { .out_rr = tgen_neg, }; +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_op_rr(s, glue(INDEX_op_not_i,TCG_TARGET_REG_BITS), a0, a1); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -812,7 +820,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_op_rl(s, opc, TCG_REG_TMP, arg_label(args[3])); break; - CASE_32_64(not) /* Optional (TCG_TARGET_HAS_not_*). */ CASE_32_64(ctpop) /* Optional (TCG_TARGET_HAS_ctpop_*). */ case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */ case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */ From 5c62d3779b8b1075782672751165c0e4f716762f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Jan 2025 23:46:47 -0800 Subject: [PATCH 038/161] tcg: Merge INDEX_op_not_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 13 ++++++------- tcg/tcg-op.c | 16 ++++++++-------- tcg/tcg.c | 6 ++---- tcg/tci.c | 11 +++++------ tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 24 insertions(+), 29 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index fb51691538..96dddc5fd3 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -315,7 +315,7 @@ Logical - | *t0* = *t1* ^ *t2* - * - not_i32/i64 *t0*, *t1* + * - not *t0*, *t1* - | *t0* = ~\ *t1* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 13b7650cec..d0fcdfd241 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -46,6 +46,7 @@ DEF(eqv, 1, 2, 0, TCG_OPF_INT) DEF(nand, 1, 2, 0, TCG_OPF_INT) DEF(neg, 1, 1, 0, TCG_OPF_INT) DEF(nor, 1, 2, 0, TCG_OPF_INT) +DEF(not, 1, 1, 0, TCG_OPF_INT) DEF(or, 1, 2, 0, TCG_OPF_INT) DEF(orc, 1, 2, 0, TCG_OPF_INT) DEF(sub, 1, 2, 0, TCG_OPF_INT) @@ -95,7 +96,6 @@ DEF(setcond2_i32, 1, 4, 1, 0) DEF(bswap16_i32, 1, 1, 1, 0) DEF(bswap32_i32, 1, 1, 1, 0) -DEF(not_i32, 1, 1, 0, 0) DEF(clz_i32, 1, 2, 0, 0) DEF(ctz_i32, 1, 2, 0, 0) DEF(ctpop_i32, 1, 1, 0, 0) @@ -144,7 +144,6 @@ DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(bswap16_i64, 1, 1, 1, 0) DEF(bswap32_i64, 1, 1, 1, 0) DEF(bswap64_i64, 1, 1, 1, 0) -DEF(not_i64, 1, 1, 0, 0) DEF(clz_i64, 1, 2, 0, 0) DEF(ctz_i64, 1, 2, 0, 0) DEF(ctpop_i64, 1, 1, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index b4a675ea37..315ee0a8bc 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -475,7 +475,8 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) case INDEX_op_rotl_i64: return rol64(x, y & 63); - CASE_OP_32_64_VEC(not): + case INDEX_op_not: + case INDEX_op_not_vec: return ~x; case INDEX_op_neg: @@ -1103,12 +1104,9 @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) switch (ctx->type) { case TCG_TYPE_I32: - not_op = INDEX_op_not_i32; - have_not = tcg_op_supported(INDEX_op_not_i32, TCG_TYPE_I32, 0); - break; case TCG_TYPE_I64: - not_op = INDEX_op_not_i64; - have_not = tcg_op_supported(INDEX_op_not_i64, TCG_TYPE_I64, 0); + not_op = INDEX_op_not; + have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0); break; case TCG_TYPE_V64: case TCG_TYPE_V128: @@ -2987,7 +2985,8 @@ void tcg_optimize(TCGContext *s) case INDEX_op_nor_vec: done = fold_nor(&ctx, op); break; - CASE_OP_32_64_VEC(not): + case INDEX_op_not: + case INDEX_op_not_vec: done = fold_not(&ctx, op); break; case INDEX_op_or: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index e0f8ab28b8..ddc1f465a4 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -462,9 +462,9 @@ void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) if (arg2 == 0) { tcg_gen_mov_i32(ret, arg1); } else if (arg2 == -1 && - tcg_op_supported(INDEX_op_not_i32, TCG_TYPE_I32, 0)) { + tcg_op_supported(INDEX_op_not, TCG_TYPE_I32, 0)) { /* Don't recurse with tcg_gen_not_i32. */ - tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1); + tcg_gen_op2_i32(INDEX_op_not, ret, arg1); } else { tcg_gen_xor_i32(ret, arg1, tcg_constant_i32(arg2)); } @@ -472,8 +472,8 @@ void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (tcg_op_supported(INDEX_op_not_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg); + if (tcg_op_supported(INDEX_op_not, TCG_TYPE_I32, 0)) { + tcg_gen_op2_i32(INDEX_op_not, ret, arg); } else { tcg_gen_xori_i32(ret, arg, -1); } @@ -1764,9 +1764,9 @@ void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) if (arg2 == 0) { tcg_gen_mov_i64(ret, arg1); } else if (arg2 == -1 && - tcg_op_supported(INDEX_op_not_i64, TCG_TYPE_I64, 0)) { + tcg_op_supported(INDEX_op_not, TCG_TYPE_I64, 0)) { /* Don't recurse with tcg_gen_not_i64. */ - tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1); + tcg_gen_op2_i64(INDEX_op_not, ret, arg1); } else { tcg_gen_xor_i64(ret, arg1, tcg_constant_i64(arg2)); } @@ -2254,8 +2254,8 @@ void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg)); tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); - } else if (tcg_op_supported(INDEX_op_not_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg); + } else if (tcg_op_supported(INDEX_op_not, TCG_TYPE_I64, 0)) { + tcg_gen_op2_i64(INDEX_op_not, ret, arg); } else { tcg_gen_xori_i64(ret, arg, -1); } diff --git a/tcg/tcg.c b/tcg/tcg.c index acd666d30b..5e43a304c0 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1024,8 +1024,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), - OUTOP(INDEX_op_not_i32, TCGOutOpUnary, outop_not), - OUTOP(INDEX_op_not_i64, TCGOutOpUnary, outop_not), + OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), @@ -5474,8 +5473,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; case INDEX_op_neg: - case INDEX_op_not_i32: - case INDEX_op_not_i64: + case INDEX_op_not: { const TCGOutOpUnary *out = container_of(all_outop[op->opc], TCGOutOpUnary, base); diff --git a/tcg/tci.c b/tcg/tci.c index 25ad37fcd5..96e3667ab2 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -571,6 +571,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rr(insn, &r0, &r1); regs[r0] = -regs[r1]; break; + case INDEX_op_not: + tci_args_rr(insn, &r0, &r1); + regs[r0] = ~regs[r1]; + break; /* Arithmetic operations (32 bit). */ @@ -695,10 +699,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, regs[r0] = bswap32(regs[r1]); break; #endif - CASE_32_64(not) - tci_args_rr(insn, &r0, &r1); - regs[r0] = ~regs[r1]; - break; #if TCG_TARGET_REG_BITS == 64 /* Load/store operations (64 bit). */ @@ -1053,6 +1053,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_mov: case INDEX_op_neg: + case INDEX_op_not: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_bswap16_i32: @@ -1060,8 +1061,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_bswap32_i32: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: - case INDEX_op_not_i32: - case INDEX_op_not_i64: case INDEX_op_ctpop_i32: case INDEX_op_ctpop_i64: tci_args_rr(insn, &r0, &r1); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index d3da498098..a1f9a3a2f0 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -739,7 +739,7 @@ static const TCGOutOpUnary outop_neg = { static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { - tcg_out_op_rr(s, glue(INDEX_op_not_i,TCG_TARGET_REG_BITS), a0, a1); + tcg_out_op_rr(s, INDEX_op_not, a0, a1); } static const TCGOutOpUnary outop_not = { From 1e4ac0ea5dc86c54e97bbc4db9638f2febdfde8b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 09:15:09 -0800 Subject: [PATCH 039/161] tcg: Convert mul to TCGOutOpBinary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 18 ++++--- tcg/arm/tcg-target.c.inc | 23 ++++---- tcg/i386/tcg-target.c.inc | 47 +++++++++------- tcg/loongarch64/tcg-target.c.inc | 24 +++++---- tcg/mips/tcg-target.c.inc | 43 +++++++++------ tcg/ppc/tcg-target.c.inc | 42 +++++++-------- tcg/riscv/tcg-target.c.inc | 21 ++++---- tcg/s390x/tcg-target.c.inc | 92 ++++++++++++++++++-------------- tcg/sparc64/tcg-target.c.inc | 28 +++++++--- tcg/tcg.c | 6 ++- tcg/tci/tcg-target.c.inc | 14 +++-- 11 files changed, 210 insertions(+), 148 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 97b444bc17..4513140f58 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2168,6 +2168,17 @@ static const TCGOutOpBinary outop_eqv = { .out_rrr = tgen_eqv, }; +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3509, MADD, type, a0, a1, a2, TCG_REG_XZR); +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mul, +}; + static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; @@ -2322,11 +2333,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_mul_i64: - case INDEX_op_mul_i32: - tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); - break; - case INDEX_op_div_i64: case INDEX_op_div_i32: tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); @@ -3029,8 +3035,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rC); - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: case INDEX_op_div_i32: case INDEX_op_div_i64: case INDEX_op_divu_i32: diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 2477b1c4ab..93e5c70ae3 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -921,13 +921,6 @@ static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, ARMInsn opc, } } -static void tcg_out_mul32(TCGContext *s, ARMCond cond, TCGReg rd, - TCGReg rn, TCGReg rm) -{ - /* mul */ - tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn); -} - static void tcg_out_umull32(TCGContext *s, ARMCond cond, TCGReg rd0, TCGReg rd1, TCGReg rn, TCGReg rm) { @@ -1885,6 +1878,18 @@ static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + /* mul */ + tcg_out32(s, (COND_AL << 28) | 0x90 | (a0 << 16) | (a1 << 8) | a2); +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mul, +}; + static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; @@ -2060,9 +2065,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } tcg_out_mov_reg(s, COND_AL, args[0], a0); break; - case INDEX_op_mul_i32: - tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]); - break; case INDEX_op_mulu2_i32: tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]); break; @@ -2258,7 +2260,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ctz_i32: return C_O1_I2(r, r, rIK); - case INDEX_op_mul_i32: case INDEX_op_div_i32: case INDEX_op_divu_i32: return C_O1_I2(r, r, r); diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 24fef09c9e..4abe89d06e 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2637,6 +2637,33 @@ static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2); +} + +static void tgen_muli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + if (a2 == (int8_t)a2) { + tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0); + tcg_out8(s, a2); + } else { + tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0); + tcg_out32(s, a2); + } +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_mul, + .out_rri = tgen_muli, +}; + static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; @@ -2804,22 +2831,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(mul): - if (const_a2) { - int32_t val; - val = a2; - if (val == (int8_t)val) { - tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0); - tcg_out8(s, val); - } else { - tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0); - tcg_out32(s, val); - } - } else { - tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2); - } - break; - OP_32_64(div2): tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]); break; @@ -3707,10 +3718,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(re, r); - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: - return C_O1_I2(r, 0, re); - case INDEX_op_shl_i32: case INDEX_op_shl_i64: case INDEX_op_shr_i32: diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index c24882b6ba..448896ac0d 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1332,6 +1332,21 @@ static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_mul_w(s, a0, a1, a2); + } else { + tcg_out_opc_mul_d(s, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mul, +}; + static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; @@ -1626,13 +1641,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_mul_i32: - tcg_out_opc_mul_w(s, a0, a1, a2); - break; - case INDEX_op_mul_i64: - tcg_out_opc_mul_d(s, a0, a1, a2); - break; - case INDEX_op_mulsh_i32: tcg_out_opc_mulh_w(s, a0, a1, a2); break; @@ -2333,8 +2341,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond_i64: return C_O1_I2(r, rz, rJ); - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: case INDEX_op_mulsh_i32: case INDEX_op_mulsh_i64: case INDEX_op_muluh_i32: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 4942855189..95c2645226 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1716,6 +1716,33 @@ static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + MIPSInsn insn; + + if (type == TCG_TYPE_I32) { + if (use_mips32_instructions) { + tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); + return; + } + insn = OPC_MULT; + } else { + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_DMUL, a0, a1, a2); + return; + } + insn = OPC_DMULT; + } + tcg_out_opc_reg(s, insn, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mul, +}; + static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; @@ -1876,13 +1903,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_mul_i32: - if (use_mips32_instructions) { - tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); - break; - } - i1 = OPC_MULT, i2 = OPC_MFLO; - goto do_hilo1; case INDEX_op_mulsh_i32: if (use_mips32r6_instructions) { tcg_out_opc_reg(s, OPC_MUH, a0, a1, a2); @@ -1925,13 +1945,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } i1 = OPC_DIVU, i2 = OPC_MFHI; goto do_hilo1; - case INDEX_op_mul_i64: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DMUL, a0, a1, a2); - break; - } - i1 = OPC_DMULT, i2 = OPC_MFLO; - goto do_hilo1; case INDEX_op_mulsh_i64: if (use_mips32r6_instructions) { tcg_out_opc_reg(s, OPC_DMUH, a0, a1, a2); @@ -2232,7 +2245,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_mul_i32: case INDEX_op_mulsh_i32: case INDEX_op_muluh_i32: case INDEX_op_div_i32: @@ -2240,7 +2252,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rem_i32: case INDEX_op_remu_i32: case INDEX_op_setcond_i32: - case INDEX_op_mul_i64: case INDEX_op_mulsh_i64: case INDEX_op_muluh_i64: case INDEX_op_div_i64: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 38cbe5223b..a7cc9d0bc7 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2965,6 +2965,25 @@ static const TCGOutOpBinary outop_eqv = { .out_rrr = tgen_eqv, }; +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? MULLW : MULLD; + tcg_out32(s, insn | TAB(a0, a1, a2)); +} + +static void tgen_muli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out32(s, MULLI | TAI(a0, a1, a2)); +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_mul, + .out_rri = tgen_muli, +}; + static void tgen_nand(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3077,7 +3096,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - TCGArg a0, a1, a2; + TCGArg a0, a1; switch (opc) { case INDEX_op_goto_ptr: @@ -3166,15 +3185,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); break; - case INDEX_op_mul_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out32(s, MULLI | TAI(a0, a1, a2)); - } else { - tcg_out32(s, MULLW | TAB(a0, a1, a2)); - } - break; - case INDEX_op_div_i32: tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); break; @@ -3283,14 +3293,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_mul_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out32(s, MULLI | TAI(a0, a1, a2)); - } else { - tcg_out32(s, MULLD | TAB(a0, a1, a2)); - } - break; case INDEX_op_div_i64: tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); break; @@ -4171,10 +4173,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rotr_i64: return C_O1_I2(r, r, ri); - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: - return C_O1_I2(r, r, rI); - case INDEX_op_div_i32: case INDEX_op_divu_i32: case INDEX_op_rem_i32: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 5e9e14815d..ff685037d7 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2009,6 +2009,18 @@ static const TCGOutOpBinary outop_eqv = { .out_rrr = tgen_eqv, }; +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_MULW : OPC_MUL; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mul, +}; + static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; @@ -2160,13 +2172,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, OPC_SD, a0, a1, a2); break; - case INDEX_op_mul_i32: - tcg_out_opc_reg(s, OPC_MULW, a0, a1, a2); - break; - case INDEX_op_mul_i64: - tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); - break; - case INDEX_op_div_i32: tcg_out_opc_reg(s, OPC_DIVW, a0, a1, a2); break; @@ -2706,14 +2711,12 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_mul_i32: case INDEX_op_mulsh_i32: case INDEX_op_muluh_i32: case INDEX_op_div_i32: case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: - case INDEX_op_mul_i64: case INDEX_op_mulsh_i64: case INDEX_op_muluh_i64: case INDEX_op_div_i64: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 98bf3ee19e..1ba9741fdd 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2258,6 +2258,57 @@ static const TCGOutOpBinary outop_eqv = { .out_rrr = tgen_eqv, }; +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + if (a0 == a1) { + tcg_out_insn(s, RRE, MSR, a0, a2); + } else { + tcg_out_insn(s, RRFa, MSRKC, a0, a1, a2); + } + } else { + if (a0 == a1) { + tcg_out_insn(s, RRE, MSGR, a0, a2); + } else { + tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2); + } + } +} + +static void tgen_muli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_mov(s, type, a0, a1); + if (type == TCG_TYPE_I32) { + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, MHI, a0, a2); + } else { + tcg_out_insn(s, RIL, MSFI, a0, a2); + } + } else { + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, MGHI, a0, a2); + } else { + tcg_out_insn(s, RIL, MSGFI, a0, a2); + } + } +} + +static TCGConstraintSetIndex cset_mul(TCGType type, unsigned flags) +{ + return (HAVE_FACILITY(MISC_INSN_EXT2) + ? C_O1_I2(r, r, rJ) + : C_O1_I2(r, 0, rJ)); +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mul, + .out_rrr = tgen_mul, + .out_rri = tgen_muli, +}; + static void tgen_nand(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2460,22 +2511,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_mul_i32: - a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - if (a2 == (int16_t)a2) { - tcg_out_insn(s, RI, MHI, a0, a2); - } else { - tcg_out_insn(s, RIL, MSFI, a0, a2); - } - } else if (a0 == a1) { - tcg_out_insn(s, RRE, MSR, a0, a2); - } else { - tcg_out_insn(s, RRFa, MSRKC, a0, a1, a2); - } - break; - case INDEX_op_div2_i32: tcg_debug_assert(args[0] == args[2]); tcg_debug_assert(args[1] == args[3]); @@ -2651,22 +2686,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_insn(s, RRE, LRVGR, args[0], args[1]); break; - case INDEX_op_mul_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - if (a2 == (int16_t)a2) { - tcg_out_insn(s, RI, MGHI, a0, a2); - } else { - tcg_out_insn(s, RIL, MSGFI, a0, a2); - } - } else if (a0 == a1) { - tcg_out_insn(s, RRE, MSGR, a0, a2); - } else { - tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2); - } - break; - case INDEX_op_div2_i64: /* * ??? We get an unnecessary sign-extension of the dividend @@ -3316,15 +3335,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_clz_i64: return C_O1_I2(r, r, rI); - case INDEX_op_mul_i32: - return (HAVE_FACILITY(MISC_INSN_EXT2) - ? C_O1_I2(r, r, ri) - : C_O1_I2(r, 0, ri)); - case INDEX_op_mul_i64: - return (HAVE_FACILITY(MISC_INSN_EXT2) - ? C_O1_I2(r, r, rJ) - : C_O1_I2(r, 0, rJ)); - case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 5819dc44fe..0a13a91166 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1337,6 +1337,26 @@ static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? ARITH_UMUL : ARITH_MULX; + tcg_out_arith(s, a0, a1, a2, insn); +} + +static void tgen_muli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? ARITH_UMUL : ARITH_MULX; + tcg_out_arithi(s, a0, a1, a2, insn); +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_mul, + .out_rri = tgen_muli, +}; + static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; @@ -1489,9 +1509,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_sar_i32: c = SHIFT_SRA; goto do_shift32; - case INDEX_op_mul_i32: - c = ARITH_UMUL; - goto gen_arith; case INDEX_op_div_i32: tcg_out_div32(s, a0, a1, a2, c2, 0); @@ -1568,9 +1585,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_sar_i64: c = SHIFT_SRAX; goto do_shift64; - case INDEX_op_mul_i64: - c = ARITH_MULX; - goto gen_arith; case INDEX_op_div_i64: c = ARITH_SDIVX; goto gen_arith; @@ -1667,8 +1681,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i64: return C_O0_I2(rz, r); - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: case INDEX_op_div_i32: case INDEX_op_div_i64: case INDEX_op_divu_i32: diff --git a/tcg/tcg.c b/tcg/tcg.c index 5e43a304c0..4d221cea6f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1021,6 +1021,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), + OUTOP(INDEX_op_mul_i32, TCGOutOpBinary, outop_mul), + OUTOP(INDEX_op_mul_i64, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), @@ -2247,7 +2249,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: - case INDEX_op_mul_i32: case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: @@ -2313,7 +2314,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st16_i64: case INDEX_op_st32_i64: case INDEX_op_st_i64: - case INDEX_op_mul_i64: case INDEX_op_shl_i64: case INDEX_op_shr_i64: case INDEX_op_sar_i64: @@ -5436,6 +5436,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_and: case INDEX_op_andc: case INDEX_op_eqv: + case INDEX_op_mul_i32: + case INDEX_op_mul_i64: case INDEX_op_nand: case INDEX_op_nor: case INDEX_op_or: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index a1f9a3a2f0..ce17079ffc 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -87,8 +87,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rem_i64: case INDEX_op_remu_i32: case INDEX_op_remu_i64: - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: case INDEX_op_shl_i32: case INDEX_op_shl_i64: case INDEX_op_shr_i32: @@ -661,6 +659,17 @@ static const TCGOutOpBinary outop_eqv = { .out_rrr = tgen_eqv, }; +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, glue(INDEX_op_mul_i,TCG_TARGET_REG_BITS), a0, a1, a2); +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mul, +}; + static void tgen_nand(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -789,7 +798,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, opc, args[0], args[1], args[2]); break; - CASE_32_64(mul) CASE_32_64(shl) CASE_32_64(shr) CASE_32_64(sar) From d2c3ecadea89832ab82566e881bc3a288b020473 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 09:32:18 -0800 Subject: [PATCH 040/161] tcg: Merge INDEX_op_mul_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 4 ++-- tcg/tcg-op.c | 12 ++++++------ tcg/tcg.c | 14 ++++++-------- tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 19 insertions(+), 23 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 96dddc5fd3..6c36e72242 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -273,7 +273,7 @@ Arithmetic - | *t0* = -*t1* (two's complement) - * - mul_i32/i64 *t0*, *t1*, *t2* + * - mul *t0*, *t1*, *t2* - | *t0* = *t1* * *t2* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index d0fcdfd241..4ecba62fda 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -43,6 +43,7 @@ DEF(add, 1, 2, 0, TCG_OPF_INT) DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(eqv, 1, 2, 0, TCG_OPF_INT) +DEF(mul, 1, 2, 0, TCG_OPF_INT) DEF(nand, 1, 2, 0, TCG_OPF_INT) DEF(neg, 1, 1, 0, TCG_OPF_INT) DEF(nor, 1, 2, 0, TCG_OPF_INT) @@ -65,7 +66,6 @@ DEF(st8_i32, 0, 2, 1, 0) DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) /* arith */ -DEF(mul_i32, 1, 2, 0, 0) DEF(div_i32, 1, 2, 0, 0) DEF(divu_i32, 1, 2, 0, 0) DEF(rem_i32, 1, 2, 0, 0) @@ -116,7 +116,6 @@ DEF(st16_i64, 0, 2, 1, 0) DEF(st32_i64, 0, 2, 1, 0) DEF(st_i64, 0, 2, 1, 0) /* arith */ -DEF(mul_i64, 1, 2, 0, 0) DEF(div_i64, 1, 2, 0, 0) DEF(divu_i64, 1, 2, 0, 0) DEF(rem_i64, 1, 2, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index 315ee0a8bc..653246f3d2 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -430,7 +430,7 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) case INDEX_op_sub: return x - y; - CASE_OP_32_64(mul): + case INDEX_op_mul: return x * y; case INDEX_op_and: @@ -2963,7 +2963,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(movcond): done = fold_movcond(&ctx, op); break; - CASE_OP_32_64(mul): + case INDEX_op_mul: done = fold_mul(&ctx, op); break; CASE_OP_32_64(mulsh): diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index ddc1f465a4..76d5b67fba 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -585,7 +585,7 @@ void tcg_gen_negsetcondi_i32(TCGCond cond, TCGv_i32 ret, void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_mul_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_mul, ret, arg1, arg2); } void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -1134,7 +1134,7 @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); } else if (TCG_TARGET_HAS_muluh_i32) { TCGv_i32 t = tcg_temp_ebb_new_i32(); - tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_mul, t, arg1, arg2); tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2); tcg_gen_mov_i32(rl, t); tcg_temp_free_i32(t); @@ -1158,7 +1158,7 @@ void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); } else if (TCG_TARGET_HAS_mulsh_i32) { TCGv_i32 t = tcg_temp_ebb_new_i32(); - tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_mul, t, arg1, arg2); tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2); tcg_gen_mov_i32(rl, t); tcg_temp_free_i32(t); @@ -1636,7 +1636,7 @@ void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) TCGv_i32 t1; if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_mul, ret, arg1, arg2); return; } @@ -2844,7 +2844,7 @@ void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); } else if (TCG_TARGET_HAS_muluh_i64) { TCGv_i64 t = tcg_temp_ebb_new_i64(); - tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_mul, t, arg1, arg2); tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2); tcg_gen_mov_i64(rl, t); tcg_temp_free_i64(t); @@ -2863,7 +2863,7 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); } else if (TCG_TARGET_HAS_mulsh_i64) { TCGv_i64 t = tcg_temp_ebb_new_i64(); - tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_mul, t, arg1, arg2); tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2); tcg_gen_mov_i64(rl, t); tcg_temp_free_i64(t); diff --git a/tcg/tcg.c b/tcg/tcg.c index 4d221cea6f..d16ed332c8 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1021,8 +1021,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), - OUTOP(INDEX_op_mul_i32, TCGOutOpBinary, outop_mul), - OUTOP(INDEX_op_mul_i64, TCGOutOpBinary, outop_mul), + OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), @@ -4035,22 +4034,22 @@ liveness_pass_1(TCGContext *s) goto do_not_remove; case INDEX_op_mulu2_i32: - opc_new = INDEX_op_mul_i32; + opc_new = INDEX_op_mul; opc_new2 = INDEX_op_muluh_i32; have_opc_new2 = TCG_TARGET_HAS_muluh_i32; goto do_mul2; case INDEX_op_muls2_i32: - opc_new = INDEX_op_mul_i32; + opc_new = INDEX_op_mul; opc_new2 = INDEX_op_mulsh_i32; have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; goto do_mul2; case INDEX_op_mulu2_i64: - opc_new = INDEX_op_mul_i64; + opc_new = INDEX_op_mul; opc_new2 = INDEX_op_muluh_i64; have_opc_new2 = TCG_TARGET_HAS_muluh_i64; goto do_mul2; case INDEX_op_muls2_i64: - opc_new = INDEX_op_mul_i64; + opc_new = INDEX_op_mul; opc_new2 = INDEX_op_mulsh_i64; have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; goto do_mul2; @@ -5436,8 +5435,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_and: case INDEX_op_andc: case INDEX_op_eqv: - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: + case INDEX_op_mul: case INDEX_op_nand: case INDEX_op_nor: case INDEX_op_or: diff --git a/tcg/tci.c b/tcg/tci.c index 96e3667ab2..61c0ccf21e 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -531,7 +531,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] - regs[r2]; break; - CASE_32_64(mul) + case INDEX_op_mul: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] * regs[r2]; break; @@ -1072,14 +1072,13 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_and: case INDEX_op_andc: case INDEX_op_eqv: + case INDEX_op_mul: case INDEX_op_nand: case INDEX_op_nor: case INDEX_op_or: case INDEX_op_orc: case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: case INDEX_op_div_i32: case INDEX_op_div_i64: case INDEX_op_rem_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index ce17079ffc..ffc8654427 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -662,7 +662,7 @@ static const TCGOutOpBinary outop_eqv = { static void tgen_mul(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { - tcg_out_op_rrr(s, glue(INDEX_op_mul_i,TCG_TARGET_REG_BITS), a0, a1, a2); + tcg_out_op_rrr(s, INDEX_op_mul, a0, a1, a2); } static const TCGOutOpBinary outop_mul = { From 937246f2ee87d01062bac7c356c1766b8c5038a8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 10:16:03 -0800 Subject: [PATCH 041/161] tcg: Convert muluh to TCGOutOpBinary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unreachable mul[su]h_i32 leftovers from commit aeb6326ec5e ("tcg/riscv: Require TCG_TARGET_REG_BITS == 64"). Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 -- tcg/aarch64/tcg-target.c.inc | 21 ++++++++++++++++---- tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 4 ++++ tcg/i386/tcg-target-has.h | 2 -- tcg/i386/tcg-target.c.inc | 4 ++++ tcg/loongarch64/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target.c.inc | 24 +++++++++++++--------- tcg/mips/tcg-target-has.h | 2 -- tcg/mips/tcg-target.c.inc | 34 +++++++++++++++++--------------- tcg/ppc/tcg-target-has.h | 2 -- tcg/ppc/tcg-target.c.inc | 20 +++++++++++-------- tcg/riscv/tcg-target-has.h | 2 -- tcg/riscv/tcg-target.c.inc | 24 +++++++++++++++------- tcg/s390x/tcg-target-has.h | 2 -- tcg/s390x/tcg-target.c.inc | 4 ++++ tcg/sparc64/tcg-target-has.h | 2 -- tcg/sparc64/tcg-target.c.inc | 23 ++++++++++++++++----- tcg/tcg-has.h | 1 - tcg/tcg-op.c | 7 ++++--- tcg/tcg.c | 16 ++++++--------- tcg/tci/tcg-target-has.h | 2 -- tcg/tci/tcg-target.c.inc | 4 ++++ 23 files changed, 123 insertions(+), 82 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 7f18727686..207a85ed61 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -27,7 +27,6 @@ #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -47,7 +46,6 @@ #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1 /* diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 4513140f58..bd0b7938c8 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2179,6 +2179,23 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static TCGConstraintSetIndex cset_mulh(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_I64 ? C_O1_I2(r, r, r) : C_NotImplemented; +} + +static void tgen_muluh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); +} + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mulh, + .out_rrr = tgen_muluh, +}; + static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; @@ -2526,9 +2543,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, args[5], const_args[4], const_args[5], true); break; - case INDEX_op_muluh_i64: - tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); - break; case INDEX_op_mulsh_i64: tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); break; @@ -3043,7 +3057,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rem_i64: case INDEX_op_remu_i32: case INDEX_op_remu_i64: - case INDEX_op_muluh_i64: case INDEX_op_mulsh_i64: return C_O1_I2(r, r, r); diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index e766c6d628..d6b06e96bf 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -34,7 +34,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i32 use_idiv_instructions #define TCG_TARGET_HAS_rem_i32 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 93e5c70ae3..b08e23d0dc 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1890,6 +1890,10 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index 3d36fe58f2..f4487ac1fc 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -39,7 +39,6 @@ #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 @@ -59,7 +58,6 @@ #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 #define TCG_TARGET_HAS_muls2_i64 1 -#define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #else diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 4abe89d06e..5fdca05c9d 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2664,6 +2664,10 @@ static const TCGOutOpBinary outop_mul = { .out_rri = tgen_muli, }; +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index ffacb41e80..53335b2cdb 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -20,7 +20,6 @@ #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 @@ -47,7 +46,6 @@ #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1 #define TCG_TARGET_HAS_qemu_ldst_i128 (cpuinfo & CPUINFO_LSX) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 448896ac0d..738fdd1a9e 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1347,6 +1347,21 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static void tgen_muluh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_mulh_wu(s, a0, a1, a2); + } else { + tcg_out_opc_mulh_du(s, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_muluh, +}; + static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; @@ -1648,13 +1663,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_mulh_d(s, a0, a1, a2); break; - case INDEX_op_muluh_i32: - tcg_out_opc_mulh_wu(s, a0, a1, a2); - break; - case INDEX_op_muluh_i64: - tcg_out_opc_mulh_du(s, a0, a1, a2); - break; - case INDEX_op_div_i32: tcg_out_opc_div_w(s, a0, a1, a2); break; @@ -2343,8 +2351,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_mulsh_i32: case INDEX_op_mulsh_i64: - case INDEX_op_muluh_i32: - case INDEX_op_muluh_i64: case INDEX_op_div_i32: case INDEX_op_div_i64: case INDEX_op_divu_i32: diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index 9d8e0fb8df..b559ab3846 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -43,7 +43,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions) -#define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 @@ -59,7 +58,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muls2_i64 (!use_mips32r6_instructions) -#define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1 #define TCG_TARGET_HAS_ext32s_i64 1 #define TCG_TARGET_HAS_ext32u_i64 1 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 95c2645226..24f8184c33 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1743,6 +1743,24 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static void tgen_muluh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (use_mips32r6_instructions) { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_MUHU : OPC_DMUHU; + tcg_out_opc_reg(s, insn, a0, a1, a2); + } else { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_MULTU : OPC_DMULTU; + tcg_out_opc_reg(s, insn, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); + } +} + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_muluh, +}; + static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; @@ -1910,13 +1928,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } i1 = OPC_MULT, i2 = OPC_MFHI; goto do_hilo1; - case INDEX_op_muluh_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MUHU, a0, a1, a2); - break; - } - i1 = OPC_MULTU, i2 = OPC_MFHI; - goto do_hilo1; case INDEX_op_div_i32: if (use_mips32r6_instructions) { tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2); @@ -1952,13 +1963,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } i1 = OPC_DMULT, i2 = OPC_MFHI; goto do_hilo1; - case INDEX_op_muluh_i64: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DMUHU, a0, a1, a2); - break; - } - i1 = OPC_DMULTU, i2 = OPC_MFHI; - goto do_hilo1; case INDEX_op_div_i64: if (use_mips32r6_instructions) { tcg_out_opc_reg(s, OPC_DDIV_R6, a0, a1, a2); @@ -2246,14 +2250,12 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) return C_O0_I2(rz, r); case INDEX_op_mulsh_i32: - case INDEX_op_muluh_i32: case INDEX_op_div_i32: case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: case INDEX_op_setcond_i32: case INDEX_op_mulsh_i64: - case INDEX_op_muluh_i64: case INDEX_op_div_i64: case INDEX_op_divu_i64: case INDEX_op_rem_i64: diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 7ebcb49a19..18ec573f7e 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -29,7 +29,6 @@ #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -52,7 +51,6 @@ #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1 #endif diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index a7cc9d0bc7..06a7abf2ba 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2984,6 +2984,18 @@ static const TCGOutOpBinary outop_mul = { .out_rri = tgen_muli, }; +static void tgen_muluh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? MULHWU : MULHDU; + tcg_out32(s, insn | TAB(a0, a1, a2)); +} + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_muluh, +}; + static void tgen_nand(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3487,15 +3499,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_muluh_i32: - tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2])); - break; case INDEX_op_mulsh_i32: tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); break; - case INDEX_op_muluh_i64: - tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); - break; case INDEX_op_mulsh_i64: tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); break; @@ -4177,14 +4183,12 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: - case INDEX_op_muluh_i32: case INDEX_op_mulsh_i32: case INDEX_op_div_i64: case INDEX_op_divu_i64: case INDEX_op_rem_i64: case INDEX_op_remu_i64: case INDEX_op_mulsh_i64: - case INDEX_op_muluh_i64: return C_O1_I2(r, r, r); case INDEX_op_clz_i32: diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index e3018717ea..453942a6a5 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -20,7 +20,6 @@ #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) @@ -46,7 +45,6 @@ #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index ff685037d7..65246cc450 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2021,6 +2021,23 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static TCGConstraintSetIndex cset_mulh(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_I32 ? C_NotImplemented : C_O1_I2(r, r, r); +} + +static void tgen_muluh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_MULHU, a0, a1, a2); +} + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mulh, + .out_rrr = tgen_muluh, +}; + static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; @@ -2379,11 +2396,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_reg(s, OPC_MULH, a0, a1, a2); break; - case INDEX_op_muluh_i32: - case INDEX_op_muluh_i64: - tcg_out_opc_reg(s, OPC_MULHU, a0, a1, a2); - break; - case INDEX_op_mb: tcg_out_mb(s, a0); break; @@ -2712,13 +2724,11 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) return C_O1_I2(r, r, rI); case INDEX_op_mulsh_i32: - case INDEX_op_muluh_i32: case INDEX_op_div_i32: case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: case INDEX_op_mulsh_i64: - case INDEX_op_muluh_i64: case INDEX_op_div_i64: case INDEX_op_divu_i64: case INDEX_op_rem_i64: diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index e5c132cf12..ac808e21e5 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -42,7 +42,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -61,7 +60,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 #define TCG_TARGET_HAS_muls2_i64 HAVE_FACILITY(MISC_INSN_EXT2) -#define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_TARGET_HAS_qemu_ldst_i128 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 1ba9741fdd..e3d70ca236 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2309,6 +2309,10 @@ static const TCGOutOpBinary outop_mul = { .out_rri = tgen_muli, }; +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_nand(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index df87249df2..093de87a1d 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -28,7 +28,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -48,7 +47,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_muluh_i64 use_vis3_instructions #define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 0a13a91166..31bdaecafa 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1357,6 +1357,24 @@ static const TCGOutOpBinary outop_mul = { .out_rri = tgen_muli, }; +static void tgen_muluh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_UMULXHI); +} + +static TCGConstraintSetIndex cset_muluh(TCGType type, unsigned flags) +{ + return (type == TCG_TYPE_I64 && use_vis3_instructions + ? C_O1_I2(r, r, r) : C_NotImplemented); +} + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_muluh, + .out_rrr = tgen_muluh, +}; + static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; @@ -1612,9 +1630,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], const_args[4], args[5], const_args[5], true); break; - case INDEX_op_muluh_i64: - tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI); - break; gen_arith: tcg_out_arithc(s, a0, a1, a2, c2, c); @@ -1711,8 +1726,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_mulu2_i32: case INDEX_op_muls2_i32: return C_O2_I2(r, r, rz, rJ); - case INDEX_op_muluh_i64: - return C_O1_I2(r, r, r); default: return C_NotImplemented; diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index a84ed1313a..7f3ef73f2e 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -28,7 +28,6 @@ #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0 /* Turn some undef macros into true macros. */ #define TCG_TARGET_HAS_add2_i32 1 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 76d5b67fba..39581465f2 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1132,7 +1132,7 @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { if (TCG_TARGET_HAS_mulu2_i32) { tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); - } else if (TCG_TARGET_HAS_muluh_i32) { + } else if (tcg_op_supported(INDEX_op_muluh_i32, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_op3_i32(INDEX_op_mul, t, arg1, arg2); tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2); @@ -2842,7 +2842,7 @@ void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_mulu2_i64) { tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); - } else if (TCG_TARGET_HAS_muluh_i64) { + } else if (tcg_op_supported(INDEX_op_muluh_i64, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_op3_i64(INDEX_op_mul, t, arg1, arg2); tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2); @@ -2867,7 +2867,8 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2); tcg_gen_mov_i64(rl, t); tcg_temp_free_i64(t); - } else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) { + } else if (TCG_TARGET_HAS_mulu2_i64 || + tcg_op_supported(INDEX_op_muluh_i64, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); TCGv_i64 t2 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index d16ed332c8..cd85967229 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1022,6 +1022,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), + OUTOP(INDEX_op_muluh_i32, TCGOutOpBinary, outop_muluh), + OUTOP(INDEX_op_muluh_i64, TCGOutOpBinary, outop_muluh), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), @@ -2280,8 +2282,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_mulu2_i32; case INDEX_op_muls2_i32: return TCG_TARGET_HAS_muls2_i32; - case INDEX_op_muluh_i32: - return TCG_TARGET_HAS_muluh_i32; case INDEX_op_mulsh_i32: return TCG_TARGET_HAS_mulsh_i32; case INDEX_op_bswap16_i32: @@ -2362,8 +2362,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_mulu2_i64; case INDEX_op_muls2_i64: return TCG_TARGET_HAS_muls2_i64; - case INDEX_op_muluh_i64: - return TCG_TARGET_HAS_muluh_i64; case INDEX_op_mulsh_i64: return TCG_TARGET_HAS_mulsh_i64; @@ -3876,7 +3874,6 @@ liveness_pass_1(TCGContext *s) QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { int nb_iargs, nb_oargs; TCGOpcode opc_new, opc_new2; - bool have_opc_new2; TCGLifeData arg_life = 0; TCGTemp *ts; TCGOpcode opc = op->opc; @@ -4036,22 +4033,18 @@ liveness_pass_1(TCGContext *s) case INDEX_op_mulu2_i32: opc_new = INDEX_op_mul; opc_new2 = INDEX_op_muluh_i32; - have_opc_new2 = TCG_TARGET_HAS_muluh_i32; goto do_mul2; case INDEX_op_muls2_i32: opc_new = INDEX_op_mul; opc_new2 = INDEX_op_mulsh_i32; - have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; goto do_mul2; case INDEX_op_mulu2_i64: opc_new = INDEX_op_mul; opc_new2 = INDEX_op_muluh_i64; - have_opc_new2 = TCG_TARGET_HAS_muluh_i64; goto do_mul2; case INDEX_op_muls2_i64: opc_new = INDEX_op_mul; opc_new2 = INDEX_op_mulsh_i64; - have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; goto do_mul2; do_mul2: nb_iargs = 2; @@ -4065,7 +4058,8 @@ liveness_pass_1(TCGContext *s) op->opc = opc = opc_new; op->args[1] = op->args[2]; op->args[2] = op->args[3]; - } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { + } else if (arg_temp(op->args[0])->state == TS_DEAD && + tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) { /* The low part of the operation is dead; generate the high. */ op->opc = opc = opc_new2; op->args[0] = op->args[1]; @@ -5436,6 +5430,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_andc: case INDEX_op_eqv: case INDEX_op_mul: + case INDEX_op_muluh_i32: + case INDEX_op_muluh_i64: case INDEX_op_nand: case INDEX_op_nor: case INDEX_op_or: diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index f147da5c0e..b99b12c24c 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -18,7 +18,6 @@ #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 0 #define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -42,7 +41,6 @@ #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 -#define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0 #else #define TCG_TARGET_HAS_mulu2_i32 1 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index ffc8654427..e4a2b171df 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -670,6 +670,10 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_nand(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { From aa28c9ef8e109db40d4781d82452805486f2a2bf Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 10:36:24 -0800 Subject: [PATCH 042/161] tcg: Merge INDEX_op_muluh_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 19 +++++++++++-------- tcg/tcg-op.c | 10 +++++----- tcg/tcg.c | 13 ++++--------- 5 files changed, 22 insertions(+), 25 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 6c36e72242..4fed5a77c6 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -591,7 +591,7 @@ Multiword arithmetic support * - mulsh_i32/i64 *t0*, *t1*, *t2* - muluh_i32/i64 *t0*, *t1*, *t2* + muluh *t0*, *t1*, *t2* - | Provide the high part of a signed or unsigned multiply, respectively. | diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 4ecba62fda..28a5128537 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -44,6 +44,7 @@ DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(eqv, 1, 2, 0, TCG_OPF_INT) DEF(mul, 1, 2, 0, TCG_OPF_INT) +DEF(muluh, 1, 2, 0, TCG_OPF_INT) DEF(nand, 1, 2, 0, TCG_OPF_INT) DEF(neg, 1, 1, 0, TCG_OPF_INT) DEF(nor, 1, 2, 0, TCG_OPF_INT) @@ -89,7 +90,6 @@ DEF(add2_i32, 2, 4, 0, 0) DEF(sub2_i32, 2, 4, 0, 0) DEF(mulu2_i32, 2, 2, 0, 0) DEF(muls2_i32, 2, 2, 0, 0) -DEF(muluh_i32, 1, 2, 0, 0) DEF(mulsh_i32, 1, 2, 0, 0) DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(setcond2_i32, 1, 4, 1, 0) @@ -151,7 +151,6 @@ DEF(add2_i64, 2, 4, 0, 0) DEF(sub2_i64, 2, 4, 0, 0) DEF(mulu2_i64, 2, 2, 0, 0) DEF(muls2_i64, 2, 2, 0, 0) -DEF(muluh_i64, 1, 2, 0, 0) DEF(mulsh_i64, 1, 2, 0, 0) #define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2) diff --git a/tcg/optimize.c b/tcg/optimize.c index 653246f3d2..e19bccf906 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -419,7 +419,8 @@ static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val)); } -static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) +static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, + uint64_t x, uint64_t y) { uint64_t l64, h64; @@ -541,14 +542,16 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) case INDEX_op_extrh_i64_i32: return (uint64_t)x >> 32; - case INDEX_op_muluh_i32: - return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; + case INDEX_op_muluh: + if (type == TCG_TYPE_I32) { + return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; + } + mulu64(&l64, &h64, x, y); + return h64; + case INDEX_op_mulsh_i32: return ((int64_t)(int32_t)x * (int32_t)y) >> 32; - case INDEX_op_muluh_i64: - mulu64(&l64, &h64, x, y); - return h64; case INDEX_op_mulsh_i64: muls64(&l64, &h64, x, y); return h64; @@ -580,7 +583,7 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) static uint64_t do_constant_folding(TCGOpcode op, TCGType type, uint64_t x, uint64_t y) { - uint64_t res = do_constant_folding_2(op, x, y); + uint64_t res = do_constant_folding_2(op, type, x, y); if (type == TCG_TYPE_I32) { res = (int32_t)res; } @@ -2967,7 +2970,7 @@ void tcg_optimize(TCGContext *s) done = fold_mul(&ctx, op); break; CASE_OP_32_64(mulsh): - CASE_OP_32_64(muluh): + case INDEX_op_muluh: done = fold_mul_highpart(&ctx, op); break; CASE_OP_32_64(muls2): diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 39581465f2..7a37b21c56 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1132,10 +1132,10 @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { if (TCG_TARGET_HAS_mulu2_i32) { tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); - } else if (tcg_op_supported(INDEX_op_muluh_i32, TCG_TYPE_I32, 0)) { + } else if (tcg_op_supported(INDEX_op_muluh, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_op3_i32(INDEX_op_mul, t, arg1, arg2); - tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_muluh, rh, arg1, arg2); tcg_gen_mov_i32(rl, t); tcg_temp_free_i32(t); } else if (TCG_TARGET_REG_BITS == 64) { @@ -2842,10 +2842,10 @@ void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_mulu2_i64) { tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); - } else if (tcg_op_supported(INDEX_op_muluh_i64, TCG_TYPE_I64, 0)) { + } else if (tcg_op_supported(INDEX_op_muluh, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_op3_i64(INDEX_op_mul, t, arg1, arg2); - tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_muluh, rh, arg1, arg2); tcg_gen_mov_i64(rl, t); tcg_temp_free_i64(t); } else { @@ -2868,7 +2868,7 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_mov_i64(rl, t); tcg_temp_free_i64(t); } else if (TCG_TARGET_HAS_mulu2_i64 || - tcg_op_supported(INDEX_op_muluh_i64, TCG_TYPE_I64, 0)) { + tcg_op_supported(INDEX_op_muluh, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); TCGv_i64 t2 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index cd85967229..808ac8c431 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1022,8 +1022,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), - OUTOP(INDEX_op_muluh_i32, TCGOutOpBinary, outop_muluh), - OUTOP(INDEX_op_muluh_i64, TCGOutOpBinary, outop_muluh), + OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), @@ -4030,17 +4029,14 @@ liveness_pass_1(TCGContext *s) } goto do_not_remove; - case INDEX_op_mulu2_i32: - opc_new = INDEX_op_mul; - opc_new2 = INDEX_op_muluh_i32; - goto do_mul2; case INDEX_op_muls2_i32: opc_new = INDEX_op_mul; opc_new2 = INDEX_op_mulsh_i32; goto do_mul2; + case INDEX_op_mulu2_i32: case INDEX_op_mulu2_i64: opc_new = INDEX_op_mul; - opc_new2 = INDEX_op_muluh_i64; + opc_new2 = INDEX_op_muluh; goto do_mul2; case INDEX_op_muls2_i64: opc_new = INDEX_op_mul; @@ -5430,8 +5426,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_andc: case INDEX_op_eqv: case INDEX_op_mul: - case INDEX_op_muluh_i32: - case INDEX_op_muluh_i64: + case INDEX_op_muluh: case INDEX_op_nand: case INDEX_op_nor: case INDEX_op_or: From a9983f81290d41ed614a193a33d03be936f6435c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 11:13:05 -0800 Subject: [PATCH 043/161] tcg: Convert mulsh to TCGOutOpBinary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 -- tcg/aarch64/tcg-target.c.inc | 17 +++++++++++----- tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 4 ++++ tcg/i386/tcg-target-has.h | 2 -- tcg/i386/tcg-target.c.inc | 4 ++++ tcg/loongarch64/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target.c.inc | 24 +++++++++++++--------- tcg/mips/tcg-target-has.h | 2 -- tcg/mips/tcg-target.c.inc | 34 +++++++++++++++++--------------- tcg/ppc/tcg-target-has.h | 2 -- tcg/ppc/tcg-target.c.inc | 21 +++++++++++--------- tcg/riscv/tcg-target-has.h | 2 -- tcg/riscv/tcg-target.c.inc | 19 +++++++++++------- tcg/s390x/tcg-target-has.h | 2 -- tcg/s390x/tcg-target.c.inc | 4 ++++ tcg/sparc64/tcg-target-has.h | 2 -- tcg/sparc64/tcg-target.c.inc | 4 ++++ tcg/tcg-has.h | 1 - tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 8 ++++---- tcg/tci/tcg-target-has.h | 2 -- tcg/tci/tcg-target.c.inc | 4 ++++ 23 files changed, 95 insertions(+), 72 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 207a85ed61..bde6db8f2a 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -27,7 +27,6 @@ #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -46,7 +45,6 @@ #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_mulsh_i64 1 /* * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load, diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index bd0b7938c8..493c504682 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2184,6 +2184,18 @@ static TCGConstraintSetIndex cset_mulh(TCGType type, unsigned flags) return type == TCG_TYPE_I64 ? C_O1_I2(r, r, r) : C_NotImplemented; } +static void tgen_mulsh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); +} + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mulh, + .out_rrr = tgen_mulsh, +}; + static void tgen_muluh(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2543,10 +2555,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, args[5], const_args[4], const_args[5], true); break; - case INDEX_op_mulsh_i64: - tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); - break; - case INDEX_op_mb: tcg_out_mb(s, a0); break; @@ -3057,7 +3065,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rem_i64: case INDEX_op_remu_i32: case INDEX_op_remu_i64: - case INDEX_op_mulsh_i64: return C_O1_I2(r, r, r); case INDEX_op_shl_i32: diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index d6b06e96bf..ab9b7b6162 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -34,7 +34,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i32 use_idiv_instructions #define TCG_TARGET_HAS_rem_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index b08e23d0dc..1c19004e6e 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1890,6 +1890,10 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_muluh = { .base.static_constraint = C_NotImplemented, }; diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index f4487ac1fc..121fb95ee0 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -39,7 +39,6 @@ #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 /* Keep 32-bit values zero-extended in a register. */ @@ -58,7 +57,6 @@ #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 #define TCG_TARGET_HAS_muls2_i64 1 -#define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #else #define TCG_TARGET_HAS_qemu_st8_i32 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 5fdca05c9d..d0391157a4 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2664,6 +2664,10 @@ static const TCGOutOpBinary outop_mul = { .out_rri = tgen_muli, }; +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_muluh = { .base.static_constraint = C_NotImplemented, }; diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 53335b2cdb..e29c892756 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -20,7 +20,6 @@ #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_clz_i32 1 @@ -46,7 +45,6 @@ #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_mulsh_i64 1 #define TCG_TARGET_HAS_qemu_ldst_i128 (cpuinfo & CPUINFO_LSX) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 738fdd1a9e..7503270ca3 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1347,6 +1347,21 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static void tgen_mulsh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_mulh_w(s, a0, a1, a2); + } else { + tcg_out_opc_mulh_d(s, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mulsh, +}; + static void tgen_muluh(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1656,13 +1671,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_mulsh_i32: - tcg_out_opc_mulh_w(s, a0, a1, a2); - break; - case INDEX_op_mulsh_i64: - tcg_out_opc_mulh_d(s, a0, a1, a2); - break; - case INDEX_op_div_i32: tcg_out_opc_div_w(s, a0, a1, a2); break; @@ -2349,8 +2357,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond_i64: return C_O1_I2(r, rz, rJ); - case INDEX_op_mulsh_i32: - case INDEX_op_mulsh_i64: case INDEX_op_div_i32: case INDEX_op_div_i64: case INDEX_op_divu_i32: diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index b559ab3846..ebaaa49cdd 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -43,7 +43,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions) -#define TCG_TARGET_HAS_mulsh_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 0 @@ -58,7 +57,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muls2_i64 (!use_mips32r6_instructions) -#define TCG_TARGET_HAS_mulsh_i64 1 #define TCG_TARGET_HAS_ext32s_i64 1 #define TCG_TARGET_HAS_ext32u_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 24f8184c33..a1c215c25d 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1743,6 +1743,24 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static void tgen_mulsh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (use_mips32r6_instructions) { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_MUH : OPC_DMUH; + tcg_out_opc_reg(s, insn, a0, a1, a2); + } else { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_MULT : OPC_DMULT; + tcg_out_opc_reg(s, insn, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); + } +} + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mulsh, +}; + static void tgen_muluh(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1921,13 +1939,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_mulsh_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MUH, a0, a1, a2); - break; - } - i1 = OPC_MULT, i2 = OPC_MFHI; - goto do_hilo1; case INDEX_op_div_i32: if (use_mips32r6_instructions) { tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2); @@ -1956,13 +1967,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } i1 = OPC_DIVU, i2 = OPC_MFHI; goto do_hilo1; - case INDEX_op_mulsh_i64: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DMUH, a0, a1, a2); - break; - } - i1 = OPC_DMULT, i2 = OPC_MFHI; - goto do_hilo1; case INDEX_op_div_i64: if (use_mips32r6_instructions) { tcg_out_opc_reg(s, OPC_DDIV_R6, a0, a1, a2); @@ -2249,13 +2253,11 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_mulsh_i32: case INDEX_op_div_i32: case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: case INDEX_op_setcond_i32: - case INDEX_op_mulsh_i64: case INDEX_op_div_i64: case INDEX_op_divu_i64: case INDEX_op_rem_i64: diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 18ec573f7e..bbbd8de2c7 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -29,7 +29,6 @@ #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 @@ -51,7 +50,6 @@ #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_mulsh_i64 1 #endif #define TCG_TARGET_HAS_qemu_ldst_i128 \ diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 06a7abf2ba..7ebadf396a 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2984,6 +2984,18 @@ static const TCGOutOpBinary outop_mul = { .out_rri = tgen_muli, }; +static void tgen_mulsh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? MULHW : MULHD; + tcg_out32(s, insn | TAB(a0, a1, a2)); +} + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mulsh, +}; + static void tgen_muluh(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3499,13 +3511,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_mulsh_i32: - tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_mulsh_i64: - tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_mb: tcg_out_mb(s, args[0]); break; @@ -4183,12 +4188,10 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: - case INDEX_op_mulsh_i32: case INDEX_op_div_i64: case INDEX_op_divu_i64: case INDEX_op_rem_i64: case INDEX_op_remu_i64: - case INDEX_op_mulsh_i64: return C_O1_I2(r, r, r); case INDEX_op_clz_i32: diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index 453942a6a5..f7e1ef82fc 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -20,7 +20,6 @@ #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_clz_i32 (cpuinfo & CPUINFO_ZBB) @@ -45,7 +44,6 @@ #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_mulsh_i64 1 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 65246cc450..82f76b8e0c 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2026,6 +2026,18 @@ static TCGConstraintSetIndex cset_mulh(TCGType type, unsigned flags) return type == TCG_TYPE_I32 ? C_NotImplemented : C_O1_I2(r, r, r); } +static void tgen_mulsh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_MULH, a0, a1, a2); +} + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mulh, + .out_rrr = tgen_mulsh, +}; + static void tgen_muluh(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2391,11 +2403,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_imm(s, OPC_SRAI, a0, a1, 32); break; - case INDEX_op_mulsh_i32: - case INDEX_op_mulsh_i64: - tcg_out_opc_reg(s, OPC_MULH, a0, a1, a2); - break; - case INDEX_op_mb: tcg_out_mb(s, a0); break; @@ -2723,12 +2730,10 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_mulsh_i32: case INDEX_op_div_i32: case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: - case INDEX_op_mulsh_i64: case INDEX_op_div_i64: case INDEX_op_divu_i64: case INDEX_op_rem_i64: diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index ac808e21e5..64f1805641 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -42,7 +42,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -60,7 +59,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 #define TCG_TARGET_HAS_muls2_i64 HAVE_FACILITY(MISC_INSN_EXT2) -#define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_TARGET_HAS_qemu_ldst_i128 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index e3d70ca236..2685e6ffa1 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2309,6 +2309,10 @@ static const TCGOutOpBinary outop_mul = { .out_rri = tgen_muli, }; +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_muluh = { .base.static_constraint = C_NotImplemented, }; diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 093de87a1d..5a517b6835 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -28,7 +28,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 @@ -47,7 +46,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 31bdaecafa..95a138ef56 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1357,6 +1357,10 @@ static const TCGOutOpBinary outop_mul = { .out_rri = tgen_muli, }; +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_muluh(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 7f3ef73f2e..3d4c67698f 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -28,7 +28,6 @@ #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_mulsh_i64 0 /* Turn some undef macros into true macros. */ #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 7a37b21c56..a043c4554b 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1156,7 +1156,7 @@ void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { if (TCG_TARGET_HAS_muls2_i32) { tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); - } else if (TCG_TARGET_HAS_mulsh_i32) { + } else if (tcg_op_supported(INDEX_op_mulsh_i32, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_op3_i32(INDEX_op_mul, t, arg1, arg2); tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2); @@ -2861,7 +2861,7 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_muls2_i64) { tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); - } else if (TCG_TARGET_HAS_mulsh_i64) { + } else if (tcg_op_supported(INDEX_op_mulsh_i64, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_op3_i64(INDEX_op_mul, t, arg1, arg2); tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2); diff --git a/tcg/tcg.c b/tcg/tcg.c index 808ac8c431..4bfda0a38f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1022,6 +1022,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), + OUTOP(INDEX_op_mulsh_i32, TCGOutOpBinary, outop_mulsh), + OUTOP(INDEX_op_mulsh_i64, TCGOutOpBinary, outop_mulsh), OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), @@ -2281,8 +2283,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_mulu2_i32; case INDEX_op_muls2_i32: return TCG_TARGET_HAS_muls2_i32; - case INDEX_op_mulsh_i32: - return TCG_TARGET_HAS_mulsh_i32; case INDEX_op_bswap16_i32: return TCG_TARGET_HAS_bswap16_i32; case INDEX_op_bswap32_i32: @@ -2361,8 +2361,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_mulu2_i64; case INDEX_op_muls2_i64: return TCG_TARGET_HAS_muls2_i64; - case INDEX_op_mulsh_i64: - return TCG_TARGET_HAS_mulsh_i64; case INDEX_op_mov_vec: case INDEX_op_dup_vec: @@ -5426,6 +5424,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_andc: case INDEX_op_eqv: case INDEX_op_mul: + case INDEX_op_mulsh_i32: + case INDEX_op_mulsh_i64: case INDEX_op_muluh: case INDEX_op_nand: case INDEX_op_nor: diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index b99b12c24c..0627585097 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -18,7 +18,6 @@ #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 0 #define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 @@ -41,7 +40,6 @@ #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 -#define TCG_TARGET_HAS_mulsh_i64 0 #else #define TCG_TARGET_HAS_mulu2_i32 1 #endif /* TCG_TARGET_REG_BITS == 64 */ diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index e4a2b171df..1dcce543ec 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -670,6 +670,10 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_muluh = { .base.static_constraint = C_NotImplemented, }; From c742824dd8df3283098d5339291d49e65e515751 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 11:19:29 -0800 Subject: [PATCH 044/161] tcg: Merge INDEX_op_mulsh_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 10 +++++----- tcg/tcg-op.c | 8 ++++---- tcg/tcg.c | 14 ++++---------- 5 files changed, 15 insertions(+), 22 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 4fed5a77c6..fe922d1dac 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -589,7 +589,7 @@ Multiword arithmetic support - | Similar to mulu2, except the two inputs *t1* and *t2* are signed. - * - mulsh_i32/i64 *t0*, *t1*, *t2* + * - mulsh *t0*, *t1*, *t2* muluh *t0*, *t1*, *t2* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 28a5128537..a9d7938a52 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -44,6 +44,7 @@ DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(eqv, 1, 2, 0, TCG_OPF_INT) DEF(mul, 1, 2, 0, TCG_OPF_INT) +DEF(mulsh, 1, 2, 0, TCG_OPF_INT) DEF(muluh, 1, 2, 0, TCG_OPF_INT) DEF(nand, 1, 2, 0, TCG_OPF_INT) DEF(neg, 1, 1, 0, TCG_OPF_INT) @@ -90,7 +91,6 @@ DEF(add2_i32, 2, 4, 0, 0) DEF(sub2_i32, 2, 4, 0, 0) DEF(mulu2_i32, 2, 2, 0, 0) DEF(muls2_i32, 2, 2, 0, 0) -DEF(mulsh_i32, 1, 2, 0, 0) DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(setcond2_i32, 1, 4, 1, 0) @@ -151,7 +151,6 @@ DEF(add2_i64, 2, 4, 0, 0) DEF(sub2_i64, 2, 4, 0, 0) DEF(mulu2_i64, 2, 2, 0, 0) DEF(muls2_i64, 2, 2, 0, 0) -DEF(mulsh_i64, 1, 2, 0, 0) #define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2) diff --git a/tcg/optimize.c b/tcg/optimize.c index e19bccf906..fd446fc47d 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -549,10 +549,10 @@ static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, mulu64(&l64, &h64, x, y); return h64; - case INDEX_op_mulsh_i32: - return ((int64_t)(int32_t)x * (int32_t)y) >> 32; - - case INDEX_op_mulsh_i64: + case INDEX_op_mulsh: + if (type == TCG_TYPE_I32) { + return ((int64_t)(int32_t)x * (int32_t)y) >> 32; + } muls64(&l64, &h64, x, y); return h64; @@ -2969,7 +2969,7 @@ void tcg_optimize(TCGContext *s) case INDEX_op_mul: done = fold_mul(&ctx, op); break; - CASE_OP_32_64(mulsh): + case INDEX_op_mulsh: case INDEX_op_muluh: done = fold_mul_highpart(&ctx, op); break; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index a043c4554b..664c698187 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1156,10 +1156,10 @@ void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { if (TCG_TARGET_HAS_muls2_i32) { tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); - } else if (tcg_op_supported(INDEX_op_mulsh_i32, TCG_TYPE_I32, 0)) { + } else if (tcg_op_supported(INDEX_op_mulsh, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_op3_i32(INDEX_op_mul, t, arg1, arg2); - tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_mulsh, rh, arg1, arg2); tcg_gen_mov_i32(rl, t); tcg_temp_free_i32(t); } else if (TCG_TARGET_REG_BITS == 32) { @@ -2861,10 +2861,10 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_muls2_i64) { tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); - } else if (tcg_op_supported(INDEX_op_mulsh_i64, TCG_TYPE_I64, 0)) { + } else if (tcg_op_supported(INDEX_op_mulsh, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_op3_i64(INDEX_op_mul, t, arg1, arg2); - tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_mulsh, rh, arg1, arg2); tcg_gen_mov_i64(rl, t); tcg_temp_free_i64(t); } else if (TCG_TARGET_HAS_mulu2_i64 || diff --git a/tcg/tcg.c b/tcg/tcg.c index 4bfda0a38f..d4b5872128 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1022,8 +1022,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), - OUTOP(INDEX_op_mulsh_i32, TCGOutOpBinary, outop_mulsh), - OUTOP(INDEX_op_mulsh_i64, TCGOutOpBinary, outop_mulsh), + OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), @@ -4028,18 +4027,14 @@ liveness_pass_1(TCGContext *s) goto do_not_remove; case INDEX_op_muls2_i32: + case INDEX_op_muls2_i64: opc_new = INDEX_op_mul; - opc_new2 = INDEX_op_mulsh_i32; + opc_new2 = INDEX_op_mulsh; goto do_mul2; case INDEX_op_mulu2_i32: case INDEX_op_mulu2_i64: opc_new = INDEX_op_mul; opc_new2 = INDEX_op_muluh; - goto do_mul2; - case INDEX_op_muls2_i64: - opc_new = INDEX_op_mul; - opc_new2 = INDEX_op_mulsh_i64; - goto do_mul2; do_mul2: nb_iargs = 2; nb_oargs = 2; @@ -5424,8 +5419,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_andc: case INDEX_op_eqv: case INDEX_op_mul: - case INDEX_op_mulsh_i32: - case INDEX_op_mulsh_i64: + case INDEX_op_mulsh: case INDEX_op_muluh: case INDEX_op_nand: case INDEX_op_nor: From 0cdacacebbe8a5e0d8a68a8c0007bff364c0e79a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 13:04:24 -0800 Subject: [PATCH 045/161] tcg: Convert div to TCGOutOpBinary For TCI, we're losing type information in the interpreter. Introduce a tci-specific opcode to handle the difference. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 17 ++++++++----- tcg/arm/tcg-target.c.inc | 28 +++++++++++++-------- tcg/i386/tcg-target.c.inc | 4 +++ tcg/loongarch64/tcg-target.c.inc | 24 +++++++++++------- tcg/mips/tcg-target.c.inc | 37 ++++++++++++++++------------ tcg/ppc/tcg-target.c.inc | 21 +++++++++------- tcg/riscv/tcg-target.c.inc | 21 +++++++++------- tcg/s390x/tcg-target.c.inc | 4 +++ tcg/sparc64/tcg-target.c.inc | 42 ++++++++++++++++++++++++++------ tcg/tcg-op.c | 8 +++--- tcg/tcg.c | 6 +++-- tcg/tci.c | 3 ++- tcg/tci/tcg-target-opc.h.inc | 1 + tcg/tci/tcg-target.c.inc | 17 ++++++++++--- 14 files changed, 156 insertions(+), 77 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 493c504682..52069f1445 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2157,6 +2157,17 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, SDIV, type, a0, a1, a2); +} + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divs, +}; + static void tgen_eqv(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2362,10 +2373,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_div_i64: - case INDEX_op_div_i32: - tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); - break; case INDEX_op_divu_i64: case INDEX_op_divu_i32: tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); @@ -3057,8 +3064,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rC); - case INDEX_op_div_i32: - case INDEX_op_div_i64: case INDEX_op_divu_i32: case INDEX_op_divu_i64: case INDEX_op_rem_i32: diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 1c19004e6e..e07e4c06d9 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -937,12 +937,6 @@ static void tcg_out_smull32(TCGContext *s, ARMCond cond, TCGReg rd0, (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); } -static void tcg_out_sdiv(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, TCGReg rm) -{ - tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); -} - static void tcg_out_udiv(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn, TCGReg rm) { @@ -1874,6 +1868,24 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static TCGConstraintSetIndex cset_idiv(TCGType type, unsigned flags) +{ + return use_idiv_instructions ? C_O1_I2(r, r, r) : C_NotImplemented; +} + +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + /* sdiv */ + tcg_out32(s, 0x0710f010 | (COND_AL << 28) | (a0 << 16) | a1 | (a2 << 8)); +} + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_idiv, + .out_rrr = tgen_divs, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; @@ -2218,9 +2230,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_div_i32: - tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]); - break; case INDEX_op_divu_i32: tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]); break; @@ -2268,7 +2277,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ctz_i32: return C_O1_I2(r, r, rIK); - case INDEX_op_div_i32: case INDEX_op_divu_i32: return C_O1_I2(r, r, r); diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index d0391157a4..e132dd0c88 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2633,6 +2633,10 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 7503270ca3..c42d8d690a 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1328,6 +1328,21 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_div_w(s, a0, a1, a2); + } else { + tcg_out_opc_div_d(s, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divs, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; @@ -1671,13 +1686,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_div_i32: - tcg_out_opc_div_w(s, a0, a1, a2); - break; - case INDEX_op_div_i64: - tcg_out_opc_div_d(s, a0, a1, a2); - break; - case INDEX_op_divu_i32: tcg_out_opc_div_wu(s, a0, a1, a2); break; @@ -2357,8 +2365,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond_i64: return C_O1_I2(r, rz, rJ); - case INDEX_op_div_i32: - case INDEX_op_div_i64: case INDEX_op_divu_i32: case INDEX_op_divu_i64: case INDEX_op_rem_i32: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index a1c215c25d..7762d88e6b 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1712,6 +1712,27 @@ static const TCGOutOpBinary outop_andc = { .base.static_constraint = C_NotImplemented, }; +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (use_mips32r6_instructions) { + if (type == TCG_TYPE_I32) { + tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_DDIV_R6, a0, a1, a2); + } + } else { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_DIV : OPC_DDIV; + tcg_out_opc_reg(s, insn, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + } +} + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divs, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; @@ -1939,13 +1960,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_div_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2); - break; - } - i1 = OPC_DIV, i2 = OPC_MFLO; - goto do_hilo1; case INDEX_op_divu_i32: if (use_mips32r6_instructions) { tcg_out_opc_reg(s, OPC_DIVU_R6, a0, a1, a2); @@ -1967,13 +1981,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } i1 = OPC_DIVU, i2 = OPC_MFHI; goto do_hilo1; - case INDEX_op_div_i64: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DDIV_R6, a0, a1, a2); - break; - } - i1 = OPC_DDIV, i2 = OPC_MFLO; - goto do_hilo1; case INDEX_op_divu_i64: if (use_mips32r6_instructions) { tcg_out_opc_reg(s, OPC_DDIVU_R6, a0, a1, a2); @@ -2253,12 +2260,10 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_div_i32: case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: case INDEX_op_setcond_i32: - case INDEX_op_div_i64: case INDEX_op_divu_i64: case INDEX_op_rem_i64: case INDEX_op_remu_i64: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 7ebadf396a..9fdf8df082 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2960,6 +2960,18 @@ static void tgen_eqv(TCGContext *s, TCGType type, tcg_out32(s, EQV | SAB(a1, a0, a2)); } +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? DIVW : DIVD; + tcg_out32(s, insn | TAB(a0, a1, a2)); +} + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divs, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_O1_I2(r, r, r), .out_rrr = tgen_eqv, @@ -3209,10 +3221,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); break; - case INDEX_op_div_i32: - tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_divu_i32: tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); break; @@ -3317,9 +3325,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_div_i64: - tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); - break; case INDEX_op_divu_i64: tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); break; @@ -4184,11 +4189,9 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rotr_i64: return C_O1_I2(r, r, ri); - case INDEX_op_div_i32: case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: - case INDEX_op_div_i64: case INDEX_op_divu_i64: case INDEX_op_rem_i64: case INDEX_op_remu_i64: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 82f76b8e0c..15925729dc 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1997,6 +1997,18 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_DIVW : OPC_DIV; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divs, +}; + static void tgen_eqv(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2201,13 +2213,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, OPC_SD, a0, a1, a2); break; - case INDEX_op_div_i32: - tcg_out_opc_reg(s, OPC_DIVW, a0, a1, a2); - break; - case INDEX_op_div_i64: - tcg_out_opc_reg(s, OPC_DIV, a0, a1, a2); - break; - case INDEX_op_divu_i32: tcg_out_opc_reg(s, OPC_DIVUW, a0, a1, a2); break; @@ -2730,11 +2735,9 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_div_i32: case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: - case INDEX_op_div_i64: case INDEX_op_divu_i64: case INDEX_op_rem_i64: case INDEX_op_remu_i64: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 2685e6ffa1..fd0e717c49 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2242,6 +2242,10 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_eqv(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 95a138ef56..779d0ce882 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1333,6 +1333,40 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_divs_rJ(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGArg a2, bool c2) +{ + uint32_t insn; + + if (type == TCG_TYPE_I32) { + /* Load Y with the sign extension of a1 to 64-bits. */ + tcg_out_arithi(s, TCG_REG_T1, a1, 31, SHIFT_SRA); + tcg_out_sety(s, TCG_REG_T1); + insn = ARITH_SDIV; + } else { + insn = ARITH_SDIVX; + } + tcg_out_arithc(s, a0, a1, a2, c2, insn); +} + +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_divs_rJ(s, type, a0, a1, a2, false); +} + +static void tgen_divsi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_divs_rJ(s, type, a0, a1, a2, true); +} + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_divs, + .out_rri = tgen_divsi, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; @@ -1532,9 +1566,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, c = SHIFT_SRA; goto do_shift32; - case INDEX_op_div_i32: - tcg_out_div32(s, a0, a1, a2, c2, 0); - break; case INDEX_op_divu_i32: tcg_out_div32(s, a0, a1, a2, c2, 1); break; @@ -1607,9 +1638,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_sar_i64: c = SHIFT_SRAX; goto do_shift64; - case INDEX_op_div_i64: - c = ARITH_SDIVX; - goto gen_arith; case INDEX_op_divu_i64: c = ARITH_UDIVX; goto gen_arith; @@ -1700,8 +1728,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i64: return C_O0_I2(rz, r); - case INDEX_op_div_i32: - case INDEX_op_div_i64: case INDEX_op_divu_i32: case INDEX_op_divu_i64: case INDEX_op_shl_i32: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 664c698187..69e50f968f 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -601,7 +601,7 @@ void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_div_i32) { + if (tcg_op_supported(INDEX_op_div_i32, TCG_TYPE_I32, 0)) { tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2); } else if (TCG_TARGET_HAS_div2_i32) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); @@ -617,7 +617,7 @@ void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { if (TCG_TARGET_HAS_rem_i32) { tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div_i32) { + } else if (tcg_op_supported(INDEX_op_div_i32, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2); tcg_gen_mul_i32(t0, t0, arg2); @@ -1969,7 +1969,7 @@ void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_div_i64) { + if (tcg_op_supported(INDEX_op_div_i64, TCG_TYPE_I64, 0)) { tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2); } else if (TCG_TARGET_HAS_div2_i64) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); @@ -1985,7 +1985,7 @@ void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_rem_i64) { tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div_i64) { + } else if (tcg_op_supported(INDEX_op_div_i64, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2); tcg_gen_mul_i64(t0, t0, arg2); diff --git a/tcg/tcg.c b/tcg/tcg.c index d4b5872128..f99213a154 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1020,6 +1020,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), + OUTOP(INDEX_op_div_i32, TCGOutOpBinary, outop_divs), + OUTOP(INDEX_op_div_i64, TCGOutOpBinary, outop_divs), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), @@ -2260,7 +2262,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i32: return TCG_TARGET_HAS_negsetcond_i32; - case INDEX_op_div_i32: case INDEX_op_divu_i32: return TCG_TARGET_HAS_div_i32; case INDEX_op_rem_i32: @@ -2323,7 +2324,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return TCG_TARGET_HAS_negsetcond_i64; - case INDEX_op_div_i64: case INDEX_op_divu_i64: return TCG_TARGET_HAS_div_i64; case INDEX_op_rem_i64: @@ -5417,6 +5417,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_add: case INDEX_op_and: case INDEX_op_andc: + case INDEX_op_div_i32: + case INDEX_op_div_i64: case INDEX_op_eqv: case INDEX_op_mul: case INDEX_op_mulsh: diff --git a/tcg/tci.c b/tcg/tci.c index 61c0ccf21e..4ecbb2d335 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -578,7 +578,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, /* Arithmetic operations (32 bit). */ - case INDEX_op_div_i32: + case INDEX_op_tci_divs32: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int32_t)regs[r1] / (int32_t)regs[r2]; break; @@ -1101,6 +1101,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_clz_i64: case INDEX_op_ctz_i32: case INDEX_op_ctz_i64: + case INDEX_op_tci_divs32: tci_args_rrr(insn, &r0, &r1, &r2); info->fprintf_func(info->stream, "%-12s %s, %s, %s", op_name, str_r(r0), str_r(r1), str_r(r2)); diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc index ecc8c4e55e..f503374643 100644 --- a/tcg/tci/tcg-target-opc.h.inc +++ b/tcg/tci/tcg-target-opc.h.inc @@ -2,3 +2,4 @@ /* These opcodes for use between the tci generator and interpreter. */ DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT) DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_divs32, 1, 2, 0, TCG_OPF_NOT_PRESENT) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 1dcce543ec..c8e86a3253 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -79,8 +79,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_div_i32: - case INDEX_op_div_i64: case INDEX_op_divu_i32: case INDEX_op_divu_i64: case INDEX_op_rem_i32: @@ -648,6 +646,20 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_divs32 + : INDEX_op_div_i64); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divs, +}; + static void tgen_eqv(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -811,7 +823,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, CASE_32_64(sar) CASE_32_64(rotl) /* Optional (TCG_TARGET_HAS_rot_*). */ CASE_32_64(rotr) /* Optional (TCG_TARGET_HAS_rot_*). */ - CASE_32_64(div) /* Optional (TCG_TARGET_HAS_div_*). */ CASE_32_64(divu) /* Optional (TCG_TARGET_HAS_div_*). */ CASE_32_64(rem) /* Optional (TCG_TARGET_HAS_div_*). */ CASE_32_64(remu) /* Optional (TCG_TARGET_HAS_div_*). */ From b2c514f9d5cab89814dc8a6b7c98c653ca8523d3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 13:22:56 -0800 Subject: [PATCH 046/161] tcg: Merge INDEX_op_div_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename to INDEX_op_divs to emphasize signed inputs, and mirroring INDEX_op_divu_*. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 12 +++++++----- tcg/tcg-op.c | 16 ++++++++-------- tcg/tcg.c | 6 ++---- tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 22 insertions(+), 24 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index fe922d1dac..a833b3b7b2 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -277,7 +277,7 @@ Arithmetic - | *t0* = *t1* * *t2* - * - div_i32/i64 *t0*, *t1*, *t2* + * - divs *t0*, *t1*, *t2* - | *t0* = *t1* / *t2* (signed) | Undefined behavior if division by zero or overflow. diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index a9d7938a52..6d4edd0b16 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -42,6 +42,7 @@ DEF(mov, 1, 1, 0, TCG_OPF_INT | TCG_OPF_NOT_PRESENT) DEF(add, 1, 2, 0, TCG_OPF_INT) DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) +DEF(divs, 1, 2, 0, TCG_OPF_INT) DEF(eqv, 1, 2, 0, TCG_OPF_INT) DEF(mul, 1, 2, 0, TCG_OPF_INT) DEF(mulsh, 1, 2, 0, TCG_OPF_INT) @@ -68,7 +69,6 @@ DEF(st8_i32, 0, 2, 1, 0) DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) /* arith */ -DEF(div_i32, 1, 2, 0, 0) DEF(divu_i32, 1, 2, 0, 0) DEF(rem_i32, 1, 2, 0, 0) DEF(remu_i32, 1, 2, 0, 0) @@ -116,7 +116,6 @@ DEF(st16_i64, 0, 2, 1, 0) DEF(st32_i64, 0, 2, 1, 0) DEF(st_i64, 0, 2, 1, 0) /* arith */ -DEF(div_i64, 1, 2, 0, 0) DEF(divu_i64, 1, 2, 0, 0) DEF(rem_i64, 1, 2, 0, 0) DEF(remu_i64, 1, 2, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index fd446fc47d..af9054be37 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -556,13 +556,15 @@ static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, muls64(&l64, &h64, x, y); return h64; - case INDEX_op_div_i32: + case INDEX_op_divs: /* Avoid crashing on divide by zero, otherwise undefined. */ - return (int32_t)x / ((int32_t)y ? : 1); + if (type == TCG_TYPE_I32) { + return (int32_t)x / ((int32_t)y ? : 1); + } + return (int64_t)x / ((int64_t)y ? : 1); + case INDEX_op_divu_i32: return (uint32_t)x / ((uint32_t)y ? : 1); - case INDEX_op_div_i64: - return (int64_t)x / ((int64_t)y ? : 1); case INDEX_op_divu_i64: return (uint64_t)x / ((uint64_t)y ? : 1); @@ -2905,7 +2907,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(deposit): done = fold_deposit(&ctx, op); break; - CASE_OP_32_64(div): + case INDEX_op_divs: CASE_OP_32_64(divu): done = fold_divide(&ctx, op); break; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 69e50f968f..9dba520d40 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -601,8 +601,8 @@ void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (tcg_op_supported(INDEX_op_div_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_divs, ret, arg1, arg2); } else if (TCG_TARGET_HAS_div2_i32) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_sari_i32(t0, arg1, 31); @@ -617,9 +617,9 @@ void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { if (TCG_TARGET_HAS_rem_i32) { tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2); - } else if (tcg_op_supported(INDEX_op_div_i32, TCG_TYPE_I32, 0)) { + } else if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); - tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_divs, t0, arg1, arg2); tcg_gen_mul_i32(t0, t0, arg2); tcg_gen_sub_i32(ret, arg1, t0); tcg_temp_free_i32(t0); @@ -1969,8 +1969,8 @@ void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (tcg_op_supported(INDEX_op_div_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_divs, ret, arg1, arg2); } else if (TCG_TARGET_HAS_div2_i64) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_sari_i64(t0, arg1, 63); @@ -1985,9 +1985,9 @@ void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_rem_i64) { tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2); - } else if (tcg_op_supported(INDEX_op_div_i64, TCG_TYPE_I64, 0)) { + } else if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_divs, t0, arg1, arg2); tcg_gen_mul_i64(t0, t0, arg2); tcg_gen_sub_i64(ret, arg1, t0); tcg_temp_free_i64(t0); diff --git a/tcg/tcg.c b/tcg/tcg.c index f99213a154..d4e30d0b33 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1020,8 +1020,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), - OUTOP(INDEX_op_div_i32, TCGOutOpBinary, outop_divs), - OUTOP(INDEX_op_div_i64, TCGOutOpBinary, outop_divs), + OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), @@ -5417,8 +5416,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_add: case INDEX_op_and: case INDEX_op_andc: - case INDEX_op_div_i32: - case INDEX_op_div_i64: + case INDEX_op_divs: case INDEX_op_eqv: case INDEX_op_mul: case INDEX_op_mulsh: diff --git a/tcg/tci.c b/tcg/tci.c index 4ecbb2d335..4b3ca53bc5 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -720,7 +720,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, /* Arithmetic operations (64 bit). */ - case INDEX_op_div_i64: + case INDEX_op_divs: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int64_t)regs[r1] / (int64_t)regs[r2]; break; @@ -1071,6 +1071,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_add: case INDEX_op_and: case INDEX_op_andc: + case INDEX_op_divs: case INDEX_op_eqv: case INDEX_op_mul: case INDEX_op_nand: @@ -1079,8 +1080,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_orc: case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_div_i32: - case INDEX_op_div_i64: case INDEX_op_rem_i32: case INDEX_op_rem_i64: case INDEX_op_divu_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index c8e86a3253..4a556e2ce7 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -651,7 +651,7 @@ static void tgen_divs(TCGContext *s, TCGType type, { TCGOpcode opc = (type == TCG_TYPE_I32 ? INDEX_op_tci_divs32 - : INDEX_op_div_i64); + : INDEX_op_divs); tcg_out_op_rrr(s, opc, a0, a1, a2); } From 6d1a2365eaee0603347fd2fabd89a8dc935c8ac7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 14:10:27 -0800 Subject: [PATCH 047/161] tcg: Convert divu to TCGOutOpBinary For TCI, we're losing type information in the interpreter. Introduce a tci-specific opcode to handle the difference. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 -- tcg/aarch64/tcg-target.c.inc | 18 ++++++---- tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 26 +++++++------- tcg/i386/tcg-target.c.inc | 4 +++ tcg/loongarch64/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target.c.inc | 24 ++++++++----- tcg/mips/tcg-target-has.h | 2 -- tcg/mips/tcg-target.c.inc | 37 ++++++++++--------- tcg/ppc/tcg-target-has.h | 2 -- tcg/ppc/tcg-target.c.inc | 21 ++++++----- tcg/riscv/tcg-target-has.h | 2 -- tcg/riscv/tcg-target.c.inc | 21 ++++++----- tcg/s390x/tcg-target.c.inc | 4 +++ tcg/sparc64/tcg-target-has.h | 2 -- tcg/sparc64/tcg-target.c.inc | 61 +++++++++++++++++--------------- tcg/tcg-has.h | 15 ++++---- tcg/tcg-op.c | 8 ++--- tcg/tcg.c | 8 ++--- tcg/tci.c | 3 +- tcg/tci/tcg-target-has.h | 2 -- tcg/tci/tcg-target-opc.h.inc | 1 + tcg/tci/tcg-target.c.inc | 17 +++++++-- 23 files changed, 157 insertions(+), 126 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index bde6db8f2a..e961668ef0 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -13,7 +13,6 @@ #define have_lse2 (cpuinfo & CPUINFO_LSE2) /* optional instructions */ -#define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 @@ -30,7 +29,6 @@ #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 52069f1445..167c51c897 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2168,6 +2168,17 @@ static const TCGOutOpBinary outop_divs = { .out_rrr = tgen_divs, }; +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, UDIV, type, a0, a1, a2); +} + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divu, +}; + static void tgen_eqv(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2373,11 +2384,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_divu_i64: - case INDEX_op_divu_i32: - tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); - break; - case INDEX_op_rem_i64: case INDEX_op_rem_i32: tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2); @@ -3064,8 +3070,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rC); - case INDEX_op_divu_i32: - case INDEX_op_divu_i64: case INDEX_op_rem_i32: case INDEX_op_rem_i64: case INDEX_op_remu_i32: diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index ab9b7b6162..6ed2b49c84 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -34,7 +34,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_div_i32 use_idiv_instructions #define TCG_TARGET_HAS_rem_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index e07e4c06d9..65d0ae83b2 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -937,12 +937,6 @@ static void tcg_out_smull32(TCGContext *s, ARMCond cond, TCGReg rd0, (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); } -static void tcg_out_udiv(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, TCGReg rm) -{ - tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); -} - static void tcg_out_ext8s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn) { /* sxtb */ @@ -1886,6 +1880,19 @@ static const TCGOutOpBinary outop_divs = { .out_rrr = tgen_divs, }; +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + /* udiv */ + tcg_out32(s, 0x0730f010 | (COND_AL << 28) | (a0 << 16) | a1 | (a2 << 8)); +} + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_idiv, + .out_rrr = tgen_divu, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; @@ -2230,10 +2237,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_divu_i32: - tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_mb: tcg_out_mb(s, args[0]); break; @@ -2277,9 +2280,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ctz_i32: return C_O1_I2(r, r, rIK); - case INDEX_op_divu_i32: - return C_O1_I2(r, r, r); - case INDEX_op_mulu2_i32: case INDEX_op_muls2_i32: return C_O2_I2(r, r, r, r); diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index e132dd0c88..f258d6383b 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2637,6 +2637,10 @@ static const TCGOutOpBinary outop_divs = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index e29c892756..96a99b6d4c 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -11,7 +11,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_negsetcond_i32 0 -#define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_div2_i32 0 #define TCG_TARGET_HAS_rot_i32 1 @@ -29,7 +28,6 @@ /* 64-bit operations */ #define TCG_TARGET_HAS_negsetcond_i64 0 -#define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_div2_i64 0 #define TCG_TARGET_HAS_rot_i64 1 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index c42d8d690a..e82a62d09e 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1343,6 +1343,21 @@ static const TCGOutOpBinary outop_divs = { .out_rrr = tgen_divs, }; +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_div_wu(s, a0, a1, a2); + } else { + tcg_out_opc_div_du(s, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divu, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; @@ -1686,13 +1701,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_divu_i32: - tcg_out_opc_div_wu(s, a0, a1, a2); - break; - case INDEX_op_divu_i64: - tcg_out_opc_div_du(s, a0, a1, a2); - break; - case INDEX_op_rem_i32: tcg_out_opc_mod_w(s, a0, a1, a2); break; @@ -2365,8 +2373,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond_i64: return C_O1_I2(r, rz, rJ); - case INDEX_op_divu_i32: - case INDEX_op_divu_i64: case INDEX_op_rem_i32: case INDEX_op_rem_i64: case INDEX_op_remu_i32: diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index ebaaa49cdd..9aa5bf9f1b 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -39,7 +39,6 @@ extern bool use_mips32r2_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions) @@ -51,7 +50,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 7762d88e6b..ab9546f104 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1733,6 +1733,27 @@ static const TCGOutOpBinary outop_divs = { .out_rrr = tgen_divs, }; +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (use_mips32r6_instructions) { + if (type == TCG_TYPE_I32) { + tcg_out_opc_reg(s, OPC_DIVU_R6, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_DDIVU_R6, a0, a1, a2); + } + } else { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_DIVU : OPC_DDIVU; + tcg_out_opc_reg(s, insn, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + } +} + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divu, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; @@ -1960,13 +1981,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_divu_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DIVU_R6, a0, a1, a2); - break; - } - i1 = OPC_DIVU, i2 = OPC_MFLO; - goto do_hilo1; case INDEX_op_rem_i32: if (use_mips32r6_instructions) { tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2); @@ -1981,13 +1995,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } i1 = OPC_DIVU, i2 = OPC_MFHI; goto do_hilo1; - case INDEX_op_divu_i64: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DDIVU_R6, a0, a1, a2); - break; - } - i1 = OPC_DDIVU, i2 = OPC_MFLO; - goto do_hilo1; case INDEX_op_rem_i64: if (use_mips32r6_instructions) { tcg_out_opc_reg(s, OPC_DMOD, a0, a1, a2); @@ -2260,11 +2267,9 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: case INDEX_op_setcond_i32: - case INDEX_op_divu_i64: case INDEX_op_rem_i64: case INDEX_op_remu_i64: case INDEX_op_setcond_i64: diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index bbbd8de2c7..f8e4c0ad3c 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -17,7 +17,6 @@ #define have_vsx (cpuinfo & CPUINFO_VSX) /* optional instructions */ -#define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 have_isa_3_00 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 @@ -35,7 +34,6 @@ #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 have_isa_3_00 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_bswap16_i64 1 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 9fdf8df082..b347595131 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2972,6 +2972,18 @@ static const TCGOutOpBinary outop_divs = { .out_rrr = tgen_divs, }; +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? DIVWU : DIVDU; + tcg_out32(s, insn | TAB(a0, a1, a2)); +} + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divu, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_O1_I2(r, r, r), .out_rrr = tgen_eqv, @@ -3221,10 +3233,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); break; - case INDEX_op_divu_i32: - tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_rem_i32: tcg_out32(s, MODSW | TAB(args[0], args[1], args[2])); break; @@ -3325,9 +3333,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_divu_i64: - tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); - break; case INDEX_op_rem_i64: tcg_out32(s, MODSD | TAB(args[0], args[1], args[2])); break; @@ -4189,10 +4194,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rotr_i64: return C_O1_I2(r, r, ri); - case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: - case INDEX_op_divu_i64: case INDEX_op_rem_i64: case INDEX_op_remu_i64: return C_O1_I2(r, r, r); diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index f7e1ef82fc..ae6624b9a4 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -11,7 +11,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_negsetcond_i32 1 -#define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_div2_i32 0 #define TCG_TARGET_HAS_rot_i32 (cpuinfo & CPUINFO_ZBB) @@ -28,7 +27,6 @@ #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_negsetcond_i64 1 -#define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_div2_i64 0 #define TCG_TARGET_HAS_rot_i64 (cpuinfo & CPUINFO_ZBB) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 15925729dc..74fa38d273 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2009,6 +2009,18 @@ static const TCGOutOpBinary outop_divs = { .out_rrr = tgen_divs, }; +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_DIVUW : OPC_DIVU; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divu, +}; + static void tgen_eqv(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2213,13 +2225,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, OPC_SD, a0, a1, a2); break; - case INDEX_op_divu_i32: - tcg_out_opc_reg(s, OPC_DIVUW, a0, a1, a2); - break; - case INDEX_op_divu_i64: - tcg_out_opc_reg(s, OPC_DIVU, a0, a1, a2); - break; - case INDEX_op_rem_i32: tcg_out_opc_reg(s, OPC_REMW, a0, a1, a2); break; @@ -2735,10 +2740,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: - case INDEX_op_divu_i64: case INDEX_op_rem_i64: case INDEX_op_remu_i64: return C_O1_I2(r, rz, rz); diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index fd0e717c49..f55309f48e 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2246,6 +2246,10 @@ static const TCGOutOpBinary outop_divs = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_eqv(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 5a517b6835..35f0dd4230 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -14,7 +14,6 @@ extern bool use_vis3_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 0 #define TCG_TARGET_HAS_rot_i32 0 #define TCG_TARGET_HAS_bswap16_i32 0 @@ -31,7 +30,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 0 #define TCG_TARGET_HAS_bswap16_i64 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 779d0ce882..3a3372d7aa 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -596,21 +596,6 @@ static void tcg_out_sety(TCGContext *s, TCGReg rs) tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); } -static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1, - int32_t val2, int val2const, int uns) -{ - /* Load Y with the sign/zero extension of RS1 to 64-bits. */ - if (uns) { - tcg_out_sety(s, TCG_REG_G0); - } else { - tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA); - tcg_out_sety(s, TCG_REG_T1); - } - - tcg_out_arithc(s, rd, rs1, val2, val2const, - uns ? ARITH_UDIV : ARITH_SDIV); -} - static const uint8_t tcg_cond_to_bcond[16] = { [TCG_COND_EQ] = COND_E, [TCG_COND_NE] = COND_NE, @@ -1367,6 +1352,39 @@ static const TCGOutOpBinary outop_divs = { .out_rri = tgen_divsi, }; +static void tgen_divu_rJ(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGArg a2, bool c2) +{ + uint32_t insn; + + if (type == TCG_TYPE_I32) { + /* Load Y with the zero extension to 64-bits. */ + tcg_out_sety(s, TCG_REG_G0); + insn = ARITH_UDIV; + } else { + insn = ARITH_UDIVX; + } + tcg_out_arithc(s, a0, a1, a2, c2, insn); +} + +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_divu_rJ(s, type, a0, a1, a2, false); +} + +static void tgen_divui(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_divu_rJ(s, type, a0, a1, a2, true); +} + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_divu, + .out_rri = tgen_divui, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; @@ -1566,10 +1584,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, c = SHIFT_SRA; goto do_shift32; - case INDEX_op_divu_i32: - tcg_out_div32(s, a0, a1, a2, c2, 1); - break; - case INDEX_op_brcond_i32: tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3])); break; @@ -1638,9 +1652,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_sar_i64: c = SHIFT_SRAX; goto do_shift64; - case INDEX_op_divu_i64: - c = ARITH_UDIVX; - goto gen_arith; case INDEX_op_brcond_i64: tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3])); @@ -1663,10 +1674,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const_args[4], args[5], const_args[5], true); break; - gen_arith: - tcg_out_arithc(s, a0, a1, a2, c2, c); - break; - case INDEX_op_mb: tcg_out_mb(s, a0); break; @@ -1728,8 +1735,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i64: return C_O0_I2(rz, r); - case INDEX_op_divu_i32: - case INDEX_op_divu_i64: case INDEX_op_shl_i32: case INDEX_op_shl_i64: case INDEX_op_shr_i32: diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 3d4c67698f..9680ccfc53 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -33,17 +33,16 @@ #define TCG_TARGET_HAS_sub2_i32 1 #endif -/* Only one of DIV or DIV2 should be defined. */ -#if defined(TCG_TARGET_HAS_div_i32) +#ifndef TCG_TARGET_HAS_div2_i32 #define TCG_TARGET_HAS_div2_i32 0 -#elif defined(TCG_TARGET_HAS_div2_i32) -#define TCG_TARGET_HAS_div_i32 0 +#endif +#ifndef TCG_TARGET_HAS_div2_i64 +#define TCG_TARGET_HAS_div2_i64 0 +#endif +#ifndef TCG_TARGET_HAS_rem_i32 #define TCG_TARGET_HAS_rem_i32 0 #endif -#if defined(TCG_TARGET_HAS_div_i64) -#define TCG_TARGET_HAS_div2_i64 0 -#elif defined(TCG_TARGET_HAS_div2_i64) -#define TCG_TARGET_HAS_div_i64 0 +#ifndef TCG_TARGET_HAS_rem_i64 #define TCG_TARGET_HAS_rem_i64 0 #endif diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 9dba520d40..19be461214 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -635,7 +635,7 @@ void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_div_i32) { + if (tcg_op_supported(INDEX_op_divu_i32, TCG_TYPE_I32, 0)) { tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2); } else if (TCG_TARGET_HAS_div2_i32) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); @@ -651,7 +651,7 @@ void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { if (TCG_TARGET_HAS_rem_i32) { tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div_i32) { + } else if (tcg_op_supported(INDEX_op_divu_i32, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2); tcg_gen_mul_i32(t0, t0, arg2); @@ -2003,7 +2003,7 @@ void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_div_i64) { + if (tcg_op_supported(INDEX_op_divu_i64, TCG_TYPE_I64, 0)) { tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2); } else if (TCG_TARGET_HAS_div2_i64) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); @@ -2019,7 +2019,7 @@ void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_rem_i64) { tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div_i64) { + } else if (tcg_op_supported(INDEX_op_divu_i64, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2); tcg_gen_mul_i64(t0, t0, arg2); diff --git a/tcg/tcg.c b/tcg/tcg.c index d4e30d0b33..a0e58c07d7 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1021,6 +1021,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), + OUTOP(INDEX_op_divu_i32, TCGOutOpBinary, outop_divu), + OUTOP(INDEX_op_divu_i64, TCGOutOpBinary, outop_divu), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), @@ -2261,8 +2263,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i32: return TCG_TARGET_HAS_negsetcond_i32; - case INDEX_op_divu_i32: - return TCG_TARGET_HAS_div_i32; case INDEX_op_rem_i32: case INDEX_op_remu_i32: return TCG_TARGET_HAS_rem_i32; @@ -2323,8 +2323,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return TCG_TARGET_HAS_negsetcond_i64; - case INDEX_op_divu_i64: - return TCG_TARGET_HAS_div_i64; case INDEX_op_rem_i64: case INDEX_op_remu_i64: return TCG_TARGET_HAS_rem_i64; @@ -5417,6 +5415,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_and: case INDEX_op_andc: case INDEX_op_divs: + case INDEX_op_divu_i32: + case INDEX_op_divu_i64: case INDEX_op_eqv: case INDEX_op_mul: case INDEX_op_mulsh: diff --git a/tcg/tci.c b/tcg/tci.c index 4b3ca53bc5..0691824f97 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -582,7 +582,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int32_t)regs[r1] / (int32_t)regs[r2]; break; - case INDEX_op_divu_i32: + case INDEX_op_tci_divu32: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint32_t)regs[r1] / (uint32_t)regs[r2]; break; @@ -1101,6 +1101,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_ctz_i32: case INDEX_op_ctz_i64: case INDEX_op_tci_divs32: + case INDEX_op_tci_divu32: tci_args_rrr(insn, &r0, &r1, &r2); info->fprintf_func(info->stream, "%-12s %s, %s, %s", op_name, str_r(r0), str_r(r1), str_r(r2)); diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index 0627585097..ccec96b610 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -9,7 +9,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_clz_i32 1 @@ -26,7 +25,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc index f503374643..43c07a269f 100644 --- a/tcg/tci/tcg-target-opc.h.inc +++ b/tcg/tci/tcg-target-opc.h.inc @@ -3,3 +3,4 @@ DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT) DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT) DEF(tci_divs32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_divu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 4a556e2ce7..18a10156a6 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -79,8 +79,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_divu_i32: - case INDEX_op_divu_i64: case INDEX_op_rem_i32: case INDEX_op_rem_i64: case INDEX_op_remu_i32: @@ -660,6 +658,20 @@ static const TCGOutOpBinary outop_divs = { .out_rrr = tgen_divs, }; +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_divu32 + : INDEX_op_divu_i64); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divu, +}; + static void tgen_eqv(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -823,7 +835,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, CASE_32_64(sar) CASE_32_64(rotl) /* Optional (TCG_TARGET_HAS_rot_*). */ CASE_32_64(rotr) /* Optional (TCG_TARGET_HAS_rot_*). */ - CASE_32_64(divu) /* Optional (TCG_TARGET_HAS_div_*). */ CASE_32_64(rem) /* Optional (TCG_TARGET_HAS_div_*). */ CASE_32_64(remu) /* Optional (TCG_TARGET_HAS_div_*). */ CASE_32_64(clz) /* Optional (TCG_TARGET_HAS_clz_*). */ From 961b80aecd1a503eedb885c309a1d5267d89c98c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 14:27:19 -0800 Subject: [PATCH 048/161] tcg: Merge INDEX_op_divu_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 9 +++++---- tcg/tcg-op.c | 16 ++++++++-------- tcg/tcg.c | 6 ++---- tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 20 insertions(+), 23 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index a833b3b7b2..41985be012 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -282,7 +282,7 @@ Arithmetic - | *t0* = *t1* / *t2* (signed) | Undefined behavior if division by zero or overflow. - * - divu_i32/i64 *t0*, *t1*, *t2* + * - divu *t0*, *t1*, *t2* - | *t0* = *t1* / *t2* (unsigned) | Undefined behavior if division by zero. diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 6d4edd0b16..243f002a61 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -43,6 +43,7 @@ DEF(add, 1, 2, 0, TCG_OPF_INT) DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(divs, 1, 2, 0, TCG_OPF_INT) +DEF(divu, 1, 2, 0, TCG_OPF_INT) DEF(eqv, 1, 2, 0, TCG_OPF_INT) DEF(mul, 1, 2, 0, TCG_OPF_INT) DEF(mulsh, 1, 2, 0, TCG_OPF_INT) @@ -69,7 +70,6 @@ DEF(st8_i32, 0, 2, 1, 0) DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) /* arith */ -DEF(divu_i32, 1, 2, 0, 0) DEF(rem_i32, 1, 2, 0, 0) DEF(remu_i32, 1, 2, 0, 0) DEF(div2_i32, 2, 3, 0, 0) @@ -116,7 +116,6 @@ DEF(st16_i64, 0, 2, 1, 0) DEF(st32_i64, 0, 2, 1, 0) DEF(st_i64, 0, 2, 1, 0) /* arith */ -DEF(divu_i64, 1, 2, 0, 0) DEF(rem_i64, 1, 2, 0, 0) DEF(remu_i64, 1, 2, 0, 0) DEF(div2_i64, 2, 3, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index af9054be37..c11cce782a 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -563,9 +563,10 @@ static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, } return (int64_t)x / ((int64_t)y ? : 1); - case INDEX_op_divu_i32: - return (uint32_t)x / ((uint32_t)y ? : 1); - case INDEX_op_divu_i64: + case INDEX_op_divu: + if (type == TCG_TYPE_I32) { + return (uint32_t)x / ((uint32_t)y ? : 1); + } return (uint64_t)x / ((uint64_t)y ? : 1); case INDEX_op_rem_i32: @@ -2908,7 +2909,7 @@ void tcg_optimize(TCGContext *s) done = fold_deposit(&ctx, op); break; case INDEX_op_divs: - CASE_OP_32_64(divu): + case INDEX_op_divu: done = fold_divide(&ctx, op); break; case INDEX_op_dup_vec: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 19be461214..f326c452a4 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -635,8 +635,8 @@ void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (tcg_op_supported(INDEX_op_divu_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_divu, ret, arg1, arg2); } else if (TCG_TARGET_HAS_div2_i32) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 zero = tcg_constant_i32(0); @@ -651,9 +651,9 @@ void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { if (TCG_TARGET_HAS_rem_i32) { tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2); - } else if (tcg_op_supported(INDEX_op_divu_i32, TCG_TYPE_I32, 0)) { + } else if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); - tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_divu, t0, arg1, arg2); tcg_gen_mul_i32(t0, t0, arg2); tcg_gen_sub_i32(ret, arg1, t0); tcg_temp_free_i32(t0); @@ -2003,8 +2003,8 @@ void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (tcg_op_supported(INDEX_op_divu_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_divu, ret, arg1, arg2); } else if (TCG_TARGET_HAS_div2_i64) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 zero = tcg_constant_i64(0); @@ -2019,9 +2019,9 @@ void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_rem_i64) { tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2); - } else if (tcg_op_supported(INDEX_op_divu_i64, TCG_TYPE_I64, 0)) { + } else if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_divu, t0, arg1, arg2); tcg_gen_mul_i64(t0, t0, arg2); tcg_gen_sub_i64(ret, arg1, t0); tcg_temp_free_i64(t0); diff --git a/tcg/tcg.c b/tcg/tcg.c index a0e58c07d7..9aa6d40905 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1021,8 +1021,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), - OUTOP(INDEX_op_divu_i32, TCGOutOpBinary, outop_divu), - OUTOP(INDEX_op_divu_i64, TCGOutOpBinary, outop_divu), + OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), @@ -5415,8 +5414,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_and: case INDEX_op_andc: case INDEX_op_divs: - case INDEX_op_divu_i32: - case INDEX_op_divu_i64: + case INDEX_op_divu: case INDEX_op_eqv: case INDEX_op_mul: case INDEX_op_mulsh: diff --git a/tcg/tci.c b/tcg/tci.c index 0691824f97..bf97849bfe 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -724,7 +724,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int64_t)regs[r1] / (int64_t)regs[r2]; break; - case INDEX_op_divu_i64: + case INDEX_op_divu: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint64_t)regs[r1] / (uint64_t)regs[r2]; break; @@ -1072,6 +1072,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_and: case INDEX_op_andc: case INDEX_op_divs: + case INDEX_op_divu: case INDEX_op_eqv: case INDEX_op_mul: case INDEX_op_nand: @@ -1082,8 +1083,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_xor: case INDEX_op_rem_i32: case INDEX_op_rem_i64: - case INDEX_op_divu_i32: - case INDEX_op_divu_i64: case INDEX_op_remu_i32: case INDEX_op_remu_i64: case INDEX_op_shl_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 18a10156a6..dfa8aecc7a 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -663,7 +663,7 @@ static void tgen_divu(TCGContext *s, TCGType type, { TCGOpcode opc = (type == TCG_TYPE_I32 ? INDEX_op_tci_divu32 - : INDEX_op_divu_i64); + : INDEX_op_divu); tcg_out_op_rrr(s, opc, a0, a1, a2); } From d6cad9c9278566deb7f646f027c056ba24f7988d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 16:32:29 -0800 Subject: [PATCH 049/161] tcg: Convert div2 to TCGOutOpDivRem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 4 +++ tcg/arm/tcg-target.c.inc | 4 +++ tcg/i386/tcg-target.c.inc | 17 ++++++++---- tcg/loongarch64/tcg-target.c.inc | 4 +++ tcg/mips/tcg-target.c.inc | 4 +++ tcg/ppc/tcg-target.c.inc | 4 +++ tcg/riscv/tcg-target.c.inc | 4 +++ tcg/s390x/tcg-target.c.inc | 44 ++++++++++++++++---------------- tcg/sparc64/tcg-target.c.inc | 4 +++ tcg/tcg.c | 24 +++++++++++++++-- tcg/tci/tcg-target.c.inc | 4 +++ 11 files changed, 88 insertions(+), 29 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 167c51c897..ea5766414d 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2168,6 +2168,10 @@ static const TCGOutOpBinary outop_divs = { .out_rrr = tgen_divs, }; +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_divu(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 65d0ae83b2..ff750e2df8 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1880,6 +1880,10 @@ static const TCGOutOpBinary outop_divs = { .out_rrr = tgen_divs, }; +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_divu(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index f258d6383b..9238e0e8e4 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2637,6 +2637,18 @@ static const TCGOutOpBinary outop_divs = { .base.static_constraint = C_NotImplemented, }; +static void tgen_divs2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a4) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, a4); +} + +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_O2_I3(a, d, 0, 1, r), + .out_rr01r = tgen_divs2, +}; + static const TCGOutOpBinary outop_divu = { .base.static_constraint = C_NotImplemented, }; @@ -2847,9 +2859,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(div2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]); - break; OP_32_64(divu2): tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]); break; @@ -3789,8 +3798,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_movcond_i64: return C_O1_I4(r, r, reT, r, 0); - case INDEX_op_div2_i32: - case INDEX_op_div2_i64: case INDEX_op_divu2_i32: case INDEX_op_divu2_i64: return C_O2_I3(a, d, 0, 1, r); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index e82a62d09e..8ec46114b8 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1343,6 +1343,10 @@ static const TCGOutOpBinary outop_divs = { .out_rrr = tgen_divs, }; +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_divu(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index ab9546f104..adbc7ee39d 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1733,6 +1733,10 @@ static const TCGOutOpBinary outop_divs = { .out_rrr = tgen_divs, }; +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_divu(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index b347595131..1eb3e785c0 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2972,6 +2972,10 @@ static const TCGOutOpBinary outop_divs = { .out_rrr = tgen_divs, }; +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_divu(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 74fa38d273..19c690c1c2 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2009,6 +2009,10 @@ static const TCGOutOpBinary outop_divs = { .out_rrr = tgen_divs, }; +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_divu(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index f55309f48e..b434ce423a 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2246,6 +2246,28 @@ static const TCGOutOpBinary outop_divs = { .base.static_constraint = C_NotImplemented, }; +static void tgen_divs2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a4) +{ + tcg_debug_assert((a1 & 1) == 0); + tcg_debug_assert(a0 == a1 + 1); + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, DR, a1, a4); + } else { + /* + * TODO: Move the sign-extend of the numerator from a2 into a3 + * into the tcg backend, instead of in early expansion. It is + * required for 32-bit DR, but not 64-bit DSGR. + */ + tcg_out_insn(s, RRE, DSGR, a1, a4); + } +} + +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_O2_I3(o, m, 0, 1, r), + .out_rr01r = tgen_divs2, +}; + static const TCGOutOpBinary outop_divu = { .base.static_constraint = C_NotImplemented, }; @@ -2527,13 +2549,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_div2_i32: - tcg_debug_assert(args[0] == args[2]); - tcg_debug_assert(args[1] == args[3]); - tcg_debug_assert((args[1] & 1) == 0); - tcg_debug_assert(args[0] == args[1] + 1); - tcg_out_insn(s, RR, DR, args[1], args[4]); - break; case INDEX_op_divu2_i32: tcg_debug_assert(args[0] == args[2]); tcg_debug_assert(args[1] == args[3]); @@ -2702,19 +2717,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_insn(s, RRE, LRVGR, args[0], args[1]); break; - case INDEX_op_div2_i64: - /* - * ??? We get an unnecessary sign-extension of the dividend - * into op0 with this definition, but as we do in fact always - * produce both quotient and remainder using INDEX_op_div_i64 - * instead requires jumping through even more hoops. - */ - tcg_debug_assert(args[0] == args[2]); - tcg_debug_assert(args[1] == args[3]); - tcg_debug_assert((args[1] & 1) == 0); - tcg_debug_assert(args[0] == args[1] + 1); - tcg_out_insn(s, RRE, DSGR, args[1], args[4]); - break; case INDEX_op_divu2_i64: tcg_debug_assert(args[0] == args[2]); tcg_debug_assert(args[1] == args[3]); @@ -3396,8 +3398,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_movcond_i64: return C_O1_I4(r, r, rC, rI, r); - case INDEX_op_div2_i32: - case INDEX_op_div2_i64: case INDEX_op_divu2_i32: case INDEX_op_divu2_i64: return C_O2_I3(o, m, 0, 1, r); diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 3a3372d7aa..472ccd7608 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1352,6 +1352,10 @@ static const TCGOutOpBinary outop_divs = { .out_rri = tgen_divsi, }; +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_divu_rJ(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGArg a2, bool c2) { diff --git a/tcg/tcg.c b/tcg/tcg.c index 9aa6d40905..ef3af4157a 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -986,6 +986,12 @@ typedef struct TCGOutOpBinary { TCGReg a0, TCGReg a1, tcg_target_long a2); } TCGOutOpBinary; +typedef struct TCGOutOpDivRem { + TCGOutOp base; + void (*out_rr01r)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a4); +} TCGOutOpDivRem; + typedef struct TCGOutOpUnary { TCGOutOp base; void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); @@ -1022,6 +1028,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), + OUTOP(INDEX_op_div2_i32, TCGOutOpDivRem, outop_divs2), + OUTOP(INDEX_op_div2_i64, TCGOutOpDivRem, outop_divs2), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), @@ -2265,7 +2273,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rem_i32: case INDEX_op_remu_i32: return TCG_TARGET_HAS_rem_i32; - case INDEX_op_div2_i32: case INDEX_op_divu2_i32: return TCG_TARGET_HAS_div2_i32; case INDEX_op_rotl_i32: @@ -2325,7 +2332,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rem_i64: case INDEX_op_remu_i64: return TCG_TARGET_HAS_rem_i64; - case INDEX_op_div2_i64: case INDEX_op_divu2_i64: return TCG_TARGET_HAS_div2_i64; case INDEX_op_rotl_i64: @@ -5467,6 +5473,20 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_div2_i32: + case INDEX_op_div2_i64: + { + const TCGOutOpDivRem *out = + container_of(all_outop[op->opc], TCGOutOpDivRem, base); + + /* Only used by x86 and s390x, which use matching constraints. */ + tcg_debug_assert(new_args[0] == new_args[2]); + tcg_debug_assert(new_args[1] == new_args[3]); + tcg_debug_assert(!const_args[4]); + out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]); + } + break; + default: if (def->flags & TCG_OPF_VECTOR) { tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index dfa8aecc7a..6646be224d 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -658,6 +658,10 @@ static const TCGOutOpBinary outop_divs = { .out_rrr = tgen_divs, }; +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_divu(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { From ee1805b9e66d6b6229270a339586058bbf275412 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 16:44:23 -0800 Subject: [PATCH 050/161] tcg: Merge INDEX_op_div2_{i32,i64} Rename to INDEX_op_divs2 to emphasize signed inputs, and mirroring INDEX_op_divu2_*. Document the opcode. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 9 +++++++++ include/tcg/tcg-opc.h | 3 +-- tcg/tcg-op.c | 16 ++++++++-------- tcg/tcg.c | 6 ++---- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 41985be012..62af390854 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -297,6 +297,15 @@ Arithmetic - | *t0* = *t1* % *t2* (unsigned) | Undefined behavior if division by zero. + * - divs2 *q*, *r*, *nl*, *nh*, *d* + + - | *q* = *nh:nl* / *d* (signed) + | *r* = *nh:nl* % *d* + | Undefined behaviour if division by zero, or the double-word + numerator divided by the single-word divisor does not fit + within the single-word quotient. The code generator will + pass *nh* as a simple sign-extension of *nl*, so the only + overflow should be *INT_MIN* / -1. Logical ------- diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 243f002a61..36dfbf80ad 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -43,6 +43,7 @@ DEF(add, 1, 2, 0, TCG_OPF_INT) DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(divs, 1, 2, 0, TCG_OPF_INT) +DEF(divs2, 2, 3, 0, TCG_OPF_INT) DEF(divu, 1, 2, 0, TCG_OPF_INT) DEF(eqv, 1, 2, 0, TCG_OPF_INT) DEF(mul, 1, 2, 0, TCG_OPF_INT) @@ -72,7 +73,6 @@ DEF(st_i32, 0, 2, 1, 0) /* arith */ DEF(rem_i32, 1, 2, 0, 0) DEF(remu_i32, 1, 2, 0, 0) -DEF(div2_i32, 2, 3, 0, 0) DEF(divu2_i32, 2, 3, 0, 0) /* shifts/rotates */ DEF(shl_i32, 1, 2, 0, 0) @@ -118,7 +118,6 @@ DEF(st_i64, 0, 2, 1, 0) /* arith */ DEF(rem_i64, 1, 2, 0, 0) DEF(remu_i64, 1, 2, 0, 0) -DEF(div2_i64, 2, 3, 0, 0) DEF(divu2_i64, 2, 3, 0, 0) /* shifts/rotates */ DEF(shl_i64, 1, 2, 0, 0) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index f326c452a4..f95beb8b5d 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -603,10 +603,10 @@ void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I32, 0)) { tcg_gen_op3_i32(INDEX_op_divs, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div2_i32) { + } else if (tcg_op_supported(INDEX_op_divs2, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_sari_i32(t0, arg1, 31); - tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2); + tcg_gen_op5_i32(INDEX_op_divs2, ret, t0, arg1, t0, arg2); tcg_temp_free_i32(t0); } else { gen_helper_div_i32(ret, arg1, arg2); @@ -623,10 +623,10 @@ void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_mul_i32(t0, t0, arg2); tcg_gen_sub_i32(ret, arg1, t0); tcg_temp_free_i32(t0); - } else if (TCG_TARGET_HAS_div2_i32) { + } else if (tcg_op_supported(INDEX_op_divs2, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_sari_i32(t0, arg1, 31); - tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2); + tcg_gen_op5_i32(INDEX_op_divs2, t0, ret, arg1, t0, arg2); tcg_temp_free_i32(t0); } else { gen_helper_rem_i32(ret, arg1, arg2); @@ -1971,10 +1971,10 @@ void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I64, 0)) { tcg_gen_op3_i64(INDEX_op_divs, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div2_i64) { + } else if (tcg_op_supported(INDEX_op_divs2, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_sari_i64(t0, arg1, 63); - tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2); + tcg_gen_op5_i64(INDEX_op_divs2, ret, t0, arg1, t0, arg2); tcg_temp_free_i64(t0); } else { gen_helper_div_i64(ret, arg1, arg2); @@ -1991,10 +1991,10 @@ void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_mul_i64(t0, t0, arg2); tcg_gen_sub_i64(ret, arg1, t0); tcg_temp_free_i64(t0); - } else if (TCG_TARGET_HAS_div2_i64) { + } else if (tcg_op_supported(INDEX_op_divs2, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_sari_i64(t0, arg1, 63); - tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2); + tcg_gen_op5_i64(INDEX_op_divs2, t0, ret, arg1, t0, arg2); tcg_temp_free_i64(t0); } else { gen_helper_rem_i64(ret, arg1, arg2); diff --git a/tcg/tcg.c b/tcg/tcg.c index ef3af4157a..30b7f8ee19 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1028,8 +1028,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), - OUTOP(INDEX_op_div2_i32, TCGOutOpDivRem, outop_divs2), - OUTOP(INDEX_op_div2_i64, TCGOutOpDivRem, outop_divs2), + OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), @@ -5473,8 +5472,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_div2_i32: - case INDEX_op_div2_i64: + case INDEX_op_divs2: { const TCGOutOpDivRem *out = container_of(all_outop[op->opc], TCGOutOpDivRem, base); From 9bf558ed17c274b172549894e8e343e6a1a1508c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 18:10:14 -0800 Subject: [PATCH 051/161] tcg: Convert divu2 to TCGOutOpDivRem Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 4 ++++ tcg/arm/tcg-target.c.inc | 4 ++++ tcg/i386/tcg-target-has.h | 2 -- tcg/i386/tcg-target.c.inc | 20 +++++++++++------- tcg/loongarch64/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target.c.inc | 4 ++++ tcg/mips/tcg-target.c.inc | 4 ++++ tcg/ppc/tcg-target.c.inc | 4 ++++ tcg/riscv/tcg-target-has.h | 2 -- tcg/riscv/tcg-target.c.inc | 4 ++++ tcg/s390x/tcg-target-has.h | 2 -- tcg/s390x/tcg-target.c.inc | 36 +++++++++++++++----------------- tcg/sparc64/tcg-target.c.inc | 4 ++++ tcg/tcg-has.h | 7 ------- tcg/tcg-op.c | 8 +++---- tcg/tcg.c | 8 +++---- tcg/tci/tcg-target.c.inc | 4 ++++ 17 files changed, 69 insertions(+), 50 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index ea5766414d..456159cdc6 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2183,6 +2183,10 @@ static const TCGOutOpBinary outop_divu = { .out_rrr = tgen_divu, }; +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_eqv(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index ff750e2df8..b2c08bba3e 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1897,6 +1897,10 @@ static const TCGOutOpBinary outop_divu = { .out_rrr = tgen_divu, }; +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index 121fb95ee0..aee6066579 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -26,7 +26,6 @@ #define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl) /* optional instructions */ -#define TCG_TARGET_HAS_div2_i32 1 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 @@ -43,7 +42,6 @@ #if TCG_TARGET_REG_BITS == 64 /* Keep 32-bit values zero-extended in a register. */ #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 9238e0e8e4..0e6b743fb2 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2653,6 +2653,18 @@ static const TCGOutOpBinary outop_divu = { .base.static_constraint = C_NotImplemented, }; +static void tgen_divu2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a4) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, a4); +} + +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_O2_I3(a, d, 0, 1, r), + .out_rr01r = tgen_divu2, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; @@ -2859,10 +2871,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(divu2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]); - break; - OP_32_64(shl): /* For small constant 3-operand shift, use LEA. */ if (const_a2 && a0 != a1 && (a2 - 1) < 3) { @@ -3798,10 +3806,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_movcond_i64: return C_O1_I4(r, r, reT, r, 0); - case INDEX_op_divu2_i32: - case INDEX_op_divu2_i64: - return C_O2_I3(a, d, 0, 1, r); - case INDEX_op_mulu2_i32: case INDEX_op_mulu2_i64: case INDEX_op_muls2_i32: diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 96a99b6d4c..aecd2879b8 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -12,7 +12,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_negsetcond_i32 0 #define TCG_TARGET_HAS_rem_i32 1 -#define TCG_TARGET_HAS_div2_i32 0 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 0 @@ -29,7 +28,6 @@ /* 64-bit operations */ #define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_rem_i64 1 -#define TCG_TARGET_HAS_div2_i64 0 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 8ec46114b8..be09c362cb 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1362,6 +1362,10 @@ static const TCGOutOpBinary outop_divu = { .out_rrr = tgen_divu, }; +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index adbc7ee39d..280afbf297 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1758,6 +1758,10 @@ static const TCGOutOpBinary outop_divu = { .out_rrr = tgen_divu, }; +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 1eb3e785c0..8b14d57d1c 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2988,6 +2988,10 @@ static const TCGOutOpBinary outop_divu = { .out_rrr = tgen_divu, }; +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_O1_I2(r, r, r), .out_rrr = tgen_eqv, diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index ae6624b9a4..e5861e5260 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -12,7 +12,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_rem_i32 1 -#define TCG_TARGET_HAS_div2_i32 0 #define TCG_TARGET_HAS_rot_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 @@ -28,7 +27,6 @@ #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_rem_i64 1 -#define TCG_TARGET_HAS_div2_i64 0 #define TCG_TARGET_HAS_rot_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 19c690c1c2..72910b0f25 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2025,6 +2025,10 @@ static const TCGOutOpBinary outop_divu = { .out_rrr = tgen_divu, }; +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_eqv(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 64f1805641..d61cc7a144 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -29,7 +29,6 @@ extern uint64_t s390_facilities[3]; ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1) /* optional instructions */ -#define TCG_TARGET_HAS_div2_i32 1 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 @@ -45,7 +44,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index b434ce423a..9af626eec2 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2272,6 +2272,23 @@ static const TCGOutOpBinary outop_divu = { .base.static_constraint = C_NotImplemented, }; +static void tgen_divu2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a4) +{ + tcg_debug_assert((a1 & 1) == 0); + tcg_debug_assert(a0 == a1 + 1); + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRE, DLR, a1, a4); + } else { + tcg_out_insn(s, RRE, DLGR, a1, a4); + } +} + +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_O2_I3(o, m, 0, 1, r), + .out_rr01r = tgen_divu2, +}; + static void tgen_eqv(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2549,14 +2566,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_divu2_i32: - tcg_debug_assert(args[0] == args[2]); - tcg_debug_assert(args[1] == args[3]); - tcg_debug_assert((args[1] & 1) == 0); - tcg_debug_assert(args[0] == args[1] + 1); - tcg_out_insn(s, RRE, DLR, args[1], args[4]); - break; - case INDEX_op_shl_i32: op = RS_SLL; op2 = RSY_SLLK; @@ -2717,13 +2726,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_insn(s, RRE, LRVGR, args[0], args[1]); break; - case INDEX_op_divu2_i64: - tcg_debug_assert(args[0] == args[2]); - tcg_debug_assert(args[1] == args[3]); - tcg_debug_assert((args[1] & 1) == 0); - tcg_debug_assert(args[0] == args[1] + 1); - tcg_out_insn(s, RRE, DLGR, args[1], args[4]); - break; case INDEX_op_mulu2_i64: tcg_debug_assert(args[0] == args[2]); tcg_debug_assert((args[1] & 1) == 0); @@ -3398,10 +3400,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_movcond_i64: return C_O1_I4(r, r, rC, rI, r); - case INDEX_op_divu2_i32: - case INDEX_op_divu2_i64: - return C_O2_I3(o, m, 0, 1, r); - case INDEX_op_mulu2_i64: return C_O2_I2(o, m, 0, r); case INDEX_op_muls2_i64: diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 472ccd7608..a4659653b3 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1389,6 +1389,10 @@ static const TCGOutOpBinary outop_divu = { .out_rri = tgen_divui, }; +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 9680ccfc53..bae9918024 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -14,7 +14,6 @@ #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_div_i64 0 #define TCG_TARGET_HAS_rem_i64 0 -#define TCG_TARGET_HAS_div2_i64 0 #define TCG_TARGET_HAS_rot_i64 0 #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 @@ -33,12 +32,6 @@ #define TCG_TARGET_HAS_sub2_i32 1 #endif -#ifndef TCG_TARGET_HAS_div2_i32 -#define TCG_TARGET_HAS_div2_i32 0 -#endif -#ifndef TCG_TARGET_HAS_div2_i64 -#define TCG_TARGET_HAS_div2_i64 0 -#endif #ifndef TCG_TARGET_HAS_rem_i32 #define TCG_TARGET_HAS_rem_i32 0 #endif diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index f95beb8b5d..5511106554 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -637,7 +637,7 @@ void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I32, 0)) { tcg_gen_op3_i32(INDEX_op_divu, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div2_i32) { + } else if (tcg_op_supported(INDEX_op_divu2_i32, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 zero = tcg_constant_i32(0); tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, zero, arg2); @@ -657,7 +657,7 @@ void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_mul_i32(t0, t0, arg2); tcg_gen_sub_i32(ret, arg1, t0); tcg_temp_free_i32(t0); - } else if (TCG_TARGET_HAS_div2_i32) { + } else if (tcg_op_supported(INDEX_op_divu2_i32, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 zero = tcg_constant_i32(0); tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, zero, arg2); @@ -2005,7 +2005,7 @@ void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I64, 0)) { tcg_gen_op3_i64(INDEX_op_divu, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div2_i64) { + } else if (tcg_op_supported(INDEX_op_divu2_i64, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 zero = tcg_constant_i64(0); tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, zero, arg2); @@ -2025,7 +2025,7 @@ void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_mul_i64(t0, t0, arg2); tcg_gen_sub_i64(ret, arg1, t0); tcg_temp_free_i64(t0); - } else if (TCG_TARGET_HAS_div2_i64) { + } else if (tcg_op_supported(INDEX_op_divu2_i64, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 zero = tcg_constant_i64(0); tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, zero, arg2); diff --git a/tcg/tcg.c b/tcg/tcg.c index 30b7f8ee19..1029cba3f0 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1029,6 +1029,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), + OUTOP(INDEX_op_divu2_i32, TCGOutOpDivRem, outop_divu2), + OUTOP(INDEX_op_divu2_i64, TCGOutOpDivRem, outop_divu2), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), @@ -2272,8 +2274,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rem_i32: case INDEX_op_remu_i32: return TCG_TARGET_HAS_rem_i32; - case INDEX_op_divu2_i32: - return TCG_TARGET_HAS_div2_i32; case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: return TCG_TARGET_HAS_rot_i32; @@ -2331,8 +2331,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rem_i64: case INDEX_op_remu_i64: return TCG_TARGET_HAS_rem_i64; - case INDEX_op_divu2_i64: - return TCG_TARGET_HAS_div2_i64; case INDEX_op_rotl_i64: case INDEX_op_rotr_i64: return TCG_TARGET_HAS_rot_i64; @@ -5473,6 +5471,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; case INDEX_op_divs2: + case INDEX_op_divu2_i32: + case INDEX_op_divu2_i64: { const TCGOutOpDivRem *out = container_of(all_outop[op->opc], TCGOutOpDivRem, base); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 6646be224d..27271c178c 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -676,6 +676,10 @@ static const TCGOutOpBinary outop_divu = { .out_rrr = tgen_divu, }; +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_eqv(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { From 8109598b683ad2b6b02cd9c79dc15b7fc0b685aa Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 18:23:17 -0800 Subject: [PATCH 052/161] tcg: Merge INDEX_op_divu2_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 10 ++++++++++ include/tcg/tcg-opc.h | 3 +-- tcg/tcg-op.c | 16 ++++++++-------- tcg/tcg.c | 6 ++---- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 62af390854..8f3b5e91b2 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -307,6 +307,16 @@ Arithmetic pass *nh* as a simple sign-extension of *nl*, so the only overflow should be *INT_MIN* / -1. + * - divu2 *q*, *r*, *nl*, *nh*, *d* + + - | *q* = *nh:nl* / *d* (unsigned) + | *r* = *nh:nl* % *d* + | Undefined behaviour if division by zero, or the double-word + numerator divided by the single-word divisor does not fit + within the single-word quotient. The code generator will + pass 0 to *nh* to make a simple zero-extension of *nl*, + so overflow should never occur. + Logical ------- diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 36dfbf80ad..61e5e185cc 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -45,6 +45,7 @@ DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(divs, 1, 2, 0, TCG_OPF_INT) DEF(divs2, 2, 3, 0, TCG_OPF_INT) DEF(divu, 1, 2, 0, TCG_OPF_INT) +DEF(divu2, 2, 3, 0, TCG_OPF_INT) DEF(eqv, 1, 2, 0, TCG_OPF_INT) DEF(mul, 1, 2, 0, TCG_OPF_INT) DEF(mulsh, 1, 2, 0, TCG_OPF_INT) @@ -73,7 +74,6 @@ DEF(st_i32, 0, 2, 1, 0) /* arith */ DEF(rem_i32, 1, 2, 0, 0) DEF(remu_i32, 1, 2, 0, 0) -DEF(divu2_i32, 2, 3, 0, 0) /* shifts/rotates */ DEF(shl_i32, 1, 2, 0, 0) DEF(shr_i32, 1, 2, 0, 0) @@ -118,7 +118,6 @@ DEF(st_i64, 0, 2, 1, 0) /* arith */ DEF(rem_i64, 1, 2, 0, 0) DEF(remu_i64, 1, 2, 0, 0) -DEF(divu2_i64, 2, 3, 0, 0) /* shifts/rotates */ DEF(shl_i64, 1, 2, 0, 0) DEF(shr_i64, 1, 2, 0, 0) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 5511106554..7ed92157de 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -637,10 +637,10 @@ void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I32, 0)) { tcg_gen_op3_i32(INDEX_op_divu, ret, arg1, arg2); - } else if (tcg_op_supported(INDEX_op_divu2_i32, TCG_TYPE_I32, 0)) { + } else if (tcg_op_supported(INDEX_op_divu2, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 zero = tcg_constant_i32(0); - tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, zero, arg2); + tcg_gen_op5_i32(INDEX_op_divu2, ret, t0, arg1, zero, arg2); tcg_temp_free_i32(t0); } else { gen_helper_divu_i32(ret, arg1, arg2); @@ -657,10 +657,10 @@ void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_mul_i32(t0, t0, arg2); tcg_gen_sub_i32(ret, arg1, t0); tcg_temp_free_i32(t0); - } else if (tcg_op_supported(INDEX_op_divu2_i32, TCG_TYPE_I32, 0)) { + } else if (tcg_op_supported(INDEX_op_divu2, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 zero = tcg_constant_i32(0); - tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, zero, arg2); + tcg_gen_op5_i32(INDEX_op_divu2, t0, ret, arg1, zero, arg2); tcg_temp_free_i32(t0); } else { gen_helper_remu_i32(ret, arg1, arg2); @@ -2005,10 +2005,10 @@ void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I64, 0)) { tcg_gen_op3_i64(INDEX_op_divu, ret, arg1, arg2); - } else if (tcg_op_supported(INDEX_op_divu2_i64, TCG_TYPE_I64, 0)) { + } else if (tcg_op_supported(INDEX_op_divu2, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, zero, arg2); + tcg_gen_op5_i64(INDEX_op_divu2, ret, t0, arg1, zero, arg2); tcg_temp_free_i64(t0); } else { gen_helper_divu_i64(ret, arg1, arg2); @@ -2025,10 +2025,10 @@ void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_mul_i64(t0, t0, arg2); tcg_gen_sub_i64(ret, arg1, t0); tcg_temp_free_i64(t0); - } else if (tcg_op_supported(INDEX_op_divu2_i64, TCG_TYPE_I64, 0)) { + } else if (tcg_op_supported(INDEX_op_divu2, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, zero, arg2); + tcg_gen_op5_i64(INDEX_op_divu2, t0, ret, arg1, zero, arg2); tcg_temp_free_i64(t0); } else { gen_helper_remu_i64(ret, arg1, arg2); diff --git a/tcg/tcg.c b/tcg/tcg.c index 1029cba3f0..2c1307e3fc 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1029,8 +1029,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), - OUTOP(INDEX_op_divu2_i32, TCGOutOpDivRem, outop_divu2), - OUTOP(INDEX_op_divu2_i64, TCGOutOpDivRem, outop_divu2), + OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), @@ -5471,8 +5470,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; case INDEX_op_divs2: - case INDEX_op_divu2_i32: - case INDEX_op_divu2_i64: + case INDEX_op_divu2: { const TCGOutOpDivRem *out = container_of(all_outop[op->opc], TCGOutOpDivRem, base); From 646f36fead167e924e83d23d0e1dc59c37b5cca5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 18:52:30 -0800 Subject: [PATCH 053/161] tcg: Convert rem to TCGOutOpBinary For TCI, we're losing type information in the interpreter. Introduce a tci-specific opcode to handle the difference. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 19 ++++++++++------ tcg/arm/tcg-target.c.inc | 4 ++++ tcg/i386/tcg-target.c.inc | 4 ++++ tcg/loongarch64/tcg-target.c.inc | 24 +++++++++++++-------- tcg/mips/tcg-target.c.inc | 37 ++++++++++++++++++-------------- tcg/ppc/tcg-target.c.inc | 27 +++++++++++++++-------- tcg/riscv/tcg-target.c.inc | 21 ++++++++++-------- tcg/s390x/tcg-target.c.inc | 4 ++++ tcg/sparc64/tcg-target.c.inc | 4 ++++ tcg/tcg.c | 6 ++++-- tcg/tci.c | 4 ++-- tcg/tci/tcg-target-opc.h.inc | 1 + tcg/tci/tcg-target.c.inc | 17 ++++++++++++--- 13 files changed, 115 insertions(+), 57 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 456159cdc6..6e80e18a6a 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2275,6 +2275,18 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_rems(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, SDIV, type, TCG_REG_TMP0, a1, a2); + tcg_out_insn(s, 3509, MSUB, type, a0, TCG_REG_TMP0, a2, a1); +} + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rems, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2392,11 +2404,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_rem_i64: - case INDEX_op_rem_i32: - tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2); - tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); - break; case INDEX_op_remu_i64: case INDEX_op_remu_i32: tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2); @@ -3078,8 +3085,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rC); - case INDEX_op_rem_i32: - case INDEX_op_rem_i64: case INDEX_op_remu_i32: case INDEX_op_remu_i64: return C_O1_I2(r, r, r); diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index b2c08bba3e..673c8fb7a6 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1955,6 +1955,10 @@ static const TCGOutOpBinary outop_orc = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 0e6b743fb2..ac0721d71c 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2736,6 +2736,10 @@ static const TCGOutOpBinary outop_orc = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index be09c362cb..ef37b4daa7 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1459,6 +1459,21 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_rems(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_mod_w(s, a0, a1, a2); + } else { + tcg_out_opc_mod_d(s, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rems, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1709,13 +1724,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_rem_i32: - tcg_out_opc_mod_w(s, a0, a1, a2); - break; - case INDEX_op_rem_i64: - tcg_out_opc_mod_d(s, a0, a1, a2); - break; - case INDEX_op_remu_i32: tcg_out_opc_mod_wu(s, a0, a1, a2); break; @@ -2381,8 +2389,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond_i64: return C_O1_I2(r, rz, rJ); - case INDEX_op_rem_i32: - case INDEX_op_rem_i64: case INDEX_op_remu_i32: case INDEX_op_remu_i64: return C_O1_I2(r, rz, rz); diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 280afbf297..37b878ec61 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1866,6 +1866,27 @@ static const TCGOutOpBinary outop_orc = { .base.static_constraint = C_NotImplemented, }; +static void tgen_rems(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (use_mips32r6_instructions) { + if (type == TCG_TYPE_I32) { + tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_DMOD, a0, a1, a2); + } + } else { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_DIV : OPC_DDIV; + tcg_out_opc_reg(s, insn, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); + } +} + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rems, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1989,13 +2010,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_rem_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2); - break; - } - i1 = OPC_DIV, i2 = OPC_MFHI; - goto do_hilo1; case INDEX_op_remu_i32: if (use_mips32r6_instructions) { tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2); @@ -2003,13 +2017,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } i1 = OPC_DIVU, i2 = OPC_MFHI; goto do_hilo1; - case INDEX_op_rem_i64: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DMOD, a0, a1, a2); - break; - } - i1 = OPC_DDIV, i2 = OPC_MFHI; - goto do_hilo1; case INDEX_op_remu_i64: if (use_mips32r6_instructions) { tcg_out_opc_reg(s, OPC_DMODU, a0, a1, a2); @@ -2275,10 +2282,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_rem_i32: case INDEX_op_remu_i32: case INDEX_op_setcond_i32: - case INDEX_op_rem_i64: case INDEX_op_remu_i64: case INDEX_op_setcond_i64: return C_O1_I2(r, rz, rz); diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 8b14d57d1c..c331f0d672 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3091,6 +3091,24 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static TCGConstraintSetIndex cset_mod(TCGType type, unsigned flags) +{ + return have_isa_3_00 ? C_O1_I2(r, r, r) : C_NotImplemented; +} + +static void tgen_rems(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? MODSW : MODSD; + tcg_out32(s, insn | TAB(a0, a1, a2)); +} + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mod, + .out_rrr = tgen_rems, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3241,10 +3259,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); break; - case INDEX_op_rem_i32: - tcg_out32(s, MODSW | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_remu_i32: tcg_out32(s, MODUW | TAB(args[0], args[1], args[2])); break; @@ -3341,9 +3355,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_rem_i64: - tcg_out32(s, MODSD | TAB(args[0], args[1], args[2])); - break; case INDEX_op_remu_i64: tcg_out32(s, MODUD | TAB(args[0], args[1], args[2])); break; @@ -4202,9 +4213,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rotr_i64: return C_O1_I2(r, r, ri); - case INDEX_op_rem_i32: case INDEX_op_remu_i32: - case INDEX_op_rem_i64: case INDEX_op_remu_i64: return C_O1_I2(r, r, r); diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 72910b0f25..b0a98273f1 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2120,6 +2120,18 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_rems(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_REMW : OPC_REM; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rems, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2233,13 +2245,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, OPC_SD, a0, a1, a2); break; - case INDEX_op_rem_i32: - tcg_out_opc_reg(s, OPC_REMW, a0, a1, a2); - break; - case INDEX_op_rem_i64: - tcg_out_opc_reg(s, OPC_REM, a0, a1, a2); - break; - case INDEX_op_remu_i32: tcg_out_opc_reg(s, OPC_REMUW, a0, a1, a2); break; @@ -2748,9 +2753,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_rem_i32: case INDEX_op_remu_i32: - case INDEX_op_rem_i64: case INDEX_op_remu_i64: return C_O1_I2(r, rz, rz); diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 9af626eec2..320268669a 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2437,6 +2437,10 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index a4659653b3..23cca5c664 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1476,6 +1476,10 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/tcg.c b/tcg/tcg.c index 2c1307e3fc..5af5529284 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1040,6 +1040,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), + OUTOP(INDEX_op_rem_i32, TCGOutOpBinary, outop_rems), + OUTOP(INDEX_op_rem_i64, TCGOutOpBinary, outop_rems), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), }; @@ -2270,7 +2272,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i32: return TCG_TARGET_HAS_negsetcond_i32; - case INDEX_op_rem_i32: case INDEX_op_remu_i32: return TCG_TARGET_HAS_rem_i32; case INDEX_op_rotl_i32: @@ -2327,7 +2328,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return TCG_TARGET_HAS_negsetcond_i64; - case INDEX_op_rem_i64: case INDEX_op_remu_i64: return TCG_TARGET_HAS_rem_i64; case INDEX_op_rotl_i64: @@ -5425,6 +5425,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_nor: case INDEX_op_or: case INDEX_op_orc: + case INDEX_op_rem_i32: + case INDEX_op_rem_i64: case INDEX_op_xor: { const TCGOutOpBinary *out = diff --git a/tcg/tci.c b/tcg/tci.c index bf97849bfe..65f493c3d4 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -586,7 +586,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint32_t)regs[r1] / (uint32_t)regs[r2]; break; - case INDEX_op_rem_i32: + case INDEX_op_tci_rems32: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int32_t)regs[r1] % (int32_t)regs[r2]; break; @@ -1081,7 +1081,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_orc: case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_rem_i32: case INDEX_op_rem_i64: case INDEX_op_remu_i32: case INDEX_op_remu_i64: @@ -1101,6 +1100,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_ctz_i64: case INDEX_op_tci_divs32: case INDEX_op_tci_divu32: + case INDEX_op_tci_rems32: tci_args_rrr(insn, &r0, &r1, &r2); info->fprintf_func(info->stream, "%-12s %s, %s, %s", op_name, str_r(r0), str_r(r1), str_r(r2)); diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc index 43c07a269f..2822fbffc8 100644 --- a/tcg/tci/tcg-target-opc.h.inc +++ b/tcg/tci/tcg-target-opc.h.inc @@ -4,3 +4,4 @@ DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT) DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT) DEF(tci_divs32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_divu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_rems32, 1, 2, 0, TCG_OPF_NOT_PRESENT) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 27271c178c..4d9c142a00 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -79,8 +79,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_rem_i32: - case INDEX_op_rem_i64: case INDEX_op_remu_i32: case INDEX_op_remu_i64: case INDEX_op_shl_i32: @@ -754,6 +752,20 @@ static const TCGOutOpBinary outop_orc = { .out_rrr = tgen_orc, }; +static void tgen_rems(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_rems32 + : INDEX_op_rem_i64); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rems, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -843,7 +855,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, CASE_32_64(sar) CASE_32_64(rotl) /* Optional (TCG_TARGET_HAS_rot_*). */ CASE_32_64(rotr) /* Optional (TCG_TARGET_HAS_rot_*). */ - CASE_32_64(rem) /* Optional (TCG_TARGET_HAS_div_*). */ CASE_32_64(remu) /* Optional (TCG_TARGET_HAS_div_*). */ CASE_32_64(clz) /* Optional (TCG_TARGET_HAS_clz_*). */ CASE_32_64(ctz) /* Optional (TCG_TARGET_HAS_ctz_*). */ From 9a6bc1840ec105902bda1a59c42e9e0c56a9ed05 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 19:00:51 -0800 Subject: [PATCH 054/161] tcg: Merge INDEX_op_rem_{i32,i64} Rename to INDEX_op_rems to emphasize signed inputs, and mirroring INDEX_op_remu_*. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 12 +++++++----- tcg/tcg-op.c | 8 ++++---- tcg/tcg.c | 6 ++---- tcg/tci.c | 4 ++-- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 18 insertions(+), 19 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 8f3b5e91b2..1f4160a585 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -287,7 +287,7 @@ Arithmetic - | *t0* = *t1* / *t2* (unsigned) | Undefined behavior if division by zero. - * - rem_i32/i64 *t0*, *t1*, *t2* + * - rems *t0*, *t1*, *t2* - | *t0* = *t1* % *t2* (signed) | Undefined behavior if division by zero or overflow. diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 61e5e185cc..040f4da835 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -56,6 +56,7 @@ DEF(nor, 1, 2, 0, TCG_OPF_INT) DEF(not, 1, 1, 0, TCG_OPF_INT) DEF(or, 1, 2, 0, TCG_OPF_INT) DEF(orc, 1, 2, 0, TCG_OPF_INT) +DEF(rems, 1, 2, 0, TCG_OPF_INT) DEF(sub, 1, 2, 0, TCG_OPF_INT) DEF(xor, 1, 2, 0, TCG_OPF_INT) @@ -72,7 +73,6 @@ DEF(st8_i32, 0, 2, 1, 0) DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) /* arith */ -DEF(rem_i32, 1, 2, 0, 0) DEF(remu_i32, 1, 2, 0, 0) /* shifts/rotates */ DEF(shl_i32, 1, 2, 0, 0) @@ -116,7 +116,6 @@ DEF(st16_i64, 0, 2, 1, 0) DEF(st32_i64, 0, 2, 1, 0) DEF(st_i64, 0, 2, 1, 0) /* arith */ -DEF(rem_i64, 1, 2, 0, 0) DEF(remu_i64, 1, 2, 0, 0) /* shifts/rotates */ DEF(shl_i64, 1, 2, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index c11cce782a..01ec365175 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -569,12 +569,14 @@ static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, } return (uint64_t)x / ((uint64_t)y ? : 1); - case INDEX_op_rem_i32: - return (int32_t)x % ((int32_t)y ? : 1); + case INDEX_op_rems: + if (type == TCG_TYPE_I32) { + return (int32_t)x % ((int32_t)y ? : 1); + } + return (int64_t)x % ((int64_t)y ? : 1); + case INDEX_op_remu_i32: return (uint32_t)x % ((uint32_t)y ? : 1); - case INDEX_op_rem_i64: - return (int64_t)x % ((int64_t)y ? : 1); case INDEX_op_remu_i64: return (uint64_t)x % ((uint64_t)y ? : 1); @@ -3021,7 +3023,7 @@ void tcg_optimize(TCGContext *s) case INDEX_op_qemu_st_i128: done = fold_qemu_st(&ctx, op); break; - CASE_OP_32_64(rem): + case INDEX_op_rems: CASE_OP_32_64(remu): done = fold_remainder(&ctx, op); break; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 7ed92157de..6da8b30547 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -615,8 +615,8 @@ void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_rem_i32) { - tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_rems, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_rems, ret, arg1, arg2); } else if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_op3_i32(INDEX_op_divs, t0, arg1, arg2); @@ -1983,8 +1983,8 @@ void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_rem_i64) { - tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_rems, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_rems, ret, arg1, arg2); } else if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_op3_i64(INDEX_op_divs, t0, arg1, arg2); diff --git a/tcg/tcg.c b/tcg/tcg.c index 5af5529284..fad828fa2f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1040,8 +1040,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), - OUTOP(INDEX_op_rem_i32, TCGOutOpBinary, outop_rems), - OUTOP(INDEX_op_rem_i64, TCGOutOpBinary, outop_rems), + OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), }; @@ -5425,8 +5424,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_nor: case INDEX_op_or: case INDEX_op_orc: - case INDEX_op_rem_i32: - case INDEX_op_rem_i64: + case INDEX_op_rems: case INDEX_op_xor: { const TCGOutOpBinary *out = diff --git a/tcg/tci.c b/tcg/tci.c index 65f493c3d4..6ca033f3be 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -728,7 +728,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint64_t)regs[r1] / (uint64_t)regs[r2]; break; - case INDEX_op_rem_i64: + case INDEX_op_rems: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int64_t)regs[r1] % (int64_t)regs[r2]; break; @@ -1079,9 +1079,9 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_nor: case INDEX_op_or: case INDEX_op_orc: + case INDEX_op_rems: case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_rem_i64: case INDEX_op_remu_i32: case INDEX_op_remu_i64: case INDEX_op_shl_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 4d9c142a00..2b05da7d06 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -757,7 +757,7 @@ static void tgen_rems(TCGContext *s, TCGType type, { TCGOpcode opc = (type == TCG_TYPE_I32 ? INDEX_op_tci_rems32 - : INDEX_op_rem_i64); + : INDEX_op_rems); tcg_out_op_rrr(s, opc, a0, a1, a2); } From 967e7ccd9c5c6a5a2cc5d7a101cd24acced22749 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 20:12:08 -0800 Subject: [PATCH 055/161] tcg: Convert remu to TCGOutOpBinary For TCI, we're losing type information in the interpreter. Introduce a tci-specific opcode to handle the difference. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 -- tcg/aarch64/tcg-target.c.inc | 22 ++++++++------- tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 4 +++ tcg/i386/tcg-target.c.inc | 4 +++ tcg/loongarch64/tcg-target-con-set.h | 1 - tcg/loongarch64/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target.c.inc | 26 ++++++++++-------- tcg/mips/tcg-target-has.h | 2 -- tcg/mips/tcg-target.c.inc | 41 ++++++++++++++-------------- tcg/ppc/tcg-target-has.h | 2 -- tcg/ppc/tcg-target.c.inc | 25 +++++++++-------- tcg/riscv/tcg-target-con-set.h | 1 - tcg/riscv/tcg-target-has.h | 2 -- tcg/riscv/tcg-target.c.inc | 23 ++++++++-------- tcg/s390x/tcg-target.c.inc | 4 +++ tcg/sparc64/tcg-target-has.h | 2 -- tcg/sparc64/tcg-target.c.inc | 4 +++ tcg/tcg-has.h | 9 ------ tcg/tcg-op.c | 4 +-- tcg/tcg.c | 8 +++--- tcg/tci.c | 4 +-- tcg/tci/tcg-target-has.h | 2 -- tcg/tci/tcg-target-opc.h.inc | 1 + tcg/tci/tcg-target.c.inc | 17 ++++++++++-- 25 files changed, 112 insertions(+), 101 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index e961668ef0..1fdff25d05 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -13,7 +13,6 @@ #define have_lse2 (cpuinfo & CPUINFO_LSE2) /* optional instructions */ -#define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_rot_i32 1 @@ -29,7 +28,6 @@ #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 6e80e18a6a..8aa11e9d9d 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2287,6 +2287,18 @@ static const TCGOutOpBinary outop_rems = { .out_rrr = tgen_rems, }; +static void tgen_remu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, UDIV, type, TCG_REG_TMP0, a1, a2); + tcg_out_insn(s, 3509, MSUB, type, a0, TCG_REG_TMP0, a2, a1); +} + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_remu, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2404,12 +2416,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_remu_i64: - case INDEX_op_remu_i32: - tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2); - tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); - break; - case INDEX_op_shl_i64: case INDEX_op_shl_i32: if (c2) { @@ -3085,10 +3091,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rC); - case INDEX_op_remu_i32: - case INDEX_op_remu_i64: - return C_O1_I2(r, r, r); - case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index 6ed2b49c84..32d73d3443 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -34,7 +34,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_rem_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 673c8fb7a6..c08cd712b1 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1959,6 +1959,10 @@ static const TCGOutOpBinary outop_rems = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index ac0721d71c..02dd440052 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2740,6 +2740,10 @@ static const TCGOutOpBinary outop_rems = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h index b4af4f5423..da84e4d49c 100644 --- a/tcg/loongarch64/tcg-target-con-set.h +++ b/tcg/loongarch64/tcg-target-con-set.h @@ -31,7 +31,6 @@ C_O1_I2(r, r, rW) C_O1_I2(r, 0, rz) C_O1_I2(r, rz, ri) C_O1_I2(r, rz, rJ) -C_O1_I2(r, rz, rz) C_O1_I2(w, w, w) C_O1_I2(w, w, wM) C_O1_I2(w, w, wA) diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index aecd2879b8..5dfc69ae6a 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -11,7 +11,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_negsetcond_i32 0 -#define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 0 @@ -27,7 +26,6 @@ /* 64-bit operations */ #define TCG_TARGET_HAS_negsetcond_i64 0 -#define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index ef37b4daa7..a0313b1140 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1474,6 +1474,21 @@ static const TCGOutOpBinary outop_rems = { .out_rrr = tgen_rems, }; +static void tgen_remu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_mod_wu(s, a0, a1, a2); + } else { + tcg_out_opc_mod_du(s, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_remu, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1724,13 +1739,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_remu_i32: - tcg_out_opc_mod_wu(s, a0, a1, a2); - break; - case INDEX_op_remu_i64: - tcg_out_opc_mod_du(s, a0, a1, a2); - break; - case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: tcg_out_setcond(s, args[3], a0, a1, a2, c2); @@ -2389,10 +2397,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond_i64: return C_O1_I2(r, rz, rJ); - case INDEX_op_remu_i32: - case INDEX_op_remu_i64: - return C_O1_I2(r, rz, rz); - case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: return C_O1_I4(r, rz, rJ, rz, rz); diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index 9aa5bf9f1b..ab6a134796 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -39,7 +39,6 @@ extern bool use_mips32r2_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_bswap16_i32 1 @@ -50,7 +49,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 (!use_mips32r6_instructions) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 37b878ec61..bd38c7ab95 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1887,6 +1887,27 @@ static const TCGOutOpBinary outop_rems = { .out_rrr = tgen_rems, }; +static void tgen_remu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (use_mips32r6_instructions) { + if (type == TCG_TYPE_I32) { + tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_DMODU, a0, a1, a2); + } + } else { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_DIVU : OPC_DDIVU; + tcg_out_opc_reg(s, insn, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); + } +} + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_remu, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2010,24 +2031,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_remu_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2); - break; - } - i1 = OPC_DIVU, i2 = OPC_MFHI; - goto do_hilo1; - case INDEX_op_remu_i64: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DMODU, a0, a1, a2); - break; - } - i1 = OPC_DDIVU, i2 = OPC_MFHI; - do_hilo1: - tcg_out_opc_reg(s, i1, 0, a1, a2); - tcg_out_opc_reg(s, i2, a0, 0, 0); - break; - case INDEX_op_muls2_i32: i1 = OPC_MULT; goto do_hilo2; @@ -2282,9 +2285,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_remu_i32: case INDEX_op_setcond_i32: - case INDEX_op_remu_i64: case INDEX_op_setcond_i64: return C_O1_I2(r, rz, rz); case INDEX_op_muls2_i32: diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index f8e4c0ad3c..37e88a3193 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -17,7 +17,6 @@ #define have_vsx (cpuinfo & CPUINFO_VSX) /* optional instructions */ -#define TCG_TARGET_HAS_rem_i32 have_isa_3_00 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 @@ -34,7 +33,6 @@ #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_rem_i64 have_isa_3_00 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index c331f0d672..80ee4d04c9 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3109,6 +3109,19 @@ static const TCGOutOpBinary outop_rems = { .out_rrr = tgen_rems, }; +static void tgen_remu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? MODUW : MODUD; + tcg_out32(s, insn | TAB(a0, a1, a2)); +} + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mod, + .out_rrr = tgen_remu, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3259,10 +3272,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); break; - case INDEX_op_remu_i32: - tcg_out32(s, MODUW | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_shl_i32: if (const_args[2]) { /* Limit immediate shift count lest we create an illegal insn. */ @@ -3355,10 +3364,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_remu_i64: - tcg_out32(s, MODUD | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); break; @@ -4213,10 +4218,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_rotr_i64: return C_O1_I2(r, r, ri); - case INDEX_op_remu_i32: - case INDEX_op_remu_i64: - return C_O1_I2(r, r, r); - case INDEX_op_clz_i32: case INDEX_op_ctz_i32: case INDEX_op_clz_i64: diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h index f3a6f7a7ed..f0d3cb81bd 100644 --- a/tcg/riscv/tcg-target-con-set.h +++ b/tcg/riscv/tcg-target-con-set.h @@ -16,7 +16,6 @@ C_O1_I1(r, r) C_O1_I2(r, r, r) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) -C_O1_I2(r, rz, rz) C_N1_I2(r, r, rM) C_O1_I4(r, r, rI, rM, rM) C_O2_I4(r, r, rz, rz, rM, rM) diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index e5861e5260..b3c6899887 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -11,7 +11,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_negsetcond_i32 1 -#define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_rot_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 @@ -26,7 +25,6 @@ #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_negsetcond_i64 1 -#define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_rot_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index b0a98273f1..38ba898042 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2132,6 +2132,18 @@ static const TCGOutOpBinary outop_rems = { .out_rrr = tgen_rems, }; +static void tgen_remu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_REMUW : OPC_REMU; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_remu, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2245,13 +2257,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, OPC_SD, a0, a1, a2); break; - case INDEX_op_remu_i32: - tcg_out_opc_reg(s, OPC_REMUW, a0, a1, a2); - break; - case INDEX_op_remu_i64: - tcg_out_opc_reg(s, OPC_REMU, a0, a1, a2); - break; - case INDEX_op_shl_i32: if (c2) { tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2 & 0x1f); @@ -2753,10 +2758,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_remu_i32: - case INDEX_op_remu_i64: - return C_O1_I2(r, rz, rz); - case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 320268669a..8702d8c928 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2441,6 +2441,10 @@ static const TCGOutOpBinary outop_rems = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 35f0dd4230..42de99efbf 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -14,7 +14,6 @@ extern bool use_vis3_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_rem_i32 0 #define TCG_TARGET_HAS_rot_i32 0 #define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_bswap32_i32 0 @@ -30,7 +29,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 0 #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 23cca5c664..d465c8dd06 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1480,6 +1480,10 @@ static const TCGOutOpBinary outop_rems = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index bae9918024..0bb829be36 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -12,8 +12,6 @@ #if TCG_TARGET_REG_BITS == 32 /* Turn some undef macros into false macros. */ #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_div_i64 0 -#define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 0 #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 @@ -32,13 +30,6 @@ #define TCG_TARGET_HAS_sub2_i32 1 #endif -#ifndef TCG_TARGET_HAS_rem_i32 -#define TCG_TARGET_HAS_rem_i32 0 -#endif -#ifndef TCG_TARGET_HAS_rem_i64 -#define TCG_TARGET_HAS_rem_i64 0 -#endif - #if !defined(TCG_TARGET_HAS_v64) \ && !defined(TCG_TARGET_HAS_v128) \ && !defined(TCG_TARGET_HAS_v256) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 6da8b30547..4ff6c9f0ab 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -649,7 +649,7 @@ void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_rem_i32) { + if (tcg_op_supported(INDEX_op_remu_i32, TCG_TYPE_I32, 0)) { tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2); } else if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); @@ -2017,7 +2017,7 @@ void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_rem_i64) { + if (tcg_op_supported(INDEX_op_remu_i64, TCG_TYPE_I64, 0)) { tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2); } else if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index fad828fa2f..537b665d07 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1041,6 +1041,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), + OUTOP(INDEX_op_remu_i32, TCGOutOpBinary, outop_remu), + OUTOP(INDEX_op_remu_i64, TCGOutOpBinary, outop_remu), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), }; @@ -2271,8 +2273,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i32: return TCG_TARGET_HAS_negsetcond_i32; - case INDEX_op_remu_i32: - return TCG_TARGET_HAS_rem_i32; case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: return TCG_TARGET_HAS_rot_i32; @@ -2327,8 +2327,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return TCG_TARGET_HAS_negsetcond_i64; - case INDEX_op_remu_i64: - return TCG_TARGET_HAS_rem_i64; case INDEX_op_rotl_i64: case INDEX_op_rotr_i64: return TCG_TARGET_HAS_rot_i64; @@ -5425,6 +5423,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_or: case INDEX_op_orc: case INDEX_op_rems: + case INDEX_op_remu_i32: + case INDEX_op_remu_i64: case INDEX_op_xor: { const TCGOutOpBinary *out = diff --git a/tcg/tci.c b/tcg/tci.c index 6ca033f3be..bd5817a382 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -590,7 +590,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int32_t)regs[r1] % (int32_t)regs[r2]; break; - case INDEX_op_remu_i32: + case INDEX_op_tci_remu32: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint32_t)regs[r1] % (uint32_t)regs[r2]; break; @@ -1082,7 +1082,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_rems: case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_remu_i32: case INDEX_op_remu_i64: case INDEX_op_shl_i32: case INDEX_op_shl_i64: @@ -1101,6 +1100,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_tci_divs32: case INDEX_op_tci_divu32: case INDEX_op_tci_rems32: + case INDEX_op_tci_remu32: tci_args_rrr(insn, &r0, &r1, &r2); info->fprintf_func(info->stream, "%-12s %s, %s, %s", op_name, str_r(r0), str_r(r1), str_r(r2)); diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index ccec96b610..bd51b9346d 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -9,7 +9,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 @@ -25,7 +24,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 1 diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc index 2822fbffc8..82d2a38cae 100644 --- a/tcg/tci/tcg-target-opc.h.inc +++ b/tcg/tci/tcg-target-opc.h.inc @@ -5,3 +5,4 @@ DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT) DEF(tci_divs32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_divu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_rems32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_remu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 2b05da7d06..421a2a8ac7 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -79,8 +79,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_remu_i32: - case INDEX_op_remu_i64: case INDEX_op_shl_i32: case INDEX_op_shl_i64: case INDEX_op_shr_i32: @@ -766,6 +764,20 @@ static const TCGOutOpBinary outop_rems = { .out_rrr = tgen_rems, }; +static void tgen_remu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_remu32 + : INDEX_op_remu_i64); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_remu, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -855,7 +867,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, CASE_32_64(sar) CASE_32_64(rotl) /* Optional (TCG_TARGET_HAS_rot_*). */ CASE_32_64(rotr) /* Optional (TCG_TARGET_HAS_rot_*). */ - CASE_32_64(remu) /* Optional (TCG_TARGET_HAS_div_*). */ CASE_32_64(clz) /* Optional (TCG_TARGET_HAS_clz_*). */ CASE_32_64(ctz) /* Optional (TCG_TARGET_HAS_ctz_*). */ tcg_out_op_rrr(s, opc, args[0], args[1], args[2]); From cd9acd2049a385e54314d58f35b4bfce7c031d55 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 20:25:14 -0800 Subject: [PATCH 056/161] tcg: Merge INDEX_op_remu_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 5 +---- tcg/optimize.c | 9 +++++---- tcg/tcg-op.c | 8 ++++---- tcg/tcg.c | 6 ++---- tcg/tci.c | 4 ++-- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 16 insertions(+), 20 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 1f4160a585..bceecb0596 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -292,7 +292,7 @@ Arithmetic - | *t0* = *t1* % *t2* (signed) | Undefined behavior if division by zero or overflow. - * - remu_i32/i64 *t0*, *t1*, *t2* + * - remu *t0*, *t1*, *t2* - | *t0* = *t1* % *t2* (unsigned) | Undefined behavior if division by zero. diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 040f4da835..ebb23347e9 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -57,6 +57,7 @@ DEF(not, 1, 1, 0, TCG_OPF_INT) DEF(or, 1, 2, 0, TCG_OPF_INT) DEF(orc, 1, 2, 0, TCG_OPF_INT) DEF(rems, 1, 2, 0, TCG_OPF_INT) +DEF(remu, 1, 2, 0, TCG_OPF_INT) DEF(sub, 1, 2, 0, TCG_OPF_INT) DEF(xor, 1, 2, 0, TCG_OPF_INT) @@ -72,8 +73,6 @@ DEF(ld_i32, 1, 1, 1, 0) DEF(st8_i32, 0, 2, 1, 0) DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) -/* arith */ -DEF(remu_i32, 1, 2, 0, 0) /* shifts/rotates */ DEF(shl_i32, 1, 2, 0, 0) DEF(shr_i32, 1, 2, 0, 0) @@ -115,8 +114,6 @@ DEF(st8_i64, 0, 2, 1, 0) DEF(st16_i64, 0, 2, 1, 0) DEF(st32_i64, 0, 2, 1, 0) DEF(st_i64, 0, 2, 1, 0) -/* arith */ -DEF(remu_i64, 1, 2, 0, 0) /* shifts/rotates */ DEF(shl_i64, 1, 2, 0, 0) DEF(shr_i64, 1, 2, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index 01ec365175..69f9ba1555 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -575,9 +575,10 @@ static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, } return (int64_t)x % ((int64_t)y ? : 1); - case INDEX_op_remu_i32: - return (uint32_t)x % ((uint32_t)y ? : 1); - case INDEX_op_remu_i64: + case INDEX_op_remu: + if (type == TCG_TYPE_I32) { + return (uint32_t)x % ((uint32_t)y ? : 1); + } return (uint64_t)x % ((uint64_t)y ? : 1); default: @@ -3024,7 +3025,7 @@ void tcg_optimize(TCGContext *s) done = fold_qemu_st(&ctx, op); break; case INDEX_op_rems: - CASE_OP_32_64(remu): + case INDEX_op_remu: done = fold_remainder(&ctx, op); break; CASE_OP_32_64(rotl): diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 4ff6c9f0ab..0f1e83a49f 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -649,8 +649,8 @@ void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (tcg_op_supported(INDEX_op_remu_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_remu, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_remu, ret, arg1, arg2); } else if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_op3_i32(INDEX_op_divu, t0, arg1, arg2); @@ -2017,8 +2017,8 @@ void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (tcg_op_supported(INDEX_op_remu_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_remu, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_remu, ret, arg1, arg2); } else if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_op3_i64(INDEX_op_divu, t0, arg1, arg2); diff --git a/tcg/tcg.c b/tcg/tcg.c index 537b665d07..cd89ef1faa 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1041,8 +1041,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), - OUTOP(INDEX_op_remu_i32, TCGOutOpBinary, outop_remu), - OUTOP(INDEX_op_remu_i64, TCGOutOpBinary, outop_remu), + OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), }; @@ -5423,8 +5422,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_or: case INDEX_op_orc: case INDEX_op_rems: - case INDEX_op_remu_i32: - case INDEX_op_remu_i64: + case INDEX_op_remu: case INDEX_op_xor: { const TCGOutOpBinary *out = diff --git a/tcg/tci.c b/tcg/tci.c index bd5817a382..5d2cba4941 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -732,7 +732,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int64_t)regs[r1] % (int64_t)regs[r2]; break; - case INDEX_op_remu_i64: + case INDEX_op_remu: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint64_t)regs[r1] % (uint64_t)regs[r2]; break; @@ -1080,9 +1080,9 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_or: case INDEX_op_orc: case INDEX_op_rems: + case INDEX_op_remu: case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_remu_i64: case INDEX_op_shl_i32: case INDEX_op_shl_i64: case INDEX_op_shr_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 421a2a8ac7..eb30fd04ba 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -769,7 +769,7 @@ static void tgen_remu(TCGContext *s, TCGType type, { TCGOpcode opc = (type == TCG_TYPE_I32 ? INDEX_op_tci_remu32 - : INDEX_op_remu_i64); + : INDEX_op_remu); tcg_out_op_rrr(s, opc, a0, a1, a2); } From 27d21ee7c79133e5dfdab8f160f547e833dc5dd0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 21:33:33 -0800 Subject: [PATCH 057/161] tcg: Convert shl to TCGOutOpBinary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 38 ++++++++++---------- tcg/arm/tcg-target.c.inc | 25 +++++++++---- tcg/i386/tcg-target.c.inc | 60 +++++++++++++++++++++++--------- tcg/loongarch64/tcg-target.c.inc | 43 ++++++++++++++--------- tcg/mips/tcg-target.c.inc | 35 ++++++++++++------- tcg/ppc/tcg-target.c.inc | 42 ++++++++++++---------- tcg/riscv/tcg-target.c.inc | 38 +++++++++++--------- tcg/s390x/tcg-target.c.inc | 37 ++++++++++++++++---- tcg/sparc64/tcg-target.c.inc | 27 ++++++++++---- tcg/tcg.c | 6 ++-- tcg/tci/tcg-target.c.inc | 14 ++++++-- 11 files changed, 241 insertions(+), 124 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 8aa11e9d9d..b57baa1eec 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1347,14 +1347,6 @@ static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); } -static inline void tcg_out_shl(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int bits = ext ? 64 : 32; - int max = bits - 1; - tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max); -} - static inline void tcg_out_shr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn, unsigned int m) { @@ -2299,6 +2291,25 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, LSLV, type, a0, a1, a2); +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int max = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_ubfm(s, type, a0, a1, -a2 & max, ~a2 & max); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2416,15 +2427,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_shl_i64: - case INDEX_op_shl_i32: - if (c2) { - tcg_out_shl(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); - } - break; - case INDEX_op_shr_i64: case INDEX_op_shr_i32: if (c2) { @@ -3091,12 +3093,10 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rC); - case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: - case INDEX_op_shl_i64: case INDEX_op_shr_i64: case INDEX_op_sar_i64: case INDEX_op_rotl_i64: diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index c08cd712b1..2b9e52914c 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1963,6 +1963,25 @@ static const TCGOutOpBinary outop_remu = { .base.static_constraint = C_NotImplemented, }; +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, SHIFT_REG_LSL(a2)); +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, + SHIFT_IMM_LSL(a2 & 0x1f)); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2114,11 +2133,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_muls2_i32: tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]); break; - /* XXX: Perhaps args[2] & 0x1f is wrong */ - case INDEX_op_shl_i32: - c = const_args[2] ? - SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]); - goto gen_shift32; case INDEX_op_shr_i32: c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) : SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]); @@ -2300,7 +2314,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_muls2_i32: return C_O2_I2(r, r, r, r); - case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: case INDEX_op_rotl_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 02dd440052..648d9ee66c 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2744,6 +2744,49 @@ static const TCGOutOpBinary outop_remu = { .base.static_constraint = C_NotImplemented, }; +static TCGConstraintSetIndex cset_shift(TCGType type, unsigned flags) +{ + return have_bmi2 ? C_O1_I2(r, r, ri) : C_O1_I2(r, 0, ci); +} + +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + if (have_bmi2) { + tcg_out_vex_modrm(s, OPC_SHLX + rexw, a0, a2, a1); + } else { + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, SHIFT_SHL, a0); + } +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + /* For small constant 3-operand shift, use LEA. */ + if (a0 != a1 && a2 >= 1 && a2 <= 3) { + if (a2 == 1) { + /* shl $1,a1,a0 -> lea (a1,a1),a0 */ + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0); + } else { + /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */ + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0); + } + return; + } + tcg_out_mov(s, type, a0, a1); + tcg_out_shifti(s, SHIFT_SHL + rexw, a0, a2); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_shift, + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2879,21 +2922,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(shl): - /* For small constant 3-operand shift, use LEA. */ - if (const_a2 && a0 != a1 && (a2 - 1) < 3) { - if (a2 - 1 == 0) { - /* shl $1,a1,a0 -> lea (a1,a1),a0 */ - tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0); - } else { - /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */ - tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0); - } - break; - } - c = SHIFT_SHL; - vexop = OPC_SHLX; - goto gen_shift_maybe_vex; OP_32_64(shr): c = SHIFT_SHR; vexop = OPC_SHRX; @@ -3759,8 +3787,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(re, r); - case INDEX_op_shl_i32: - case INDEX_op_shl_i64: case INDEX_op_shr_i32: case INDEX_op_shr_i64: case INDEX_op_sar_i32: diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index a0313b1140..9e34c37e62 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1489,6 +1489,32 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_sll_w(s, a0, a1, a2); + } else { + tcg_out_opc_sll_d(s, a0, a1, a2); + } +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_slli_w(s, a0, a1, a2 & 0x1f); + } else { + tcg_out_opc_slli_d(s, a0, a1, a2 & 0x3f); + } +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1660,21 +1686,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false); break; - case INDEX_op_shl_i32: - if (c2) { - tcg_out_opc_slli_w(s, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_sll_w(s, a0, a1, a2); - } - break; - case INDEX_op_shl_i64: - if (c2) { - tcg_out_opc_slli_d(s, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_sll_d(s, a0, a1, a2); - } - break; - case INDEX_op_shr_i32: if (c2) { tcg_out_opc_srli_w(s, a0, a1, a2 & 0x1f); @@ -2369,8 +2380,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); - case INDEX_op_shl_i32: - case INDEX_op_shl_i64: case INDEX_op_shr_i32: case INDEX_op_shr_i64: case INDEX_op_sar_i32: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index bd38c7ab95..30d8872b4f 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1908,6 +1908,29 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_SLLV : OPC_DSLLV; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_sa(s, OPC_SLL, a0, a1, a2); + } else { + tcg_out_dsll(s, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2068,9 +2091,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_sar_i32: i1 = OPC_SRAV, i2 = OPC_SRA; goto do_shift; - case INDEX_op_shl_i32: - i1 = OPC_SLLV, i2 = OPC_SLL; - goto do_shift; case INDEX_op_shr_i32: i1 = OPC_SRLV, i2 = OPC_SRL; goto do_shift; @@ -2099,13 +2119,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } i1 = OPC_DSRAV; goto do_shiftv; - case INDEX_op_shl_i64: - if (c2) { - tcg_out_dsll(s, a0, a1, a2); - break; - } - i1 = OPC_DSLLV; - goto do_shiftv; case INDEX_op_shr_i64: if (c2) { tcg_out_dsrl(s, a0, a1, a2); @@ -2293,12 +2306,10 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_muls2_i64: case INDEX_op_mulu2_i64: return C_O2_I2(r, r, r, r); - case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: case INDEX_op_rotr_i32: case INDEX_op_rotl_i32: - case INDEX_op_shl_i64: case INDEX_op_shr_i64: case INDEX_op_sar_i64: case INDEX_op_rotr_i64: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 80ee4d04c9..88cfcd1d91 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3122,6 +3122,30 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SLW : SLD; + tcg_out32(s, insn | SAB(a1, a0, a2)); +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + /* Limit immediate shift count lest we create an illegal insn. */ + if (type == TCG_TYPE_I32) { + tcg_out_shli32(s, a0, a1, a2 & 31); + } else { + tcg_out_shli64(s, a0, a1, a2 & 63); + } +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3272,14 +3296,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); break; - case INDEX_op_shl_i32: - if (const_args[2]) { - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_shli32(s, args[0], args[1], args[2] & 31); - } else { - tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); - } - break; case INDEX_op_shr_i32: if (const_args[2]) { /* Limit immediate shift count lest we create an illegal insn. */ @@ -3325,14 +3341,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_brcond2(s, args, const_args); break; - case INDEX_op_shl_i64: - if (const_args[2]) { - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_shli64(s, args[0], args[1], args[2] & 63); - } else { - tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); - } - break; case INDEX_op_shr_i64: if (const_args[2]) { /* Limit immediate shift count lest we create an illegal insn. */ @@ -4206,12 +4214,10 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: - case INDEX_op_shl_i64: case INDEX_op_shr_i64: case INDEX_op_sar_i64: case INDEX_op_rotl_i64: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 38ba898042..372c4e1651 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2144,6 +2144,27 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SLLW : OPC_SLL; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SLLIW : OPC_SLLI; + unsigned mask = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_opc_imm(s, insn, a0, a1, a2 & mask); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2257,21 +2278,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, OPC_SD, a0, a1, a2); break; - case INDEX_op_shl_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_reg(s, OPC_SLLW, a0, a1, a2); - } - break; - case INDEX_op_shl_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_reg(s, OPC_SLL, a0, a1, a2); - } - break; - case INDEX_op_shr_i32: if (c2) { tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2 & 0x1f); @@ -2758,12 +2764,10 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: - case INDEX_op_shl_i64: case INDEX_op_shr_i64: case INDEX_op_sar_i64: case INDEX_op_rotl_i64: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 8702d8c928..ed68054664 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2445,6 +2445,36 @@ static const TCGOutOpBinary outop_remu = { .base.static_constraint = C_NotImplemented, }; +static void tgen_shl_int(TCGContext *s, TCGType type, TCGReg dst, + TCGReg src, TCGReg v, tcg_target_long i) +{ + if (type != TCG_TYPE_I32) { + tcg_out_sh64(s, RSY_SLLG, dst, src, v, i); + } else if (dst == src) { + tcg_out_sh32(s, RS_SLL, dst, v, i); + } else { + tcg_out_sh64(s, RSY_SLLK, dst, src, v, i); + } +} + +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_shl_int(s, type, a0, a1, a2, 0); +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_shl_int(s, type, a0, a1, TCG_REG_NONE, a2); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2574,9 +2604,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_shl_i32: - op = RS_SLL; - op2 = RSY_SLLK; do_shift32: a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; if (a0 == a1) { @@ -2746,8 +2773,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_insn(s, RRFa, MGRK, args[1], args[2], args[3]); break; - case INDEX_op_shl_i64: - op = RSY_SLLG; do_shift64: if (const_args[2]) { tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]); @@ -3346,7 +3371,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_shl_i64: case INDEX_op_shr_i64: case INDEX_op_sar_i64: case INDEX_op_rotl_i32: @@ -3363,7 +3387,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_clz_i64: return C_O1_I2(r, r, rI); - case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: return C_O1_I2(r, r, ri); diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index d465c8dd06..6b320a8622 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1484,6 +1484,27 @@ static const TCGOutOpBinary outop_remu = { .base.static_constraint = C_NotImplemented, }; +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SHIFT_SLL : SHIFT_SLLX; + tcg_out_arith(s, a0, a1, a2, insn); +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SHIFT_SLL : SHIFT_SLLX; + uint32_t mask = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_arithi(s, a0, a1, a2 & mask, insn); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1587,8 +1608,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_st32_i64: tcg_out_ldst(s, a0, a1, a2, STW); break; - case INDEX_op_shl_i32: - c = SHIFT_SLL; do_shift32: /* Limit immediate shift count lest we create an illegal insn. */ tcg_out_arithc(s, a0, a1, a2 & 31, c2, c); @@ -1656,8 +1675,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_st_i64: tcg_out_ldst(s, a0, a1, a2, STX); break; - case INDEX_op_shl_i64: - c = SHIFT_SLLX; do_shift64: /* Limit immediate shift count lest we create an illegal insn. */ tcg_out_arithc(s, a0, a1, a2 & 63, c2, c); @@ -1751,8 +1768,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i64: return C_O0_I2(rz, r); - case INDEX_op_shl_i32: - case INDEX_op_shl_i64: case INDEX_op_shr_i32: case INDEX_op_shr_i64: case INDEX_op_sar_i32: diff --git a/tcg/tcg.c b/tcg/tcg.c index cd89ef1faa..369a1e6d48 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1042,6 +1042,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), + OUTOP(INDEX_op_shl_i32, TCGOutOpBinary, outop_shl), + OUTOP(INDEX_op_shl_i64, TCGOutOpBinary, outop_shl), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), }; @@ -2262,7 +2264,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: - case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: case INDEX_op_extract_i32: @@ -2314,7 +2315,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st16_i64: case INDEX_op_st32_i64: case INDEX_op_st_i64: - case INDEX_op_shl_i64: case INDEX_op_shr_i64: case INDEX_op_sar_i64: case INDEX_op_ext_i32_i64: @@ -5423,6 +5423,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_orc: case INDEX_op_rems: case INDEX_op_remu: + case INDEX_op_shl_i32: + case INDEX_op_shl_i64: case INDEX_op_xor: { const TCGOutOpBinary *out = diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index eb30fd04ba..748bb8118f 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -79,8 +79,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_shl_i32: - case INDEX_op_shl_i64: case INDEX_op_shr_i32: case INDEX_op_shr_i64: case INDEX_op_sar_i32: @@ -778,6 +776,17 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, glue(INDEX_op_shl_i,TCG_TARGET_REG_BITS), a0, a1, a2); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_shl, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -862,7 +871,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, opc, args[0], args[1], args[2]); break; - CASE_32_64(shl) CASE_32_64(shr) CASE_32_64(sar) CASE_32_64(rotl) /* Optional (TCG_TARGET_HAS_rot_*). */ From 6ca594517ab389f3095c4aab745e168cdd8e8ff5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 21:50:04 -0800 Subject: [PATCH 058/161] tcg: Merge INDEX_op_shl_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 4 ++-- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 10 +++++----- tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 6 ++---- tcg/tci.c | 13 ++++--------- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 17 insertions(+), 25 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index bceecb0596..f64c881530 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -379,10 +379,10 @@ Shifts/Rotates .. list-table:: - * - shl_i32/i64 *t0*, *t1*, *t2* + * - shl *t0*, *t1*, *t2* - | *t0* = *t1* << *t2* - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. * - shr_i32/i64 *t0*, *t1*, *t2* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index ebb23347e9..c2ac25d1b6 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -58,6 +58,7 @@ DEF(or, 1, 2, 0, TCG_OPF_INT) DEF(orc, 1, 2, 0, TCG_OPF_INT) DEF(rems, 1, 2, 0, TCG_OPF_INT) DEF(remu, 1, 2, 0, TCG_OPF_INT) +DEF(shl, 1, 2, 0, TCG_OPF_INT) DEF(sub, 1, 2, 0, TCG_OPF_INT) DEF(xor, 1, 2, 0, TCG_OPF_INT) @@ -74,7 +75,6 @@ DEF(st8_i32, 0, 2, 1, 0) DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) /* shifts/rotates */ -DEF(shl_i32, 1, 2, 0, 0) DEF(shr_i32, 1, 2, 0, 0) DEF(sar_i32, 1, 2, 0, 0) DEF(rotl_i32, 1, 2, 0, 0) @@ -115,7 +115,6 @@ DEF(st16_i64, 0, 2, 1, 0) DEF(st32_i64, 0, 2, 1, 0) DEF(st_i64, 0, 2, 1, 0) /* shifts/rotates */ -DEF(shl_i64, 1, 2, 0, 0) DEF(shr_i64, 1, 2, 0, 0) DEF(sar_i64, 1, 2, 0, 0) DEF(rotl_i64, 1, 2, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index 69f9ba1555..3142daa800 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -446,10 +446,10 @@ static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, case INDEX_op_xor_vec: return x ^ y; - case INDEX_op_shl_i32: - return (uint32_t)x << (y & 31); - - case INDEX_op_shl_i64: + case INDEX_op_shl: + if (type == TCG_TYPE_I32) { + return (uint32_t)x << (y & 31); + } return (uint64_t)x << (y & 63); case INDEX_op_shr_i32: @@ -3031,7 +3031,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(rotl): CASE_OP_32_64(rotr): CASE_OP_32_64(sar): - CASE_OP_32_64(shl): + case INDEX_op_shl: CASE_OP_32_64(shr): done = fold_shift(&ctx, op); break; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 0f1e83a49f..c85c056726 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -481,7 +481,7 @@ void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg) void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_shl_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_shl, ret, arg1, arg2); } void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -1606,7 +1606,7 @@ void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_shl_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_shl, ret, arg1, arg2); } else { gen_helper_shl_i64(ret, arg1, arg2); } diff --git a/tcg/tcg.c b/tcg/tcg.c index 369a1e6d48..50935b7e03 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1042,8 +1042,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), - OUTOP(INDEX_op_shl_i32, TCGOutOpBinary, outop_shl), - OUTOP(INDEX_op_shl_i64, TCGOutOpBinary, outop_shl), + OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), }; @@ -5423,8 +5422,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_orc: case INDEX_op_rems: case INDEX_op_remu: - case INDEX_op_shl_i32: - case INDEX_op_shl_i64: + case INDEX_op_shl: case INDEX_op_xor: { const TCGOutOpBinary *out = diff --git a/tcg/tci.c b/tcg/tci.c index 5d2cba4941..22401ce1f6 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -615,11 +615,11 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, break; #endif - /* Shift/rotate operations (32 bit). */ + /* Shift/rotate operations. */ - case INDEX_op_shl_i32: + case INDEX_op_shl: tci_args_rrr(insn, &r0, &r1, &r2); - regs[r0] = (uint32_t)regs[r1] << (regs[r2] & 31); + regs[r0] = regs[r1] << (regs[r2] % TCG_TARGET_REG_BITS); break; case INDEX_op_shr_i32: tci_args_rrr(insn, &r0, &r1, &r2); @@ -787,10 +787,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, /* Shift/rotate operations (64 bit). */ - case INDEX_op_shl_i64: - tci_args_rrr(insn, &r0, &r1, &r2); - regs[r0] = regs[r1] << (regs[r2] & 63); - break; case INDEX_op_shr_i64: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] >> (regs[r2] & 63); @@ -1081,10 +1077,9 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_orc: case INDEX_op_rems: case INDEX_op_remu: + case INDEX_op_shl: case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_shl_i32: - case INDEX_op_shl_i64: case INDEX_op_shr_i32: case INDEX_op_shr_i64: case INDEX_op_sar_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 748bb8118f..ca83a097ab 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -779,7 +779,7 @@ static const TCGOutOpBinary outop_remu = { static void tgen_shl(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { - tcg_out_op_rrr(s, glue(INDEX_op_shl_i,TCG_TARGET_REG_BITS), a0, a1, a2); + tcg_out_op_rrr(s, INDEX_op_shl, a0, a1, a2); } static const TCGOutOpBinary outop_shl = { From edd6ba8a6bc804153a8fe643a7e2dae0802db98c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 22:22:36 -0800 Subject: [PATCH 059/161] tcg: Convert shr to TCGOutOpBinary Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 37 ++++++++++++++------------- tcg/arm/tcg-target.c.inc | 24 ++++++++++++++---- tcg/i386/tcg-target.c.inc | 33 +++++++++++++++++++----- tcg/loongarch64/tcg-target.c.inc | 43 +++++++++++++++++++------------- tcg/mips/tcg-target.c.inc | 35 +++++++++++++++++--------- tcg/ppc/tcg-target.c.inc | 42 ++++++++++++++++++------------- tcg/riscv/tcg-target.c.inc | 38 +++++++++++++++------------- tcg/s390x/tcg-target.c.inc | 39 ++++++++++++++++++++++------- tcg/sparc64/tcg-target.c.inc | 29 +++++++++++++++------ tcg/tcg.c | 6 +++-- tcg/tci/tcg-target.c.inc | 18 ++++++++++--- 11 files changed, 229 insertions(+), 115 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index b57baa1eec..87b97e852a 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1347,13 +1347,6 @@ static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); } -static inline void tcg_out_shr(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int max = ext ? 63 : 31; - tcg_out_ubfm(s, ext, rd, rn, m & max, max); -} - static inline void tcg_out_sar(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn, unsigned int m) { @@ -2310,6 +2303,25 @@ static const TCGOutOpBinary outop_shl = { .out_rri = tgen_shli, }; +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, LSRV, type, a0, a1, a2); +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int max = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_ubfm(s, type, a0, a1, a2 & max, max); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2427,15 +2439,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_shr_i64: - case INDEX_op_shr_i32: - if (c2) { - tcg_out_shr(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); - } - break; - case INDEX_op_sar_i64: case INDEX_op_sar_i32: if (c2) { @@ -3093,11 +3096,9 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rC); - case INDEX_op_shr_i32: case INDEX_op_sar_i32: case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: - case INDEX_op_shr_i64: case INDEX_op_sar_i64: case INDEX_op_rotl_i64: case INDEX_op_rotr_i64: diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 2b9e52914c..247aefd0a1 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1982,6 +1982,25 @@ static const TCGOutOpBinary outop_shl = { .out_rri = tgen_shli, }; +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, SHIFT_REG_LSR(a2)); +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, + SHIFT_IMM_LSR(a2 & 0x1f)); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2133,10 +2152,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_muls2_i32: tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]); break; - case INDEX_op_shr_i32: - c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) : - SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]); - goto gen_shift32; case INDEX_op_sar_i32: c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) : SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]); @@ -2314,7 +2329,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_muls2_i32: return C_O2_I2(r, r, r, r); - case INDEX_op_shr_i32: case INDEX_op_sar_i32: case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 648d9ee66c..93d94e7881 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2787,6 +2787,33 @@ static const TCGOutOpBinary outop_shl = { .out_rri = tgen_shli, }; +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + if (have_bmi2) { + tcg_out_vex_modrm(s, OPC_SHRX + rexw, a0, a2, a1); + } else { + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, SHIFT_SHR, a0); + } +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + tcg_out_mov(s, type, a0, a1); + tcg_out_shifti(s, SHIFT_SHR + rexw, a0, a2); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_shift, + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2922,10 +2949,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(shr): - c = SHIFT_SHR; - vexop = OPC_SHRX; - goto gen_shift_maybe_vex; OP_32_64(sar): c = SHIFT_SAR; vexop = OPC_SARX; @@ -3787,8 +3810,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(re, r); - case INDEX_op_shr_i32: - case INDEX_op_shr_i64: case INDEX_op_sar_i32: case INDEX_op_sar_i64: return have_bmi2 ? C_O1_I2(r, r, ri) : C_O1_I2(r, 0, ci); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 9e34c37e62..2699079073 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1515,6 +1515,32 @@ static const TCGOutOpBinary outop_shl = { .out_rri = tgen_shli, }; +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_srl_w(s, a0, a1, a2); + } else { + tcg_out_opc_srl_d(s, a0, a1, a2); + } +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_srli_w(s, a0, a1, a2 & 0x1f); + } else { + tcg_out_opc_srli_d(s, a0, a1, a2 & 0x3f); + } +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1686,21 +1712,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false); break; - case INDEX_op_shr_i32: - if (c2) { - tcg_out_opc_srli_w(s, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_srl_w(s, a0, a1, a2); - } - break; - case INDEX_op_shr_i64: - if (c2) { - tcg_out_opc_srli_d(s, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_srl_d(s, a0, a1, a2); - } - break; - case INDEX_op_sar_i32: if (c2) { tcg_out_opc_srai_w(s, a0, a1, a2 & 0x1f); @@ -2380,8 +2391,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); - case INDEX_op_shr_i32: - case INDEX_op_shr_i64: case INDEX_op_sar_i32: case INDEX_op_sar_i64: case INDEX_op_rotl_i32: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 30d8872b4f..03b4248ea9 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1931,6 +1931,29 @@ static const TCGOutOpBinary outop_shl = { .out_rri = tgen_shli, }; +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_SRLV : OPC_DSRLV; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_sa(s, OPC_SRL, a0, a1, a2); + } else { + tcg_out_dsrl(s, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2091,9 +2114,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_sar_i32: i1 = OPC_SRAV, i2 = OPC_SRA; goto do_shift; - case INDEX_op_shr_i32: - i1 = OPC_SRLV, i2 = OPC_SRL; - goto do_shift; case INDEX_op_rotr_i32: i1 = OPC_ROTRV, i2 = OPC_ROTR; do_shift: @@ -2119,13 +2139,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } i1 = OPC_DSRAV; goto do_shiftv; - case INDEX_op_shr_i64: - if (c2) { - tcg_out_dsrl(s, a0, a1, a2); - break; - } - i1 = OPC_DSRLV; - goto do_shiftv; case INDEX_op_rotr_i64: if (c2) { tcg_out_opc_sa64(s, OPC_DROTR, OPC_DROTR32, a0, a1, a2); @@ -2306,11 +2319,9 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_muls2_i64: case INDEX_op_mulu2_i64: return C_O2_I2(r, r, r, r); - case INDEX_op_shr_i32: case INDEX_op_sar_i32: case INDEX_op_rotr_i32: case INDEX_op_rotl_i32: - case INDEX_op_shr_i64: case INDEX_op_sar_i64: case INDEX_op_rotr_i64: case INDEX_op_rotl_i64: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 88cfcd1d91..2012734bb3 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3146,6 +3146,30 @@ static const TCGOutOpBinary outop_shl = { .out_rri = tgen_shli, }; +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SRW : SRD; + tcg_out32(s, insn | SAB(a1, a0, a2)); +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + /* Limit immediate shift count lest we create an illegal insn. */ + if (type == TCG_TYPE_I32) { + tcg_out_shri32(s, a0, a1, a2 & 31); + } else { + tcg_out_shri64(s, a0, a1, a2 & 63); + } +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3296,14 +3320,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); break; - case INDEX_op_shr_i32: - if (const_args[2]) { - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_shri32(s, args[0], args[1], args[2] & 31); - } else { - tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); - } - break; case INDEX_op_sar_i32: if (const_args[2]) { tcg_out_sari32(s, args[0], args[1], args[2]); @@ -3341,14 +3357,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_brcond2(s, args, const_args); break; - case INDEX_op_shr_i64: - if (const_args[2]) { - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_shri64(s, args[0], args[1], args[2] & 63); - } else { - tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); - } - break; case INDEX_op_sar_i64: if (const_args[2]) { tcg_out_sari64(s, args[0], args[1], args[2]); @@ -4214,11 +4222,9 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_shr_i32: case INDEX_op_sar_i32: case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: - case INDEX_op_shr_i64: case INDEX_op_sar_i64: case INDEX_op_rotl_i64: case INDEX_op_rotr_i64: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 372c4e1651..8020cc0b3f 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2165,6 +2165,27 @@ static const TCGOutOpBinary outop_shl = { .out_rri = tgen_shli, }; +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SRLW : OPC_SRL; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SRLIW : OPC_SRLI; + unsigned mask = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_opc_imm(s, insn, a0, a1, a2 & mask); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2278,21 +2299,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, OPC_SD, a0, a1, a2); break; - case INDEX_op_shr_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_reg(s, OPC_SRLW, a0, a1, a2); - } - break; - case INDEX_op_shr_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_reg(s, OPC_SRL, a0, a1, a2); - } - break; - case INDEX_op_sar_i32: if (c2) { tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2 & 0x1f); @@ -2764,11 +2770,9 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_shr_i32: case INDEX_op_sar_i32: case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: - case INDEX_op_shr_i64: case INDEX_op_sar_i64: case INDEX_op_rotl_i64: case INDEX_op_rotr_i64: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index ed68054664..0417bbef50 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2475,6 +2475,36 @@ static const TCGOutOpBinary outop_shl = { .out_rri = tgen_shli, }; +static void tgen_shr_int(TCGContext *s, TCGType type, TCGReg dst, + TCGReg src, TCGReg v, tcg_target_long i) +{ + if (type != TCG_TYPE_I32) { + tcg_out_sh64(s, RSY_SRLG, dst, src, v, i); + } else if (dst == src) { + tcg_out_sh32(s, RS_SRL, dst, v, i); + } else { + tcg_out_sh64(s, RSY_SRLK, dst, src, v, i); + } +} + +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_shr_int(s, type, a0, a1, a2, 0); +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_shr_int(s, type, a0, a1, TCG_REG_NONE, a2); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2621,10 +2651,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } } break; - case INDEX_op_shr_i32: - op = RS_SRL; - op2 = RSY_SRLK; - goto do_shift32; case INDEX_op_sar_i32: op = RS_SRA; op2 = RSY_SRAK; @@ -2780,9 +2806,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_sh64(s, op, args[0], args[1], args[2], 0); } break; - case INDEX_op_shr_i64: - op = RSY_SRLG; - goto do_shift64; case INDEX_op_sar_i64: op = RSY_SRAG; goto do_shift64; @@ -3371,7 +3394,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_shr_i64: case INDEX_op_sar_i64: case INDEX_op_rotl_i32: case INDEX_op_rotl_i64: @@ -3387,7 +3409,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_clz_i64: return C_O1_I2(r, r, rI); - case INDEX_op_shr_i32: case INDEX_op_sar_i32: return C_O1_I2(r, r, ri); diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 6b320a8622..f679fa04ea 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1505,6 +1505,27 @@ static const TCGOutOpBinary outop_shl = { .out_rri = tgen_shli, }; +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SHIFT_SRL : SHIFT_SRLX; + tcg_out_arith(s, a0, a1, a2, insn); +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SHIFT_SRL : SHIFT_SRLX; + uint32_t mask = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_arithi(s, a0, a1, a2 & mask, insn); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1612,9 +1633,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, /* Limit immediate shift count lest we create an illegal insn. */ tcg_out_arithc(s, a0, a1, a2 & 31, c2, c); break; - case INDEX_op_shr_i32: - c = SHIFT_SRL; - goto do_shift32; case INDEX_op_sar_i32: c = SHIFT_SRA; goto do_shift32; @@ -1679,9 +1697,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, /* Limit immediate shift count lest we create an illegal insn. */ tcg_out_arithc(s, a0, a1, a2 & 63, c2, c); break; - case INDEX_op_shr_i64: - c = SHIFT_SRLX; - goto do_shift64; case INDEX_op_sar_i64: c = SHIFT_SRAX; goto do_shift64; @@ -1768,8 +1783,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i64: return C_O0_I2(rz, r); - case INDEX_op_shr_i32: - case INDEX_op_shr_i64: case INDEX_op_sar_i32: case INDEX_op_sar_i64: case INDEX_op_setcond_i32: diff --git a/tcg/tcg.c b/tcg/tcg.c index 50935b7e03..134ab9c6c2 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1043,6 +1043,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), + OUTOP(INDEX_op_shr_i32, TCGOutOpBinary, outop_shr), + OUTOP(INDEX_op_shr_i64, TCGOutOpBinary, outop_shr), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), }; @@ -2263,7 +2265,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: - case INDEX_op_shr_i32: case INDEX_op_sar_i32: case INDEX_op_extract_i32: case INDEX_op_sextract_i32: @@ -2314,7 +2315,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st16_i64: case INDEX_op_st32_i64: case INDEX_op_st_i64: - case INDEX_op_shr_i64: case INDEX_op_sar_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: @@ -5423,6 +5423,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_rems: case INDEX_op_remu: case INDEX_op_shl: + case INDEX_op_shr_i32: + case INDEX_op_shr_i64: case INDEX_op_xor: { const TCGOutOpBinary *out = diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index ca83a097ab..5651833ac9 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -79,8 +79,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_shr_i32: - case INDEX_op_shr_i64: case INDEX_op_sar_i32: case INDEX_op_sar_i64: case INDEX_op_rotl_i32: @@ -787,6 +785,21 @@ static const TCGOutOpBinary outop_shl = { .out_rrr = tgen_shl, }; +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type < TCG_TYPE_REG) { + tcg_out_ext32u(s, TCG_REG_TMP, a1); + a1 = TCG_REG_TMP; + } + tcg_out_op_rrr(s, glue(INDEX_op_shr_i,TCG_TARGET_REG_BITS), a0, a1, a2); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_shr, +}; + static void tgen_sub(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -871,7 +884,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, opc, args[0], args[1], args[2]); break; - CASE_32_64(shr) CASE_32_64(sar) CASE_32_64(rotl) /* Optional (TCG_TARGET_HAS_rot_*). */ CASE_32_64(rotr) /* Optional (TCG_TARGET_HAS_rot_*). */ From 74dbd36f1f87bd7fc4705644d63c5561a23b0567 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 22:52:10 -0800 Subject: [PATCH 060/161] tcg: Merge INDEX_op_shr_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 4 ++-- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 17 +++++++---------- tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 6 ++---- tcg/tci.c | 11 +++-------- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 18 insertions(+), 29 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index f64c881530..f9fd4b0087 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -384,10 +384,10 @@ Shifts/Rotates - | *t0* = *t1* << *t2* | Unspecified behavior for negative or out-of-range shifts. - * - shr_i32/i64 *t0*, *t1*, *t2* + * - shr *t0*, *t1*, *t2* - | *t0* = *t1* >> *t2* (unsigned) - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. * - sar_i32/i64 *t0*, *t1*, *t2* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index c2ac25d1b6..35e0be8f80 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -59,6 +59,7 @@ DEF(orc, 1, 2, 0, TCG_OPF_INT) DEF(rems, 1, 2, 0, TCG_OPF_INT) DEF(remu, 1, 2, 0, TCG_OPF_INT) DEF(shl, 1, 2, 0, TCG_OPF_INT) +DEF(shr, 1, 2, 0, TCG_OPF_INT) DEF(sub, 1, 2, 0, TCG_OPF_INT) DEF(xor, 1, 2, 0, TCG_OPF_INT) @@ -75,7 +76,6 @@ DEF(st8_i32, 0, 2, 1, 0) DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) /* shifts/rotates */ -DEF(shr_i32, 1, 2, 0, 0) DEF(sar_i32, 1, 2, 0, 0) DEF(rotl_i32, 1, 2, 0, 0) DEF(rotr_i32, 1, 2, 0, 0) @@ -115,7 +115,6 @@ DEF(st16_i64, 0, 2, 1, 0) DEF(st32_i64, 0, 2, 1, 0) DEF(st_i64, 0, 2, 1, 0) /* shifts/rotates */ -DEF(shr_i64, 1, 2, 0, 0) DEF(sar_i64, 1, 2, 0, 0) DEF(rotl_i64, 1, 2, 0, 0) DEF(rotr_i64, 1, 2, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index 3142daa800..43db079693 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -452,10 +452,10 @@ static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, } return (uint64_t)x << (y & 63); - case INDEX_op_shr_i32: - return (uint32_t)x >> (y & 31); - - case INDEX_op_shr_i64: + case INDEX_op_shr: + if (type == TCG_TYPE_I32) { + return (uint32_t)x >> (y & 31); + } return (uint64_t)x >> (y & 63); case INDEX_op_sar_i32: @@ -2342,7 +2342,6 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { - TCGOpcode shr_opc; TCGOpcode uext_opc = 0, sext_opc = 0; TCGCond cond = op->args[3]; TCGArg ret, src1, src2; @@ -2364,7 +2363,6 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) switch (ctx->type) { case TCG_TYPE_I32: - shr_opc = INDEX_op_shr_i32; if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) { uext_opc = INDEX_op_extract_i32; } @@ -2373,7 +2371,6 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) } break; case TCG_TYPE_I64: - shr_opc = INDEX_op_shr_i64; if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) { uext_opc = INDEX_op_extract_i64; } @@ -2402,7 +2399,7 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) op->args[3] = 1; } else { if (sh) { - op2 = opt_insert_before(ctx, op, shr_opc, 3); + op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3); op2->args[0] = ret; op2->args[1] = src1; op2->args[2] = arg_new_constant(ctx, sh); @@ -2609,7 +2606,7 @@ static bool fold_shift(OptContext *ctx, TCGOp *op) * input sign repetitions. */ return fold_masks_s(ctx, op, s_mask); - CASE_OP_32_64(shr): + case INDEX_op_shr: /* * If the sign bit is known zero, then logical right shift * will not reduce the number of input sign repetitions. @@ -3032,7 +3029,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(rotr): CASE_OP_32_64(sar): case INDEX_op_shl: - CASE_OP_32_64(shr): + case INDEX_op_shr: done = fold_shift(&ctx, op); break; CASE_OP_32_64(setcond): diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index c85c056726..ef8cf5a1ac 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -496,7 +496,7 @@ void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_shr_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_shr, ret, arg1, arg2); } void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -1615,7 +1615,7 @@ void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_shr_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_shr, ret, arg1, arg2); } else { gen_helper_shr_i64(ret, arg1, arg2); } diff --git a/tcg/tcg.c b/tcg/tcg.c index 134ab9c6c2..939bbe86e9 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1043,8 +1043,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), - OUTOP(INDEX_op_shr_i32, TCGOutOpBinary, outop_shr), - OUTOP(INDEX_op_shr_i64, TCGOutOpBinary, outop_shr), + OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), }; @@ -5423,8 +5422,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_rems: case INDEX_op_remu: case INDEX_op_shl: - case INDEX_op_shr_i32: - case INDEX_op_shr_i64: + case INDEX_op_shr: case INDEX_op_xor: { const TCGOutOpBinary *out = diff --git a/tcg/tci.c b/tcg/tci.c index 22401ce1f6..376b1b1ece 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -621,9 +621,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] << (regs[r2] % TCG_TARGET_REG_BITS); break; - case INDEX_op_shr_i32: + case INDEX_op_shr: tci_args_rrr(insn, &r0, &r1, &r2); - regs[r0] = (uint32_t)regs[r1] >> (regs[r2] & 31); + regs[r0] = regs[r1] >> (regs[r2] % TCG_TARGET_REG_BITS); break; case INDEX_op_sar_i32: tci_args_rrr(insn, &r0, &r1, &r2); @@ -787,10 +787,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, /* Shift/rotate operations (64 bit). */ - case INDEX_op_shr_i64: - tci_args_rrr(insn, &r0, &r1, &r2); - regs[r0] = regs[r1] >> (regs[r2] & 63); - break; case INDEX_op_sar_i64: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int64_t)regs[r1] >> (regs[r2] & 63); @@ -1078,10 +1074,9 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_rems: case INDEX_op_remu: case INDEX_op_shl: + case INDEX_op_shr: case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_shr_i32: - case INDEX_op_shr_i64: case INDEX_op_sar_i32: case INDEX_op_sar_i64: case INDEX_op_rotl_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 5651833ac9..c0dbe873f1 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -792,7 +792,7 @@ static void tgen_shr(TCGContext *s, TCGType type, tcg_out_ext32u(s, TCG_REG_TMP, a1); a1 = TCG_REG_TMP; } - tcg_out_op_rrr(s, glue(INDEX_op_shr_i,TCG_TARGET_REG_BITS), a0, a1, a2); + tcg_out_op_rrr(s, INDEX_op_shr, a0, a1, a2); } static const TCGOutOpBinary outop_shr = { From b5aafbaa834653abd4c208794bacbc53a4c1bc16 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Jan 2025 23:36:22 -0800 Subject: [PATCH 061/161] tcg: Convert sar to TCGOutOpBinary Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 37 ++++++++--------- tcg/arm/tcg-target.c.inc | 26 ++++++++---- tcg/i386/tcg-target.c.inc | 46 ++++++++++++--------- tcg/loongarch64/tcg-target.c.inc | 43 ++++++++++++-------- tcg/mips/tcg-target.c.inc | 36 +++++++++++------ tcg/ppc/tcg-target.c.inc | 40 +++++++++++-------- tcg/riscv/tcg-target.c.inc | 38 ++++++++++-------- tcg/s390x/tcg-target.c.inc | 68 ++++++++++++++------------------ tcg/sparc64/tcg-target.c.inc | 37 +++++++++-------- tcg/tcg.c | 6 ++- tcg/tci/tcg-target.c.inc | 17 +++++++- 11 files changed, 230 insertions(+), 164 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 87b97e852a..90bdbf8387 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1347,13 +1347,6 @@ static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); } -static inline void tcg_out_sar(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int max = ext ? 63 : 31; - tcg_out_sbfm(s, ext, rd, rn, m & max, max); -} - static inline void tcg_out_rotr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn, unsigned int m) { @@ -2284,6 +2277,25 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, ASRV, type, a0, a1, a2); +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int max = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_sbfm(s, type, a0, a1, a2 & max, max); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + static void tgen_shl(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2439,15 +2451,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_sar_i64: - case INDEX_op_sar_i32: - if (c2) { - tcg_out_sar(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); - } - break; - case INDEX_op_rotr_i64: case INDEX_op_rotr_i32: if (c2) { @@ -3096,10 +3099,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rC); - case INDEX_op_sar_i32: case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: - case INDEX_op_sar_i64: case INDEX_op_rotl_i64: case INDEX_op_rotr_i64: return C_O1_I2(r, r, ri); diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 247aefd0a1..058677650b 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1963,6 +1963,25 @@ static const TCGOutOpBinary outop_remu = { .base.static_constraint = C_NotImplemented, }; +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, SHIFT_REG_ASR(a2)); +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, + SHIFT_IMM_ASR(a2 & 0x1f)); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + static void tgen_shl(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2152,15 +2171,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_muls2_i32: tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]); break; - case INDEX_op_sar_i32: - c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) : - SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]); - goto gen_shift32; case INDEX_op_rotr_i32: c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) : SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]); - /* Fall through. */ - gen_shift32: tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c); break; @@ -2329,7 +2342,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_muls2_i32: return C_O2_I2(r, r, r, r); - case INDEX_op_sar_i32: case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: return C_O1_I2(r, r, ri); diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 93d94e7881..1e81455461 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2749,6 +2749,33 @@ static TCGConstraintSetIndex cset_shift(TCGType type, unsigned flags) return have_bmi2 ? C_O1_I2(r, r, ri) : C_O1_I2(r, 0, ci); } +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + if (have_bmi2) { + tcg_out_vex_modrm(s, OPC_SARX + rexw, a0, a2, a1); + } else { + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, SHIFT_SAR, a0); + } +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + tcg_out_mov(s, type, a0, a1); + tcg_out_shifti(s, SHIFT_SAR + rexw, a0, a2); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_shift, + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + static void tgen_shl(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2874,7 +2901,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { TCGArg a0, a1, a2; - int c, const_a2, vexop, rexw; + int c, const_a2, rexw; #if TCG_TARGET_REG_BITS == 64 # define OP_32_64(x) \ @@ -2949,25 +2976,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(sar): - c = SHIFT_SAR; - vexop = OPC_SARX; - goto gen_shift_maybe_vex; OP_32_64(rotl): c = SHIFT_ROL; goto gen_shift; OP_32_64(rotr): c = SHIFT_ROR; goto gen_shift; - gen_shift_maybe_vex: - if (have_bmi2) { - if (!const_a2) { - tcg_out_vex_modrm(s, vexop + rexw, a0, a2, a1); - break; - } - tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1); - } - /* FALLTHRU */ gen_shift: if (const_a2) { tcg_out_shifti(s, c + rexw, a0, a2); @@ -3810,10 +3824,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(re, r); - case INDEX_op_sar_i32: - case INDEX_op_sar_i64: - return have_bmi2 ? C_O1_I2(r, r, ri) : C_O1_I2(r, 0, ci); - case INDEX_op_rotl_i32: case INDEX_op_rotl_i64: case INDEX_op_rotr_i32: diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 2699079073..aae0f03505 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1489,6 +1489,32 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_sra_w(s, a0, a1, a2); + } else { + tcg_out_opc_sra_d(s, a0, a1, a2); + } +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_srai_w(s, a0, a1, a2 & 0x1f); + } else { + tcg_out_opc_srai_d(s, a0, a1, a2 & 0x3f); + } +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + static void tgen_shl(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1712,21 +1738,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false); break; - case INDEX_op_sar_i32: - if (c2) { - tcg_out_opc_srai_w(s, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_sra_w(s, a0, a1, a2); - } - break; - case INDEX_op_sar_i64: - if (c2) { - tcg_out_opc_srai_d(s, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_sra_d(s, a0, a1, a2); - } - break; - case INDEX_op_rotl_i32: /* transform into equivalent rotr/rotri */ if (c2) { @@ -2391,8 +2402,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); - case INDEX_op_sar_i32: - case INDEX_op_sar_i64: case INDEX_op_rotl_i32: case INDEX_op_rotl_i64: case INDEX_op_rotr_i32: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 03b4248ea9..16c3d59c19 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1908,6 +1908,29 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_SRAV : OPC_DSRAV; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_sa(s, OPC_SRA, a0, a1, a2); + } else { + tcg_out_dsra(s, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + static void tgen_shl(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2111,12 +2134,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_dsra(s, a0, a1, 32); break; - case INDEX_op_sar_i32: - i1 = OPC_SRAV, i2 = OPC_SRA; - goto do_shift; case INDEX_op_rotr_i32: i1 = OPC_ROTRV, i2 = OPC_ROTR; - do_shift: if (c2) { tcg_out_opc_sa(s, i2, a0, a1, a2); break; @@ -2132,13 +2151,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1); } break; - case INDEX_op_sar_i64: - if (c2) { - tcg_out_dsra(s, a0, a1, a2); - break; - } - i1 = OPC_DSRAV; - goto do_shiftv; case INDEX_op_rotr_i64: if (c2) { tcg_out_opc_sa64(s, OPC_DROTR, OPC_DROTR32, a0, a1, a2); @@ -2319,10 +2331,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_muls2_i64: case INDEX_op_mulu2_i64: return C_O2_I2(r, r, r, r); - case INDEX_op_sar_i32: case INDEX_op_rotr_i32: case INDEX_op_rotl_i32: - case INDEX_op_sar_i64: case INDEX_op_rotr_i64: case INDEX_op_rotl_i64: return C_O1_I2(r, r, ri); diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 2012734bb3..24e8f675bb 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3122,6 +3122,30 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SRAW : SRAD; + tcg_out32(s, insn | SAB(a1, a0, a2)); +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + /* Limit immediate shift count lest we create an illegal insn. */ + if (type == TCG_TYPE_I32) { + tcg_out_sari32(s, a0, a1, a2 & 31); + } else { + tcg_out_sari64(s, a0, a1, a2 & 63); + } +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + static void tgen_shl(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3320,13 +3344,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); break; - case INDEX_op_sar_i32: - if (const_args[2]) { - tcg_out_sari32(s, args[0], args[1], args[2]); - } else { - tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); - } - break; case INDEX_op_rotl_i32: if (const_args[2]) { tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); @@ -3357,13 +3374,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_brcond2(s, args, const_args); break; - case INDEX_op_sar_i64: - if (const_args[2]) { - tcg_out_sari64(s, args[0], args[1], args[2]); - } else { - tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); - } - break; case INDEX_op_rotl_i64: if (const_args[2]) { tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); @@ -4222,10 +4232,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_sar_i32: case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: - case INDEX_op_sar_i64: case INDEX_op_rotl_i64: case INDEX_op_rotr_i64: return C_O1_I2(r, r, ri); diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 8020cc0b3f..8cab07a392 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2144,6 +2144,27 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SRAW : OPC_SRA; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SRAIW : OPC_SRAI; + unsigned mask = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_opc_imm(s, insn, a0, a1, a2 & mask); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + static void tgen_shl(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2299,21 +2320,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, OPC_SD, a0, a1, a2); break; - case INDEX_op_sar_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_reg(s, OPC_SRAW, a0, a1, a2); - } - break; - case INDEX_op_sar_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_reg(s, OPC_SRA, a0, a1, a2); - } - break; - case INDEX_op_rotl_i32: if (c2) { tcg_out_opc_imm(s, OPC_RORIW, a0, a1, -a2 & 0x1f); @@ -2770,10 +2776,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_sar_i32: case INDEX_op_rotl_i32: case INDEX_op_rotr_i32: - case INDEX_op_sar_i64: case INDEX_op_rotl_i64: case INDEX_op_rotr_i64: return C_O1_I2(r, r, ri); diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 0417bbef50..1cf4920276 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2445,6 +2445,36 @@ static const TCGOutOpBinary outop_remu = { .base.static_constraint = C_NotImplemented, }; +static void tgen_sar_int(TCGContext *s, TCGType type, TCGReg dst, + TCGReg src, TCGReg v, tcg_target_long i) +{ + if (type != TCG_TYPE_I32) { + tcg_out_sh64(s, RSY_SRAG, dst, src, v, i); + } else if (dst == src) { + tcg_out_sh32(s, RS_SRA, dst, v, i); + } else { + tcg_out_sh64(s, RSY_SRAK, dst, src, v, i); + } +} + +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_sar_int(s, type, a0, a1, a2, 0); +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_sar_int(s, type, a0, a1, TCG_REG_NONE, a2); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + static void tgen_shl_int(TCGContext *s, TCGType type, TCGReg dst, TCGReg src, TCGReg v, tcg_target_long i) { @@ -2586,7 +2616,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - S390Opcode op, op2; TCGArg a0, a1, a2; switch (opc) { @@ -2634,28 +2663,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - do_shift32: - a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; - if (a0 == a1) { - if (const_args[2]) { - tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2); - } else { - tcg_out_sh32(s, op, a0, a2, 0); - } - } else { - /* Using tcg_out_sh64 here for the format; it is a 32-bit shift. */ - if (const_args[2]) { - tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2); - } else { - tcg_out_sh64(s, op2, a0, a1, a2, 0); - } - } - break; - case INDEX_op_sar_i32: - op = RS_SRA; - op2 = RSY_SRAK; - goto do_shift32; - case INDEX_op_rotl_i32: /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */ if (const_args[2]) { @@ -2799,17 +2806,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_insn(s, RRFa, MGRK, args[1], args[2], args[3]); break; - do_shift64: - if (const_args[2]) { - tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]); - } else { - tcg_out_sh64(s, op, args[0], args[1], args[2], 0); - } - break; - case INDEX_op_sar_i64: - op = RSY_SRAG; - goto do_shift64; - case INDEX_op_rotl_i64: if (const_args[2]) { tcg_out_sh64(s, RSY_RLLG, args[0], args[1], @@ -3394,7 +3390,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_sar_i64: case INDEX_op_rotl_i32: case INDEX_op_rotl_i64: case INDEX_op_rotr_i32: @@ -3409,9 +3404,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_clz_i64: return C_O1_I2(r, r, rI); - case INDEX_op_sar_i32: - return C_O1_I2(r, r, ri); - case INDEX_op_brcond_i32: return C_O0_I2(r, ri); case INDEX_op_brcond_i64: diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index f679fa04ea..42d81c1e6c 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1484,6 +1484,27 @@ static const TCGOutOpBinary outop_remu = { .base.static_constraint = C_NotImplemented, }; +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SHIFT_SRA : SHIFT_SRAX; + tcg_out_arith(s, a0, a1, a2, insn); +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SHIFT_SRA : SHIFT_SRAX; + uint32_t mask = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_arithi(s, a0, a1, a2 & mask, insn); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + static void tgen_shl(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1629,13 +1650,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_st32_i64: tcg_out_ldst(s, a0, a1, a2, STW); break; - do_shift32: - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_arithc(s, a0, a1, a2 & 31, c2, c); - break; - case INDEX_op_sar_i32: - c = SHIFT_SRA; - goto do_shift32; case INDEX_op_brcond_i32: tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3])); @@ -1693,13 +1707,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_st_i64: tcg_out_ldst(s, a0, a1, a2, STX); break; - do_shift64: - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_arithc(s, a0, a1, a2 & 63, c2, c); - break; - case INDEX_op_sar_i64: - c = SHIFT_SRAX; - goto do_shift64; case INDEX_op_brcond_i64: tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3])); @@ -1783,8 +1790,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i64: return C_O0_I2(rz, r); - case INDEX_op_sar_i32: - case INDEX_op_sar_i64: case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: case INDEX_op_negsetcond_i32: diff --git a/tcg/tcg.c b/tcg/tcg.c index 939bbe86e9..ffe9efbf79 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1042,6 +1042,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), + OUTOP(INDEX_op_sar_i32, TCGOutOpBinary, outop_sar), + OUTOP(INDEX_op_sar_i64, TCGOutOpBinary, outop_sar), OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), @@ -2264,7 +2266,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: - case INDEX_op_sar_i32: case INDEX_op_extract_i32: case INDEX_op_sextract_i32: case INDEX_op_deposit_i32: @@ -2314,7 +2315,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st16_i64: case INDEX_op_st32_i64: case INDEX_op_st_i64: - case INDEX_op_sar_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extract_i64: @@ -5421,6 +5421,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_orc: case INDEX_op_rems: case INDEX_op_remu: + case INDEX_op_sar_i32: + case INDEX_op_sar_i64: case INDEX_op_shl: case INDEX_op_shr: case INDEX_op_xor: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index c0dbe873f1..f50a2d6574 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -79,8 +79,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_sar_i32: - case INDEX_op_sar_i64: case INDEX_op_rotl_i32: case INDEX_op_rotl_i64: case INDEX_op_rotr_i32: @@ -774,6 +772,21 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type < TCG_TYPE_REG) { + tcg_out_ext32s(s, TCG_REG_TMP, a1); + a1 = TCG_REG_TMP; + } + tcg_out_op_rrr(s, glue(INDEX_op_sar_i,TCG_TARGET_REG_BITS), a0, a1, a2); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sar, +}; + static void tgen_shl(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { From 3949f365eb6e7c934831c65c67b729562846ede9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 8 Jan 2025 08:05:18 -0800 Subject: [PATCH 062/161] tcg: Merge INDEX_op_sar_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 4 ++-- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 12 ++++++------ tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 6 ++---- tcg/tci.c | 12 ++++-------- tcg/tci/tcg-target.c.inc | 3 +-- 7 files changed, 18 insertions(+), 26 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index f9fd4b0087..be82fed41a 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -389,10 +389,10 @@ Shifts/Rotates - | *t0* = *t1* >> *t2* (unsigned) | Unspecified behavior for negative or out-of-range shifts. - * - sar_i32/i64 *t0*, *t1*, *t2* + * - sar *t0*, *t1*, *t2* - | *t0* = *t1* >> *t2* (signed) - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. * - rotl_i32/i64 *t0*, *t1*, *t2* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 35e0be8f80..cb8c134e94 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -58,6 +58,7 @@ DEF(or, 1, 2, 0, TCG_OPF_INT) DEF(orc, 1, 2, 0, TCG_OPF_INT) DEF(rems, 1, 2, 0, TCG_OPF_INT) DEF(remu, 1, 2, 0, TCG_OPF_INT) +DEF(sar, 1, 2, 0, TCG_OPF_INT) DEF(shl, 1, 2, 0, TCG_OPF_INT) DEF(shr, 1, 2, 0, TCG_OPF_INT) DEF(sub, 1, 2, 0, TCG_OPF_INT) @@ -76,7 +77,6 @@ DEF(st8_i32, 0, 2, 1, 0) DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) /* shifts/rotates */ -DEF(sar_i32, 1, 2, 0, 0) DEF(rotl_i32, 1, 2, 0, 0) DEF(rotr_i32, 1, 2, 0, 0) DEF(deposit_i32, 1, 2, 2, 0) @@ -115,7 +115,6 @@ DEF(st16_i64, 0, 2, 1, 0) DEF(st32_i64, 0, 2, 1, 0) DEF(st_i64, 0, 2, 1, 0) /* shifts/rotates */ -DEF(sar_i64, 1, 2, 0, 0) DEF(rotl_i64, 1, 2, 0, 0) DEF(rotr_i64, 1, 2, 0, 0) DEF(deposit_i64, 1, 2, 2, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index 43db079693..f94be19b72 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -458,10 +458,10 @@ static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, } return (uint64_t)x >> (y & 63); - case INDEX_op_sar_i32: - return (int32_t)x >> (y & 31); - - case INDEX_op_sar_i64: + case INDEX_op_sar: + if (type == TCG_TYPE_I32) { + return (int32_t)x >> (y & 31); + } return (int64_t)x >> (y & 63); case INDEX_op_rotr_i32: @@ -2600,7 +2600,7 @@ static bool fold_shift(OptContext *ctx, TCGOp *op) } switch (op->opc) { - CASE_OP_32_64(sar): + case INDEX_op_sar: /* * Arithmetic right shift will not reduce the number of * input sign repetitions. @@ -3027,7 +3027,7 @@ void tcg_optimize(TCGContext *s) break; CASE_OP_32_64(rotl): CASE_OP_32_64(rotr): - CASE_OP_32_64(sar): + case INDEX_op_sar: case INDEX_op_shl: case INDEX_op_shr: done = fold_shift(&ctx, op); diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index ef8cf5a1ac..43848ebc4f 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -511,7 +511,7 @@ void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_sar_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_sar, ret, arg1, arg2); } void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -1624,7 +1624,7 @@ void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_sar_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_sar, ret, arg1, arg2); } else { gen_helper_sar_i64(ret, arg1, arg2); } diff --git a/tcg/tcg.c b/tcg/tcg.c index ffe9efbf79..8f67107190 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1042,8 +1042,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), - OUTOP(INDEX_op_sar_i32, TCGOutOpBinary, outop_sar), - OUTOP(INDEX_op_sar_i64, TCGOutOpBinary, outop_sar), + OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), @@ -5421,8 +5420,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_orc: case INDEX_op_rems: case INDEX_op_remu: - case INDEX_op_sar_i32: - case INDEX_op_sar_i64: + case INDEX_op_sar: case INDEX_op_shl: case INDEX_op_shr: case INDEX_op_xor: diff --git a/tcg/tci.c b/tcg/tci.c index 376b1b1ece..2a2f216898 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -625,9 +625,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] >> (regs[r2] % TCG_TARGET_REG_BITS); break; - case INDEX_op_sar_i32: + case INDEX_op_sar: tci_args_rrr(insn, &r0, &r1, &r2); - regs[r0] = (int32_t)regs[r1] >> (regs[r2] & 31); + regs[r0] = ((tcg_target_long)regs[r1] + >> (regs[r2] % TCG_TARGET_REG_BITS)); break; #if TCG_TARGET_HAS_rot_i32 case INDEX_op_rotl_i32: @@ -787,10 +788,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, /* Shift/rotate operations (64 bit). */ - case INDEX_op_sar_i64: - tci_args_rrr(insn, &r0, &r1, &r2); - regs[r0] = (int64_t)regs[r1] >> (regs[r2] & 63); - break; #if TCG_TARGET_HAS_rot_i64 case INDEX_op_rotl_i64: tci_args_rrr(insn, &r0, &r1, &r2); @@ -1073,12 +1070,11 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_orc: case INDEX_op_rems: case INDEX_op_remu: + case INDEX_op_sar: case INDEX_op_shl: case INDEX_op_shr: case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_sar_i32: - case INDEX_op_sar_i64: case INDEX_op_rotl_i32: case INDEX_op_rotl_i64: case INDEX_op_rotr_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index f50a2d6574..feaa13dff0 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -779,7 +779,7 @@ static void tgen_sar(TCGContext *s, TCGType type, tcg_out_ext32s(s, TCG_REG_TMP, a1); a1 = TCG_REG_TMP; } - tcg_out_op_rrr(s, glue(INDEX_op_sar_i,TCG_TARGET_REG_BITS), a0, a1, a2); + tcg_out_op_rrr(s, INDEX_op_sar, a0, a1, a2); } static const TCGOutOpBinary outop_sar = { @@ -897,7 +897,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, opc, args[0], args[1], args[2]); break; - CASE_32_64(sar) CASE_32_64(rotl) /* Optional (TCG_TARGET_HAS_rot_*). */ CASE_32_64(rotr) /* Optional (TCG_TARGET_HAS_rot_*). */ CASE_32_64(clz) /* Optional (TCG_TARGET_HAS_clz_*). */ From 8726c7d79967c740f7d5a963ac2d855354805cd2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 8 Jan 2025 09:53:38 -0800 Subject: [PATCH 063/161] tcg: Do not require both rotr and rotl from the backend Many host architectures do not implement both rotate right and rotate left and require the compiler to negate the shift count to rotate the opposite direction. We have been requiring the backend to perform this transformation. Do this during opcode expansion so that the next patch can drop support where possible in the backend. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/tcg-op.c | 98 +++++++++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 44 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 43848ebc4f..8c8b9d179b 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -829,15 +829,18 @@ void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1) void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_rot_i32) { + if (tcg_op_supported(INDEX_op_rotl_i32, TCG_TYPE_I32, 0)) { tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotr_i32, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + tcg_gen_neg_i32(t0, arg2); + tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, t0); + tcg_temp_free_i32(t0); } else { - TCGv_i32 t0, t1; - - t0 = tcg_temp_ebb_new_i32(); - t1 = tcg_temp_ebb_new_i32(); + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); tcg_gen_shl_i32(t0, arg1, arg2); - tcg_gen_subfi_i32(t1, 32, arg2); + tcg_gen_neg_i32(t1, arg2); tcg_gen_shr_i32(t1, arg1, t1); tcg_gen_or_i32(ret, t0, t1); tcg_temp_free_i32(t0); @@ -851,12 +854,15 @@ void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i32(ret, arg1); - } else if (TCG_TARGET_HAS_rot_i32) { - tcg_gen_rotl_i32(ret, arg1, tcg_constant_i32(arg2)); + } else if (tcg_op_supported(INDEX_op_rotl_i32, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_constant_i32(arg2); + tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, t0); + } else if (tcg_op_supported(INDEX_op_rotr_i32, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_constant_i32(32 - arg2); + tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, t0); } else { - TCGv_i32 t0, t1; - t0 = tcg_temp_ebb_new_i32(); - t1 = tcg_temp_ebb_new_i32(); + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); tcg_gen_shli_i32(t0, arg1, arg2); tcg_gen_shri_i32(t1, arg1, 32 - arg2); tcg_gen_or_i32(ret, t0, t1); @@ -867,15 +873,18 @@ void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_rot_i32) { + if (tcg_op_supported(INDEX_op_rotr_i32, TCG_TYPE_I32, 0)) { tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotl_i32, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + tcg_gen_neg_i32(t0, arg2); + tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, t0); + tcg_temp_free_i32(t0); } else { - TCGv_i32 t0, t1; - - t0 = tcg_temp_ebb_new_i32(); - t1 = tcg_temp_ebb_new_i32(); + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); tcg_gen_shr_i32(t0, arg1, arg2); - tcg_gen_subfi_i32(t1, 32, arg2); + tcg_gen_neg_i32(t1, arg2); tcg_gen_shl_i32(t1, arg1, t1); tcg_gen_or_i32(ret, t0, t1); tcg_temp_free_i32(t0); @@ -886,12 +895,7 @@ void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 32); - /* some cases can be optimized here */ - if (arg2 == 0) { - tcg_gen_mov_i32(ret, arg1); - } else { - tcg_gen_rotli_i32(ret, arg1, 32 - arg2); - } + tcg_gen_rotli_i32(ret, arg1, -arg2 & 31); } void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, @@ -2437,14 +2441,18 @@ void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1) void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_rot_i64) { + if (tcg_op_supported(INDEX_op_rotl_i64, TCG_TYPE_I64, 0)) { tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotl_i64, TCG_TYPE_I64, 0)) { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + tcg_gen_neg_i64(t0, arg2); + tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, t0); + tcg_temp_free_i64(t0); } else { - TCGv_i64 t0, t1; - t0 = tcg_temp_ebb_new_i64(); - t1 = tcg_temp_ebb_new_i64(); + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + TCGv_i64 t1 = tcg_temp_ebb_new_i64(); tcg_gen_shl_i64(t0, arg1, arg2); - tcg_gen_subfi_i64(t1, 64, arg2); + tcg_gen_neg_i64(t1, arg2); tcg_gen_shr_i64(t1, arg1, t1); tcg_gen_or_i64(ret, t0, t1); tcg_temp_free_i64(t0); @@ -2458,12 +2466,15 @@ void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i64(ret, arg1); - } else if (TCG_TARGET_HAS_rot_i64) { - tcg_gen_rotl_i64(ret, arg1, tcg_constant_i64(arg2)); + } else if (tcg_op_supported(INDEX_op_rotl_i64, TCG_TYPE_I64, 0)) { + TCGv_i64 t0 = tcg_constant_i64(arg2); + tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, t0); + } else if (tcg_op_supported(INDEX_op_rotr_i64, TCG_TYPE_I64, 0)) { + TCGv_i64 t0 = tcg_constant_i64(64 - arg2); + tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, t0); } else { - TCGv_i64 t0, t1; - t0 = tcg_temp_ebb_new_i64(); - t1 = tcg_temp_ebb_new_i64(); + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + TCGv_i64 t1 = tcg_temp_ebb_new_i64(); tcg_gen_shli_i64(t0, arg1, arg2); tcg_gen_shri_i64(t1, arg1, 64 - arg2); tcg_gen_or_i64(ret, t0, t1); @@ -2474,14 +2485,18 @@ void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_rot_i64) { + if (tcg_op_supported(INDEX_op_rotr_i64, TCG_TYPE_I64, 0)) { tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotl_i64, TCG_TYPE_I64, 0)) { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + tcg_gen_neg_i64(t0, arg2); + tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, t0); + tcg_temp_free_i64(t0); } else { - TCGv_i64 t0, t1; - t0 = tcg_temp_ebb_new_i64(); - t1 = tcg_temp_ebb_new_i64(); + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + TCGv_i64 t1 = tcg_temp_ebb_new_i64(); tcg_gen_shr_i64(t0, arg1, arg2); - tcg_gen_subfi_i64(t1, 64, arg2); + tcg_gen_neg_i64(t1, arg2); tcg_gen_shl_i64(t1, arg1, t1); tcg_gen_or_i64(ret, t0, t1); tcg_temp_free_i64(t0); @@ -2492,12 +2507,7 @@ void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 64); - /* some cases can be optimized here */ - if (arg2 == 0) { - tcg_gen_mov_i64(ret, arg1); - } else { - tcg_gen_rotli_i64(ret, arg1, 64 - arg2); - } + tcg_gen_rotli_i64(ret, arg1, -arg2 & 63); } void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, From 03568c0d539581c6e86263d2fd7396f5a1e25a6b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 8 Jan 2025 10:22:53 -0800 Subject: [PATCH 064/161] tcg: Convert rotl, rotr to TCGOutOpBinary For aarch64, arm, loongarch64, mips, we can drop rotl. For ppc, s390x we can drop rotr. Only x86, riscv, tci have both rotl and rotr. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 - tcg/aarch64/tcg-target.c.inc | 62 +++++++++--------------- tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 44 ++++++++--------- tcg/i386/tcg-target-has.h | 2 - tcg/i386/tcg-target.c.inc | 62 ++++++++++++++++-------- tcg/loongarch64/tcg-target-has.h | 2 - tcg/loongarch64/tcg-target.c.inc | 70 ++++++++++++--------------- tcg/mips/tcg-target-has.h | 2 - tcg/mips/tcg-target.c.inc | 75 +++++++++++++---------------- tcg/ppc/tcg-target-has.h | 2 - tcg/ppc/tcg-target.c.inc | 70 ++++++++++++--------------- tcg/riscv/tcg-target-has.h | 2 - tcg/riscv/tcg-target.c.inc | 83 ++++++++++++++++++-------------- tcg/s390x/tcg-target-has.h | 2 - tcg/s390x/tcg-target.c.inc | 72 +++++++++++---------------- tcg/sparc64/tcg-target-has.h | 2 - tcg/sparc64/tcg-target.c.inc | 8 +++ tcg/tcg-has.h | 1 - tcg/tcg.c | 14 +++--- tcg/tci.c | 12 ++--- tcg/tci/tcg-target-has.h | 2 - tcg/tci/tcg-target-opc.h.inc | 2 + tcg/tci/tcg-target.c.inc | 34 ++++++++++--- 24 files changed, 306 insertions(+), 322 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 1fdff25d05..fa79cbc1f0 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -15,7 +15,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 0 @@ -31,7 +30,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 0 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 90bdbf8387..00fca43840 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1347,20 +1347,6 @@ static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); } -static inline void tcg_out_rotr(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int max = ext ? 63 : 31; - tcg_out_extr(s, ext, rd, rn, rn, m & max); -} - -static inline void tcg_out_rotl(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int max = ext ? 63 : 31; - tcg_out_extr(s, ext, rd, rn, rn, -m & max); -} - static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn, unsigned lsb, unsigned width) { @@ -2277,6 +2263,29 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, RORV, type, a0, a1, a2); +} + +static void tgen_rotri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int max = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_extr(s, type, a0, a1, a1, a2 & max); +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_rotr, + .out_rri = tgen_rotri, +}; + static void tgen_sar(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2451,25 +2460,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_rotr_i64: - case INDEX_op_rotr_i32: - if (c2) { - tcg_out_rotr(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); - } - break; - - case INDEX_op_rotl_i64: - case INDEX_op_rotl_i32: - if (c2) { - tcg_out_rotl(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2); - tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0); - } - break; - case INDEX_op_clz_i64: case INDEX_op_clz_i32: tcg_out_cltz(s, ext, a0, a1, a2, c2, false); @@ -3099,12 +3089,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rC); - case INDEX_op_rotl_i32: - case INDEX_op_rotr_i32: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i64: - return C_O1_I2(r, r, ri); - case INDEX_op_clz_i32: case INDEX_op_ctz_i32: case INDEX_op_clz_i64: diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index 32d73d3443..12ffbcda2b 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -26,7 +26,6 @@ extern bool use_neon_instructions; /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions #define TCG_TARGET_HAS_ctpop_i32 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 058677650b..462f0ec08d 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1963,6 +1963,28 @@ static const TCGOutOpBinary outop_remu = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, SHIFT_REG_ROR(a2)); +} + +static void tgen_rotri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, SHIFT_IMM_ROR(a2 & 0x1f)); +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_rotr, + .out_rri = tgen_rotri, +}; + static void tgen_sar(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2171,24 +2193,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_muls2_i32: tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]); break; - case INDEX_op_rotr_i32: - c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) : - SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]); - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c); - break; - - case INDEX_op_rotl_i32: - if (const_args[2]) { - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], - ((0x20 - args[2]) & 0x1f) ? - SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) : - SHIFT_IMM_LSL(0)); - } else { - tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20); - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], - SHIFT_REG_ROR(TCG_REG_TMP)); - } - break; case INDEX_op_ctz_i32: tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0); @@ -2342,10 +2346,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_muls2_i32: return C_O2_I2(r, r, r, r); - case INDEX_op_rotl_i32: - case INDEX_op_rotr_i32: - return C_O1_I2(r, r, ri); - case INDEX_op_brcond_i32: return C_O0_I2(r, rIN); case INDEX_op_deposit_i32: diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index aee6066579..a7199463df 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -26,7 +26,6 @@ #define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl) /* optional instructions */ -#define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_clz_i32 1 @@ -42,7 +41,6 @@ #if TCG_TARGET_REG_BITS == 64 /* Keep 32-bit values zero-extended in a register. */ #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 1e81455461..dd35bba57f 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2744,6 +2744,46 @@ static const TCGOutOpBinary outop_remu = { .base.static_constraint = C_NotImplemented, }; +static void tgen_rotl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, SHIFT_ROL, a0); +} + +static void tgen_rotli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_shifti(s, SHIFT_ROL + rexw, a0, a2); +} + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_O1_I2(r, 0, ci), + .out_rrr = tgen_rotl, + .out_rri = tgen_rotli, +}; + +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, SHIFT_ROR, a0); +} + +static void tgen_rotri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_shifti(s, SHIFT_ROR + rexw, a0, a2); +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_O1_I2(r, 0, ci), + .out_rrr = tgen_rotr, + .out_rri = tgen_rotri, +}; + static TCGConstraintSetIndex cset_shift(TCGType type, unsigned flags) { return have_bmi2 ? C_O1_I2(r, r, ri) : C_O1_I2(r, 0, ci); @@ -2901,7 +2941,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { TCGArg a0, a1, a2; - int c, const_a2, rexw; + int const_a2, rexw; #if TCG_TARGET_REG_BITS == 64 # define OP_32_64(x) \ @@ -2976,20 +3016,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(rotl): - c = SHIFT_ROL; - goto gen_shift; - OP_32_64(rotr): - c = SHIFT_ROR; - goto gen_shift; - gen_shift: - if (const_a2) { - tcg_out_shifti(s, c + rexw, a0, a2); - } else { - tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, a0); - } - break; - OP_32_64(ctz): tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]); break; @@ -3824,12 +3850,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(re, r); - case INDEX_op_rotl_i32: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i32: - case INDEX_op_rotr_i64: - return C_O1_I2(r, 0, ci); - case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: return C_O0_I2(r, reT); diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 5dfc69ae6a..303134390a 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -11,7 +11,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_negsetcond_i32 0 -#define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 @@ -26,7 +25,6 @@ /* 64-bit operations */ #define TCG_TARGET_HAS_negsetcond_i64 0 -#define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_bswap16_i64 1 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index aae0f03505..26cf982780 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1489,6 +1489,36 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_rotr_w(s, a0, a1, a2); + } else { + tcg_out_opc_rotr_d(s, a0, a1, a2); + } +} + +static void tgen_rotri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_rotri_w(s, a0, a1, a2 & 0x1f); + } else { + tcg_out_opc_rotri_d(s, a0, a1, a2 & 0x3f); + } +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_rotr, + .out_rri = tgen_rotri, +}; + static void tgen_sar(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1738,40 +1768,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false); break; - case INDEX_op_rotl_i32: - /* transform into equivalent rotr/rotri */ - if (c2) { - tcg_out_opc_rotri_w(s, a0, a1, (32 - a2) & 0x1f); - } else { - tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2); - tcg_out_opc_rotr_w(s, a0, a1, TCG_REG_TMP0); - } - break; - case INDEX_op_rotl_i64: - /* transform into equivalent rotr/rotri */ - if (c2) { - tcg_out_opc_rotri_d(s, a0, a1, (64 - a2) & 0x3f); - } else { - tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2); - tcg_out_opc_rotr_d(s, a0, a1, TCG_REG_TMP0); - } - break; - - case INDEX_op_rotr_i32: - if (c2) { - tcg_out_opc_rotri_w(s, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_rotr_w(s, a0, a1, a2); - } - break; - case INDEX_op_rotr_i64: - if (c2) { - tcg_out_opc_rotri_d(s, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_rotr_d(s, a0, a1, a2); - } - break; - case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: tcg_out_setcond(s, args[3], a0, a1, a2, c2); @@ -2402,12 +2398,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); - case INDEX_op_rotl_i32: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i32: - case INDEX_op_rotr_i64: - return C_O1_I2(r, r, ri); - case INDEX_op_clz_i32: case INDEX_op_clz_i64: case INDEX_op_ctz_i32: diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index ab6a134796..880eb084eb 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -60,7 +60,6 @@ extern bool use_mips32r2_instructions; /* optional instructions detected at runtime */ #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_clz_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctpop_i32 0 @@ -71,7 +70,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_rot_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_clz_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 16c3d59c19..fb9fe0c40e 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1908,6 +1908,39 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_NotImplemented, +}; + +static TCGConstraintSetIndex cset_rotr(TCGType type, unsigned flags) +{ + return use_mips32r2_instructions ? C_O1_I2(r, r, ri) : C_NotImplemented; +} + +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_ROTRV : OPC_DROTRV; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_rotri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_sa(s, OPC_ROTR, a0, a1, a2); + } else { + tcg_out_opc_sa64(s, OPC_DROTR, OPC_DROTR32, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_rotr, + .out_rrr = tgen_rotr, + .out_rri = tgen_rotri, +}; + static void tgen_sar(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2032,14 +2065,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - MIPSInsn i1, i2; + MIPSInsn i1; TCGArg a0, a1, a2; - int c2; a0 = args[0]; a1 = args[1]; a2 = args[2]; - c2 = const_args[2]; switch (opc) { case INDEX_op_goto_ptr: @@ -2134,39 +2165,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_dsra(s, a0, a1, 32); break; - case INDEX_op_rotr_i32: - i1 = OPC_ROTRV, i2 = OPC_ROTR; - if (c2) { - tcg_out_opc_sa(s, i2, a0, a1, a2); - break; - } - do_shiftv: - tcg_out_opc_reg(s, i1, a0, a2, a1); - break; - case INDEX_op_rotl_i32: - if (c2) { - tcg_out_opc_sa(s, OPC_ROTR, a0, a1, 32 - a2); - } else { - tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, TCG_REG_ZERO, a2); - tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1); - } - break; - case INDEX_op_rotr_i64: - if (c2) { - tcg_out_opc_sa64(s, OPC_DROTR, OPC_DROTR32, a0, a1, a2); - break; - } - i1 = OPC_DROTRV; - goto do_shiftv; - case INDEX_op_rotl_i64: - if (c2) { - tcg_out_opc_sa64(s, OPC_DROTR, OPC_DROTR32, a0, a1, 64 - a2); - } else { - tcg_out_opc_reg(s, OPC_DSUBU, TCG_TMP0, TCG_REG_ZERO, a2); - tcg_out_opc_reg(s, OPC_DROTRV, a0, TCG_TMP0, a1); - } - break; - case INDEX_op_clz_i32: tcg_out_clz(s, OPC_CLZ, OPC_CLZ_R6, 32, a0, a1, a2); break; @@ -2331,11 +2329,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_muls2_i64: case INDEX_op_mulu2_i64: return C_O2_I2(r, r, r, r); - case INDEX_op_rotr_i32: - case INDEX_op_rotl_i32: - case INDEX_op_rotr_i64: - case INDEX_op_rotl_i64: - return C_O1_I2(r, r, ri); case INDEX_op_clz_i32: case INDEX_op_clz_i64: return C_O1_I2(r, r, rzW); diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 37e88a3193..71c02d88b9 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -17,7 +17,6 @@ #define have_vsx (cpuinfo & CPUINFO_VSX) /* optional instructions */ -#define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_clz_i32 1 @@ -33,7 +32,6 @@ #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 24e8f675bb..687b66af54 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3122,6 +3122,36 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static void tgen_rotl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out32(s, RLWNM | SAB(a1, a0, a2) | MB(0) | ME(31)); + } else { + tcg_out32(s, RLDCL | SAB(a1, a0, a2) | MB64(0)); + } +} + +static void tgen_rotli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_rlw(s, RLWINM, a0, a1, a2, 0, 31); + } else { + tcg_out_rld(s, RLDICL, a0, a1, a2, 0); + } +} + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_rotl, + .out_rri = tgen_rotli, +}; + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_sar(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3344,24 +3374,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); break; - case INDEX_op_rotl_i32: - if (const_args[2]) { - tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); - } else { - tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) - | MB(0) | ME(31)); - } - break; - case INDEX_op_rotr_i32: - if (const_args[2]) { - tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); - } else { - tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); - tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) - | MB(0) | ME(31)); - } - break; - case INDEX_op_brcond_i32: tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], arg_label(args[3]), TCG_TYPE_I32); @@ -3374,22 +3386,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_brcond2(s, args, const_args); break; - case INDEX_op_rotl_i64: - if (const_args[2]) { - tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); - } else { - tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); - } - break; - case INDEX_op_rotr_i64: - if (const_args[2]) { - tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); - } else { - tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); - tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); - } - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); break; @@ -4232,12 +4228,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_rotl_i32: - case INDEX_op_rotr_i32: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i64: - return C_O1_I2(r, r, ri); - case INDEX_op_clz_i32: case INDEX_op_ctz_i32: case INDEX_op_clz_i64: diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index b3c6899887..c7745a6462 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -11,7 +11,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_negsetcond_i32 1 -#define TCG_TARGET_HAS_rot_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -25,7 +24,6 @@ #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_negsetcond_i64 1 -#define TCG_TARGET_HAS_rot_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_bswap16_i64 (cpuinfo & CPUINFO_ZBB) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 8cab07a392..4dd892d98d 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2144,6 +2144,53 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static TCGConstraintSetIndex cset_rot(TCGType type, unsigned flags) +{ + return cpuinfo & CPUINFO_ZBB ? C_O1_I2(r, r, ri) : C_NotImplemented; +} + +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_RORW : OPC_ROR; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_rotri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_RORIW : OPC_RORI; + unsigned mask = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_opc_imm(s, insn, a0, a1, a2 & mask); +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_rot, + .out_rrr = tgen_rotr, + .out_rri = tgen_rotri, +}; + +static void tgen_rotl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_ROLW : OPC_ROL; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_rotli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_rotri(s, type, a0, a1, -a2); +} + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_rot, + .out_rrr = tgen_rotl, + .out_rri = tgen_rotli, +}; + static void tgen_sar(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2320,36 +2367,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, OPC_SD, a0, a1, a2); break; - case INDEX_op_rotl_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_RORIW, a0, a1, -a2 & 0x1f); - } else { - tcg_out_opc_reg(s, OPC_ROLW, a0, a1, a2); - } - break; - case INDEX_op_rotl_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_RORI, a0, a1, -a2 & 0x3f); - } else { - tcg_out_opc_reg(s, OPC_ROL, a0, a1, a2); - } - break; - - case INDEX_op_rotr_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_RORIW, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_reg(s, OPC_RORW, a0, a1, a2); - } - break; - case INDEX_op_rotr_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_RORI, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_reg(s, OPC_ROR, a0, a1, a2); - } - break; - case INDEX_op_bswap64_i64: tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); break; @@ -2776,12 +2793,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_rotl_i32: - case INDEX_op_rotr_i32: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i64: - return C_O1_I2(r, r, ri); - case INDEX_op_clz_i32: case INDEX_op_clz_i64: case INDEX_op_ctz_i32: diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index d61cc7a144..eaddf7005e 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -29,7 +29,6 @@ extern uint64_t s390_facilities[3]; ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1) /* optional instructions */ -#define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_clz_i32 0 @@ -44,7 +43,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 1cf4920276..76180dabcb 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2445,6 +2445,35 @@ static const TCGOutOpBinary outop_remu = { .base.static_constraint = C_NotImplemented, }; +static void tgen_rotl_int(TCGContext *s, TCGType type, TCGReg dst, + TCGReg src, TCGReg v, tcg_target_long i) +{ + S390Opcode insn = type == TCG_TYPE_I32 ? RSY_RLL : RSY_RLLG; + tcg_out_sh64(s, insn, dst, src, v, i); +} + +static void tgen_rotl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_rotl_int(s, type, a0, a1, a2, 0); +} + +static void tgen_rotli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_rotl_int(s, type, a0, a1, TCG_REG_NONE, a2); +} + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_rotl, + .out_rri = tgen_rotli, +}; + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_sar_int(TCGContext *s, TCGType type, TCGReg dst, TCGReg src, TCGReg v, tcg_target_long i) { @@ -2663,24 +2692,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_rotl_i32: - /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */ - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]); - } else { - tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0); - } - break; - case INDEX_op_rotr_i32: - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLL, args[0], args[1], - TCG_REG_NONE, (32 - args[2]) & 31); - } else { - tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); - tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0); - } - break; - case INDEX_op_bswap16_i32: a0 = args[0], a1 = args[1], a2 = args[2]; tcg_out_insn(s, RRE, LRVR, a0, a1); @@ -2806,26 +2817,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_insn(s, RRFa, MGRK, args[1], args[2], args[3]); break; - case INDEX_op_rotl_i64: - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], - TCG_REG_NONE, args[2]); - } else { - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0); - } - break; - case INDEX_op_rotr_i64: - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], - TCG_REG_NONE, (64 - args[2]) & 63); - } else { - /* We can use the smaller 32-bit negate because only the - low 6 bits are examined for the rotate. */ - tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0); - } - break; - case INDEX_op_add2_i64: if (const_args[4]) { if ((int64_t)args[4] >= 0) { @@ -3390,11 +3381,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_rotl_i32: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i32: - case INDEX_op_rotr_i64: - return C_O1_I2(r, r, ri); case INDEX_op_setcond_i32: case INDEX_op_negsetcond_i32: case INDEX_op_setcond_i64: diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 42de99efbf..1dd86c363d 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -14,7 +14,6 @@ extern bool use_vis3_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_rot_i32 0 #define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_bswap32_i32 0 #define TCG_TARGET_HAS_clz_i32 0 @@ -29,7 +28,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_rot_i64 0 #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 42d81c1e6c..57b26ae33b 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1484,6 +1484,14 @@ static const TCGOutOpBinary outop_remu = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_sar(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 0bb829be36..7bfa55adb1 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -12,7 +12,6 @@ #if TCG_TARGET_REG_BITS == 32 /* Turn some undef macros into false macros. */ #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_rot_i64 0 #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 diff --git a/tcg/tcg.c b/tcg/tcg.c index 8f67107190..40a3e44b7c 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1042,6 +1042,10 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), + OUTOP(INDEX_op_rotl_i32, TCGOutOpBinary, outop_rotl), + OUTOP(INDEX_op_rotl_i64, TCGOutOpBinary, outop_rotl), + OUTOP(INDEX_op_rotr_i32, TCGOutOpBinary, outop_rotr), + OUTOP(INDEX_op_rotr_i64, TCGOutOpBinary, outop_rotr), OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), @@ -2272,9 +2276,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i32: return TCG_TARGET_HAS_negsetcond_i32; - case INDEX_op_rotl_i32: - case INDEX_op_rotr_i32: - return TCG_TARGET_HAS_rot_i32; case INDEX_op_extract2_i32: return TCG_TARGET_HAS_extract2_i32; case INDEX_op_add2_i32: @@ -2323,9 +2324,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return TCG_TARGET_HAS_negsetcond_i64; - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i64: - return TCG_TARGET_HAS_rot_i64; case INDEX_op_extract2_i64: return TCG_TARGET_HAS_extract2_i64; case INDEX_op_extrl_i64_i32: @@ -5420,6 +5418,10 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_orc: case INDEX_op_rems: case INDEX_op_remu: + case INDEX_op_rotl_i32: + case INDEX_op_rotl_i64: + case INDEX_op_rotr_i32: + case INDEX_op_rotr_i64: case INDEX_op_sar: case INDEX_op_shl: case INDEX_op_shr: diff --git a/tcg/tci.c b/tcg/tci.c index 2a2f216898..0fb13ff61d 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -630,16 +630,14 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, regs[r0] = ((tcg_target_long)regs[r1] >> (regs[r2] % TCG_TARGET_REG_BITS)); break; -#if TCG_TARGET_HAS_rot_i32 - case INDEX_op_rotl_i32: + case INDEX_op_tci_rotl32: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = rol32(regs[r1], regs[r2] & 31); break; - case INDEX_op_rotr_i32: + case INDEX_op_tci_rotr32: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ror32(regs[r1], regs[r2] & 31); break; -#endif case INDEX_op_deposit_i32: tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); regs[r0] = deposit32(regs[r1], pos, len, regs[r2]); @@ -788,7 +786,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, /* Shift/rotate operations (64 bit). */ -#if TCG_TARGET_HAS_rot_i64 case INDEX_op_rotl_i64: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = rol64(regs[r1], regs[r2] & 63); @@ -797,7 +794,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ror64(regs[r1], regs[r2] & 63); break; -#endif case INDEX_op_deposit_i64: tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); regs[r0] = deposit64(regs[r1], pos, len, regs[r2]); @@ -1075,9 +1071,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_shr: case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_rotl_i32: case INDEX_op_rotl_i64: - case INDEX_op_rotr_i32: case INDEX_op_rotr_i64: case INDEX_op_clz_i32: case INDEX_op_clz_i64: @@ -1087,6 +1081,8 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_tci_divu32: case INDEX_op_tci_rems32: case INDEX_op_tci_remu32: + case INDEX_op_tci_rotl32: + case INDEX_op_tci_rotr32: tci_args_rrr(insn, &r0, &r1, &r2); info->fprintf_func(info->stream, "%-12s %s, %s, %s", op_name, str_r(r0), str_r(r1), str_r(r2)); diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index bd51b9346d..04d341a8d2 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -13,7 +13,6 @@ #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 1 -#define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 0 #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -27,7 +26,6 @@ #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 1 -#define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_muls2_i64 1 #define TCG_TARGET_HAS_add2_i32 1 diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc index 82d2a38cae..cff215490a 100644 --- a/tcg/tci/tcg-target-opc.h.inc +++ b/tcg/tci/tcg-target-opc.h.inc @@ -6,3 +6,5 @@ DEF(tci_divs32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_divu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_rems32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_remu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_rotl32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_rotr32, 1, 2, 0, TCG_OPF_NOT_PRESENT) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index feaa13dff0..0a2da3ba47 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -79,10 +79,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_rotl_i32: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i32: - case INDEX_op_rotr_i64: case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: case INDEX_op_deposit_i32: @@ -772,6 +768,34 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static void tgen_rotl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_rotl32 + : INDEX_op_rotl_i64); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rotl, +}; + +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_rotr32 + : INDEX_op_rotr_i64); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rotr, +}; + static void tgen_sar(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -897,8 +921,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, opc, args[0], args[1], args[2]); break; - CASE_32_64(rotl) /* Optional (TCG_TARGET_HAS_rot_*). */ - CASE_32_64(rotr) /* Optional (TCG_TARGET_HAS_rot_*). */ CASE_32_64(clz) /* Optional (TCG_TARGET_HAS_clz_*). */ CASE_32_64(ctz) /* Optional (TCG_TARGET_HAS_ctz_*). */ tcg_out_op_rrr(s, opc, args[0], args[1], args[2]); From 005a87e148dc20f59835b328336240759703d63d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 8 Jan 2025 10:42:16 -0800 Subject: [PATCH 065/161] tcg: Merge INDEX_op_rot{l,r}_{i32,i64} Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 8 +++---- include/tcg/tcg-opc.h | 6 ++--- tcg/optimize.c | 20 ++++++++--------- tcg/tcg-op.c | 48 ++++++++++++++++++++-------------------- tcg/tcg.c | 12 ++++------ tcg/tci.c | 8 +++---- tcg/tci/tcg-target.c.inc | 4 ++-- 7 files changed, 50 insertions(+), 56 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index be82fed41a..c3a6499d01 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -394,15 +394,15 @@ Shifts/Rotates - | *t0* = *t1* >> *t2* (signed) | Unspecified behavior for negative or out-of-range shifts. - * - rotl_i32/i64 *t0*, *t1*, *t2* + * - rotl *t0*, *t1*, *t2* - | Rotation of *t2* bits to the left - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. - * - rotr_i32/i64 *t0*, *t1*, *t2* + * - rotr *t0*, *t1*, *t2* - | Rotation of *t2* bits to the right. - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. Misc diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index cb8c134e94..25fd93eb28 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -58,6 +58,8 @@ DEF(or, 1, 2, 0, TCG_OPF_INT) DEF(orc, 1, 2, 0, TCG_OPF_INT) DEF(rems, 1, 2, 0, TCG_OPF_INT) DEF(remu, 1, 2, 0, TCG_OPF_INT) +DEF(rotl, 1, 2, 0, TCG_OPF_INT) +DEF(rotr, 1, 2, 0, TCG_OPF_INT) DEF(sar, 1, 2, 0, TCG_OPF_INT) DEF(shl, 1, 2, 0, TCG_OPF_INT) DEF(shr, 1, 2, 0, TCG_OPF_INT) @@ -77,8 +79,6 @@ DEF(st8_i32, 0, 2, 1, 0) DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) /* shifts/rotates */ -DEF(rotl_i32, 1, 2, 0, 0) -DEF(rotr_i32, 1, 2, 0, 0) DEF(deposit_i32, 1, 2, 2, 0) DEF(extract_i32, 1, 1, 2, 0) DEF(sextract_i32, 1, 1, 2, 0) @@ -115,8 +115,6 @@ DEF(st16_i64, 0, 2, 1, 0) DEF(st32_i64, 0, 2, 1, 0) DEF(st_i64, 0, 2, 1, 0) /* shifts/rotates */ -DEF(rotl_i64, 1, 2, 0, 0) -DEF(rotr_i64, 1, 2, 0, 0) DEF(deposit_i64, 1, 2, 2, 0) DEF(extract_i64, 1, 1, 2, 0) DEF(sextract_i64, 1, 1, 2, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index f94be19b72..97a566a617 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -464,16 +464,16 @@ static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, } return (int64_t)x >> (y & 63); - case INDEX_op_rotr_i32: - return ror32(x, y & 31); - - case INDEX_op_rotr_i64: + case INDEX_op_rotr: + if (type == TCG_TYPE_I32) { + return ror32(x, y & 31); + } return ror64(x, y & 63); - case INDEX_op_rotl_i32: - return rol32(x, y & 31); - - case INDEX_op_rotl_i64: + case INDEX_op_rotl: + if (type == TCG_TYPE_I32) { + return rol32(x, y & 31); + } return rol64(x, y & 63); case INDEX_op_not: @@ -3025,8 +3025,8 @@ void tcg_optimize(TCGContext *s) case INDEX_op_remu: done = fold_remainder(&ctx, op); break; - CASE_OP_32_64(rotl): - CASE_OP_32_64(rotr): + case INDEX_op_rotl: + case INDEX_op_rotr: case INDEX_op_sar: case INDEX_op_shl: case INDEX_op_shr: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 8c8b9d179b..1989d8d12c 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -829,12 +829,12 @@ void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1) void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (tcg_op_supported(INDEX_op_rotl_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2); - } else if (tcg_op_supported(INDEX_op_rotr_i32, TCG_TYPE_I32, 0)) { + if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_rotl, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_neg_i32(t0, arg2); - tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, t0); + tcg_gen_op3_i32(INDEX_op_rotr, ret, arg1, t0); tcg_temp_free_i32(t0); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); @@ -854,12 +854,12 @@ void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i32(ret, arg1); - } else if (tcg_op_supported(INDEX_op_rotl_i32, TCG_TYPE_I32, 0)) { + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_constant_i32(arg2); - tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, t0); - } else if (tcg_op_supported(INDEX_op_rotr_i32, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_rotl, ret, arg1, t0); + } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_constant_i32(32 - arg2); - tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, t0); + tcg_gen_op3_i32(INDEX_op_rotr, ret, arg1, t0); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 t1 = tcg_temp_ebb_new_i32(); @@ -873,12 +873,12 @@ void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (tcg_op_supported(INDEX_op_rotr_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2); - } else if (tcg_op_supported(INDEX_op_rotl_i32, TCG_TYPE_I32, 0)) { + if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_rotr, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_neg_i32(t0, arg2); - tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, t0); + tcg_gen_op3_i32(INDEX_op_rotl, ret, arg1, t0); tcg_temp_free_i32(t0); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); @@ -2441,12 +2441,12 @@ void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1) void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (tcg_op_supported(INDEX_op_rotl_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2); - } else if (tcg_op_supported(INDEX_op_rotl_i64, TCG_TYPE_I64, 0)) { + if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_rotl, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_neg_i64(t0, arg2); - tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, t0); + tcg_gen_op3_i64(INDEX_op_rotr, ret, arg1, t0); tcg_temp_free_i64(t0); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); @@ -2466,12 +2466,12 @@ void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i64(ret, arg1); - } else if (tcg_op_supported(INDEX_op_rotl_i64, TCG_TYPE_I64, 0)) { + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_constant_i64(arg2); - tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, t0); - } else if (tcg_op_supported(INDEX_op_rotr_i64, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_rotl, ret, arg1, t0); + } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_constant_i64(64 - arg2); - tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, t0); + tcg_gen_op3_i64(INDEX_op_rotr, ret, arg1, t0); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); @@ -2485,12 +2485,12 @@ void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (tcg_op_supported(INDEX_op_rotr_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2); - } else if (tcg_op_supported(INDEX_op_rotl_i64, TCG_TYPE_I64, 0)) { + if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_rotr, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_neg_i64(t0, arg2); - tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, t0); + tcg_gen_op3_i64(INDEX_op_rotl, ret, arg1, t0); tcg_temp_free_i64(t0); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index 40a3e44b7c..182f19e5f0 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1042,10 +1042,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), - OUTOP(INDEX_op_rotl_i32, TCGOutOpBinary, outop_rotl), - OUTOP(INDEX_op_rotl_i64, TCGOutOpBinary, outop_rotl), - OUTOP(INDEX_op_rotr_i32, TCGOutOpBinary, outop_rotr), - OUTOP(INDEX_op_rotr_i64, TCGOutOpBinary, outop_rotr), + OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), + OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), @@ -5418,10 +5416,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_orc: case INDEX_op_rems: case INDEX_op_remu: - case INDEX_op_rotl_i32: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i32: - case INDEX_op_rotr_i64: + case INDEX_op_rotl: + case INDEX_op_rotr: case INDEX_op_sar: case INDEX_op_shl: case INDEX_op_shr: diff --git a/tcg/tci.c b/tcg/tci.c index 0fb13ff61d..b1ee14e65f 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -786,11 +786,11 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, /* Shift/rotate operations (64 bit). */ - case INDEX_op_rotl_i64: + case INDEX_op_rotl: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = rol64(regs[r1], regs[r2] & 63); break; - case INDEX_op_rotr_i64: + case INDEX_op_rotr: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ror64(regs[r1], regs[r2] & 63); break; @@ -1066,13 +1066,13 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_orc: case INDEX_op_rems: case INDEX_op_remu: + case INDEX_op_rotl: + case INDEX_op_rotr: case INDEX_op_sar: case INDEX_op_shl: case INDEX_op_shr: case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i64: case INDEX_op_clz_i32: case INDEX_op_clz_i64: case INDEX_op_ctz_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 0a2da3ba47..0d15547c9f 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -773,7 +773,7 @@ static void tgen_rotl(TCGContext *s, TCGType type, { TCGOpcode opc = (type == TCG_TYPE_I32 ? INDEX_op_tci_rotl32 - : INDEX_op_rotl_i64); + : INDEX_op_rotl); tcg_out_op_rrr(s, opc, a0, a1, a2); } @@ -787,7 +787,7 @@ static void tgen_rotr(TCGContext *s, TCGType type, { TCGOpcode opc = (type == TCG_TYPE_I32 ? INDEX_op_tci_rotr32 - : INDEX_op_rotr_i64); + : INDEX_op_rotr); tcg_out_op_rrr(s, opc, a0, a1, a2); } From 8b915879b0fdd05afab41f0ca156113811ebac38 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 8 Jan 2025 14:16:04 -0800 Subject: [PATCH 066/161] tcg: Convert clz to TCGOutOpBinary Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 - tcg/aarch64/tcg-target.c.inc | 83 +++++++++++++----------- tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 47 +++++++++----- tcg/i386/tcg-target-has.h | 2 - tcg/i386/tcg-target.c.inc | 72 +++++++++++---------- tcg/loongarch64/tcg-target-has.h | 2 - tcg/loongarch64/tcg-target.c.inc | 36 ++++++++--- tcg/mips/tcg-target-has.h | 2 - tcg/mips/tcg-target.c.inc | 86 +++++++++++++----------- tcg/ppc/tcg-target-has.h | 2 - tcg/ppc/tcg-target.c.inc | 30 ++++++--- tcg/riscv/tcg-target-has.h | 2 - tcg/riscv/tcg-target.c.inc | 34 +++++++--- tcg/s390x/tcg-target-has.h | 2 - tcg/s390x/tcg-target.c.inc | 75 +++++++++++++-------- tcg/sparc64/tcg-target-has.h | 2 - tcg/sparc64/tcg-target.c.inc | 4 ++ tcg/tcg-has.h | 1 - tcg/tcg-op.c | 108 ++++++++++++++++--------------- tcg/tcg.c | 8 +-- tcg/tci.c | 8 +-- tcg/tci/tcg-target-has.h | 2 - tcg/tci/tcg-target-opc.h.inc | 1 + tcg/tci/tcg-target.c.inc | 17 ++++- 25 files changed, 365 insertions(+), 264 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index fa79cbc1f0..8c839d8949 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -15,7 +15,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_extract2_i32 1 @@ -30,7 +29,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_extract2_i64 1 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 00fca43840..3bd8231117 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1618,37 +1618,6 @@ static inline void tcg_out_mb(TCGContext *s, TCGArg a0) tcg_out32(s, sync[a0 & TCG_MO_ALL]); } -static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, - TCGReg a0, TCGArg b, bool const_b, bool is_ctz) -{ - TCGReg a1 = a0; - if (is_ctz) { - a1 = TCG_REG_TMP0; - tcg_out_insn(s, 3507, RBIT, ext, a1, a0); - } - if (const_b && b == (ext ? 64 : 32)) { - tcg_out_insn(s, 3507, CLZ, ext, d, a1); - } else { - AArch64Insn sel = I3506_CSEL; - - tcg_out_cmp(s, ext, TCG_COND_NE, a0, 0, 1); - tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1); - - if (const_b) { - if (b == -1) { - b = TCG_REG_XZR; - sel = I3506_CSINV; - } else if (b == 0) { - b = TCG_REG_XZR; - } else { - tcg_out_movi(s, ext, d, b); - b = d; - } - } - tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE); - } -} - typedef struct { TCGReg base; TCGReg index; @@ -2121,6 +2090,45 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_cmp(s, type, TCG_COND_NE, a1, 0, true); + tcg_out_insn(s, 3507, CLZ, type, TCG_REG_TMP0, a1); + tcg_out_insn(s, 3506, CSEL, type, a0, TCG_REG_TMP0, a2, TCG_COND_NE); +} + +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a2 == (type == TCG_TYPE_I32 ? 32 : 64)) { + tcg_out_insn(s, 3507, CLZ, type, a0, a1); + return; + } + + tcg_out_cmp(s, type, TCG_COND_NE, a1, 0, true); + tcg_out_insn(s, 3507, CLZ, type, a0, a1); + + switch (a2) { + case -1: + tcg_out_insn(s, 3506, CSINV, type, a0, a0, TCG_REG_XZR, TCG_COND_NE); + break; + case 0: + tcg_out_insn(s, 3506, CSEL, type, a0, a0, TCG_REG_XZR, TCG_COND_NE); + break; + default: + tcg_out_movi(s, type, TCG_REG_TMP0, a2); + tcg_out_insn(s, 3506, CSEL, type, a0, a0, TCG_REG_TMP0, TCG_COND_NE); + break; + } +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_O1_I2(r, r, rAL), + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; + static void tgen_divs(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2460,13 +2468,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_clz_i64: - case INDEX_op_clz_i32: - tcg_out_cltz(s, ext, a0, a1, a2, c2, false); - break; case INDEX_op_ctz_i64: case INDEX_op_ctz_i32: - tcg_out_cltz(s, ext, a0, a1, a2, c2, true); + tcg_out_insn(s, 3507, RBIT, ext, TCG_REG_TMP0, a1); + if (c2) { + tgen_clzi(s, ext, a0, TCG_REG_TMP0, a2); + } else { + tgen_clz(s, ext, a0, TCG_REG_TMP0, a2); + } break; case INDEX_op_brcond_i32: @@ -3089,9 +3098,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rC); - case INDEX_op_clz_i32: case INDEX_op_ctz_i32: - case INDEX_op_clz_i64: case INDEX_op_ctz_i64: return C_O1_I2(r, r, rAL); diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index 12ffbcda2b..fceec2f0ca 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -26,7 +26,6 @@ extern bool use_neon_instructions; /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions #define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_extract2_i32 1 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 462f0ec08d..681eb5aba1 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1862,6 +1862,32 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0); + tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0); + tcg_out_mov_reg(s, COND_EQ, a0, a2); +} + +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a2 == 32) { + tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0); + } else { + tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0); + tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0); + tcg_out_movi32(s, COND_EQ, a0, a2); + } +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_O1_I2(r, r, rIK), + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; + static TCGConstraintSetIndex cset_idiv(TCGType type, unsigned flags) { return use_idiv_instructions ? C_O1_I2(r, r, r) : C_NotImplemented; @@ -2196,23 +2222,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_ctz_i32: tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0); - a1 = TCG_REG_TMP; - goto do_clz; - - case INDEX_op_clz_i32: - a1 = args[1]; - do_clz: - a0 = args[0]; - a2 = args[2]; - c = const_args[2]; - if (c && a2 == 32) { - tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0); - break; - } - tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0); - tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0); - if (c || a0 != a2) { - tcg_out_dat_rIK(s, COND_EQ, ARITH_MOV, ARITH_MVN, a0, 0, a2, c); + if (const_args[2]) { + tgen_clzi(s, TCG_TYPE_I32, args[0], TCG_REG_TMP, args[2]); + } else { + tgen_clz(s, TCG_TYPE_I32, args[0], TCG_REG_TMP, args[2]); } break; diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index a7199463df..2277872ff3 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -28,7 +28,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 have_popcnt #define TCG_TARGET_HAS_extract2_i32 1 @@ -44,7 +43,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 have_popcnt #define TCG_TARGET_HAS_extract2_i64 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index dd35bba57f..0edd4cbc07 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1869,32 +1869,6 @@ static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, } } -static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, - TCGArg arg2, bool const_a2) -{ - if (have_lzcnt) { - tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1); - if (const_a2) { - tcg_debug_assert(arg2 == (rexw ? 64 : 32)); - } else { - tcg_debug_assert(dest != arg2); - tcg_out_cmov(s, JCC_JB, rexw, dest, arg2); - } - } else { - tcg_debug_assert(!const_a2); - tcg_debug_assert(dest != arg1); - tcg_debug_assert(dest != arg2); - - /* Recall that the output of BSR is the index not the count. */ - tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1); - tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0); - - /* Since we have destroyed the flags from BSR, we have to re-test. */ - int jcc = tcg_out_cmp(s, TCG_COND_EQ, arg1, 0, 1, rexw); - tcg_out_cmov(s, jcc, rexw, dest, arg2); - } -} - static void tcg_out_branch(TCGContext *s, int call, const tcg_insn_unit *dest) { intptr_t disp = tcg_pcrel_diff(s, dest) - 5; @@ -2633,6 +2607,45 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + int jcc; + + if (have_lzcnt) { + tcg_out_modrm(s, OPC_LZCNT + rexw, a0, a1); + jcc = JCC_JB; + } else { + /* Recall that the output of BSR is the index not the count. */ + tcg_out_modrm(s, OPC_BSR + rexw, a0, a1); + tgen_arithi(s, ARITH_XOR + rexw, a0, rexw ? 63 : 31, 0); + + /* Since we have destroyed the flags from BSR, we have to re-test. */ + jcc = tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, rexw); + } + tcg_out_cmov(s, jcc, rexw, a0, a2); +} + +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_LZCNT + rexw, a0, a1); +} + +static TCGConstraintSetIndex cset_clz(TCGType type, unsigned flags) +{ + return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_clz, + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; + static const TCGOutOpBinary outop_divs = { .base.static_constraint = C_NotImplemented, }; @@ -3019,9 +3032,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, OP_32_64(ctz): tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]); break; - OP_32_64(clz): - tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]); - break; OP_32_64(ctpop): tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1); break; @@ -3907,10 +3917,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ctz_i64: return have_bmi1 ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); - case INDEX_op_clz_i32: - case INDEX_op_clz_i64: - return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); - case INDEX_op_qemu_ld_i32: return C_O1_I1(r, L); diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 303134390a..2eba2132b8 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -18,7 +18,6 @@ #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -30,7 +29,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_add2_i64 0 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 26cf982780..332ce6c86b 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1328,6 +1328,33 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + /* a2 is constrained to exactly the type width. */ + if (type == TCG_TYPE_I32) { + tcg_out_opc_clz_w(s, a0, a1); + } else { + tcg_out_opc_clz_d(s, a0, a1); + } +} + +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_clzi(s, type, TCG_REG_TMP0, a1, /* ignored */ 0); + /* a0 = a1 ? REG_TMP0 : a2 */ + tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1); + tcg_out_opc_masknez(s, a0, a2, a1); + tcg_out_opc_or(s, a0, a0, TCG_REG_TMP0); +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_O1_I2(r, r, rW), + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; + static void tgen_divs(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1754,13 +1781,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_revb_d(s, a0, a1); break; - case INDEX_op_clz_i32: - tcg_out_clzctz(s, OPC_CLZ_W, a0, a1, a2, c2, true); - break; - case INDEX_op_clz_i64: - tcg_out_clzctz(s, OPC_CLZ_D, a0, a1, a2, c2, false); - break; - case INDEX_op_ctz_i32: tcg_out_clzctz(s, OPC_CTZ_W, a0, a1, a2, c2, true); break; @@ -2398,8 +2418,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); - case INDEX_op_clz_i32: - case INDEX_op_clz_i64: case INDEX_op_ctz_i32: case INDEX_op_ctz_i64: return C_O1_I2(r, r, rW); diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index 880eb084eb..c27ca7e543 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -60,7 +60,6 @@ extern bool use_mips32r2_instructions; /* optional instructions detected at runtime */ #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_clz_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -70,7 +69,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_clz_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 0 #endif diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index fb9fe0c40e..5052d6481c 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1563,33 +1563,6 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) tcg_out32(s, sync[a0 & TCG_MO_ALL]); } -static void tcg_out_clz(TCGContext *s, MIPSInsn opcv2, MIPSInsn opcv6, - int width, TCGReg a0, TCGReg a1, TCGArg a2) -{ - if (use_mips32r6_instructions) { - if (a2 == width) { - tcg_out_opc_reg(s, opcv6, a0, a1, 0); - } else { - tcg_out_opc_reg(s, opcv6, TCG_TMP0, a1, 0); - tcg_out_movcond(s, TCG_COND_EQ, a0, a1, 0, a2, TCG_TMP0); - } - } else { - if (a2 == width) { - tcg_out_opc_reg(s, opcv2, a0, a1, a1); - } else if (a0 == a2) { - tcg_out_opc_reg(s, opcv2, TCG_TMP0, a1, a1); - tcg_out_opc_reg(s, OPC_MOVN, a0, TCG_TMP0, a1); - } else if (a0 != a1) { - tcg_out_opc_reg(s, opcv2, a0, a1, a1); - tcg_out_opc_reg(s, OPC_MOVZ, a0, a2, a1); - } else { - tcg_out_opc_reg(s, opcv2, TCG_TMP0, a1, a1); - tcg_out_opc_reg(s, OPC_MOVZ, TCG_TMP0, a2, a1); - tcg_out_mov(s, TCG_TYPE_REG, a0, TCG_TMP0); - } - } -} - static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) { TCGReg base = TCG_REG_ZERO; @@ -1712,6 +1685,55 @@ static const TCGOutOpBinary outop_andc = { .base.static_constraint = C_NotImplemented, }; +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (use_mips32r6_instructions) { + MIPSInsn opcv6 = type == TCG_TYPE_I32 ? OPC_CLZ_R6 : OPC_DCLZ_R6; + tcg_out_opc_reg(s, opcv6, TCG_TMP0, a1, 0); + tcg_out_movcond(s, TCG_COND_EQ, a0, a1, 0, a2, TCG_TMP0); + } else { + MIPSInsn opcv2 = type == TCG_TYPE_I32 ? OPC_CLZ : OPC_DCLZ; + if (a0 == a2) { + tcg_out_opc_reg(s, opcv2, TCG_TMP0, a1, a1); + tcg_out_opc_reg(s, OPC_MOVN, a0, TCG_TMP0, a1); + } else if (a0 != a1) { + tcg_out_opc_reg(s, opcv2, a0, a1, a1); + tcg_out_opc_reg(s, OPC_MOVZ, a0, a2, a1); + } else { + tcg_out_opc_reg(s, opcv2, TCG_TMP0, a1, a1); + tcg_out_opc_reg(s, OPC_MOVZ, TCG_TMP0, a2, a1); + tcg_out_mov(s, type, a0, TCG_TMP0); + } + } +} + +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a2 == 0) { + tgen_clz(s, type, a0, a1, TCG_REG_ZERO); + } else if (use_mips32r6_instructions) { + MIPSInsn opcv6 = type == TCG_TYPE_I32 ? OPC_CLZ_R6 : OPC_DCLZ_R6; + tcg_out_opc_reg(s, opcv6, a0, a1, 0); + } else { + MIPSInsn opcv2 = type == TCG_TYPE_I32 ? OPC_CLZ : OPC_DCLZ; + tcg_out_opc_reg(s, opcv2, a0, a1, a1); + } +} + +static TCGConstraintSetIndex cset_clz(TCGType type, unsigned flags) +{ + return use_mips32r2_instructions ? C_O1_I2(r, r, rzW) : C_NotImplemented; +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_clz, + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; + static void tgen_divs(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2165,13 +2187,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_dsra(s, a0, a1, 32); break; - case INDEX_op_clz_i32: - tcg_out_clz(s, OPC_CLZ, OPC_CLZ_R6, 32, a0, a1, a2); - break; - case INDEX_op_clz_i64: - tcg_out_clz(s, OPC_DCLZ, OPC_DCLZ_R6, 64, a0, a1, a2); - break; - case INDEX_op_deposit_i32: tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]); break; @@ -2329,9 +2344,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_muls2_i64: case INDEX_op_mulu2_i64: return C_O2_I2(r, r, r, r); - case INDEX_op_clz_i32: - case INDEX_op_clz_i64: - return C_O1_I2(r, r, rzW); case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 71c02d88b9..cd7346011b 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -19,7 +19,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 have_isa_3_00 #define TCG_TARGET_HAS_ctpop_i32 have_isa_2_06 #define TCG_TARGET_HAS_extract2_i32 0 @@ -35,7 +34,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 have_isa_3_00 #define TCG_TARGET_HAS_ctpop_i64 have_isa_2_06 #define TCG_TARGET_HAS_extract2_i64 0 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 687b66af54..518cf1e9ef 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2954,6 +2954,26 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD; + tcg_out_cntxz(s, type, insn, a0, a1, a2, false); +} + +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD; + tcg_out_cntxz(s, type, insn, a0, a1, a2, true); +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_O1_I2(r, r, rZW), + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; + static void tgen_eqv(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3350,10 +3370,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); break; - case INDEX_op_clz_i32: - tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1], - args[2], const_args[2]); - break; case INDEX_op_ctz_i32: tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1], args[2], const_args[2]); @@ -3362,10 +3378,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0)); break; - case INDEX_op_clz_i64: - tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1], - args[2], const_args[2]); - break; case INDEX_op_ctz_i64: tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1], args[2], const_args[2]); @@ -4228,9 +4240,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_clz_i32: case INDEX_op_ctz_i32: - case INDEX_op_clz_i64: case INDEX_op_ctz_i64: return C_O1_I2(r, r, rZW); diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index c7745a6462..41e287130d 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -18,7 +18,6 @@ #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_clz_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctz_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctpop_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -29,7 +28,6 @@ #define TCG_TARGET_HAS_bswap16_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_clz_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctz_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctpop_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_add2_i64 1 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 4dd892d98d..77eef02db5 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1997,6 +1997,32 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CLZW : OPC_CLZ; + tcg_out_cltz(s, type, insn, a0, a1, a2, false); +} + +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CLZW : OPC_CLZ; + tcg_out_cltz(s, type, insn, a0, a1, a2, true); +} + +static TCGConstraintSetIndex cset_clzctz(TCGType type, unsigned flags) +{ + return cpuinfo & CPUINFO_ZBB ? C_N1_I2(r, r, rM) : C_NotImplemented; +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_clzctz, + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; + static void tgen_divs(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2398,12 +2424,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_imm(s, OPC_CPOP, a0, a1, 0); break; - case INDEX_op_clz_i32: - tcg_out_cltz(s, TCG_TYPE_I32, OPC_CLZW, a0, a1, a2, c2); - break; - case INDEX_op_clz_i64: - tcg_out_cltz(s, TCG_TYPE_I64, OPC_CLZ, a0, a1, a2, c2); - break; case INDEX_op_ctz_i32: tcg_out_cltz(s, TCG_TYPE_I32, OPC_CTZW, a0, a1, a2, c2); break; @@ -2793,8 +2813,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_clz_i32: - case INDEX_op_clz_i64: case INDEX_op_ctz_i32: case INDEX_op_ctz_i64: return C_N1_I2(r, r, rM); diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index eaddf7005e..85a4f23e95 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -31,7 +31,6 @@ extern uint64_t s390_facilities[3]; /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctpop_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 @@ -46,7 +45,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 76180dabcb..adfe403bef 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1514,27 +1514,6 @@ static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest, tgen_movcond_int(s, type, dest, v3, v3const, v4, cc, inv_cc); } -static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1, - TCGArg a2, int a2const) -{ - /* Since this sets both R and R+1, we have no choice but to store the - result into R0, allowing R1 == TCG_TMP0 to be clobbered as well. */ - QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1); - tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1); - - if (a2const && a2 == 64) { - tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0); - return; - } - - /* - * Conditions from FLOGR are: - * 2 -> one bit found - * 8 -> no one bit found - */ - tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2); -} - static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) { /* With MIE3, and bit 0 of m4 set, we get the complete result. */ @@ -2242,6 +2221,53 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_clz_int(TCGContext *s, TCGReg dest, TCGReg a1, + TCGArg a2, int a2const) +{ + /* + * Since this sets both R and R+1, we have no choice but to store the + * result into R0, allowing R1 == TCG_TMP0 to be clobbered as well. + */ + QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1); + tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1); + + if (a2const && a2 == 64) { + tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0); + return; + } + + /* + * Conditions from FLOGR are: + * 2 -> one bit found + * 8 -> no one bit found + */ + tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2); +} + +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_clz_int(s, a0, a1, a2, false); +} + +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_clz_int(s, a0, a1, a2, true); +} + +static TCGConstraintSetIndex cset_clz(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_I64 ? C_O1_I2(r, r, rI) : C_NotImplemented; +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_clz, + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; + static const TCGOutOpBinary outop_divs = { .base.static_constraint = C_NotImplemented, }; @@ -2884,10 +2910,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tgen_sextract(s, args[0], args[1], args[2], args[3]); break; - case INDEX_op_clz_i64: - tgen_clz(s, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_ctpop_i32: tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]); break; @@ -3387,9 +3409,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rC); - case INDEX_op_clz_i64: - return C_O1_I2(r, r, rI); - case INDEX_op_brcond_i32: return C_O0_I2(r, ri); case INDEX_op_brcond_i64: diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 1dd86c363d..21fa0f3663 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -16,7 +16,6 @@ extern bool use_vis3_instructions; /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_bswap32_i32 0 -#define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_extract2_i32 0 @@ -31,7 +30,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 -#define TCG_TARGET_HAS_clz_i64 0 #define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 57b26ae33b..a4fb41764b 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1318,6 +1318,10 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_divs_rJ(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGArg a2, bool c2) { diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 7bfa55adb1..27d6ec7636 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -15,7 +15,6 @@ #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 -#define TCG_TARGET_HAS_clz_i64 0 #define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 1989d8d12c..e1e57ff3f8 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -723,9 +723,9 @@ void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_clz_i32) { + if (tcg_op_supported(INDEX_op_clz_i32, TCG_TYPE_I32, 0)) { tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_clz_i64) { + } else if (tcg_op_supported(INDEX_op_clz_i64, TCG_TYPE_I64, 0)) { TCGv_i64 t1 = tcg_temp_ebb_new_i64(); TCGv_i64 t2 = tcg_temp_ebb_new_i64(); tcg_gen_extu_i32_i64(t1, arg1); @@ -748,9 +748,13 @@ void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { + TCGv_i32 z, t; + if (TCG_TARGET_HAS_ctz_i32) { tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_ctz_i64) { + return; + } + if (TCG_TARGET_HAS_ctz_i64) { TCGv_i64 t1 = tcg_temp_ebb_new_i64(); TCGv_i64 t2 = tcg_temp_ebb_new_i64(); tcg_gen_extu_i32_i64(t1, arg1); @@ -759,29 +763,28 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_extrl_i64_i32(ret, t1); tcg_temp_free_i64(t1); tcg_temp_free_i64(t2); - } else if (TCG_TARGET_HAS_ctpop_i32 - || TCG_TARGET_HAS_ctpop_i64 - || TCG_TARGET_HAS_clz_i32 - || TCG_TARGET_HAS_clz_i64) { - TCGv_i32 z, t = tcg_temp_ebb_new_i32(); - - if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) { - tcg_gen_subi_i32(t, arg1, 1); - tcg_gen_andc_i32(t, t, arg1); - tcg_gen_ctpop_i32(t, t); - } else { - /* Since all non-x86 hosts have clz(0) == 32, don't fight it. */ - tcg_gen_neg_i32(t, arg1); - tcg_gen_and_i32(t, t, arg1); - tcg_gen_clzi_i32(t, t, 32); - tcg_gen_xori_i32(t, t, 31); - } - z = tcg_constant_i32(0); - tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t); - tcg_temp_free_i32(t); + return; + } + if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) { + t = tcg_temp_ebb_new_i32(); + tcg_gen_subi_i32(t, arg1, 1); + tcg_gen_andc_i32(t, t, arg1); + tcg_gen_ctpop_i32(t, t); + } else if (tcg_op_supported(INDEX_op_clz_i32, TCG_TYPE_I32, 0) || + tcg_op_supported(INDEX_op_clz_i64, TCG_TYPE_I64, 0)) { + t = tcg_temp_ebb_new_i32(); + tcg_gen_neg_i32(t, arg1); + tcg_gen_and_i32(t, t, arg1); + tcg_gen_clzi_i32(t, t, 32); + tcg_gen_xori_i32(t, t, 31); } else { gen_helper_ctz_i32(ret, arg1, arg2); + return; } + + z = tcg_constant_i32(0); + tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t); + tcg_temp_free_i32(t); } void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) @@ -800,7 +803,8 @@ void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_clz_i32) { + if (tcg_op_supported(INDEX_op_clz_i32, TCG_TYPE_I32, 0) || + tcg_op_supported(INDEX_op_clz_i64, TCG_TYPE_I64, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_sari_i32(t, arg, 31); tcg_gen_xor_i32(t, t, arg); @@ -2336,7 +2340,7 @@ void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_clz_i64) { + if (tcg_op_supported(INDEX_op_clz_i64, TCG_TYPE_I64, 0)) { tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2); } else { gen_helper_clz_i64(ret, arg1, arg2); @@ -2346,8 +2350,8 @@ void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) { if (TCG_TARGET_REG_BITS == 32 - && TCG_TARGET_HAS_clz_i32 - && arg2 <= 0xffffffffu) { + && arg2 <= 0xffffffffu + && tcg_op_supported(INDEX_op_clz_i32, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_clzi_i32(t, TCGV_LOW(arg1), arg2 - 32); tcg_gen_addi_i32(t, t, 32); @@ -2361,45 +2365,47 @@ void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { + TCGv_i64 z, t; + if (TCG_TARGET_HAS_ctz_i64) { tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) { - TCGv_i64 z, t = tcg_temp_ebb_new_i64(); - - if (TCG_TARGET_HAS_ctpop_i64) { - tcg_gen_subi_i64(t, arg1, 1); - tcg_gen_andc_i64(t, t, arg1); - tcg_gen_ctpop_i64(t, t); - } else { - /* Since all non-x86 hosts have clz(0) == 64, don't fight it. */ - tcg_gen_neg_i64(t, arg1); - tcg_gen_and_i64(t, t, arg1); - tcg_gen_clzi_i64(t, t, 64); - tcg_gen_xori_i64(t, t, 63); - } - z = tcg_constant_i64(0); - tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t); - tcg_temp_free_i64(t); - tcg_temp_free_i64(z); + return; + } + if (TCG_TARGET_HAS_ctpop_i64) { + t = tcg_temp_ebb_new_i64(); + tcg_gen_subi_i64(t, arg1, 1); + tcg_gen_andc_i64(t, t, arg1); + tcg_gen_ctpop_i64(t, t); + } else if (tcg_op_supported(INDEX_op_clz_i64, TCG_TYPE_I64, 0)) { + t = tcg_temp_ebb_new_i64(); + tcg_gen_neg_i64(t, arg1); + tcg_gen_and_i64(t, t, arg1); + tcg_gen_clzi_i64(t, t, 64); + tcg_gen_xori_i64(t, t, 63); } else { gen_helper_ctz_i64(ret, arg1, arg2); + return; } + + z = tcg_constant_i64(0); + tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t); + tcg_temp_free_i64(t); } void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) { if (TCG_TARGET_REG_BITS == 32 - && TCG_TARGET_HAS_ctz_i32 - && arg2 <= 0xffffffffu) { + && arg2 <= 0xffffffffu + && tcg_op_supported(INDEX_op_ctz_i32, TCG_TYPE_I32, 0)) { TCGv_i32 t32 = tcg_temp_ebb_new_i32(); tcg_gen_ctzi_i32(t32, TCGV_HIGH(arg1), arg2 - 32); tcg_gen_addi_i32(t32, t32, 32); tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); tcg_temp_free_i32(t32); - } else if (!TCG_TARGET_HAS_ctz_i64 - && TCG_TARGET_HAS_ctpop_i64 - && arg2 == 64) { + } else if (arg2 == 64 + && !tcg_op_supported(INDEX_op_ctz_i64, TCG_TYPE_I64, 0) + && TCG_TARGET_HAS_ctpop_i64) { /* This equivalence has the advantage of not requiring a fixup. */ TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_subi_i64(t, arg1, 1); @@ -2413,7 +2419,7 @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_HAS_clz_i64 || TCG_TARGET_HAS_clz_i32) { + if (tcg_op_supported(INDEX_op_clz_i64, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_sari_i64(t, arg, 63); tcg_gen_xor_i64(t, t, arg); diff --git a/tcg/tcg.c b/tcg/tcg.c index 182f19e5f0..e04d3adcec 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1026,6 +1026,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), + OUTOP(INDEX_op_clz_i32, TCGOutOpBinary, outop_clz), + OUTOP(INDEX_op_clz_i64, TCGOutOpBinary, outop_clz), OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), @@ -2288,8 +2290,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap16_i32; case INDEX_op_bswap32_i32: return TCG_TARGET_HAS_bswap32_i32; - case INDEX_op_clz_i32: - return TCG_TARGET_HAS_clz_i32; case INDEX_op_ctz_i32: return TCG_TARGET_HAS_ctz_i32; case INDEX_op_ctpop_i32: @@ -2333,8 +2333,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap32_i64; case INDEX_op_bswap64_i64: return TCG_TARGET_HAS_bswap64_i64; - case INDEX_op_clz_i64: - return TCG_TARGET_HAS_clz_i64; case INDEX_op_ctz_i64: return TCG_TARGET_HAS_ctz_i64; case INDEX_op_ctpop_i64: @@ -5404,6 +5402,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_add: case INDEX_op_and: case INDEX_op_andc: + case INDEX_op_clz_i32: + case INDEX_op_clz_i64: case INDEX_op_divs: case INDEX_op_divu: case INDEX_op_eqv: diff --git a/tcg/tci.c b/tcg/tci.c index b1ee14e65f..11b11ce642 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -594,13 +594,11 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint32_t)regs[r1] % (uint32_t)regs[r2]; break; -#if TCG_TARGET_HAS_clz_i32 - case INDEX_op_clz_i32: + case INDEX_op_tci_clz32: tci_args_rrr(insn, &r0, &r1, &r2); tmp32 = regs[r1]; regs[r0] = tmp32 ? clz32(tmp32) : regs[r2]; break; -#endif #if TCG_TARGET_HAS_ctz_i32 case INDEX_op_ctz_i32: tci_args_rrr(insn, &r0, &r1, &r2); @@ -735,12 +733,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint64_t)regs[r1] % (uint64_t)regs[r2]; break; -#if TCG_TARGET_HAS_clz_i64 case INDEX_op_clz_i64: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ? clz64(regs[r1]) : regs[r2]; break; -#endif #if TCG_TARGET_HAS_ctz_i64 case INDEX_op_ctz_i64: tci_args_rrr(insn, &r0, &r1, &r2); @@ -1073,10 +1069,10 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_shr: case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_clz_i32: case INDEX_op_clz_i64: case INDEX_op_ctz_i32: case INDEX_op_ctz_i64: + case INDEX_op_tci_clz32: case INDEX_op_tci_divs32: case INDEX_op_tci_divu32: case INDEX_op_tci_rems32: diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index 04d341a8d2..ae1f724702 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -10,7 +10,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 0 @@ -23,7 +22,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 0 diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc index cff215490a..04774ca9c4 100644 --- a/tcg/tci/tcg-target-opc.h.inc +++ b/tcg/tci/tcg-target-opc.h.inc @@ -2,6 +2,7 @@ /* These opcodes for use between the tci generator and interpreter. */ DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT) DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_clz32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_divs32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_divu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_rems32, 1, 2, 0, TCG_OPF_NOT_PRESENT) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 0d15547c9f..ee7e6f15eb 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -83,8 +83,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond_i64: case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: - case INDEX_op_clz_i32: - case INDEX_op_clz_i64: case INDEX_op_ctz_i32: case INDEX_op_ctz_i64: return C_O1_I2(r, r, r); @@ -630,6 +628,20 @@ static const TCGOutOpBinary outop_andc = { .out_rrr = tgen_andc, }; +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_clz32 + : INDEX_op_clz_i64); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_clz, +}; + static void tgen_divs(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -921,7 +933,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, opc, args[0], args[1], args[2]); break; - CASE_32_64(clz) /* Optional (TCG_TARGET_HAS_clz_*). */ CASE_32_64(ctz) /* Optional (TCG_TARGET_HAS_ctz_*). */ tcg_out_op_rrr(s, opc, args[0], args[1], args[2]); break; From 5a5bb0a5a0b879c8f110c6a9bde9146181ef840c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 8 Jan 2025 16:12:46 -0800 Subject: [PATCH 067/161] tcg: Merge INDEX_op_clz_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 10 +++++----- tcg/tcg-op.c | 22 ++++++++++------------ tcg/tcg.c | 6 ++---- tcg/tci.c | 4 ++-- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 22 insertions(+), 27 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index c3a6499d01..22f0432988 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -358,7 +358,7 @@ Logical - | *t0* = *t1* | ~\ *t2* - * - clz_i32/i64 *t0*, *t1*, *t2* + * - clz *t0*, *t1*, *t2* - | *t0* = *t1* ? clz(*t1*) : *t2* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 25fd93eb28..ad1d193ef4 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -42,6 +42,7 @@ DEF(mov, 1, 1, 0, TCG_OPF_INT | TCG_OPF_NOT_PRESENT) DEF(add, 1, 2, 0, TCG_OPF_INT) DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) +DEF(clz, 1, 2, 0, TCG_OPF_INT) DEF(divs, 1, 2, 0, TCG_OPF_INT) DEF(divs2, 2, 3, 0, TCG_OPF_INT) DEF(divu, 1, 2, 0, TCG_OPF_INT) @@ -95,7 +96,6 @@ DEF(setcond2_i32, 1, 4, 1, 0) DEF(bswap16_i32, 1, 1, 1, 0) DEF(bswap32_i32, 1, 1, 1, 0) -DEF(clz_i32, 1, 2, 0, 0) DEF(ctz_i32, 1, 2, 0, 0) DEF(ctpop_i32, 1, 1, 0, 0) @@ -130,7 +130,6 @@ DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(bswap16_i64, 1, 1, 1, 0) DEF(bswap32_i64, 1, 1, 1, 0) DEF(bswap64_i64, 1, 1, 1, 0) -DEF(clz_i64, 1, 2, 0, 0) DEF(ctz_i64, 1, 2, 0, 0) DEF(ctpop_i64, 1, 1, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index 97a566a617..d8d0e728aa 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -503,10 +503,10 @@ static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, case INDEX_op_nor_vec: return ~(x | y); - case INDEX_op_clz_i32: - return (uint32_t)x ? clz32(x) : y; - - case INDEX_op_clz_i64: + case INDEX_op_clz: + if (type == TCG_TYPE_I32) { + return (uint32_t)x ? clz32(x) : y; + } return x ? clz64(x) : y; case INDEX_op_ctz_i32: @@ -2898,7 +2898,7 @@ void tcg_optimize(TCGContext *s) case INDEX_op_bswap64_i64: done = fold_bswap(&ctx, op); break; - CASE_OP_32_64(clz): + case INDEX_op_clz: CASE_OP_32_64(ctz): done = fold_count_zeros(&ctx, op); break; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index e1e57ff3f8..76e9efc655 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -723,9 +723,9 @@ void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (tcg_op_supported(INDEX_op_clz_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2); - } else if (tcg_op_supported(INDEX_op_clz_i64, TCG_TYPE_I64, 0)) { + if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_clz, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_I64, 0)) { TCGv_i64 t1 = tcg_temp_ebb_new_i64(); TCGv_i64 t2 = tcg_temp_ebb_new_i64(); tcg_gen_extu_i32_i64(t1, arg1); @@ -770,8 +770,7 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_subi_i32(t, arg1, 1); tcg_gen_andc_i32(t, t, arg1); tcg_gen_ctpop_i32(t, t); - } else if (tcg_op_supported(INDEX_op_clz_i32, TCG_TYPE_I32, 0) || - tcg_op_supported(INDEX_op_clz_i64, TCG_TYPE_I64, 0)) { + } else if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_REG, 0)) { t = tcg_temp_ebb_new_i32(); tcg_gen_neg_i32(t, arg1); tcg_gen_and_i32(t, t, arg1); @@ -803,8 +802,7 @@ void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (tcg_op_supported(INDEX_op_clz_i32, TCG_TYPE_I32, 0) || - tcg_op_supported(INDEX_op_clz_i64, TCG_TYPE_I64, 0)) { + if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_REG, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_sari_i32(t, arg, 31); tcg_gen_xor_i32(t, t, arg); @@ -2340,8 +2338,8 @@ void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (tcg_op_supported(INDEX_op_clz_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_clz, ret, arg1, arg2); } else { gen_helper_clz_i64(ret, arg1, arg2); } @@ -2351,7 +2349,7 @@ void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) { if (TCG_TARGET_REG_BITS == 32 && arg2 <= 0xffffffffu - && tcg_op_supported(INDEX_op_clz_i32, TCG_TYPE_I32, 0)) { + && tcg_op_supported(INDEX_op_clz, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_clzi_i32(t, TCGV_LOW(arg1), arg2 - 32); tcg_gen_addi_i32(t, t, 32); @@ -2376,7 +2374,7 @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_subi_i64(t, arg1, 1); tcg_gen_andc_i64(t, t, arg1); tcg_gen_ctpop_i64(t, t); - } else if (tcg_op_supported(INDEX_op_clz_i64, TCG_TYPE_I64, 0)) { + } else if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_I64, 0)) { t = tcg_temp_ebb_new_i64(); tcg_gen_neg_i64(t, arg1); tcg_gen_and_i64(t, t, arg1); @@ -2419,7 +2417,7 @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (tcg_op_supported(INDEX_op_clz_i64, TCG_TYPE_I64, 0)) { + if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_sari_i64(t, arg, 63); tcg_gen_xor_i64(t, t, arg); diff --git a/tcg/tcg.c b/tcg/tcg.c index e04d3adcec..71b0721fb5 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1026,8 +1026,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), - OUTOP(INDEX_op_clz_i32, TCGOutOpBinary, outop_clz), - OUTOP(INDEX_op_clz_i64, TCGOutOpBinary, outop_clz), + OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), @@ -5402,8 +5401,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_add: case INDEX_op_and: case INDEX_op_andc: - case INDEX_op_clz_i32: - case INDEX_op_clz_i64: + case INDEX_op_clz: case INDEX_op_divs: case INDEX_op_divu: case INDEX_op_eqv: diff --git a/tcg/tci.c b/tcg/tci.c index 11b11ce642..7c2f2a524b 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -733,7 +733,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint64_t)regs[r1] % (uint64_t)regs[r2]; break; - case INDEX_op_clz_i64: + case INDEX_op_clz: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ? clz64(regs[r1]) : regs[r2]; break; @@ -1052,6 +1052,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_add: case INDEX_op_and: case INDEX_op_andc: + case INDEX_op_clz: case INDEX_op_divs: case INDEX_op_divu: case INDEX_op_eqv: @@ -1069,7 +1070,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_shr: case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_clz_i64: case INDEX_op_ctz_i32: case INDEX_op_ctz_i64: case INDEX_op_tci_clz32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index ee7e6f15eb..0fd1f5510a 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -633,7 +633,7 @@ static void tgen_clz(TCGContext *s, TCGType type, { TCGOpcode opc = (type == TCG_TYPE_I32 ? INDEX_op_tci_clz32 - : INDEX_op_clz_i64); + : INDEX_op_clz); tcg_out_op_rrr(s, opc, a0, a1, a2); } From e3fcde59c92fb421789890c145d5fffdd3d1672d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 8 Jan 2025 17:02:13 -0800 Subject: [PATCH 068/161] tcg: Convert ctz to TCGOutOpBinary Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 -- tcg/aarch64/tcg-target.c.inc | 34 ++++++++++-------- tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 39 ++++++++++++++------- tcg/i386/tcg-target-has.h | 2 -- tcg/i386/tcg-target.c.inc | 60 +++++++++++++++++++------------- tcg/loongarch64/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target.c.inc | 60 ++++++++++++++------------------ tcg/mips/tcg-target-has.h | 2 -- tcg/mips/tcg-target.c.inc | 4 +++ tcg/ppc/tcg-target-has.h | 2 -- tcg/ppc/tcg-target.c.inc | 39 ++++++++++++++------- tcg/riscv/tcg-target-has.h | 2 -- tcg/riscv/tcg-target.c.inc | 32 +++++++++++------ tcg/s390x/tcg-target-has.h | 2 -- tcg/s390x/tcg-target.c.inc | 4 +++ tcg/sparc64/tcg-target-has.h | 2 -- tcg/sparc64/tcg-target.c.inc | 4 +++ tcg/tcg-has.h | 1 - tcg/tcg-op.c | 9 ++--- tcg/tcg.c | 8 ++--- tcg/tci.c | 8 ++--- tcg/tci/tcg-target-has.h | 2 -- tcg/tci/tcg-target-opc.h.inc | 1 + tcg/tci/tcg-target.c.inc | 20 +++++++---- 25 files changed, 193 insertions(+), 149 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 8c839d8949..478d59676e 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -15,7 +15,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 1 @@ -29,7 +28,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 1 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 3bd8231117..8441c5f4bf 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2129,6 +2129,26 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3507, RBIT, type, TCG_REG_TMP0, a1); + tgen_clz(s, type, a0, TCG_REG_TMP0, a2); +} + +static void tgen_ctzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_insn(s, 3507, RBIT, type, TCG_REG_TMP0, a1); + tgen_clzi(s, type, a0, TCG_REG_TMP0, a2); +} + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_O1_I2(r, r, rAL), + .out_rrr = tgen_ctz, + .out_rri = tgen_ctzi, +}; + static void tgen_divs(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2468,16 +2488,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_ctz_i64: - case INDEX_op_ctz_i32: - tcg_out_insn(s, 3507, RBIT, ext, TCG_REG_TMP0, a1); - if (c2) { - tgen_clzi(s, ext, a0, TCG_REG_TMP0, a2); - } else { - tgen_clz(s, ext, a0, TCG_REG_TMP0, a2); - } - break; - case INDEX_op_brcond_i32: a1 = (int32_t)a1; /* FALLTHRU */ @@ -3098,10 +3108,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rC); - case INDEX_op_ctz_i32: - case INDEX_op_ctz_i64: - return C_O1_I2(r, r, rAL); - case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: return C_O0_I2(r, rC); diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index fceec2f0ca..1485a52c21 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -26,7 +26,6 @@ extern bool use_neon_instructions; /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions #define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 1 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 681eb5aba1..c05f21c82c 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1888,6 +1888,32 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, a1, 0); + tgen_clz(s, TCG_TYPE_I32, a0, TCG_REG_TMP, a2); +} + +static void tgen_ctzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, a1, 0); + tgen_clzi(s, TCG_TYPE_I32, a0, TCG_REG_TMP, a2); +} + +static TCGConstraintSetIndex cset_ctz(TCGType type, unsigned flags) +{ + return use_armv7_instructions ? C_O1_I2(r, r, rIK) : C_NotImplemented; +} + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctz, + .out_rrr = tgen_ctz, + .out_rri = tgen_ctzi, +}; + static TCGConstraintSetIndex cset_idiv(TCGType type, unsigned flags) { return use_idiv_instructions ? C_O1_I2(r, r, r) : C_NotImplemented; @@ -2220,15 +2246,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]); break; - case INDEX_op_ctz_i32: - tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0); - if (const_args[2]) { - tgen_clzi(s, TCG_TYPE_I32, args[0], TCG_REG_TMP, args[2]); - } else { - tgen_clz(s, TCG_TYPE_I32, args[0], TCG_REG_TMP, args[2]); - } - break; - case INDEX_op_brcond_i32: c = tcg_out_cmp(s, args[2], args[0], args[1], const_args[1]); tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[3])); @@ -2351,10 +2368,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i32: return C_O1_I2(r, r, rIN); - case INDEX_op_clz_i32: - case INDEX_op_ctz_i32: - return C_O1_I2(r, r, rIK); - case INDEX_op_mulu2_i32: case INDEX_op_muls2_i32: return C_O2_I2(r, r, r, r); diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index 2277872ff3..b8a0a5c619 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -28,7 +28,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 have_popcnt #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 1 @@ -43,7 +42,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 have_popcnt #define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 0edd4cbc07..f7d0b93af0 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1851,24 +1851,6 @@ static void tcg_out_movcond(TCGContext *s, int rexw, TCGCond cond, tcg_out_cmov(s, jcc, rexw, dest, v1); } -static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, - TCGArg arg2, bool const_a2) -{ - if (have_bmi1) { - tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1); - if (const_a2) { - tcg_debug_assert(arg2 == (rexw ? 64 : 32)); - } else { - tcg_debug_assert(dest != arg2); - tcg_out_cmov(s, JCC_JB, rexw, dest, arg2); - } - } else { - tcg_debug_assert(dest != arg2); - tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1); - tcg_out_cmov(s, JCC_JE, rexw, dest, arg2); - } -} - static void tcg_out_branch(TCGContext *s, int call, const tcg_insn_unit *dest) { intptr_t disp = tcg_pcrel_diff(s, dest) - 5; @@ -2646,6 +2628,41 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + int jcc; + + if (have_bmi1) { + tcg_out_modrm(s, OPC_TZCNT + rexw, a0, a1); + jcc = JCC_JB; + } else { + tcg_out_modrm(s, OPC_BSF + rexw, a0, a1); + jcc = JCC_JE; + } + tcg_out_cmov(s, jcc, rexw, a0, a2); +} + +static void tgen_ctzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_TZCNT + rexw, a0, a1); +} + +static TCGConstraintSetIndex cset_ctz(TCGType type, unsigned flags) +{ + return have_bmi1 ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); +} + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctz, + .out_rrr = tgen_ctz, + .out_rri = tgen_ctzi, +}; + static const TCGOutOpBinary outop_divs = { .base.static_constraint = C_NotImplemented, }; @@ -3029,9 +3046,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(ctz): - tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]); - break; OP_32_64(ctpop): tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1); break; @@ -3913,10 +3927,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub2_i64: return C_N1_O1_I4(r, r, 0, 1, re, re); - case INDEX_op_ctz_i32: - case INDEX_op_ctz_i64: - return have_bmi1 ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); - case INDEX_op_qemu_ld_i32: return C_O1_I1(r, L); diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 2eba2132b8..f87d05efc6 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -18,7 +18,6 @@ #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -29,7 +28,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 332ce6c86b..14f3ed1f5c 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -546,28 +546,6 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg) tcg_out_ext32s(s, ret, arg); } -static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc, - TCGReg a0, TCGReg a1, TCGReg a2, - bool c2, bool is_32bit) -{ - if (c2) { - /* - * Fast path: semantics already satisfied due to constraint and - * insn behavior, single instruction is enough. - */ - tcg_debug_assert(a2 == (is_32bit ? 32 : 64)); - /* all clz/ctz insns belong to DJ-format */ - tcg_out32(s, encode_dj_insn(opc, a0, a1)); - return; - } - - tcg_out32(s, encode_dj_insn(opc, TCG_REG_TMP0, a1)); - /* a0 = a1 ? REG_TMP0 : a2 */ - tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1); - tcg_out_opc_masknez(s, a0, a2, a1); - tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0); -} - #define SETCOND_INV TCG_TARGET_NB_REGS #define SETCOND_NEZ (SETCOND_INV << 1) #define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) @@ -1355,6 +1333,33 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static void tgen_ctzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + /* a2 is constrained to exactly the type width. */ + if (type == TCG_TYPE_I32) { + tcg_out_opc_ctz_w(s, a0, a1); + } else { + tcg_out_opc_ctz_d(s, a0, a1); + } +} + +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_ctzi(s, type, TCG_REG_TMP0, a1, /* ignored */ 0); + /* a0 = a1 ? REG_TMP0 : a2 */ + tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1); + tcg_out_opc_masknez(s, a0, a2, a1); + tcg_out_opc_or(s, a0, a0, TCG_REG_TMP0); +} + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_O1_I2(r, r, rW), + .out_rrr = tgen_ctz, + .out_rri = tgen_ctzi, +}; + static void tgen_divs(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1781,13 +1786,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_revb_d(s, a0, a1); break; - case INDEX_op_ctz_i32: - tcg_out_clzctz(s, OPC_CTZ_W, a0, a1, a2, c2, true); - break; - case INDEX_op_ctz_i64: - tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false); - break; - case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: tcg_out_setcond(s, args[3], a0, a1, a2, c2); @@ -2418,10 +2416,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); - case INDEX_op_ctz_i32: - case INDEX_op_ctz_i64: - return C_O1_I2(r, r, rW); - case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: /* Must deposit into the same register as input */ diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index c27ca7e543..ca33c9b745 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -60,7 +60,6 @@ extern bool use_mips32r2_instructions; /* optional instructions detected at runtime */ #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -69,7 +68,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 0 #endif diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 5052d6481c..e8720b63ed 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1734,6 +1734,10 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_divs(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index cd7346011b..2b381b99a2 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -19,7 +19,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_ctz_i32 have_isa_3_00 #define TCG_TARGET_HAS_ctpop_i32 have_isa_2_06 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_negsetcond_i32 1 @@ -34,7 +33,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_ctz_i64 have_isa_3_00 #define TCG_TARGET_HAS_ctpop_i64 have_isa_2_06 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_negsetcond_i64 1 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 518cf1e9ef..2cdabcf610 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2974,6 +2974,32 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD; + tcg_out_cntxz(s, type, insn, a0, a1, a2, false); +} + +static void tgen_ctzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD; + tcg_out_cntxz(s, type, insn, a0, a1, a2, true); +} + +static TCGConstraintSetIndex cset_ctz(TCGType type, unsigned flags) +{ + return have_isa_3_00 ? C_O1_I2(r, r, rZW) : C_NotImplemented; +} + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctz, + .out_rrr = tgen_ctz, + .out_rri = tgen_ctzi, +}; + static void tgen_eqv(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3370,18 +3396,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); break; - case INDEX_op_ctz_i32: - tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1], - args[2], const_args[2]); - break; case INDEX_op_ctpop_i32: tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0)); break; - - case INDEX_op_ctz_i64: - tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1], - args[2], const_args[2]); - break; case INDEX_op_ctpop_i64: tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); break; @@ -4240,10 +4257,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_ctz_i32: - case INDEX_op_ctz_i64: - return C_O1_I2(r, r, rZW); - case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: return C_O0_I2(r, rC); diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index 41e287130d..385a6736c0 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -18,7 +18,6 @@ #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_ctz_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctpop_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -28,7 +27,6 @@ #define TCG_TARGET_HAS_bswap16_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_ctz_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_ctpop_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 77eef02db5..1ceb1aeb1c 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2023,6 +2023,27 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CTZW : OPC_CTZ; + tcg_out_cltz(s, type, insn, a0, a1, a2, false); +} + +static void tgen_ctzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CTZW : OPC_CTZ; + tcg_out_cltz(s, type, insn, a0, a1, a2, true); +} + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_clzctz, + .out_rrr = tgen_ctz, + .out_rri = tgen_ctzi, +}; + static void tgen_divs(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2424,13 +2445,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_imm(s, OPC_CPOP, a0, a1, 0); break; - case INDEX_op_ctz_i32: - tcg_out_cltz(s, TCG_TYPE_I32, OPC_CTZW, a0, a1, a2, c2); - break; - case INDEX_op_ctz_i64: - tcg_out_cltz(s, TCG_TYPE_I64, OPC_CTZ, a0, a1, a2, c2); - break; - case INDEX_op_add2_i32: tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], const_args[4], const_args[5], false, true); @@ -2813,10 +2827,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i64: return C_O1_I2(r, r, rI); - case INDEX_op_ctz_i32: - case INDEX_op_ctz_i64: - return C_N1_I2(r, r, rM); - case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: return C_O0_I2(rz, rz); diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 85a4f23e95..0794394fea 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -31,7 +31,6 @@ extern uint64_t s390_facilities[3]; /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctpop_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_negsetcond_i32 1 @@ -45,7 +44,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_negsetcond_i64 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index adfe403bef..374136ed14 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2268,6 +2268,10 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_divs = { .base.static_constraint = C_NotImplemented, }; diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 21fa0f3663..56262640ff 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -16,7 +16,6 @@ extern bool use_vis3_instructions; /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_bswap32_i32 0 -#define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_negsetcond_i32 1 @@ -30,7 +29,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 -#define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_negsetcond_i64 1 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index a4fb41764b..a9257b8b93 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1322,6 +1322,10 @@ static const TCGOutOpBinary outop_clz = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_divs_rJ(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGArg a2, bool c2) { diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 27d6ec7636..6bba845944 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -15,7 +15,6 @@ #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 -#define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_negsetcond_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 76e9efc655..b117a59f05 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -750,11 +750,11 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { TCGv_i32 z, t; - if (TCG_TARGET_HAS_ctz_i32) { + if (tcg_op_supported(INDEX_op_ctz_i32, TCG_TYPE_I32, 0)) { tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2); return; } - if (TCG_TARGET_HAS_ctz_i64) { + if (tcg_op_supported(INDEX_op_ctz_i64, TCG_TYPE_I64, 0)) { TCGv_i64 t1 = tcg_temp_ebb_new_i64(); TCGv_i64 t2 = tcg_temp_ebb_new_i64(); tcg_gen_extu_i32_i64(t1, arg1); @@ -788,7 +788,8 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) { - if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) { + if (!tcg_op_supported(INDEX_op_ctz_i32, TCG_TYPE_I32, 0) + && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) { /* This equivalence has the advantage of not requiring a fixup. */ TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_subi_i32(t, arg1, 1); @@ -2365,7 +2366,7 @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { TCGv_i64 z, t; - if (TCG_TARGET_HAS_ctz_i64) { + if (tcg_op_supported(INDEX_op_ctz_i64, TCG_TYPE_I64, 0)) { tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2); return; } diff --git a/tcg/tcg.c b/tcg/tcg.c index 71b0721fb5..3f610e3f83 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1027,6 +1027,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), + OUTOP(INDEX_op_ctz_i32, TCGOutOpBinary, outop_ctz), + OUTOP(INDEX_op_ctz_i64, TCGOutOpBinary, outop_ctz), OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), @@ -2289,8 +2291,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap16_i32; case INDEX_op_bswap32_i32: return TCG_TARGET_HAS_bswap32_i32; - case INDEX_op_ctz_i32: - return TCG_TARGET_HAS_ctz_i32; case INDEX_op_ctpop_i32: return TCG_TARGET_HAS_ctpop_i32; @@ -2332,8 +2332,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap32_i64; case INDEX_op_bswap64_i64: return TCG_TARGET_HAS_bswap64_i64; - case INDEX_op_ctz_i64: - return TCG_TARGET_HAS_ctz_i64; case INDEX_op_ctpop_i64: return TCG_TARGET_HAS_ctpop_i64; case INDEX_op_add2_i64: @@ -5402,6 +5400,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_and: case INDEX_op_andc: case INDEX_op_clz: + case INDEX_op_ctz_i32: + case INDEX_op_ctz_i64: case INDEX_op_divs: case INDEX_op_divu: case INDEX_op_eqv: diff --git a/tcg/tci.c b/tcg/tci.c index 7c2f2a524b..b505944b10 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -599,13 +599,11 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tmp32 = regs[r1]; regs[r0] = tmp32 ? clz32(tmp32) : regs[r2]; break; -#if TCG_TARGET_HAS_ctz_i32 - case INDEX_op_ctz_i32: + case INDEX_op_tci_ctz32: tci_args_rrr(insn, &r0, &r1, &r2); tmp32 = regs[r1]; regs[r0] = tmp32 ? ctz32(tmp32) : regs[r2]; break; -#endif #if TCG_TARGET_HAS_ctpop_i32 case INDEX_op_ctpop_i32: tci_args_rr(insn, &r0, &r1); @@ -737,12 +735,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ? clz64(regs[r1]) : regs[r2]; break; -#if TCG_TARGET_HAS_ctz_i64 case INDEX_op_ctz_i64: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2]; break; -#endif #if TCG_TARGET_HAS_ctpop_i64 case INDEX_op_ctpop_i64: tci_args_rr(insn, &r0, &r1); @@ -1070,8 +1066,8 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_shr: case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_ctz_i32: case INDEX_op_ctz_i64: + case INDEX_op_tci_ctz32: case INDEX_op_tci_clz32: case INDEX_op_tci_divs32: case INDEX_op_tci_divu32: diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index ae1f724702..daa6db4799 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -10,7 +10,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 0 #define TCG_TARGET_HAS_muls2_i32 1 @@ -22,7 +21,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_ctpop_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_muls2_i64 1 diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc index 04774ca9c4..2bb346f4c8 100644 --- a/tcg/tci/tcg-target-opc.h.inc +++ b/tcg/tci/tcg-target-opc.h.inc @@ -3,6 +3,7 @@ DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT) DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT) DEF(tci_clz32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_ctz32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_divs32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_divu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_rems32, 1, 2, 0, TCG_OPF_NOT_PRESENT) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 0fd1f5510a..47bdec5f44 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -83,8 +83,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond_i64: case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: - case INDEX_op_ctz_i32: - case INDEX_op_ctz_i64: return C_O1_I2(r, r, r); case INDEX_op_brcond_i32: @@ -642,6 +640,20 @@ static const TCGOutOpBinary outop_clz = { .out_rrr = tgen_clz, }; +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_ctz32 + : INDEX_op_ctz_i64); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_ctz, +}; + static void tgen_divs(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -933,10 +945,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, opc, args[0], args[1], args[2]); break; - CASE_32_64(ctz) /* Optional (TCG_TARGET_HAS_ctz_*). */ - tcg_out_op_rrr(s, opc, args[0], args[1], args[2]); - break; - CASE_32_64(deposit) tcg_out_op_rrrbb(s, opc, args[0], args[1], args[2], args[3], args[4]); break; From c96447d838d67db509cde1a190132e14b8672055 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 8 Jan 2025 17:07:01 -0800 Subject: [PATCH 069/161] tcg: Merge INDEX_op_ctz_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 10 +++++----- tcg/tcg-op.c | 16 ++++++++-------- tcg/tcg.c | 6 ++---- tcg/tci.c | 4 ++-- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 20 insertions(+), 23 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 22f0432988..92344b8786 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -362,7 +362,7 @@ Logical - | *t0* = *t1* ? clz(*t1*) : *t2* - * - ctz_i32/i64 *t0*, *t1*, *t2* + * - ctz *t0*, *t1*, *t2* - | *t0* = *t1* ? ctz(*t1*) : *t2* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index ad1d193ef4..4dfd8708a5 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -43,6 +43,7 @@ DEF(add, 1, 2, 0, TCG_OPF_INT) DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(clz, 1, 2, 0, TCG_OPF_INT) +DEF(ctz, 1, 2, 0, TCG_OPF_INT) DEF(divs, 1, 2, 0, TCG_OPF_INT) DEF(divs2, 2, 3, 0, TCG_OPF_INT) DEF(divu, 1, 2, 0, TCG_OPF_INT) @@ -96,7 +97,6 @@ DEF(setcond2_i32, 1, 4, 1, 0) DEF(bswap16_i32, 1, 1, 1, 0) DEF(bswap32_i32, 1, 1, 1, 0) -DEF(ctz_i32, 1, 2, 0, 0) DEF(ctpop_i32, 1, 1, 0, 0) DEF(setcond_i64, 1, 2, 1, 0) @@ -130,7 +130,6 @@ DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(bswap16_i64, 1, 1, 1, 0) DEF(bswap32_i64, 1, 1, 1, 0) DEF(bswap64_i64, 1, 1, 1, 0) -DEF(ctz_i64, 1, 2, 0, 0) DEF(ctpop_i64, 1, 1, 0, 0) DEF(add2_i64, 2, 4, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index d8d0e728aa..af4e76e81b 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -509,10 +509,10 @@ static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, } return x ? clz64(x) : y; - case INDEX_op_ctz_i32: - return (uint32_t)x ? ctz32(x) : y; - - case INDEX_op_ctz_i64: + case INDEX_op_ctz: + if (type == TCG_TYPE_I32) { + return (uint32_t)x ? ctz32(x) : y; + } return x ? ctz64(x) : y; case INDEX_op_ctpop_i32: @@ -2899,7 +2899,7 @@ void tcg_optimize(TCGContext *s) done = fold_bswap(&ctx, op); break; case INDEX_op_clz: - CASE_OP_32_64(ctz): + case INDEX_op_ctz: done = fold_count_zeros(&ctx, op); break; CASE_OP_32_64(ctpop): diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index b117a59f05..7bf7de1213 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -750,11 +750,11 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { TCGv_i32 z, t; - if (tcg_op_supported(INDEX_op_ctz_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_ctz, ret, arg1, arg2); return; } - if (tcg_op_supported(INDEX_op_ctz_i64, TCG_TYPE_I64, 0)) { + if (tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I64, 0)) { TCGv_i64 t1 = tcg_temp_ebb_new_i64(); TCGv_i64 t2 = tcg_temp_ebb_new_i64(); tcg_gen_extu_i32_i64(t1, arg1); @@ -788,7 +788,7 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) { - if (!tcg_op_supported(INDEX_op_ctz_i32, TCG_TYPE_I32, 0) + if (!tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I32, 0) && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) { /* This equivalence has the advantage of not requiring a fixup. */ TCGv_i32 t = tcg_temp_ebb_new_i32(); @@ -2366,8 +2366,8 @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { TCGv_i64 z, t; - if (tcg_op_supported(INDEX_op_ctz_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_ctz, ret, arg1, arg2); return; } if (TCG_TARGET_HAS_ctpop_i64) { @@ -2395,7 +2395,7 @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) { if (TCG_TARGET_REG_BITS == 32 && arg2 <= 0xffffffffu - && tcg_op_supported(INDEX_op_ctz_i32, TCG_TYPE_I32, 0)) { + && tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I32, 0)) { TCGv_i32 t32 = tcg_temp_ebb_new_i32(); tcg_gen_ctzi_i32(t32, TCGV_HIGH(arg1), arg2 - 32); tcg_gen_addi_i32(t32, t32, 32); @@ -2403,7 +2403,7 @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) tcg_gen_movi_i32(TCGV_HIGH(ret), 0); tcg_temp_free_i32(t32); } else if (arg2 == 64 - && !tcg_op_supported(INDEX_op_ctz_i64, TCG_TYPE_I64, 0) + && !tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I64, 0) && TCG_TARGET_HAS_ctpop_i64) { /* This equivalence has the advantage of not requiring a fixup. */ TCGv_i64 t = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index 3f610e3f83..18b28a670e 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1027,8 +1027,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), - OUTOP(INDEX_op_ctz_i32, TCGOutOpBinary, outop_ctz), - OUTOP(INDEX_op_ctz_i64, TCGOutOpBinary, outop_ctz), + OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), @@ -5400,8 +5399,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_and: case INDEX_op_andc: case INDEX_op_clz: - case INDEX_op_ctz_i32: - case INDEX_op_ctz_i64: + case INDEX_op_ctz: case INDEX_op_divs: case INDEX_op_divu: case INDEX_op_eqv: diff --git a/tcg/tci.c b/tcg/tci.c index b505944b10..550f2014a8 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -735,7 +735,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ? clz64(regs[r1]) : regs[r2]; break; - case INDEX_op_ctz_i64: + case INDEX_op_ctz: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2]; break; @@ -1049,6 +1049,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_and: case INDEX_op_andc: case INDEX_op_clz: + case INDEX_op_ctz: case INDEX_op_divs: case INDEX_op_divu: case INDEX_op_eqv: @@ -1066,7 +1067,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_shr: case INDEX_op_sub: case INDEX_op_xor: - case INDEX_op_ctz_i64: case INDEX_op_tci_ctz32: case INDEX_op_tci_clz32: case INDEX_op_tci_divs32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 47bdec5f44..d8d45e2c4b 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -645,7 +645,7 @@ static void tgen_ctz(TCGContext *s, TCGType type, { TCGOpcode opc = (type == TCG_TYPE_I32 ? INDEX_op_tci_ctz32 - : INDEX_op_ctz_i64); + : INDEX_op_ctz); tcg_out_op_rrr(s, opc, a0, a1, a2); } From f8fa1dae3db5493617ff42e8ccfd797058df243e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 8 Jan 2025 17:56:01 -0800 Subject: [PATCH 070/161] tcg: Convert ctpop to TCGOutOpUnary Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 - tcg/aarch64/tcg-target.c.inc | 4 ++ tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 4 ++ tcg/i386/tcg-target-has.h | 2 - tcg/i386/tcg-target.c.inc | 23 ++++++++--- tcg/loongarch64/tcg-target-has.h | 2 - tcg/loongarch64/tcg-target.c.inc | 4 ++ tcg/mips/tcg-target-has.h | 2 - tcg/mips/tcg-target.c.inc | 4 ++ tcg/ppc/tcg-target-has.h | 2 - tcg/ppc/tcg-target.c.inc | 26 ++++++++----- tcg/riscv/tcg-target-has.h | 2 - tcg/riscv/tcg-target.c.inc | 26 ++++++++----- tcg/s390x/tcg-target-has.h | 2 - tcg/s390x/tcg-target.c.inc | 66 +++++++++++++++----------------- tcg/sparc64/tcg-target-has.h | 2 - tcg/sparc64/tcg-target.c.inc | 4 ++ tcg/tcg-has.h | 1 - tcg/tcg-op.c | 37 ++++++++++-------- tcg/tcg.c | 8 ++-- tcg/tci.c | 19 ++++----- tcg/tci/tcg-target-has.h | 2 - tcg/tci/tcg-target.c.inc | 19 +++++++-- 24 files changed, 151 insertions(+), 113 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 478d59676e..4f1840f44e 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -15,7 +15,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 @@ -28,7 +27,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 8441c5f4bf..0f01fa8c20 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2129,6 +2129,10 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_ctz(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index 1485a52c21..1cf3911613 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -26,7 +26,6 @@ extern bool use_neon_instructions; /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index c05f21c82c..e109c65965 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1888,6 +1888,10 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_ctz(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index b8a0a5c619..a71f8c7370 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -28,7 +28,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_ctpop_i32 have_popcnt #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 @@ -42,7 +41,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_ctpop_i64 have_popcnt #define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index f7d0b93af0..318a30ebe0 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2628,6 +2628,23 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1); +} + +static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) +{ + return have_popcnt ? C_O1_I1(r, r) : C_NotImplemented; +} + +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctpop, + .out_rr = tgen_ctpop, +}; + static void tgen_ctz(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3046,10 +3063,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(ctpop): - tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1); - break; - OP_32_64(brcond): tcg_out_brcond(s, rexw, a2, a0, a1, const_args[1], arg_label(args[3]), 0); @@ -3893,8 +3906,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extract_i64: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: - case INDEX_op_ctpop_i32: - case INDEX_op_ctpop_i64: return C_O1_I1(r, r); case INDEX_op_extract2_i32: diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index f87d05efc6..33a1cf2326 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -18,7 +18,6 @@ #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 /* 64-bit operations */ @@ -28,7 +27,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 14f3ed1f5c..4ef7c6b945 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1333,6 +1333,10 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_ctzi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index ca33c9b745..470aa16452 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -60,7 +60,6 @@ extern bool use_mips32r2_instructions; /* optional instructions detected at runtime */ #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 @@ -68,7 +67,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_ctpop_i64 0 #endif #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index e8720b63ed..a94c965046 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1734,6 +1734,10 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_ctz = { .base.static_constraint = C_NotImplemented, }; diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 2b381b99a2..f071435d98 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -19,7 +19,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_ctpop_i32 have_isa_2_06 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 @@ -33,7 +32,6 @@ #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_ctpop_i64 have_isa_2_06 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 2cdabcf610..ab56c623c7 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2974,6 +2974,23 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + uint32_t insn = type == TCG_TYPE_I32 ? CNTPOPW : CNTPOPD; + tcg_out32(s, insn | SAB(a1, a0, 0)); +} + +static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) +{ + return have_isa_2_06 ? C_O1_I1(r, r) : C_NotImplemented; +} + +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctpop, + .out_rr = tgen_ctpop, +}; + static void tgen_ctz(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3396,13 +3413,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); break; - case INDEX_op_ctpop_i32: - tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0)); - break; - case INDEX_op_ctpop_i64: - tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); - break; - case INDEX_op_brcond_i32: tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], arg_label(args[3]), TCG_TYPE_I32); @@ -4226,7 +4236,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_ctpop_i32: case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: case INDEX_op_extract_i32: @@ -4238,7 +4247,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_ctpop_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_bswap16_i64: diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index 385a6736c0..a3b634570b 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -18,7 +18,6 @@ #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_ctpop_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_negsetcond_i64 1 @@ -27,7 +26,6 @@ #define TCG_TARGET_HAS_bswap16_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_ctpop_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 1ceb1aeb1c..a5cd18c99e 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2023,6 +2023,23 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CPOPW : OPC_CPOP; + tcg_out_opc_imm(s, insn, a0, a1, 0); +} + +static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) +{ + return cpuinfo & CPUINFO_ZBB ? C_O1_I1(r, r) : C_NotImplemented; +} + +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctpop, + .out_rr = tgen_ctpop, +}; + static void tgen_ctz(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2438,13 +2455,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_ctpop_i32: - tcg_out_opc_imm(s, OPC_CPOPW, a0, a1, 0); - break; - case INDEX_op_ctpop_i64: - tcg_out_opc_imm(s, OPC_CPOP, a0, a1, 0); - break; - case INDEX_op_add2_i32: tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], const_args[4], const_args[5], false, true); @@ -2808,8 +2818,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: - case INDEX_op_ctpop_i32: - case INDEX_op_ctpop_i64: return C_O1_I1(r, r); case INDEX_op_st8_i32: diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 0794394fea..87f117ce58 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -31,7 +31,6 @@ extern uint64_t s390_facilities[3]; /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_ctpop_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 @@ -44,7 +43,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_ctpop_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 374136ed14..71adb0964d 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1514,32 +1514,6 @@ static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest, tgen_movcond_int(s, type, dest, v3, v3const, v4, cc, inv_cc); } -static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) -{ - /* With MIE3, and bit 0 of m4 set, we get the complete result. */ - if (HAVE_FACILITY(MISC_INSN_EXT3)) { - if (type == TCG_TYPE_I32) { - tcg_out_ext32u(s, dest, src); - src = dest; - } - tcg_out_insn(s, RRFc, POPCNT, dest, src, 8); - return; - } - - /* Without MIE3, each byte gets the count of bits for the byte. */ - tcg_out_insn(s, RRFc, POPCNT, dest, src, 0); - - /* Multiply to sum each byte at the top of the word. */ - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RIL, MSFI, dest, 0x01010101); - tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24); - } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull); - tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0); - tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56); - } -} - static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src, int ofs, int len, int z) { @@ -2268,6 +2242,37 @@ static const TCGOutOpBinary outop_clz = { .out_rri = tgen_clzi, }; +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ + /* With MIE3, and bit 0 of m4 set, we get the complete result. */ + if (HAVE_FACILITY(MISC_INSN_EXT3)) { + if (type == TCG_TYPE_I32) { + tcg_out_ext32u(s, dest, src); + src = dest; + } + tcg_out_insn(s, RRFc, POPCNT, dest, src, 8); + return; + } + + /* Without MIE3, each byte gets the count of bits for the byte. */ + tcg_out_insn(s, RRFc, POPCNT, dest, src, 0); + + /* Multiply to sum each byte at the top of the word. */ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, MSFI, dest, 0x01010101); + tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull); + tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0); + tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56); + } +} + +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_ctpop, +}; + static const TCGOutOpBinary outop_ctz = { .base.static_constraint = C_NotImplemented, }; @@ -2914,13 +2919,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tgen_sextract(s, args[0], args[1], args[2], args[3]); break; - case INDEX_op_ctpop_i32: - tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]); - break; - case INDEX_op_ctpop_i64: - tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]); - break; - case INDEX_op_mb: /* The host memory model is quite strong, we simply need to serialize the instruction stream. */ @@ -3429,8 +3427,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extract_i64: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: - case INDEX_op_ctpop_i32: - case INDEX_op_ctpop_i64: return C_O1_I1(r, r); case INDEX_op_qemu_ld_i32: diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 56262640ff..40e54e1543 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -16,7 +16,6 @@ extern bool use_vis3_instructions; /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_bswap32_i32 0 -#define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 @@ -29,7 +28,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 -#define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index a9257b8b93..43ca23f593 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1322,6 +1322,10 @@ static const TCGOutOpBinary outop_clz = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_NotImplemented, +}; + static const TCGOutOpBinary outop_ctz = { .base.static_constraint = C_NotImplemented, }; diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 6bba845944..97f4e83303 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -15,7 +15,6 @@ #define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 -#define TCG_TARGET_HAS_ctpop_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_add2_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 7bf7de1213..db0e79059b 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -765,7 +765,8 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) tcg_temp_free_i64(t2); return; } - if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) { + if (tcg_op_supported(INDEX_op_ctpop_i32, TCG_TYPE_I32, 0) || + tcg_op_supported(INDEX_op_ctpop_i64, TCG_TYPE_I64, 0)) { t = tcg_temp_ebb_new_i32(); tcg_gen_subi_i32(t, arg1, 1); tcg_gen_andc_i32(t, t, arg1); @@ -788,8 +789,9 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) { - if (!tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I32, 0) - && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) { + if (arg2 == 32 + && !tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I32, 0) + && tcg_op_supported(INDEX_op_ctpop_i32, TCG_TYPE_I32, 0)) { /* This equivalence has the advantage of not requiring a fixup. */ TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_subi_i32(t, arg1, 1); @@ -817,9 +819,9 @@ void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg) void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1) { - if (TCG_TARGET_HAS_ctpop_i32) { + if (tcg_op_supported(INDEX_op_ctpop_i32, TCG_TYPE_I32, 0)) { tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1); - } else if (TCG_TARGET_HAS_ctpop_i64) { + } else if (tcg_op_supported(INDEX_op_ctpop_i64, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_extu_i32_i64(t, arg1); tcg_gen_ctpop_i64(t, t); @@ -2370,7 +2372,7 @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op3_i64(INDEX_op_ctz, ret, arg1, arg2); return; } - if (TCG_TARGET_HAS_ctpop_i64) { + if (tcg_op_supported(INDEX_op_ctpop_i64, TCG_TYPE_I64, 0)) { t = tcg_temp_ebb_new_i64(); tcg_gen_subi_i64(t, arg1, 1); tcg_gen_andc_i64(t, t, arg1); @@ -2404,7 +2406,7 @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) tcg_temp_free_i32(t32); } else if (arg2 == 64 && !tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I64, 0) - && TCG_TARGET_HAS_ctpop_i64) { + && tcg_op_supported(INDEX_op_ctpop_i64, TCG_TYPE_I64, 0)) { /* This equivalence has the advantage of not requiring a fixup. */ TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_subi_i64(t, arg1, 1); @@ -2432,16 +2434,21 @@ void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg) void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1) { - if (TCG_TARGET_HAS_ctpop_i64) { - tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1); - } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) { - tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1)); - tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); - tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + if (TCG_TARGET_REG_BITS == 64) { + if (tcg_op_supported(INDEX_op_ctpop_i64, TCG_TYPE_I64, 0)) { + tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1); + return; + } } else { - gen_helper_ctpop_i64(ret, arg1); + if (tcg_op_supported(INDEX_op_ctpop_i32, TCG_TYPE_I32, 0)) { + tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1)); + tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); + tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret)); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + return; + } } + gen_helper_ctpop_i64(ret, arg1); } void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) diff --git a/tcg/tcg.c b/tcg/tcg.c index 18b28a670e..94997d8610 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1027,6 +1027,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), + OUTOP(INDEX_op_ctpop_i32, TCGOutOpUnary, outop_ctpop), + OUTOP(INDEX_op_ctpop_i64, TCGOutOpUnary, outop_ctpop), OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), @@ -2290,8 +2292,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap16_i32; case INDEX_op_bswap32_i32: return TCG_TARGET_HAS_bswap32_i32; - case INDEX_op_ctpop_i32: - return TCG_TARGET_HAS_ctpop_i32; case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: @@ -2331,8 +2331,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_bswap32_i64; case INDEX_op_bswap64_i64: return TCG_TARGET_HAS_bswap64_i64; - case INDEX_op_ctpop_i64: - return TCG_TARGET_HAS_ctpop_i64; case INDEX_op_add2_i64: return TCG_TARGET_HAS_add2_i64; case INDEX_op_sub2_i64: @@ -5449,6 +5447,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_ctpop_i32: + case INDEX_op_ctpop_i64: case INDEX_op_neg: case INDEX_op_not: { diff --git a/tcg/tci.c b/tcg/tci.c index 550f2014a8..8bcf48b251 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -26,6 +26,8 @@ #include +#define ctpop_tr glue(ctpop, TCG_TARGET_REG_BITS) + /* * Enable TCI assertions only when debugging TCG (and without NDEBUG defined). * Without assertions, the interpreter runs much faster. @@ -575,6 +577,11 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rr(insn, &r0, &r1); regs[r0] = ~regs[r1]; break; + case INDEX_op_ctpop_i32: + case INDEX_op_ctpop_i64: + tci_args_rr(insn, &r0, &r1); + regs[r0] = ctpop_tr(regs[r1]); + break; /* Arithmetic operations (32 bit). */ @@ -604,12 +611,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tmp32 = regs[r1]; regs[r0] = tmp32 ? ctz32(tmp32) : regs[r2]; break; -#if TCG_TARGET_HAS_ctpop_i32 - case INDEX_op_ctpop_i32: - tci_args_rr(insn, &r0, &r1); - regs[r0] = ctpop32(regs[r1]); - break; -#endif /* Shift/rotate operations. */ @@ -739,12 +740,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2]; break; -#if TCG_TARGET_HAS_ctpop_i64 - case INDEX_op_ctpop_i64: - tci_args_rr(insn, &r0, &r1); - regs[r0] = ctpop64(regs[r1]); - break; -#endif #if TCG_TARGET_HAS_mulu2_i64 case INDEX_op_mulu2_i64: tci_args_rrrr(insn, &r0, &r1, &r2, &r3); diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index daa6db4799..774fb149fc 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -10,7 +10,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_ctpop_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 0 #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -21,7 +20,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_ctpop_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_muls2_i64 1 #define TCG_TARGET_HAS_add2_i32 1 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index d8d45e2c4b..a931369a80 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -66,8 +66,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extract_i64: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: - case INDEX_op_ctpop_i32: - case INDEX_op_ctpop_i64: return C_O1_I1(r, r); case INDEX_op_st8_i32: @@ -883,6 +881,22 @@ static const TCGOutOpBinary outop_xor = { .out_rrr = tgen_xor, }; +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_op_rr(s, glue(INDEX_op_ctpop_i,TCG_TARGET_REG_BITS), a0, a1); +} + +static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_REG ? C_O1_I1(r, r) : C_NotImplemented; +} + +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctpop, + .out_rr = tgen_ctpop, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tcg_out_op_rr(s, INDEX_op_neg, a0, a1); @@ -961,7 +975,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_op_rl(s, opc, TCG_REG_TMP, arg_label(args[3])); break; - CASE_32_64(ctpop) /* Optional (TCG_TARGET_HAS_ctpop_*). */ case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */ case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */ tcg_out_op_rr(s, opc, args[0], args[1]); From 97218ae918b1504a63623130f3dc8f4b423b5f1b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 8 Jan 2025 18:37:43 -0800 Subject: [PATCH 071/161] tcg: Merge INDEX_op_ctpop_{i32,i64} Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 6 +++--- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 9 +++------ tcg/tcg-op.c | 21 ++++++++++----------- tcg/tcg.c | 6 ++---- tcg/tci.c | 6 ++---- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 22 insertions(+), 31 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 92344b8786..fb7764e3c0 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -366,12 +366,12 @@ Logical - | *t0* = *t1* ? ctz(*t1*) : *t2* - * - ctpop_i32/i64 *t0*, *t1* + * - ctpop *t0*, *t1* - | *t0* = number of bits set in *t1* | - | With *ctpop* short for "count population", matching - | the function name used in ``include/qemu/host-utils.h``. + | The name *ctpop* is short for "count population", and matches + the function name used in ``include/qemu/host-utils.h``. Shifts/Rotates diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 4dfd8708a5..f4ccde074b 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -43,6 +43,7 @@ DEF(add, 1, 2, 0, TCG_OPF_INT) DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(clz, 1, 2, 0, TCG_OPF_INT) +DEF(ctpop, 1, 1, 0, TCG_OPF_INT) DEF(ctz, 1, 2, 0, TCG_OPF_INT) DEF(divs, 1, 2, 0, TCG_OPF_INT) DEF(divs2, 2, 3, 0, TCG_OPF_INT) @@ -97,7 +98,6 @@ DEF(setcond2_i32, 1, 4, 1, 0) DEF(bswap16_i32, 1, 1, 1, 0) DEF(bswap32_i32, 1, 1, 1, 0) -DEF(ctpop_i32, 1, 1, 0, 0) DEF(setcond_i64, 1, 2, 1, 0) DEF(negsetcond_i64, 1, 2, 1, 0) @@ -130,7 +130,6 @@ DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(bswap16_i64, 1, 1, 1, 0) DEF(bswap32_i64, 1, 1, 1, 0) DEF(bswap64_i64, 1, 1, 1, 0) -DEF(ctpop_i64, 1, 1, 0, 0) DEF(add2_i64, 2, 4, 0, 0) DEF(sub2_i64, 2, 4, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index af4e76e81b..bf625f770c 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -515,11 +515,8 @@ static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, } return x ? ctz64(x) : y; - case INDEX_op_ctpop_i32: - return ctpop32(x); - - case INDEX_op_ctpop_i64: - return ctpop64(x); + case INDEX_op_ctpop: + return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x); CASE_OP_32_64(bswap16): x = bswap16(x); @@ -2902,7 +2899,7 @@ void tcg_optimize(TCGContext *s) case INDEX_op_ctz: done = fold_count_zeros(&ctx, op); break; - CASE_OP_32_64(ctpop): + case INDEX_op_ctpop: done = fold_ctpop(&ctx, op); break; CASE_OP_32_64(deposit): diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index db0e79059b..0eeec47b83 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -765,8 +765,7 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) tcg_temp_free_i64(t2); return; } - if (tcg_op_supported(INDEX_op_ctpop_i32, TCG_TYPE_I32, 0) || - tcg_op_supported(INDEX_op_ctpop_i64, TCG_TYPE_I64, 0)) { + if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_REG, 0)) { t = tcg_temp_ebb_new_i32(); tcg_gen_subi_i32(t, arg1, 1); tcg_gen_andc_i32(t, t, arg1); @@ -791,7 +790,7 @@ void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) { if (arg2 == 32 && !tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I32, 0) - && tcg_op_supported(INDEX_op_ctpop_i32, TCG_TYPE_I32, 0)) { + && tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_REG, 0)) { /* This equivalence has the advantage of not requiring a fixup. */ TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_subi_i32(t, arg1, 1); @@ -819,9 +818,9 @@ void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg) void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1) { - if (tcg_op_supported(INDEX_op_ctpop_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1); - } else if (tcg_op_supported(INDEX_op_ctpop_i64, TCG_TYPE_I64, 0)) { + if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I32, 0)) { + tcg_gen_op2_i32(INDEX_op_ctpop, ret, arg1); + } else if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_extu_i32_i64(t, arg1); tcg_gen_ctpop_i64(t, t); @@ -2372,7 +2371,7 @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op3_i64(INDEX_op_ctz, ret, arg1, arg2); return; } - if (tcg_op_supported(INDEX_op_ctpop_i64, TCG_TYPE_I64, 0)) { + if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I64, 0)) { t = tcg_temp_ebb_new_i64(); tcg_gen_subi_i64(t, arg1, 1); tcg_gen_andc_i64(t, t, arg1); @@ -2406,7 +2405,7 @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) tcg_temp_free_i32(t32); } else if (arg2 == 64 && !tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I64, 0) - && tcg_op_supported(INDEX_op_ctpop_i64, TCG_TYPE_I64, 0)) { + && tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I64, 0)) { /* This equivalence has the advantage of not requiring a fixup. */ TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_subi_i64(t, arg1, 1); @@ -2435,12 +2434,12 @@ void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg) void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1) { if (TCG_TARGET_REG_BITS == 64) { - if (tcg_op_supported(INDEX_op_ctpop_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1); + if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I64, 0)) { + tcg_gen_op2_i64(INDEX_op_ctpop, ret, arg1); return; } } else { - if (tcg_op_supported(INDEX_op_ctpop_i32, TCG_TYPE_I32, 0)) { + if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I32, 0)) { tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1)); tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret)); diff --git a/tcg/tcg.c b/tcg/tcg.c index 94997d8610..5b6af803b2 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1027,8 +1027,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), - OUTOP(INDEX_op_ctpop_i32, TCGOutOpUnary, outop_ctpop), - OUTOP(INDEX_op_ctpop_i64, TCGOutOpUnary, outop_ctpop), + OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), @@ -5447,8 +5446,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_ctpop_i32: - case INDEX_op_ctpop_i64: + case INDEX_op_ctpop: case INDEX_op_neg: case INDEX_op_not: { diff --git a/tcg/tci.c b/tcg/tci.c index 8bcf48b251..d58a94ff28 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -577,8 +577,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rr(insn, &r0, &r1); regs[r0] = ~regs[r1]; break; - case INDEX_op_ctpop_i32: - case INDEX_op_ctpop_i64: + case INDEX_op_ctpop: tci_args_rr(insn, &r0, &r1); regs[r0] = ctpop_tr(regs[r1]); break; @@ -1023,6 +1022,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) op_name, str_r(r0), str_r(r1), s2); break; + case INDEX_op_ctpop: case INDEX_op_mov: case INDEX_op_neg: case INDEX_op_not: @@ -1033,8 +1033,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_bswap32_i32: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: - case INDEX_op_ctpop_i32: - case INDEX_op_ctpop_i64: tci_args_rr(insn, &r0, &r1); info->fprintf_func(info->stream, "%-12s %s, %s", op_name, str_r(r0), str_r(r1)); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index a931369a80..1d696a087e 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -883,7 +883,7 @@ static const TCGOutOpBinary outop_xor = { static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { - tcg_out_op_rr(s, glue(INDEX_op_ctpop_i,TCG_TARGET_REG_BITS), a0, a1); + tcg_out_op_rr(s, INDEX_op_ctpop, a0, a1); } static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) From 5641afdf9b496933596fd0bd5fa9cad0033405d4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 8 Jan 2025 21:52:03 -0800 Subject: [PATCH 072/161] tcg: Convert muls2 to TCGOutOpMul2 Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 -- tcg/aarch64/tcg-target.c.inc | 4 ++++ tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 25 +++++++++++++------------ tcg/i386/tcg-target-has.h | 2 -- tcg/i386/tcg-target.c.inc | 17 ++++++++++++----- tcg/loongarch64/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target.c.inc | 4 ++++ tcg/mips/tcg-target-has.h | 2 -- tcg/mips/tcg-target.c.inc | 28 ++++++++++++++++++++-------- tcg/ppc/tcg-target-has.h | 2 -- tcg/ppc/tcg-target.c.inc | 4 ++++ tcg/riscv/tcg-target-has.h | 2 -- tcg/riscv/tcg-target.c.inc | 4 ++++ tcg/s390x/tcg-target-has.h | 2 -- tcg/s390x/tcg-target.c.inc | 27 ++++++++++++++++++++------- tcg/sparc64/tcg-target-con-set.h | 1 + tcg/sparc64/tcg-target-has.h | 2 -- tcg/sparc64/tcg-target.c.inc | 29 ++++++++++++++++++++++++----- tcg/tcg-has.h | 1 - tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 26 ++++++++++++++++++++++---- tcg/tci.c | 23 ++++++++++------------- tcg/tci/tcg-target-has.h | 2 -- tcg/tci/tcg-target.c.inc | 21 ++++++++++++++++++--- 25 files changed, 158 insertions(+), 79 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 4f1840f44e..c351db223d 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -20,7 +20,6 @@ #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 -#define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -32,7 +31,6 @@ #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 -#define TCG_TARGET_HAS_muls2_i64 0 /* * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load, diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 0f01fa8c20..0996c6234b 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2205,6 +2205,10 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_NotImplemented, +}; + static TCGConstraintSetIndex cset_mulh(TCGType type, unsigned flags) { return type == TCG_TYPE_I64 ? C_O1_I2(r, r, r) : C_NotImplemented; diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index 1cf3911613..e1f19ffbc9 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -29,7 +29,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 -#define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index e109c65965..8c0bc78be3 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -929,14 +929,6 @@ static void tcg_out_umull32(TCGContext *s, ARMCond cond, TCGReg rd0, (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); } -static void tcg_out_smull32(TCGContext *s, ARMCond cond, TCGReg rd0, - TCGReg rd1, TCGReg rn, TCGReg rm) -{ - /* smull */ - tcg_out32(s, (cond << 28) | 0x00c00090 | - (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); -} - static void tcg_out_ext8s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn) { /* sxtb */ @@ -1973,6 +1965,19 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static void tgen_muls2(TCGContext *s, TCGType type, + TCGReg rd0, TCGReg rd1, TCGReg rn, TCGReg rm) +{ + /* smull */ + tcg_out32(s, (COND_AL << 28) | 0x00c00090 | + (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); +} + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_O2_I2(r, r, r, r), + .out_rrrr = tgen_muls2, +}; + static const TCGOutOpBinary outop_mulsh = { .base.static_constraint = C_NotImplemented, }; @@ -2246,9 +2251,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_mulu2_i32: tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]); break; - case INDEX_op_muls2_i32: - tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]); - break; case INDEX_op_brcond_i32: c = tcg_out_cmp(s, args[2], args[0], args[1], const_args[1]); @@ -2373,7 +2375,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) return C_O1_I2(r, r, rIN); case INDEX_op_mulu2_i32: - case INDEX_op_muls2_i32: return C_O2_I2(r, r, r, r); case INDEX_op_brcond_i32: diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index a71f8c7370..d63b3a3a89 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -33,7 +33,6 @@ #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 -#define TCG_TARGET_HAS_muls2_i32 1 #if TCG_TARGET_REG_BITS == 64 /* Keep 32-bit values zero-extended in a register. */ @@ -46,7 +45,6 @@ #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 -#define TCG_TARGET_HAS_muls2_i64 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #else #define TCG_TARGET_HAS_qemu_st8_i32 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 318a30ebe0..43d63cab5c 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2743,6 +2743,18 @@ static const TCGOutOpBinary outop_mul = { .out_rri = tgen_muli, }; +static void tgen_muls2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, a3); +} + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_O2_I2(a, d, a, r), + .out_rrrr = tgen_muls2, +}; + static const TCGOutOpBinary outop_mulsh = { .base.static_constraint = C_NotImplemented, }; @@ -3136,9 +3148,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, OP_32_64(mulu2): tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); break; - OP_32_64(muls2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]); - break; OP_32_64(add2): if (const_args[4]) { tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1); @@ -3928,8 +3937,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_mulu2_i32: case INDEX_op_mulu2_i64: - case INDEX_op_muls2_i32: - case INDEX_op_muls2_i64: return C_O2_I2(a, d, a, r); case INDEX_op_add2_i32: diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 33a1cf2326..491ebf0d06 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -15,7 +15,6 @@ #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_mulu2_i32 0 -#define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -30,7 +29,6 @@ #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 -#define TCG_TARGET_HAS_muls2_i64 0 #define TCG_TARGET_HAS_qemu_ldst_i128 (cpuinfo & CPUINFO_LSX) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 4ef7c6b945..95a0614e6e 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1421,6 +1421,10 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_mulsh(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index 470aa16452..fd0b674402 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -40,7 +40,6 @@ extern bool use_mips32r2_instructions; /* optional instructions */ #define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions) -#define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 0 @@ -52,7 +51,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 (!use_mips32r6_instructions) -#define TCG_TARGET_HAS_muls2_i64 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_ext32s_i64 1 #define TCG_TARGET_HAS_ext32u_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index a94c965046..a1f9efb18b 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1823,6 +1823,26 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static void tgen_muls2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_MULT : OPC_DMULT; + tcg_out_opc_reg(s, insn, 0, a2, a3); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); +} + +static TCGConstraintSetIndex cset_mul2(TCGType type, unsigned flags) +{ + return use_mips32r6_instructions ? C_NotImplemented : C_O2_I2(r, r, r, r); +} + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mul2, + .out_rrrr = tgen_muls2, +}; + static void tgen_mulsh(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2161,15 +2181,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_muls2_i32: - i1 = OPC_MULT; - goto do_hilo2; case INDEX_op_mulu2_i32: i1 = OPC_MULTU; goto do_hilo2; - case INDEX_op_muls2_i64: - i1 = OPC_DMULT; - goto do_hilo2; case INDEX_op_mulu2_i64: i1 = OPC_DMULTU; do_hilo2: @@ -2347,9 +2361,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: return C_O1_I2(r, rz, rz); - case INDEX_op_muls2_i32: case INDEX_op_mulu2_i32: - case INDEX_op_muls2_i64: case INDEX_op_mulu2_i64: return C_O2_I2(r, r, r, r); diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index f071435d98..e711aa0731 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -22,7 +22,6 @@ #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 -#define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 @@ -37,7 +36,6 @@ #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 -#define TCG_TARGET_HAS_muls2_i64 0 #endif #define TCG_TARGET_HAS_qemu_ldst_i128 \ diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index ab56c623c7..d4e34e3e7d 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3079,6 +3079,10 @@ static const TCGOutOpBinary outop_mul = { .out_rri = tgen_muli, }; +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_mulsh(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index a3b634570b..7e260da61e 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -15,7 +15,6 @@ #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 -#define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -29,7 +28,6 @@ #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 -#define TCG_TARGET_HAS_muls2_i64 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index a5cd18c99e..316621b285 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2117,6 +2117,10 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_NotImplemented, +}; + static TCGConstraintSetIndex cset_mulh(TCGType type, unsigned flags) { return type == TCG_TYPE_I32 ? C_NotImplemented : C_O1_I2(r, r, r); diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 87f117ce58..52a76fc0b5 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -36,7 +36,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 -#define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -48,7 +47,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 -#define TCG_TARGET_HAS_muls2_i64 HAVE_FACILITY(MISC_INSN_EXT2) #define TCG_TARGET_HAS_qemu_ldst_i128 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 71adb0964d..71f0eb40f8 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2391,6 +2391,26 @@ static const TCGOutOpBinary outop_mul = { .out_rri = tgen_muli, }; +static void tgen_muls2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + tcg_debug_assert((a1 & 1) == 0); + tcg_debug_assert(a0 == a1 + 1); + tcg_out_insn(s, RRFa, MGRK, a1, a2, a3); +} + +static TCGConstraintSetIndex cset_muls2(TCGType type, unsigned flags) +{ + return (type == TCG_TYPE_I64 && HAVE_FACILITY(MISC_INSN_EXT2) + ? C_O2_I2(o, m, r, r) : C_NotImplemented); +} + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_muls2, + .out_rrrr = tgen_muls2, +}; + static const TCGOutOpBinary outop_mulsh = { .base.static_constraint = C_NotImplemented, }; @@ -2846,11 +2866,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_debug_assert(args[0] == args[1] + 1); tcg_out_insn(s, RRE, MLGR, args[1], args[3]); break; - case INDEX_op_muls2_i64: - tcg_debug_assert((args[1] & 1) == 0); - tcg_debug_assert(args[0] == args[1] + 1); - tcg_out_insn(s, RRFa, MGRK, args[1], args[2], args[3]); - break; case INDEX_op_add2_i64: if (const_args[4]) { @@ -3451,8 +3466,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_mulu2_i64: return C_O2_I2(o, m, 0, r); - case INDEX_op_muls2_i64: - return C_O2_I2(o, m, r, r); case INDEX_op_add2_i32: case INDEX_op_sub2_i32: diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h index d90ba11443..d2ea184fa2 100644 --- a/tcg/sparc64/tcg-target-con-set.h +++ b/tcg/sparc64/tcg-target-con-set.h @@ -17,5 +17,6 @@ C_O1_I2(r, r, r) C_O1_I2(r, r, rJ) C_O1_I2(r, rz, rJ) C_O1_I4(r, rz, rJ, rI, 0) +C_O2_I2(r, r, r, r) C_O2_I2(r, r, rz, rJ) C_O2_I4(r, r, rz, rz, rJ, rJ) diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 40e54e1543..dea0941cac 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -21,7 +21,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 -#define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 @@ -33,7 +32,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 -#define TCG_TARGET_HAS_muls2_i64 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 43ca23f593..be2072c027 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1429,6 +1429,30 @@ static const TCGOutOpBinary outop_mul = { .out_rri = tgen_muli, }; +/* + * The 32-bit multiply insns produce a full 64-bit result. + * Supporting 32-bit mul[us]2 opcodes avoids sign/zero-extensions + * before the actual multiply; we only need extract the high part + * into the separate operand. + */ +static TCGConstraintSetIndex cset_mul2(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_I32 ? C_O2_I2(r, r, r, r) : C_NotImplemented; +} + +static void tgen_muls2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + tcg_out_arith(s, a0, a2, a3, ARITH_SMUL); + tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); +} + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mul2, + .out_rrrr = tgen_muls2, +}; + static const TCGOutOpBinary outop_mulsh = { .base.static_constraint = C_NotImplemented, }; @@ -1696,10 +1720,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, break; case INDEX_op_mulu2_i32: c = ARITH_UMUL; - goto do_mul2; - case INDEX_op_muls2_i32: - c = ARITH_SMUL; - do_mul2: /* The 32-bit multiply insns produce a full 64-bit result. */ tcg_out_arithc(s, a0, a2, args[3], const_args[3], c); tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); @@ -1828,7 +1848,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub2_i64: return C_O2_I4(r, r, rz, rz, rJ, rJ); case INDEX_op_mulu2_i32: - case INDEX_op_muls2_i32: return C_O2_I2(r, r, rz, rJ); default: diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 97f4e83303..ac387b2544 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -20,7 +20,6 @@ #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 -#define TCG_TARGET_HAS_muls2_i64 0 /* Turn some undef macros into true macros. */ #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 0eeec47b83..8a0846a8d2 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1162,7 +1162,7 @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_muls2_i32) { + if (tcg_op_supported(INDEX_op_muls2_i32, TCG_TYPE_I32, 0)) { tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); } else if (tcg_op_supported(INDEX_op_mulsh, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); @@ -2880,7 +2880,7 @@ void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_muls2_i64) { + if (tcg_op_supported(INDEX_op_muls2_i64, TCG_TYPE_I64, 0)) { tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); } else if (tcg_op_supported(INDEX_op_mulsh, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index 5b6af803b2..b1efc44725 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -992,6 +992,12 @@ typedef struct TCGOutOpDivRem { TCGReg a0, TCGReg a1, TCGReg a4); } TCGOutOpDivRem; +typedef struct TCGOutOpMul2 { + TCGOutOp base; + void (*out_rrrr)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3); +} TCGOutOpMul2; + typedef struct TCGOutOpUnary { TCGOutOp base; void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); @@ -1035,6 +1041,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), + OUTOP(INDEX_op_muls2_i32, TCGOutOpMul2, outop_muls2), + OUTOP(INDEX_op_muls2_i64, TCGOutOpMul2, outop_muls2), OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), @@ -2285,8 +2293,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_sub2_i32; case INDEX_op_mulu2_i32: return TCG_TARGET_HAS_mulu2_i32; - case INDEX_op_muls2_i32: - return TCG_TARGET_HAS_muls2_i32; case INDEX_op_bswap16_i32: return TCG_TARGET_HAS_bswap16_i32; case INDEX_op_bswap32_i32: @@ -2336,8 +2342,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_sub2_i64; case INDEX_op_mulu2_i64: return TCG_TARGET_HAS_mulu2_i64; - case INDEX_op_muls2_i64: - return TCG_TARGET_HAS_muls2_i64; case INDEX_op_mov_vec: case INDEX_op_dup_vec: @@ -5473,6 +5477,20 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_muls2_i32: + case INDEX_op_muls2_i64: + { + const TCGOutOpMul2 *out = + container_of(all_outop[op->opc], TCGOutOpMul2, base); + + tcg_debug_assert(!const_args[2]); + tcg_debug_assert(!const_args[3]); + out->out_rrrr(s, type, new_args[0], new_args[1], + new_args[2], new_args[3]); + } + break; + + default: if (def->flags & TCG_OPF_VECTOR) { tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, diff --git a/tcg/tci.c b/tcg/tci.c index d58a94ff28..51cbb5760a 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -581,6 +581,16 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rr(insn, &r0, &r1); regs[r0] = ctpop_tr(regs[r1]); break; + case INDEX_op_muls2_i32: + case INDEX_op_muls2_i64: + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); +#if TCG_TARGET_REG_BITS == 32 + tmp64 = (int64_t)(int32_t)regs[r2] * (int32_t)regs[r3]; + tci_write_reg64(regs, r1, r0, tmp64); +#else + muls64(®s[r0], ®s[r1], regs[r2], regs[r3]); +#endif + break; /* Arithmetic operations (32 bit). */ @@ -675,13 +685,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_write_reg64(regs, r1, r0, tmp64); break; #endif -#if TCG_TARGET_HAS_muls2_i32 - case INDEX_op_muls2_i32: - tci_args_rrrr(insn, &r0, &r1, &r2, &r3); - tmp64 = (int64_t)(int32_t)regs[r2] * (int32_t)regs[r3]; - tci_write_reg64(regs, r1, r0, tmp64); - break; -#endif #if TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64 CASE_32_64(bswap16) tci_args_rr(insn, &r0, &r1); @@ -745,12 +748,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, mulu64(®s[r0], ®s[r1], regs[r2], regs[r3]); break; #endif -#if TCG_TARGET_HAS_muls2_i64 - case INDEX_op_muls2_i64: - tci_args_rrrr(insn, &r0, &r1, &r2, &r3); - muls64(®s[r0], ®s[r1], regs[r2], regs[r3]); - break; -#endif #if TCG_TARGET_HAS_add2_i64 case INDEX_op_add2_i64: tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index 774fb149fc..a3d04b0ee2 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -11,7 +11,6 @@ #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_negsetcond_i32 0 -#define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 @@ -21,7 +20,6 @@ #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_negsetcond_i64 0 -#define TCG_TARGET_HAS_muls2_i64 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 1d696a087e..f568d4edb9 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -100,8 +100,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_mulu2_i32: case INDEX_op_mulu2_i64: - case INDEX_op_muls2_i32: - case INDEX_op_muls2_i64: return C_O2_I2(r, r, r, r); case INDEX_op_movcond_i32: @@ -710,6 +708,24 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static TCGConstraintSetIndex cset_mul2(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_REG ? C_O2_I2(r, r, r, r) : C_NotImplemented; +} + +static void tgen_muls2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + tcg_out_op_rrrr(s, glue(INDEX_op_muls2_i,TCG_TARGET_REG_BITS), + a0, a1, a2, a3); +} + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mul2, + .out_rrrr = tgen_muls2, +}; + static const TCGOutOpBinary outop_mulsh = { .base.static_constraint = C_NotImplemented, }; @@ -1009,7 +1025,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, #endif CASE_32_64(mulu2) - CASE_32_64(muls2) tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]); break; From bfe964809bf6ce951b2e674929d7b730c754e298 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 9 Jan 2025 07:24:32 -0800 Subject: [PATCH 073/161] tcg: Merge INDEX_op_muls2_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 17 +++++++++-------- tcg/tcg-op.c | 8 ++++---- tcg/tcg.c | 9 +++------ tcg/tci.c | 6 ++---- tcg/tci/tcg-target.c.inc | 3 +-- 7 files changed, 21 insertions(+), 27 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index fb7764e3c0..0394767291 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -604,7 +604,7 @@ Multiword arithmetic support - | Similar to mul, except two unsigned inputs *t1* and *t2* yielding the full double-word product *t0*. The latter is returned in two single-word outputs. - * - muls2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2* + * - muls2 *t0_low*, *t0_high*, *t1*, *t2* - | Similar to mulu2, except the two inputs *t1* and *t2* are signed. diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index f4ccde074b..a45b22ca1a 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -51,6 +51,7 @@ DEF(divu, 1, 2, 0, TCG_OPF_INT) DEF(divu2, 2, 3, 0, TCG_OPF_INT) DEF(eqv, 1, 2, 0, TCG_OPF_INT) DEF(mul, 1, 2, 0, TCG_OPF_INT) +DEF(muls2, 2, 2, 0, TCG_OPF_INT) DEF(mulsh, 1, 2, 0, TCG_OPF_INT) DEF(muluh, 1, 2, 0, TCG_OPF_INT) DEF(nand, 1, 2, 0, TCG_OPF_INT) @@ -92,7 +93,6 @@ DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(add2_i32, 2, 4, 0, 0) DEF(sub2_i32, 2, 4, 0, 0) DEF(mulu2_i32, 2, 2, 0, 0) -DEF(muls2_i32, 2, 2, 0, 0) DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(setcond2_i32, 1, 4, 1, 0) @@ -134,7 +134,6 @@ DEF(bswap64_i64, 1, 1, 1, 0) DEF(add2_i64, 2, 4, 0, 0) DEF(sub2_i64, 2, 4, 0, 0) DEF(mulu2_i64, 2, 2, 0, 0) -DEF(muls2_i64, 2, 2, 0, 0) #define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2) diff --git a/tcg/optimize.c b/tcg/optimize.c index bf625f770c..756f681e88 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2074,16 +2074,17 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) h = (int32_t)(l >> 32); l = (int32_t)l; break; - case INDEX_op_muls2_i32: - l = (int64_t)(int32_t)a * (int32_t)b; - h = l >> 32; - l = (int32_t)l; - break; case INDEX_op_mulu2_i64: mulu64(&l, &h, a, b); break; - case INDEX_op_muls2_i64: - muls64(&l, &h, a, b); + case INDEX_op_muls2: + if (ctx->type == TCG_TYPE_I32) { + l = (int64_t)(int32_t)a * (int32_t)b; + h = l >> 32; + l = (int32_t)l; + } else { + muls64(&l, &h, a, b); + } break; default: g_assert_not_reached(); @@ -2973,7 +2974,7 @@ void tcg_optimize(TCGContext *s) case INDEX_op_muluh: done = fold_mul_highpart(&ctx, op); break; - CASE_OP_32_64(muls2): + case INDEX_op_muls2: CASE_OP_32_64(mulu2): done = fold_multiply2(&ctx, op); break; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 8a0846a8d2..0f48484dfe 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1162,8 +1162,8 @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { - if (tcg_op_supported(INDEX_op_muls2_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); + if (tcg_op_supported(INDEX_op_muls2, TCG_TYPE_I32, 0)) { + tcg_gen_op4_i32(INDEX_op_muls2, rl, rh, arg1, arg2); } else if (tcg_op_supported(INDEX_op_mulsh, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_op3_i32(INDEX_op_mul, t, arg1, arg2); @@ -2880,8 +2880,8 @@ void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { - if (tcg_op_supported(INDEX_op_muls2_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); + if (tcg_op_supported(INDEX_op_muls2, TCG_TYPE_I64, 0)) { + tcg_gen_op4_i64(INDEX_op_muls2, rl, rh, arg1, arg2); } else if (tcg_op_supported(INDEX_op_mulsh, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_op3_i64(INDEX_op_mul, t, arg1, arg2); diff --git a/tcg/tcg.c b/tcg/tcg.c index b1efc44725..5b22c75d2e 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1041,8 +1041,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), - OUTOP(INDEX_op_muls2_i32, TCGOutOpMul2, outop_muls2), - OUTOP(INDEX_op_muls2_i64, TCGOutOpMul2, outop_muls2), + OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), @@ -4008,8 +4007,7 @@ liveness_pass_1(TCGContext *s) } goto do_not_remove; - case INDEX_op_muls2_i32: - case INDEX_op_muls2_i64: + case INDEX_op_muls2: opc_new = INDEX_op_mul; opc_new2 = INDEX_op_mulsh; goto do_mul2; @@ -5477,8 +5475,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_muls2_i32: - case INDEX_op_muls2_i64: + case INDEX_op_muls2: { const TCGOutOpMul2 *out = container_of(all_outop[op->opc], TCGOutOpMul2, base); diff --git a/tcg/tci.c b/tcg/tci.c index 51cbb5760a..708ded34c7 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -581,8 +581,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rr(insn, &r0, &r1); regs[r0] = ctpop_tr(regs[r1]); break; - case INDEX_op_muls2_i32: - case INDEX_op_muls2_i64: + case INDEX_op_muls2: tci_args_rrrr(insn, &r0, &r1, &r2, &r3); #if TCG_TARGET_REG_BITS == 32 tmp64 = (int64_t)(int32_t)regs[r2] * (int32_t)regs[r3]; @@ -1095,10 +1094,9 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) str_r(r3), str_r(r4), str_c(c)); break; + case INDEX_op_muls2: case INDEX_op_mulu2_i32: case INDEX_op_mulu2_i64: - case INDEX_op_muls2_i32: - case INDEX_op_muls2_i64: tci_args_rrrr(insn, &r0, &r1, &r2, &r3); info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", op_name, str_r(r0), str_r(r1), diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index f568d4edb9..aa3ce929b4 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -716,8 +716,7 @@ static TCGConstraintSetIndex cset_mul2(TCGType type, unsigned flags) static void tgen_muls2(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) { - tcg_out_op_rrrr(s, glue(INDEX_op_muls2_i,TCG_TARGET_REG_BITS), - a0, a1, a2, a3); + tcg_out_op_rrrr(s, INDEX_op_muls2, a0, a1, a2, a3); } static const TCGOutOpMul2 outop_muls2 = { From d37bc370fcad08698e4b6de99184361a2cf71ac0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 9 Jan 2025 08:59:52 -0800 Subject: [PATCH 074/161] tcg: Convert mulu2 to TCGOutOpMul2 Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 -- tcg/aarch64/tcg-target.c.inc | 4 ++++ tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 27 +++++++++++++-------------- tcg/i386/tcg-target-has.h | 2 -- tcg/i386/tcg-target.c.inc | 19 ++++++++++++------- tcg/loongarch64/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target.c.inc | 4 ++++ tcg/mips/tcg-target-has.h | 2 -- tcg/mips/tcg-target.c.inc | 29 +++++++++++++++-------------- tcg/ppc/tcg-target-has.h | 2 -- tcg/ppc/tcg-target.c.inc | 4 ++++ tcg/riscv/tcg-target-has.h | 2 -- tcg/riscv/tcg-target.c.inc | 4 ++++ tcg/s390x/tcg-target-has.h | 2 -- tcg/s390x/tcg-target.c.inc | 31 +++++++++++++++++++++---------- tcg/sparc64/tcg-target-con-set.h | 1 - tcg/sparc64/tcg-target-has.h | 2 -- tcg/sparc64/tcg-target.c.inc | 23 ++++++++++++++--------- tcg/tcg-has.h | 1 - tcg/tcg-op.c | 8 ++++---- tcg/tcg.c | 8 ++++---- tcg/tci.c | 23 ++++++++++------------- tcg/tci/tcg-target-has.h | 4 ---- tcg/tci/tcg-target.c.inc | 21 +++++++++++++-------- 25 files changed, 122 insertions(+), 106 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index c351db223d..0c370d7dda 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -19,7 +19,6 @@ #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -30,7 +29,6 @@ #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 0 /* * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load, diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 0996c6234b..46ad91f40e 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2226,6 +2226,10 @@ static const TCGOutOpBinary outop_mulsh = { .out_rrr = tgen_mulsh, }; +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_muluh(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index e1f19ffbc9..ccbc39a23e 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -28,7 +28,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 8c0bc78be3..55e9f66340 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -921,14 +921,6 @@ static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, ARMInsn opc, } } -static void tcg_out_umull32(TCGContext *s, ARMCond cond, TCGReg rd0, - TCGReg rd1, TCGReg rn, TCGReg rm) -{ - /* umull */ - tcg_out32(s, (cond << 28) | 0x00800090 | - (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); -} - static void tcg_out_ext8s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn) { /* sxtb */ @@ -1982,6 +1974,19 @@ static const TCGOutOpBinary outop_mulsh = { .base.static_constraint = C_NotImplemented, }; +static void tgen_mulu2(TCGContext *s, TCGType type, + TCGReg rd0, TCGReg rd1, TCGReg rn, TCGReg rm) +{ + /* umull */ + tcg_out32(s, (COND_AL << 28) | 0x00800090 | + (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); +} + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_O2_I2(r, r, r, r), + .out_rrrr = tgen_mulu2, +}; + static const TCGOutOpBinary outop_muluh = { .base.static_constraint = C_NotImplemented, }; @@ -2248,9 +2253,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } tcg_out_mov_reg(s, COND_AL, args[0], a0); break; - case INDEX_op_mulu2_i32: - tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]); - break; case INDEX_op_brcond_i32: c = tcg_out_cmp(s, args[2], args[0], args[1], const_args[1]); @@ -2374,9 +2376,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond_i32: return C_O1_I2(r, r, rIN); - case INDEX_op_mulu2_i32: - return C_O2_I2(r, r, r, r); - case INDEX_op_brcond_i32: return C_O0_I2(r, rIN); case INDEX_op_deposit_i32: diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index d63b3a3a89..c92a049fd7 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -32,7 +32,6 @@ #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 1 #if TCG_TARGET_REG_BITS == 64 /* Keep 32-bit values zero-extended in a register. */ @@ -44,7 +43,6 @@ #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #else #define TCG_TARGET_HAS_qemu_st8_i32 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 43d63cab5c..d1b37c4388 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2763,6 +2763,18 @@ static const TCGOutOpBinary outop_muluh = { .base.static_constraint = C_NotImplemented, }; +static void tgen_mulu2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, a3); +} + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_O2_I2(a, d, a, r), + .out_rrrr = tgen_mulu2, +}; + static const TCGOutOpBinary outop_nand = { .base.static_constraint = C_NotImplemented, }; @@ -3145,9 +3157,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_st(s, a0, a1, a2, args[3], TCG_TYPE_I128); break; - OP_32_64(mulu2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); - break; OP_32_64(add2): if (const_args[4]) { tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1); @@ -3935,10 +3944,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_movcond_i64: return C_O1_I4(r, r, reT, r, 0); - case INDEX_op_mulu2_i32: - case INDEX_op_mulu2_i64: - return C_O2_I2(a, d, a, r); - case INDEX_op_add2_i32: case INDEX_op_add2_i64: case INDEX_op_sub2_i32: diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 491ebf0d06..12a721b4da 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -14,7 +14,6 @@ #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 -#define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -28,7 +27,6 @@ #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 -#define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_qemu_ldst_i128 (cpuinfo & CPUINFO_LSX) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 95a0614e6e..fa9da82df8 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1440,6 +1440,10 @@ static const TCGOutOpBinary outop_mulsh = { .out_rrr = tgen_mulsh, }; +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_muluh(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index fd0b674402..05701fd228 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -39,7 +39,6 @@ extern bool use_mips32r2_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_negsetcond_i32 0 @@ -50,7 +49,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 -#define TCG_TARGET_HAS_mulu2_i64 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_ext32s_i64 1 #define TCG_TARGET_HAS_ext32u_i64 1 #define TCG_TARGET_HAS_negsetcond_i64 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index a1f9efb18b..6a97264c7c 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1861,6 +1861,21 @@ static const TCGOutOpBinary outop_mulsh = { .out_rrr = tgen_mulsh, }; +static void tgen_mulu2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_MULTU : OPC_DMULTU; + tcg_out_opc_reg(s, insn, 0, a2, a3); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); +} + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mul2, + .out_rrrr = tgen_mulu2, +}; + static void tgen_muluh(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2181,17 +2196,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_mulu2_i32: - i1 = OPC_MULTU; - goto do_hilo2; - case INDEX_op_mulu2_i64: - i1 = OPC_DMULTU; - do_hilo2: - tcg_out_opc_reg(s, i1, 0, a2, args[3]); - tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); - tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); - break; - case INDEX_op_bswap16_i32: case INDEX_op_bswap16_i64: tcg_out_bswap16(s, a0, a1, a2); @@ -2361,9 +2365,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: return C_O1_I2(r, rz, rz); - case INDEX_op_mulu2_i32: - case INDEX_op_mulu2_i64: - return C_O2_I2(r, r, r, r); case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index e711aa0731..5cc059fe9a 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -21,7 +21,6 @@ #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_negsetcond_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 @@ -35,7 +34,6 @@ #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 0 #endif #define TCG_TARGET_HAS_qemu_ldst_i128 \ diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index d4e34e3e7d..f2cb45029f 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3095,6 +3095,10 @@ static const TCGOutOpBinary outop_mulsh = { .out_rrr = tgen_mulsh, }; +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_muluh(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index 7e260da61e..9b86b8bf48 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -14,7 +14,6 @@ #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -27,7 +26,6 @@ #define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 316621b285..071be449f6 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2138,6 +2138,10 @@ static const TCGOutOpBinary outop_mulsh = { .out_rrr = tgen_mulsh, }; +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_muluh(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 52a76fc0b5..894a9f64e0 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -35,7 +35,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -46,7 +45,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 1 #define TCG_TARGET_HAS_qemu_ldst_i128 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 71f0eb40f8..18b83d5899 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2415,6 +2415,27 @@ static const TCGOutOpBinary outop_mulsh = { .base.static_constraint = C_NotImplemented, }; +static void tgen_mulu2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + tcg_debug_assert(a0 == a2); + tcg_debug_assert((a1 & 1) == 0); + tcg_debug_assert(a0 == a1 + 1); + tcg_out_insn(s, RRE, MLGR, a1, a3); +} + +static TCGConstraintSetIndex cset_mulu2(TCGType type, unsigned flags) +{ + return (type == TCG_TYPE_I64 && HAVE_FACILITY(MISC_INSN_EXT2) + ? C_O2_I2(o, m, 0, r) : C_NotImplemented); +} + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mulu2, + .out_rrrr = tgen_mulu2, +}; + static const TCGOutOpBinary outop_muluh = { .base.static_constraint = C_NotImplemented, }; @@ -2860,13 +2881,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_insn(s, RRE, LRVGR, args[0], args[1]); break; - case INDEX_op_mulu2_i64: - tcg_debug_assert(args[0] == args[2]); - tcg_debug_assert((args[1] & 1) == 0); - tcg_debug_assert(args[0] == args[1] + 1); - tcg_out_insn(s, RRE, MLGR, args[1], args[3]); - break; - case INDEX_op_add2_i64: if (const_args[4]) { if ((int64_t)args[4] >= 0) { @@ -3464,9 +3478,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_movcond_i64: return C_O1_I4(r, r, rC, rI, r); - case INDEX_op_mulu2_i64: - return C_O2_I2(o, m, 0, r); - case INDEX_op_add2_i32: case INDEX_op_sub2_i32: return C_N1_O1_I4(r, r, 0, 1, ri, r); diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h index d2ea184fa2..85dcfbc375 100644 --- a/tcg/sparc64/tcg-target-con-set.h +++ b/tcg/sparc64/tcg-target-con-set.h @@ -18,5 +18,4 @@ C_O1_I2(r, r, rJ) C_O1_I2(r, rz, rJ) C_O1_I4(r, rz, rJ, rI, 0) C_O2_I2(r, r, r, r) -C_O2_I2(r, r, rz, rJ) C_O2_I4(r, r, rz, rz, rJ, rJ) diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index dea0941cac..258c978b5e 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -20,7 +20,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 @@ -31,7 +30,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index be2072c027..41c4e77466 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1457,6 +1457,19 @@ static const TCGOutOpBinary outop_mulsh = { .base.static_constraint = C_NotImplemented, }; +static void tgen_mulu2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + tcg_out_arith(s, a0, a2, a3, ARITH_UMUL); + tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); +} + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mul2, + .out_rrrr = tgen_mulu2, +}; + static void tgen_muluh(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1646,7 +1659,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { TCGArg a0, a1, a2; - int c, c2; + int c2; /* Hoist the loads of the most common arguments. */ a0 = args[0]; @@ -1718,12 +1731,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, args[4], const_args[4], args[5], const_args[5], ARITH_SUBCC, ARITH_SUBC); break; - case INDEX_op_mulu2_i32: - c = ARITH_UMUL; - /* The 32-bit multiply insns produce a full 64-bit result. */ - tcg_out_arithc(s, a0, a2, args[3], const_args[3], c); - tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); - break; case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); @@ -1847,8 +1854,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub2_i32: case INDEX_op_sub2_i64: return C_O2_I4(r, r, rz, rz, rJ, rJ); - case INDEX_op_mulu2_i32: - return C_O2_I2(r, r, rz, rJ); default: return C_NotImplemented; diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index ac387b2544..d4fc7148b4 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -19,7 +19,6 @@ #define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 -#define TCG_TARGET_HAS_mulu2_i64 0 /* Turn some undef macros into true macros. */ #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 0f48484dfe..a4d976242a 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1138,7 +1138,7 @@ void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_mulu2_i32) { + if (tcg_op_supported(INDEX_op_mulu2_i32, TCG_TYPE_I32, 0)) { tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); } else if (tcg_op_supported(INDEX_op_muluh, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); @@ -1156,7 +1156,7 @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); } else { - qemu_build_not_reached(); + g_assert_not_reached(); } } @@ -2861,7 +2861,7 @@ void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_mulu2_i64) { + if (tcg_op_supported(INDEX_op_mulu2_i64, TCG_TYPE_I64, 0)) { tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); } else if (tcg_op_supported(INDEX_op_muluh, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); @@ -2888,7 +2888,7 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op3_i64(INDEX_op_mulsh, rh, arg1, arg2); tcg_gen_mov_i64(rl, t); tcg_temp_free_i64(t); - } else if (TCG_TARGET_HAS_mulu2_i64 || + } else if (tcg_op_supported(INDEX_op_mulu2_i64, TCG_TYPE_I64, 0) || tcg_op_supported(INDEX_op_muluh, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index 5b22c75d2e..d38b55d04f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1043,6 +1043,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), + OUTOP(INDEX_op_mulu2_i32, TCGOutOpMul2, outop_mulu2), + OUTOP(INDEX_op_mulu2_i64, TCGOutOpMul2, outop_mulu2), OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), @@ -2290,8 +2292,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_add2_i32; case INDEX_op_sub2_i32: return TCG_TARGET_HAS_sub2_i32; - case INDEX_op_mulu2_i32: - return TCG_TARGET_HAS_mulu2_i32; case INDEX_op_bswap16_i32: return TCG_TARGET_HAS_bswap16_i32; case INDEX_op_bswap32_i32: @@ -2339,8 +2339,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_add2_i64; case INDEX_op_sub2_i64: return TCG_TARGET_HAS_sub2_i64; - case INDEX_op_mulu2_i64: - return TCG_TARGET_HAS_mulu2_i64; case INDEX_op_mov_vec: case INDEX_op_dup_vec: @@ -5476,6 +5474,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; case INDEX_op_muls2: + case INDEX_op_mulu2_i32: + case INDEX_op_mulu2_i64: { const TCGOutOpMul2 *out = container_of(all_outop[op->opc], TCGOutOpMul2, base); diff --git a/tcg/tci.c b/tcg/tci.c index 708ded34c7..5c8c62c0ef 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -588,6 +588,16 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_write_reg64(regs, r1, r0, tmp64); #else muls64(®s[r0], ®s[r1], regs[r2], regs[r3]); +#endif + break; + case INDEX_op_mulu2_i32: + case INDEX_op_mulu2_i64: + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); +#if TCG_TARGET_REG_BITS == 32 + tmp64 = (uint64_t)(uint32_t)regs[r2] * (uint32_t)regs[r3]; + tci_write_reg64(regs, r1, r0, tmp64); +#else + mulu64(®s[r0], ®s[r1], regs[r2], regs[r3]); #endif break; @@ -677,13 +687,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_write_reg64(regs, r1, r0, T1 - T2); break; #endif -#if TCG_TARGET_HAS_mulu2_i32 - case INDEX_op_mulu2_i32: - tci_args_rrrr(insn, &r0, &r1, &r2, &r3); - tmp64 = (uint64_t)(uint32_t)regs[r2] * (uint32_t)regs[r3]; - tci_write_reg64(regs, r1, r0, tmp64); - break; -#endif #if TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64 CASE_32_64(bswap16) tci_args_rr(insn, &r0, &r1); @@ -741,12 +744,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2]; break; -#if TCG_TARGET_HAS_mulu2_i64 - case INDEX_op_mulu2_i64: - tci_args_rrrr(insn, &r0, &r1, &r2, &r3); - mulu64(®s[r0], ®s[r1], regs[r2], regs[r3]); - break; -#endif #if TCG_TARGET_HAS_add2_i64 case INDEX_op_add2_i64: tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index a3d04b0ee2..2402889bec 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -22,12 +22,8 @@ #define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 1 -#else -#define TCG_TARGET_HAS_mulu2_i32 1 #endif /* TCG_TARGET_REG_BITS == 64 */ #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index aa3ce929b4..4bce206f80 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -98,10 +98,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) return C_O0_I4(r, r, r, r); #endif - case INDEX_op_mulu2_i32: - case INDEX_op_mulu2_i64: - return C_O2_I2(r, r, r, r); - case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: case INDEX_op_setcond2_i32: @@ -729,6 +725,19 @@ static const TCGOutOpBinary outop_mulsh = { .base.static_constraint = C_NotImplemented, }; +static void tgen_mulu2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + tcg_out_op_rrrr(s, glue(INDEX_op_mulu2_i,TCG_TARGET_REG_BITS), + a0, a1, a2, a3); +} + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mul2, + .out_rrrr = tgen_mulu2, +}; + static const TCGOutOpBinary outop_muluh = { .base.static_constraint = C_NotImplemented, }; @@ -1023,10 +1032,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, break; #endif - CASE_32_64(mulu2) - tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]); - break; - case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_st_i64: if (TCG_TARGET_REG_BITS == 32) { From d776198cd31d1578c4b0239dc80cb2841e86f2f8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 9 Jan 2025 09:11:53 -0800 Subject: [PATCH 075/161] tcg: Merge INDEX_op_mulu2_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 17 +++++++++-------- tcg/tcg-op.c | 10 +++++----- tcg/tcg.c | 9 +++------ tcg/tci.c | 6 ++---- tcg/tci/tcg-target.c.inc | 3 +-- 7 files changed, 22 insertions(+), 28 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 0394767291..592e002971 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -599,7 +599,7 @@ Multiword arithmetic support formed from two single-word arguments, and the double-word output *t0* is returned in two single-word outputs. - * - mulu2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2* + * - mulu2 *t0_low*, *t0_high*, *t1*, *t2* - | Similar to mul, except two unsigned inputs *t1* and *t2* yielding the full double-word product *t0*. The latter is returned in two single-word outputs. diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index a45b22ca1a..287bdf3473 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -53,6 +53,7 @@ DEF(eqv, 1, 2, 0, TCG_OPF_INT) DEF(mul, 1, 2, 0, TCG_OPF_INT) DEF(muls2, 2, 2, 0, TCG_OPF_INT) DEF(mulsh, 1, 2, 0, TCG_OPF_INT) +DEF(mulu2, 2, 2, 0, TCG_OPF_INT) DEF(muluh, 1, 2, 0, TCG_OPF_INT) DEF(nand, 1, 2, 0, TCG_OPF_INT) DEF(neg, 1, 1, 0, TCG_OPF_INT) @@ -92,7 +93,6 @@ DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(add2_i32, 2, 4, 0, 0) DEF(sub2_i32, 2, 4, 0, 0) -DEF(mulu2_i32, 2, 2, 0, 0) DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(setcond2_i32, 1, 4, 1, 0) @@ -133,7 +133,6 @@ DEF(bswap64_i64, 1, 1, 1, 0) DEF(add2_i64, 2, 4, 0, 0) DEF(sub2_i64, 2, 4, 0, 0) -DEF(mulu2_i64, 2, 2, 0, 0) #define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2) diff --git a/tcg/optimize.c b/tcg/optimize.c index 756f681e88..14d943cf97 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2069,13 +2069,14 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) TCGOp *op2; switch (op->opc) { - case INDEX_op_mulu2_i32: - l = (uint64_t)(uint32_t)a * (uint32_t)b; - h = (int32_t)(l >> 32); - l = (int32_t)l; - break; - case INDEX_op_mulu2_i64: - mulu64(&l, &h, a, b); + case INDEX_op_mulu2: + if (ctx->type == TCG_TYPE_I32) { + l = (uint64_t)(uint32_t)a * (uint32_t)b; + h = (int32_t)(l >> 32); + l = (int32_t)l; + } else { + mulu64(&l, &h, a, b); + } break; case INDEX_op_muls2: if (ctx->type == TCG_TYPE_I32) { @@ -2975,7 +2976,7 @@ void tcg_optimize(TCGContext *s) done = fold_mul_highpart(&ctx, op); break; case INDEX_op_muls2: - CASE_OP_32_64(mulu2): + case INDEX_op_mulu2: done = fold_multiply2(&ctx, op); break; case INDEX_op_nand: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index a4d976242a..22af3b12bc 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1138,8 +1138,8 @@ void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { - if (tcg_op_supported(INDEX_op_mulu2_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); + if (tcg_op_supported(INDEX_op_mulu2, TCG_TYPE_I32, 0)) { + tcg_gen_op4_i32(INDEX_op_mulu2, rl, rh, arg1, arg2); } else if (tcg_op_supported(INDEX_op_muluh, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_op3_i32(INDEX_op_mul, t, arg1, arg2); @@ -2861,8 +2861,8 @@ void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { - if (tcg_op_supported(INDEX_op_mulu2_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); + if (tcg_op_supported(INDEX_op_mulu2, TCG_TYPE_I64, 0)) { + tcg_gen_op4_i64(INDEX_op_mulu2, rl, rh, arg1, arg2); } else if (tcg_op_supported(INDEX_op_muluh, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_op3_i64(INDEX_op_mul, t, arg1, arg2); @@ -2888,7 +2888,7 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op3_i64(INDEX_op_mulsh, rh, arg1, arg2); tcg_gen_mov_i64(rl, t); tcg_temp_free_i64(t); - } else if (tcg_op_supported(INDEX_op_mulu2_i64, TCG_TYPE_I64, 0) || + } else if (tcg_op_supported(INDEX_op_mulu2, TCG_TYPE_I64, 0) || tcg_op_supported(INDEX_op_muluh, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index d38b55d04f..685408f0f9 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1043,8 +1043,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), - OUTOP(INDEX_op_mulu2_i32, TCGOutOpMul2, outop_mulu2), - OUTOP(INDEX_op_mulu2_i64, TCGOutOpMul2, outop_mulu2), + OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2), OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), @@ -4009,8 +4008,7 @@ liveness_pass_1(TCGContext *s) opc_new = INDEX_op_mul; opc_new2 = INDEX_op_mulsh; goto do_mul2; - case INDEX_op_mulu2_i32: - case INDEX_op_mulu2_i64: + case INDEX_op_mulu2: opc_new = INDEX_op_mul; opc_new2 = INDEX_op_muluh; do_mul2: @@ -5474,8 +5472,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; case INDEX_op_muls2: - case INDEX_op_mulu2_i32: - case INDEX_op_mulu2_i64: + case INDEX_op_mulu2: { const TCGOutOpMul2 *out = container_of(all_outop[op->opc], TCGOutOpMul2, base); diff --git a/tcg/tci.c b/tcg/tci.c index 5c8c62c0ef..569b5c7ed0 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -590,8 +590,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, muls64(®s[r0], ®s[r1], regs[r2], regs[r3]); #endif break; - case INDEX_op_mulu2_i32: - case INDEX_op_mulu2_i64: + case INDEX_op_mulu2: tci_args_rrrr(insn, &r0, &r1, &r2, &r3); #if TCG_TARGET_REG_BITS == 32 tmp64 = (uint64_t)(uint32_t)regs[r2] * (uint32_t)regs[r3]; @@ -1092,8 +1091,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) break; case INDEX_op_muls2: - case INDEX_op_mulu2_i32: - case INDEX_op_mulu2_i64: + case INDEX_op_mulu2: tci_args_rrrr(insn, &r0, &r1, &r2, &r3); info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", op_name, str_r(r0), str_r(r1), diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 4bce206f80..563529e055 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -728,8 +728,7 @@ static const TCGOutOpBinary outop_mulsh = { static void tgen_mulu2(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) { - tcg_out_op_rrrr(s, glue(INDEX_op_mulu2_i,TCG_TARGET_REG_BITS), - a0, a1, a2, a3); + tcg_out_op_rrrr(s, INDEX_op_mulu2, a0, a1, a2, a3); } static const TCGOutOpMul2 outop_mulu2 = { From 84f57d50ac25a24870de6d19f5b588083a4899e2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 9 Jan 2025 20:22:55 +0000 Subject: [PATCH 076/161] tcg/loongarch64: Support negsetcond Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target-con-set.h | 2 -- tcg/loongarch64/tcg-target-has.h | 4 ++-- tcg/loongarch64/tcg-target.c.inc | 34 ++++++++++++++++++++++------ 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h index da84e4d49c..c145d4ab66 100644 --- a/tcg/loongarch64/tcg-target-con-set.h +++ b/tcg/loongarch64/tcg-target-con-set.h @@ -29,8 +29,6 @@ C_O1_I2(r, r, rJ) C_O1_I2(r, r, rU) C_O1_I2(r, r, rW) C_O1_I2(r, 0, rz) -C_O1_I2(r, rz, ri) -C_O1_I2(r, rz, rJ) C_O1_I2(w, w, w) C_O1_I2(w, w, wM) C_O1_I2(w, w, wA) diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 12a721b4da..e9bb913961 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -10,7 +10,7 @@ #include "host/cpuinfo.h" /* optional instructions */ -#define TCG_TARGET_HAS_negsetcond_i32 0 +#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 @@ -19,7 +19,7 @@ #define TCG_TARGET_HAS_qemu_st8_i32 0 /* 64-bit operations */ -#define TCG_TARGET_HAS_negsetcond_i64 0 +#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_bswap16_i64 1 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index fa9da82df8..e7f97aaa5e 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -646,14 +646,29 @@ static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret, } static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg arg1, tcg_target_long arg2, bool c2) + TCGReg arg1, tcg_target_long arg2, + bool c2, bool neg) { int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2); + TCGReg tmp = tmpflags & ~SETCOND_FLAGS; - if (tmpflags != ret) { - TCGReg tmp = tmpflags & ~SETCOND_FLAGS; - + if (neg) { + /* If intermediate result is zero/non-zero: test != 0. */ + if (tmpflags & SETCOND_NEZ) { + tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp); + tmp = ret; + } + /* Produce the 0/-1 result. */ + if (tmpflags & SETCOND_INV) { + tcg_out_opc_addi_d(s, ret, tmp, -1); + } else { + tcg_out_opc_sub_d(s, ret, TCG_REG_ZERO, tmp); + } + } else { switch (tmpflags & SETCOND_FLAGS) { + case 0: + tcg_debug_assert(tmp == ret); + break; case SETCOND_INV: /* Intermediate result is boolean: simply invert. */ tcg_out_opc_xori(s, ret, tmp, 1); @@ -1800,7 +1815,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: - tcg_out_setcond(s, args[3], a0, a1, a2, c2); + tcg_out_setcond(s, args[3], a0, a1, a2, c2, false); + break; + case INDEX_op_negsetcond_i32: + case INDEX_op_negsetcond_i64: + tcg_out_setcond(s, args[3], a0, a1, a2, c2, true); break; case INDEX_op_movcond_i32: @@ -2434,9 +2453,10 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) return C_O1_I2(r, 0, rz); case INDEX_op_setcond_i32: - return C_O1_I2(r, rz, ri); case INDEX_op_setcond_i64: - return C_O1_I2(r, rz, rJ); + case INDEX_op_negsetcond_i32: + case INDEX_op_negsetcond_i64: + return C_O1_I2(r, r, rJ); case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: From ce27066de1bf9e44486834bfeb0aa44022f98ab9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 9 Jan 2025 12:36:32 -0800 Subject: [PATCH 077/161] tcg/mips: Support negsetcond Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/mips/tcg-target-has.h | 4 ++-- tcg/mips/tcg-target.c.inc | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index 05701fd228..c77d4296cf 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -41,7 +41,7 @@ extern bool use_mips32r2_instructions; /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_negsetcond_i32 0 +#define TCG_TARGET_HAS_negsetcond_i32 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 @@ -51,7 +51,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_ext32s_i64 1 #define TCG_TARGET_HAS_ext32u_i64 1 -#define TCG_TARGET_HAS_negsetcond_i64 0 +#define TCG_TARGET_HAS_negsetcond_i64 1 #endif /* optional instructions detected at runtime */ diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 6a97264c7c..759f152711 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -959,6 +959,25 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, tcg_out_setcond_end(s, ret, tmpflags); } +static void tcg_out_negsetcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, TCGReg arg2) +{ + int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2); + TCGReg tmp = tmpflags & ~SETCOND_FLAGS; + + /* If intermediate result is zero/non-zero: test != 0. */ + if (tmpflags & SETCOND_NEZ) { + tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp); + tmp = ret; + } + /* Produce the 0/-1 result. */ + if (tmpflags & SETCOND_INV) { + tcg_out_opc_imm(s, OPC_ADDIU, ret, tmp, -1); + } else { + tcg_out_opc_reg(s, OPC_SUBU, ret, TCG_REG_ZERO, tmp); + } +} + static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, TCGReg arg2, TCGLabel *l) { @@ -2270,6 +2289,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_setcond_i64: tcg_out_setcond(s, args[3], a0, a1, a2); break; + case INDEX_op_negsetcond_i32: + case INDEX_op_negsetcond_i64: + tcg_out_negsetcond(s, args[3], a0, a1, a2); + break; case INDEX_op_setcond2_i32: tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); break; @@ -2364,6 +2387,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: + case INDEX_op_negsetcond_i32: + case INDEX_op_negsetcond_i64: return C_O1_I2(r, rz, rz); case INDEX_op_deposit_i32: From 9204be8dec1a52e943185df392377d8853decf93 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 9 Jan 2025 12:42:13 -0800 Subject: [PATCH 078/161] tcg/tci: Support negsetcond MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/tci/tcg-target-has.h | 4 ++-- tcg/tci/tcg-target.c.inc | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index 2402889bec..7787347e05 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -10,7 +10,7 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_negsetcond_i32 0 +#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 @@ -19,7 +19,7 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_negsetcond_i64 0 +#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_add2_i64 1 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 563529e055..2eb323b5c5 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -79,6 +79,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: + case INDEX_op_negsetcond_i32: + case INDEX_op_negsetcond_i64: case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: return C_O1_I2(r, r, r); @@ -966,6 +968,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, args[3], args[4], args[5]); break; + case INDEX_op_negsetcond_i32: + tcg_out_op_rrrc(s, INDEX_op_setcond_i32, + args[0], args[1], args[2], args[3]); + tcg_out_op_rr(s, INDEX_op_neg, args[0], args[0]); + break; + case INDEX_op_negsetcond_i64: + tcg_out_op_rrrc(s, INDEX_op_setcond_i64, + args[0], args[1], args[2], args[3]); + tcg_out_op_rr(s, INDEX_op_neg, args[0], args[0]); + break; + CASE_32_64(ld8u) CASE_32_64(ld8s) CASE_32_64(ld16u) From f791458932a21f8d99694af412304abfbce83765 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 9 Jan 2025 12:48:21 -0800 Subject: [PATCH 079/161] tcg: Remove TCG_TARGET_HAS_negsetcond_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All targets now provide negsetcond, so remove the conditional. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 -- tcg/arm/tcg-target-has.h | 1 - tcg/i386/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target-has.h | 2 -- tcg/mips/tcg-target-has.h | 2 -- tcg/optimize.c | 24 +++++++++--------------- tcg/ppc/tcg-target-has.h | 2 -- tcg/riscv/tcg-target-has.h | 2 -- tcg/s390x/tcg-target-has.h | 2 -- tcg/sparc64/tcg-target-has.h | 2 -- tcg/tcg-has.h | 1 - tcg/tcg-op.c | 12 +++--------- tcg/tcg.c | 6 ++---- tcg/tci/tcg-target-has.h | 2 -- 14 files changed, 14 insertions(+), 48 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 0c370d7dda..22a574e703 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -16,7 +16,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 1 -#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_extr_i64_i32 0 @@ -26,7 +25,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 1 -#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index ccbc39a23e..bfa3be8028 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -27,7 +27,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 1 -#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index c92a049fd7..aaf8764cc9 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -29,7 +29,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 1 -#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -40,7 +39,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 1 -#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index e9bb913961..90f0a131ae 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -10,7 +10,6 @@ #include "host/cpuinfo.h" /* optional instructions */ -#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 @@ -19,7 +18,6 @@ #define TCG_TARGET_HAS_qemu_st8_i32 0 /* 64-bit operations */ -#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_bswap16_i64 1 diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index c77d4296cf..c6cecba28b 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -41,7 +41,6 @@ extern bool use_mips32r2_instructions; /* optional instructions */ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_negsetcond_i32 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 @@ -51,7 +50,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_ext32s_i64 1 #define TCG_TARGET_HAS_ext32u_i64 1 -#define TCG_TARGET_HAS_negsetcond_i64 1 #endif /* optional instructions detected at runtime */ diff --git a/tcg/optimize.c b/tcg/optimize.c index 14d943cf97..0affde323b 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1996,23 +1996,19 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op) if (ti_is_const(tt) && ti_is_const(ft)) { uint64_t tv = ti_const_val(tt); uint64_t fv = ti_const_val(ft); - TCGOpcode opc, negopc = 0; + TCGOpcode opc, negopc; TCGCond cond = op->args[5]; switch (ctx->type) { case TCG_TYPE_I32: opc = INDEX_op_setcond_i32; - if (TCG_TARGET_HAS_negsetcond_i32) { - negopc = INDEX_op_negsetcond_i32; - } + negopc = INDEX_op_negsetcond_i32; tv = (int32_t)tv; fv = (int32_t)fv; break; case TCG_TYPE_I64: opc = INDEX_op_setcond_i64; - if (TCG_TARGET_HAS_negsetcond_i64) { - negopc = INDEX_op_negsetcond_i64; - } + negopc = INDEX_op_negsetcond_i64; break; default: g_assert_not_reached(); @@ -2024,14 +2020,12 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op) } else if (fv == 1 && tv == 0) { op->opc = opc; op->args[3] = tcg_invert_cond(cond); - } else if (negopc) { - if (tv == -1 && fv == 0) { - op->opc = negopc; - op->args[3] = cond; - } else if (fv == -1 && tv == 0) { - op->opc = negopc; - op->args[3] = tcg_invert_cond(cond); - } + } else if (tv == -1 && fv == 0) { + op->opc = negopc; + op->args[3] = cond; + } else if (fv == -1 && tv == 0) { + op->opc = negopc; + op->args[3] = tcg_invert_cond(cond); } } diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 5cc059fe9a..5c4fc2bc34 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -20,7 +20,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 @@ -31,7 +30,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #endif diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index 9b86b8bf48..e18b5cb8ec 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -10,7 +10,6 @@ #include "host/cpuinfo.h" /* optional instructions */ -#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -18,7 +17,6 @@ #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_bswap16_i64 (cpuinfo & CPUINFO_ZBB) diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 894a9f64e0..41cd8a1d0d 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -32,7 +32,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_extr_i64_i32 0 @@ -42,7 +41,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 258c978b5e..6ed27b8fcc 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -17,7 +17,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_bswap32_i32 0 #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -27,7 +26,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index d4fc7148b4..315dfd05aa 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -16,7 +16,6 @@ #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_negsetcond_i64 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 /* Turn some undef macros into true macros. */ diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 22af3b12bc..413b68352d 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -569,11 +569,8 @@ void tcg_gen_negsetcond_i32(TCGCond cond, TCGv_i32 ret, tcg_gen_movi_i32(ret, -1); } else if (cond == TCG_COND_NEVER) { tcg_gen_movi_i32(ret, 0); - } else if (TCG_TARGET_HAS_negsetcond_i32) { - tcg_gen_op4i_i32(INDEX_op_negsetcond_i32, ret, arg1, arg2, cond); } else { - tcg_gen_setcond_i32(cond, ret, arg1, arg2); - tcg_gen_neg_i32(ret, ret); + tcg_gen_op4i_i32(INDEX_op_negsetcond_i32, ret, arg1, arg2, cond); } } @@ -1950,17 +1947,14 @@ void tcg_gen_negsetcond_i64(TCGCond cond, TCGv_i64 ret, tcg_gen_movi_i64(ret, -1); } else if (cond == TCG_COND_NEVER) { tcg_gen_movi_i64(ret, 0); - } else if (TCG_TARGET_HAS_negsetcond_i64) { + } else if (TCG_TARGET_REG_BITS == 64) { tcg_gen_op4i_i64(INDEX_op_negsetcond_i64, ret, arg1, arg2, cond); - } else if (TCG_TARGET_REG_BITS == 32) { + } else { tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2), cond); tcg_gen_neg_i32(TCGV_LOW(ret), TCGV_LOW(ret)); tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_LOW(ret)); - } else { - tcg_gen_setcond_i64(cond, ret, arg1, arg2); - tcg_gen_neg_i64(ret, ret); } } diff --git a/tcg/tcg.c b/tcg/tcg.c index 685408f0f9..38a329b876 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -2268,6 +2268,7 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return has_type; case INDEX_op_setcond_i32: + case INDEX_op_negsetcond_i32: case INDEX_op_brcond_i32: case INDEX_op_movcond_i32: case INDEX_op_ld8u_i32: @@ -2283,8 +2284,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_deposit_i32: return true; - case INDEX_op_negsetcond_i32: - return TCG_TARGET_HAS_negsetcond_i32; case INDEX_op_extract2_i32: return TCG_TARGET_HAS_extract2_i32; case INDEX_op_add2_i32: @@ -2301,6 +2300,7 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_REG_BITS == 32; case INDEX_op_setcond_i64: + case INDEX_op_negsetcond_i64: case INDEX_op_brcond_i64: case INDEX_op_movcond_i64: case INDEX_op_ld8u_i64: @@ -2321,8 +2321,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_deposit_i64: return TCG_TARGET_REG_BITS == 64; - case INDEX_op_negsetcond_i64: - return TCG_TARGET_HAS_negsetcond_i64; case INDEX_op_extract2_i64: return TCG_TARGET_HAS_extract2_i64; case INDEX_op_extrl_i64_i32: diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index 7787347e05..f45a0688f9 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -10,7 +10,6 @@ #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_negsetcond_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 @@ -19,7 +18,6 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_negsetcond_i64 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_add2_i64 1 From 5a7b38c8ca056f1ffbb488c1b7c97636b1f3b22b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 09:12:06 -0800 Subject: [PATCH 080/161] tcg: Convert setcond, negsetcond to TCGOutOpSetcond Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 121 ++++++++++++++++++++----------- tcg/arm/tcg-target.c.inc | 117 +++++++++++++++++++++--------- tcg/i386/tcg-target.c.inc | 57 +++++++++++---- tcg/loongarch64/tcg-target.c.inc | 51 +++++++++---- tcg/mips/tcg-target-con-set.h | 2 +- tcg/mips/tcg-target.c.inc | 39 +++++----- tcg/ppc/tcg-target.c.inc | 61 ++++++++++------ tcg/riscv/tcg-target.c.inc | 52 +++++++++---- tcg/s390x/tcg-target.c.inc | 64 +++++++++------- tcg/sparc64/tcg-target-con-set.h | 1 - tcg/sparc64/tcg-target.c.inc | 69 +++++++++++++----- tcg/tcg.c | 31 ++++++++ tcg/tci/tcg-target.c.inc | 49 +++++++------ 13 files changed, 477 insertions(+), 237 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 46ad91f40e..2524e73ff4 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1356,25 +1356,37 @@ static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, tcg_out_bfm(s, ext, rd, rn, a, b); } +static void tgen_cmp(TCGContext *s, TCGType ext, TCGCond cond, + TCGReg a, TCGReg b) +{ + if (is_tst_cond(cond)) { + tcg_out_insn(s, 3510, ANDS, ext, TCG_REG_XZR, a, b); + } else { + tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); + } +} + +static void tgen_cmpi(TCGContext *s, TCGType ext, TCGCond cond, + TCGReg a, tcg_target_long b) +{ + if (is_tst_cond(cond)) { + tcg_out_logicali(s, I3404_ANDSI, ext, TCG_REG_XZR, a, b); + } else if (b >= 0) { + tcg_debug_assert(is_aimm(b)); + tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); + } else { + tcg_debug_assert(is_aimm(-b)); + tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); + } +} + static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGCond cond, TCGReg a, tcg_target_long b, bool const_b) { - if (is_tst_cond(cond)) { - if (!const_b) { - tcg_out_insn(s, 3510, ANDS, ext, TCG_REG_XZR, a, b); - } else { - tcg_out_logicali(s, I3404_ANDSI, ext, TCG_REG_XZR, a, b); - } + if (const_b) { + tgen_cmpi(s, ext, cond, a, b); } else { - if (!const_b) { - tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); - } else if (b >= 0) { - tcg_debug_assert(is_aimm(b)); - tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); - } else { - tcg_debug_assert(is_aimm(-b)); - tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); - } + tgen_cmp(s, ext, cond, a, b); } } @@ -2433,6 +2445,59 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static void tgen_cset(TCGContext *s, TCGCond cond, TCGReg ret) +{ + /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ + tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, ret, TCG_REG_XZR, + TCG_REG_XZR, tcg_invert_cond(cond)); +} + +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_cmp(s, type, cond, a1, a2); + tgen_cset(s, cond, a0); +} + +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_cmpi(s, type, cond, a1, a2); + tgen_cset(s, cond, a0); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rC), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + +static void tgen_csetm(TCGContext *s, TCGType ext, TCGCond cond, TCGReg ret) +{ + /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */ + tcg_out_insn(s, 3506, CSINV, ext, ret, TCG_REG_XZR, + TCG_REG_XZR, tcg_invert_cond(cond)); +} + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_cmp(s, type, cond, a1, a2); + tgen_csetm(s, type, cond, a0); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_cmpi(s, type, cond, a1, a2); + tgen_csetm(s, type, cond, a0); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rC), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2507,26 +2572,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); break; - case INDEX_op_setcond_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_setcond_i64: - tcg_out_cmp(s, ext, args[3], a1, a2, c2); - /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ - tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, - TCG_REG_XZR, tcg_invert_cond(args[3])); - break; - - case INDEX_op_negsetcond_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_negsetcond_i64: - tcg_out_cmp(s, ext, args[3], a1, a2, c2); - /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */ - tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR, - TCG_REG_XZR, tcg_invert_cond(args[3])); - break; - case INDEX_op_movcond_i32: a2 = (int32_t)a2; /* FALLTHRU */ @@ -3114,12 +3159,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - return C_O1_I2(r, r, rC); - case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: return C_O0_I2(r, rC); diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 55e9f66340..0f2a029f6d 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1210,31 +1210,48 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) } } -static TCGCond tcg_out_cmp(TCGContext *s, TCGCond cond, TCGReg a, - TCGArg b, int b_const) +static TCGCond tgen_cmp(TCGContext *s, TCGCond cond, TCGReg a, TCGReg b) { + if (is_tst_cond(cond)) { + tcg_out_dat_reg(s, COND_AL, ARITH_TST, 0, a, b, SHIFT_IMM_LSL(0)); + return tcg_tst_eqne_cond(cond); + } + tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, a, b, SHIFT_IMM_LSL(0)); + return cond; +} + +static TCGCond tgen_cmpi(TCGContext *s, TCGCond cond, TCGReg a, TCGArg b) +{ + int imm12; + if (!is_tst_cond(cond)) { - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, a, b, b_const); + tcg_out_dat_IN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, a, b); return cond; } - cond = tcg_tst_eqne_cond(cond); - if (b_const) { - int imm12 = encode_imm(b); - - /* - * The compare constraints allow rIN, but TST does not support N. - * Be prepared to load the constant into a scratch register. - */ - if (imm12 >= 0) { - tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, a, imm12); - return cond; - } + /* + * The compare constraints allow rIN, but TST does not support N. + * Be prepared to load the constant into a scratch register. + */ + imm12 = encode_imm(b); + if (imm12 >= 0) { + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, a, imm12); + } else { tcg_out_movi32(s, COND_AL, TCG_REG_TMP, b); - b = TCG_REG_TMP; + tcg_out_dat_reg(s, COND_AL, ARITH_TST, 0, + a, TCG_REG_TMP, SHIFT_IMM_LSL(0)); + } + return tcg_tst_eqne_cond(cond); +} + +static TCGCond tcg_out_cmp(TCGContext *s, TCGCond cond, TCGReg a, + TCGArg b, int b_const) +{ + if (b_const) { + return tgen_cmpi(s, cond, a, b); + } else { + return tgen_cmp(s, cond, a, b); } - tcg_out_dat_reg(s, COND_AL, ARITH_TST, 0, a, b, SHIFT_IMM_LSL(0)); - return cond; } static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args, @@ -2164,6 +2181,52 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static void finish_setcond(TCGContext *s, TCGCond cond, TCGReg ret, bool neg) +{ + tcg_out_movi32(s, tcg_cond_to_arm_cond[tcg_invert_cond(cond)], ret, 0); + tcg_out_movi32(s, tcg_cond_to_arm_cond[cond], ret, neg ? -1 : 1); +} + +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + cond = tgen_cmp(s, cond, a1, a2); + finish_setcond(s, cond, a0, false); +} + +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + cond = tgen_cmpi(s, cond, a1, a2); + finish_setcond(s, cond, a0, false); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rIN), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + cond = tgen_cmp(s, cond, a1, a2); + finish_setcond(s, cond, a0, true); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + cond = tgen_cmpi(s, cond, a1, a2); + finish_setcond(s, cond, a0, true); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rIN), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2258,20 +2321,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, c = tcg_out_cmp(s, args[2], args[0], args[1], const_args[1]); tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[3])); break; - case INDEX_op_setcond_i32: - c = tcg_out_cmp(s, args[3], args[1], args[2], const_args[2]); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], - ARITH_MOV, args[0], 0, 1); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)], - ARITH_MOV, args[0], 0, 0); - break; - case INDEX_op_negsetcond_i32: - c = tcg_out_cmp(s, args[3], args[1], args[2], const_args[2]); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], - ARITH_MVN, args[0], 0, 0); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)], - ARITH_MOV, args[0], 0, 0); - break; case INDEX_op_brcond2_i32: c = tcg_out_cmp2(s, args, const_args); @@ -2372,10 +2421,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i32: return C_O0_I2(r, r); - case INDEX_op_setcond_i32: - case INDEX_op_negsetcond_i32: - return C_O1_I2(r, r, rIN); - case INDEX_op_brcond_i32: return C_O0_I2(r, rIN); case INDEX_op_deposit_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index d1b37c4388..d3a3f1f7fb 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1679,10 +1679,11 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, } #endif -static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond, - TCGArg dest, TCGArg arg1, TCGArg arg2, - int const_arg2, bool neg) +static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGArg arg2, + bool const_arg2, bool neg) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; int cmp_rexw = rexw; bool inv = false; bool cleared; @@ -1757,7 +1758,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond, case TCG_COND_LT: /* If arg2 is 0, extract the sign bit. */ if (const_arg2 && arg2 == 0) { - tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, dest, arg1); + tcg_out_mov(s, type, dest, arg1); if (inv) { tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, dest); } @@ -1793,6 +1794,42 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond, } } +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, false); +} + +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, false); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(q, r, reT), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, true); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, true); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(q, r, reT), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; + #if TCG_TARGET_REG_BITS == 32 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, const int *const_args) @@ -3091,12 +3128,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_brcond(s, rexw, a2, a0, a1, const_args[1], arg_label(args[3]), 0); break; - OP_32_64(setcond): - tcg_out_setcond(s, rexw, args[3], a0, a1, a2, const_a2, false); - break; - OP_32_64(negsetcond): - tcg_out_setcond(s, rexw, args[3], a0, a1, a2, const_a2, true); - break; OP_32_64(movcond): tcg_out_movcond(s, rexw, args[5], a0, a1, a2, const_a2, args[3]); break; @@ -3934,12 +3965,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_deposit_i64: return C_O1_I2(q, 0, qi); - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - return C_O1_I2(q, r, reT); - case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: return C_O1_I4(r, r, reT, r, 0); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index e7f97aaa5e..87e8b843f8 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -687,6 +687,42 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, cond, dest, arg1, arg2, false, false); +} + +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, cond, dest, arg1, arg2, true, false); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, cond, dest, arg1, arg2, false, true); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, cond, dest, arg1, arg2, true, true); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; + static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1, tcg_target_long c2, bool const2, TCGReg v1, TCGReg v2) @@ -1813,15 +1849,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_revb_d(s, a0, a1); break; - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - tcg_out_setcond(s, args[3], a0, a1, a2, c2, false); - break; - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - tcg_out_setcond(s, args[3], a0, a1, a2, c2, true); - break; - case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: tcg_out_movcond(s, args[5], a0, a1, a2, c2, args[3], args[4]); @@ -2452,12 +2479,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) /* Must deposit into the same register as input */ return C_O1_I2(r, 0, rz); - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - return C_O1_I2(r, r, rJ); - case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: return C_O1_I4(r, rz, rJ, rz, rz); diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h index 248bc95d9b..67dfab2aed 100644 --- a/tcg/mips/tcg-target-con-set.h +++ b/tcg/mips/tcg-target-con-set.h @@ -23,8 +23,8 @@ C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) C_O1_I2(r, r, rIK) C_O1_I2(r, r, rJ) +C_O1_I2(r, r, rz) C_O1_I2(r, r, rzW) -C_O1_I2(r, rz, rz) C_O1_I4(r, rz, rz, rz, 0) C_O1_I4(r, rz, rz, rz, rz) C_O2_I1(r, r, r) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 759f152711..51b3ea4bb0 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -952,15 +952,20 @@ static void tcg_out_setcond_end(TCGContext *s, TCGReg ret, int tmpflags) } } -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg arg1, TCGReg arg2) +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg arg1, TCGReg arg2) { int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2); tcg_out_setcond_end(s, ret, tmpflags); } -static void tcg_out_negsetcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg arg1, TCGReg arg2) +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rz), + .out_rrr = tgen_setcond, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg arg1, TCGReg arg2) { int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2); TCGReg tmp = tmpflags & ~SETCOND_FLAGS; @@ -978,6 +983,11 @@ static void tcg_out_negsetcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rz), + .out_rrr = tgen_negsetcond, +}; + static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, TCGReg arg2, TCGLabel *l) { @@ -1041,10 +1051,11 @@ static int tcg_out_setcond2_int(TCGContext *s, TCGCond cond, TCGReg ret, break; default: - tcg_out_setcond(s, TCG_COND_EQ, TCG_TMP0, ah, bh); - tcg_out_setcond(s, tcg_unsigned_cond(cond), TCG_TMP1, al, bl); + tgen_setcond(s, TCG_TYPE_I32, TCG_COND_EQ, TCG_TMP0, ah, bh); + tgen_setcond(s, TCG_TYPE_I32, tcg_unsigned_cond(cond), + TCG_TMP1, al, bl); tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP0); - tcg_out_setcond(s, tcg_high_cond(cond), TCG_TMP0, ah, bh); + tgen_setcond(s, TCG_TYPE_I32, tcg_high_cond(cond), TCG_TMP0, ah, bh); tcg_out_opc_reg(s, OPC_OR, ret, TCG_TMP0, TCG_TMP1); break; } @@ -2285,14 +2296,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]); break; - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - tcg_out_setcond(s, args[3], a0, a1, a2); - break; - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - tcg_out_negsetcond(s, args[3], a0, a1, a2); - break; case INDEX_op_setcond2_i32: tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); break; @@ -2385,12 +2388,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - return C_O1_I2(r, rz, rz); - case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: return C_O1_I2(r, 0, rz); diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index f2cb45029f..0a66351124 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -1926,8 +1926,8 @@ static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, } static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, - TCGArg arg0, TCGArg arg1, TCGArg arg2, - int const_arg2, bool neg) + TCGReg arg0, TCGReg arg1, TCGArg arg2, + bool const_arg2, bool neg) { int sh; bool inv; @@ -2072,6 +2072,42 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, } } +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, false); +} + +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, false); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rC), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, true); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, true); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rC), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; + static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd) { tcg_out32(s, tcg_to_bc[cond] | bd); @@ -3465,22 +3501,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); break; - case INDEX_op_setcond_i32: - tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], - const_args[2], false); - break; - case INDEX_op_setcond_i64: - tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], - const_args[2], false); - break; - case INDEX_op_negsetcond_i32: - tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], - const_args[2], true); - break; - case INDEX_op_negsetcond_i64: - tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], - const_args[2], true); - break; case INDEX_op_setcond2_i32: tcg_out_setcond2(s, args, const_args); break; @@ -4276,11 +4296,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: return C_O0_I2(r, rC); - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - return C_O1_I2(r, r, rC); case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: return C_O1_I4(r, r, rC, rZ, rZ); diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 071be449f6..05114b5c5f 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1325,6 +1325,24 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, cond, dest, arg1, arg2, false); +} + +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, cond, dest, arg1, arg2, true); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + static void tcg_out_negsetcond(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg arg1, tcg_target_long arg2, bool c2) { @@ -1363,6 +1381,24 @@ static void tcg_out_negsetcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_negsetcond(s, cond, dest, arg1, arg2, false); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_negsetcond(s, cond, dest, arg1, arg2, true); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; + static void tcg_out_movcond_zicond(TCGContext *s, TCGReg ret, TCGReg test_ne, int val1, bool c_val1, int val2, bool c_val2) @@ -2485,16 +2521,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); break; - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - tcg_out_setcond(s, args[3], a0, a1, a2, c2); - break; - - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - tcg_out_negsetcond(s, args[3], a0, a1, a2, c2); - break; - case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: tcg_out_movcond(s, args[5], a0, a1, a2, c2, @@ -2837,12 +2863,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - return C_O1_I2(r, r, rI); - case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: return C_O0_I2(rz, rz); diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 18b83d5899..3c04b87109 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1370,9 +1370,9 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, return tgen_cmp2(s, type, c, r1, c2, c2const, need_carry, &inv_cc); } -static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, - TCGReg dest, TCGReg c1, TCGArg c2, - bool c2const, bool neg) +static void tgen_setcond_int(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg c1, TCGArg c2, + bool c2const, bool neg) { int cc; @@ -1464,6 +1464,42 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, tcg_out_insn(s, RRFc, LOCGR, dest, TCG_TMP0, cc); } +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tgen_setcond_int(s, type, cond, dest, arg1, arg2, false, false); +} + +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tgen_setcond_int(s, type, cond, dest, arg1, arg2, true, false); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rC), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tgen_setcond_int(s, type, cond, dest, arg1, arg2, false, true); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tgen_setcond_int(s, type, cond, dest, arg1, arg2, true, true); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rC), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; + static void tgen_movcond_int(TCGContext *s, TCGType type, TCGReg dest, TCGArg v3, int v3const, TCGReg v4, int cc, int inv_cc) @@ -2825,14 +2861,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tgen_brcond(s, TCG_TYPE_I32, args[2], args[0], args[1], const_args[1], arg_label(args[3])); break; - case INDEX_op_setcond_i32: - tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], - args[2], const_args[2], false); - break; - case INDEX_op_negsetcond_i32: - tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], - args[2], const_args[2], true); - break; case INDEX_op_movcond_i32: tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], const_args[2], args[3], const_args[3], args[4]); @@ -2910,14 +2938,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tgen_brcond(s, TCG_TYPE_I64, args[2], args[0], args[1], const_args[1], arg_label(args[3])); break; - case INDEX_op_setcond_i64: - tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], - args[2], const_args[2], false); - break; - case INDEX_op_negsetcond_i64: - tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], - args[2], const_args[2], true); - break; case INDEX_op_movcond_i64: tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], const_args[2], args[3], const_args[3], args[4]); @@ -3434,12 +3454,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_setcond_i32: - case INDEX_op_negsetcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i64: - return C_O1_I2(r, r, rC); - case INDEX_op_brcond_i32: return C_O0_I2(r, ri); case INDEX_op_brcond_i64: diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h index 85dcfbc375..ca7bbf0a2f 100644 --- a/tcg/sparc64/tcg-target-con-set.h +++ b/tcg/sparc64/tcg-target-con-set.h @@ -15,7 +15,6 @@ C_O0_I2(rz, rJ) C_O1_I1(r, r) C_O1_I2(r, r, r) C_O1_I2(r, r, rJ) -C_O1_I2(r, rz, rJ) C_O1_I4(r, rz, rJ, rI, 0) C_O2_I2(r, r, r, r) C_O2_I4(r, r, rz, rz, rJ, rJ) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 41c4e77466..dcbe6a8f47 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -714,7 +714,7 @@ static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, } static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, int32_t c2, int c2const, bool neg) + TCGReg c1, int32_t c2, bool c2const, bool neg) { /* For 32-bit comparisons, we can play games with ADDC/SUBC. */ switch (cond) { @@ -788,7 +788,7 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, } static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, int32_t c2, int c2const, bool neg) + TCGReg c1, int32_t c2, bool c2const, bool neg) { int rcond; @@ -822,6 +822,53 @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, } } +static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, + TCGArg c2, bool c2const, bool neg) +{ + if (type == TCG_TYPE_I32) { + tcg_out_setcond_i32(s, cond, ret, c1, c2, c2const, neg); + } else { + tcg_out_setcond_i64(s, cond, ret, c1, c2, c2const, neg); + } +} + +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, false); +} + +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, false); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, true); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, true); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; + static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, TCGReg ah, int32_t bl, int blconst, int32_t bh, int bhconst, int opl, int oph) @@ -1711,12 +1758,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_brcond_i32: tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3])); break; - case INDEX_op_setcond_i32: - tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2, false); - break; - case INDEX_op_negsetcond_i32: - tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2, true); - break; case INDEX_op_movcond_i32: tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); break; @@ -1758,12 +1799,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_brcond_i64: tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3])); break; - case INDEX_op_setcond_i64: - tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2, false); - break; - case INDEX_op_negsetcond_i64: - tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2, true); - break; case INDEX_op_movcond_i64: tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); break; @@ -1837,12 +1872,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i64: return C_O0_I2(rz, r); - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - return C_O1_I2(r, rz, rJ); - case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: return C_O0_I2(rz, rJ); diff --git a/tcg/tcg.c b/tcg/tcg.c index 38a329b876..90e82e7ed0 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1003,6 +1003,14 @@ typedef struct TCGOutOpUnary { void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); } TCGOutOpUnary; +typedef struct TCGOutOpSetcond { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg a1, TCGReg a2); + void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg a1, tcg_target_long a2); +} TCGOutOpSetcond; + typedef struct TCGOutOpSubtract { TCGOutOp base; void (*out_rrr)(TCGContext *s, TCGType type, @@ -1047,6 +1055,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), + OUTOP(INDEX_op_negsetcond_i32, TCGOutOpSetcond, outop_negsetcond), + OUTOP(INDEX_op_negsetcond_i64, TCGOutOpSetcond, outop_negsetcond), OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), @@ -1056,6 +1066,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), + OUTOP(INDEX_op_setcond_i32, TCGOutOpSetcond, outop_setcond), + OUTOP(INDEX_op_setcond_i64, TCGOutOpSetcond, outop_setcond), OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), @@ -5482,6 +5494,25 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_setcond_i32: + case INDEX_op_setcond_i64: + case INDEX_op_negsetcond_i32: + case INDEX_op_negsetcond_i64: + { + const TCGOutOpSetcond *out = + container_of(all_outop[op->opc], TCGOutOpSetcond, base); + TCGCond cond = new_args[3]; + + tcg_debug_assert(!const_args[1]); + if (const_args[2]) { + out->out_rri(s, type, cond, + new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, cond, + new_args[0], new_args[1], new_args[2]); + } + } + break; default: if (def->flags & TCG_OPF_VECTOR) { diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 2eb323b5c5..1b75aba698 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -77,10 +77,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: return C_O1_I2(r, r, r); @@ -942,6 +938,32 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_setcond_i32 + : INDEX_op_setcond_i64); + tcg_out_op_rrrc(s, opc, dest, arg1, arg2, cond); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_setcond, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tgen_setcond(s, type, cond, dest, arg1, arg2); + tgen_neg(s, type, dest, dest); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_negsetcond, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -958,27 +980,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_op_l(s, opc, arg_label(args[0])); break; - CASE_32_64(setcond) - tcg_out_op_rrrc(s, opc, args[0], args[1], args[2], args[3]); - break; - CASE_32_64(movcond) case INDEX_op_setcond2_i32: tcg_out_op_rrrrrc(s, opc, args[0], args[1], args[2], args[3], args[4], args[5]); break; - case INDEX_op_negsetcond_i32: - tcg_out_op_rrrc(s, INDEX_op_setcond_i32, - args[0], args[1], args[2], args[3]); - tcg_out_op_rr(s, INDEX_op_neg, args[0], args[0]); - break; - case INDEX_op_negsetcond_i64: - tcg_out_op_rrrc(s, INDEX_op_setcond_i64, - args[0], args[1], args[2], args[3]); - tcg_out_op_rr(s, INDEX_op_neg, args[0], args[0]); - break; - CASE_32_64(ld8u) CASE_32_64(ld8s) CASE_32_64(ld16u) @@ -1005,9 +1012,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, break; CASE_32_64(brcond) - tcg_out_op_rrrc(s, (opc == INDEX_op_brcond_i32 - ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64), - TCG_REG_TMP, args[0], args[1], args[2]); + tgen_setcond(s, type, args[2], TCG_REG_TMP, args[0], args[1]); tcg_out_op_rl(s, opc, TCG_REG_TMP, arg_label(args[3])); break; From a363e1e179445102d7940e92d394d6c00c126f13 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 09:26:44 -0800 Subject: [PATCH 081/161] tcg: Merge INDEX_op_{neg}setcond_{i32,i64}` Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 4 ++-- include/tcg/tcg-opc.h | 6 ++---- target/sh4/translate.c | 6 +++--- tcg/optimize.c | 32 ++++++++------------------------ tcg/tcg-op.c | 8 ++++---- tcg/tcg.c | 30 ++++++++++-------------------- tcg/tci.c | 14 +++++++------- tcg/tci/tcg-target-opc.h.inc | 1 + tcg/tci/tcg-target.c.inc | 4 ++-- 9 files changed, 39 insertions(+), 66 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 592e002971..d3283265cd 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -499,13 +499,13 @@ Conditional moves .. list-table:: - * - setcond_i32/i64 *dest*, *t1*, *t2*, *cond* + * - setcond *dest*, *t1*, *t2*, *cond* - | *dest* = (*t1* *cond* *t2*) | | Set *dest* to 1 if (*t1* *cond* *t2*) is true, otherwise set to 0. - * - negsetcond_i32/i64 *dest*, *t1*, *t2*, *cond* + * - negsetcond *dest*, *t1*, *t2*, *cond* - | *dest* = -(*t1* *cond* *t2*) | diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 287bdf3473..f40bb5796a 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -57,6 +57,7 @@ DEF(mulu2, 2, 2, 0, TCG_OPF_INT) DEF(muluh, 1, 2, 0, TCG_OPF_INT) DEF(nand, 1, 2, 0, TCG_OPF_INT) DEF(neg, 1, 1, 0, TCG_OPF_INT) +DEF(negsetcond, 1, 2, 1, TCG_OPF_INT) DEF(nor, 1, 2, 0, TCG_OPF_INT) DEF(not, 1, 1, 0, TCG_OPF_INT) DEF(or, 1, 2, 0, TCG_OPF_INT) @@ -66,13 +67,12 @@ DEF(remu, 1, 2, 0, TCG_OPF_INT) DEF(rotl, 1, 2, 0, TCG_OPF_INT) DEF(rotr, 1, 2, 0, TCG_OPF_INT) DEF(sar, 1, 2, 0, TCG_OPF_INT) +DEF(setcond, 1, 2, 1, TCG_OPF_INT) DEF(shl, 1, 2, 0, TCG_OPF_INT) DEF(shr, 1, 2, 0, TCG_OPF_INT) DEF(sub, 1, 2, 0, TCG_OPF_INT) DEF(xor, 1, 2, 0, TCG_OPF_INT) -DEF(setcond_i32, 1, 2, 1, 0) -DEF(negsetcond_i32, 1, 2, 1, 0) DEF(movcond_i32, 1, 4, 1, 0) /* load/store */ DEF(ld8u_i32, 1, 1, 1, 0) @@ -99,8 +99,6 @@ DEF(setcond2_i32, 1, 4, 1, 0) DEF(bswap16_i32, 1, 1, 1, 0) DEF(bswap32_i32, 1, 1, 1, 0) -DEF(setcond_i64, 1, 2, 1, 0) -DEF(negsetcond_i64, 1, 2, 1, 0) DEF(movcond_i64, 1, 4, 1, 0) /* load/store */ DEF(ld8u_i64, 1, 1, 1, 0) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 8248648c0c..712a57fb54 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -1995,7 +1995,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) if ((ld_dst == B11_8) + (ld_dst == B7_4) != 1 || mv_src >= 0) { goto fail; } - op_opc = INDEX_op_setcond_i32; /* placeholder */ + op_opc = INDEX_op_setcond; /* placeholder */ op_src = (ld_dst == B11_8 ? B7_4 : B11_8); op_arg = REG(op_src); @@ -2030,7 +2030,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) if (ld_dst != B11_8 || ld_dst != B7_4 || mv_src >= 0) { goto fail; } - op_opc = INDEX_op_setcond_i32; + op_opc = INDEX_op_setcond; op_arg = tcg_constant_i32(0); NEXT_INSN; @@ -2147,7 +2147,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) } break; - case INDEX_op_setcond_i32: + case INDEX_op_setcond: if (st_src == ld_dst) { goto fail; } diff --git a/tcg/optimize.c b/tcg/optimize.c index 0affde323b..e53dbd4290 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1996,35 +1996,19 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op) if (ti_is_const(tt) && ti_is_const(ft)) { uint64_t tv = ti_const_val(tt); uint64_t fv = ti_const_val(ft); - TCGOpcode opc, negopc; TCGCond cond = op->args[5]; - switch (ctx->type) { - case TCG_TYPE_I32: - opc = INDEX_op_setcond_i32; - negopc = INDEX_op_negsetcond_i32; - tv = (int32_t)tv; - fv = (int32_t)fv; - break; - case TCG_TYPE_I64: - opc = INDEX_op_setcond_i64; - negopc = INDEX_op_negsetcond_i64; - break; - default: - g_assert_not_reached(); - } - if (tv == 1 && fv == 0) { - op->opc = opc; + op->opc = INDEX_op_setcond; op->args[3] = cond; } else if (fv == 1 && tv == 0) { - op->opc = opc; + op->opc = INDEX_op_setcond; op->args[3] = tcg_invert_cond(cond); } else if (tv == -1 && fv == 0) { - op->opc = negopc; + op->opc = INDEX_op_negsetcond; op->args[3] = cond; } else if (fv == -1 && tv == 0) { - op->opc = negopc; + op->opc = INDEX_op_negsetcond; op->args[3] = tcg_invert_cond(cond); } } @@ -2526,14 +2510,14 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op) do_setcond_low: op->args[2] = op->args[3]; op->args[3] = cond; - op->opc = INDEX_op_setcond_i32; + op->opc = INDEX_op_setcond; return fold_setcond(ctx, op); do_setcond_high: op->args[1] = op->args[2]; op->args[2] = op->args[4]; op->args[3] = cond; - op->opc = INDEX_op_setcond_i32; + op->opc = INDEX_op_setcond; return fold_setcond(ctx, op); } @@ -3025,10 +3009,10 @@ void tcg_optimize(TCGContext *s) case INDEX_op_shr: done = fold_shift(&ctx, op); break; - CASE_OP_32_64(setcond): + case INDEX_op_setcond: done = fold_setcond(&ctx, op); break; - CASE_OP_32_64(negsetcond): + case INDEX_op_negsetcond: done = fold_negsetcond(&ctx, op); break; case INDEX_op_setcond2_i32: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 413b68352d..477dfc25b7 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -552,7 +552,7 @@ void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret, } else if (cond == TCG_COND_NEVER) { tcg_gen_movi_i32(ret, 0); } else { - tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond); + tcg_gen_op4i_i32(INDEX_op_setcond, ret, arg1, arg2, cond); } } @@ -570,7 +570,7 @@ void tcg_gen_negsetcond_i32(TCGCond cond, TCGv_i32 ret, } else if (cond == TCG_COND_NEVER) { tcg_gen_movi_i32(ret, 0); } else { - tcg_gen_op4i_i32(INDEX_op_negsetcond_i32, ret, arg1, arg2, cond); + tcg_gen_op4i_i32(INDEX_op_negsetcond, ret, arg1, arg2, cond); } } @@ -1911,7 +1911,7 @@ void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, TCGV_LOW(arg2), TCGV_HIGH(arg2), cond); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } else { - tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond); + tcg_gen_op4i_i64(INDEX_op_setcond, ret, arg1, arg2, cond); } } } @@ -1948,7 +1948,7 @@ void tcg_gen_negsetcond_i64(TCGCond cond, TCGv_i64 ret, } else if (cond == TCG_COND_NEVER) { tcg_gen_movi_i64(ret, 0); } else if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op4i_i64(INDEX_op_negsetcond_i64, ret, arg1, arg2, cond); + tcg_gen_op4i_i64(INDEX_op_negsetcond, ret, arg1, arg2, cond); } else { tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), diff --git a/tcg/tcg.c b/tcg/tcg.c index 90e82e7ed0..49fbf1f561 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1055,8 +1055,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), - OUTOP(INDEX_op_negsetcond_i32, TCGOutOpSetcond, outop_negsetcond), - OUTOP(INDEX_op_negsetcond_i64, TCGOutOpSetcond, outop_negsetcond), + OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond), OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), @@ -1066,8 +1065,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), - OUTOP(INDEX_op_setcond_i32, TCGOutOpSetcond, outop_setcond), - OUTOP(INDEX_op_setcond_i64, TCGOutOpSetcond, outop_setcond), + OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond), OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), @@ -2275,12 +2273,12 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_add: case INDEX_op_and: case INDEX_op_mov: + case INDEX_op_negsetcond: case INDEX_op_or: + case INDEX_op_setcond: case INDEX_op_xor: return has_type; - case INDEX_op_setcond_i32: - case INDEX_op_negsetcond_i32: case INDEX_op_brcond_i32: case INDEX_op_movcond_i32: case INDEX_op_ld8u_i32: @@ -2311,8 +2309,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond2_i32: return TCG_TARGET_REG_BITS == 32; - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i64: case INDEX_op_brcond_i64: case INDEX_op_movcond_i64: case INDEX_op_ld8u_i64: @@ -2864,14 +2860,12 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) } switch (c) { case INDEX_op_brcond_i32: - case INDEX_op_setcond_i32: - case INDEX_op_negsetcond_i32: + case INDEX_op_setcond: + case INDEX_op_negsetcond: case INDEX_op_movcond_i32: case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: case INDEX_op_brcond_i64: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i64: case INDEX_op_movcond_i64: case INDEX_op_cmp_vec: case INDEX_op_cmpsel_vec: @@ -5068,10 +5062,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_brcond_i64: op_cond = op->args[2]; break; - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: + case INDEX_op_setcond: + case INDEX_op_negsetcond: case INDEX_op_cmp_vec: op_cond = op->args[3]; break; @@ -5494,10 +5486,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: + case INDEX_op_setcond: + case INDEX_op_negsetcond: { const TCGOutOpSetcond *out = container_of(all_outop[op->opc], TCGOutOpSetcond, base); diff --git a/tcg/tci.c b/tcg/tci.c index 569b5c7ed0..d97ca1fade 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -438,10 +438,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_l(insn, tb_ptr, &ptr); tb_ptr = ptr; continue; - case INDEX_op_setcond_i32: - tci_args_rrrc(insn, &r0, &r1, &r2, &condition); - regs[r0] = tci_compare32(regs[r1], regs[r2], condition); - break; case INDEX_op_movcond_i32: tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); tmp32 = tci_compare32(regs[r1], regs[r2], condition); @@ -455,7 +451,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, regs[r0] = tci_compare64(T1, T2, condition); break; #elif TCG_TARGET_REG_BITS == 64 - case INDEX_op_setcond_i64: + case INDEX_op_setcond: tci_args_rrrc(insn, &r0, &r1, &r2, &condition); regs[r0] = tci_compare64(regs[r1], regs[r2], condition); break; @@ -628,6 +624,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tmp32 = regs[r1]; regs[r0] = tmp32 ? ctz32(tmp32) : regs[r2]; break; + case INDEX_op_tci_setcond32: + tci_args_rrrc(insn, &r0, &r1, &r2, &condition); + regs[r0] = tci_compare32(regs[r1], regs[r2], condition); + break; /* Shift/rotate operations. */ @@ -971,8 +971,8 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) op_name, str_r(r0), ptr); break; - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: + case INDEX_op_setcond: + case INDEX_op_tci_setcond32: tci_args_rrrc(insn, &r0, &r1, &r2, &c); info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", op_name, str_r(r0), str_r(r1), str_r(r2), str_c(c)); diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc index 2bb346f4c8..27b4574e4f 100644 --- a/tcg/tci/tcg-target-opc.h.inc +++ b/tcg/tci/tcg-target-opc.h.inc @@ -10,3 +10,4 @@ DEF(tci_rems32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_remu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_rotl32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_rotr32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_setcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 1b75aba698..d49c767de5 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -942,8 +942,8 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, TCGReg dest, TCGReg arg1, TCGReg arg2) { TCGOpcode opc = (type == TCG_TYPE_I32 - ? INDEX_op_setcond_i32 - : INDEX_op_setcond_i64); + ? INDEX_op_tci_setcond32 + : INDEX_op_setcond); tcg_out_op_rrrc(s, opc, dest, arg1, arg2, cond); } From 99ac4706b35a2c16e2bf4437e966fc39c41ed67d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 11:40:06 -0800 Subject: [PATCH 082/161] tcg: Convert brcond to TCGOutOpBrcond Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 39 +++++++++++++++------------- tcg/arm/tcg-target.c.inc | 27 ++++++++++++++----- tcg/i386/tcg-target.c.inc | 28 ++++++++++++++------ tcg/loongarch64/tcg-target-con-set.h | 2 +- tcg/loongarch64/tcg-target.c.inc | 18 +++++-------- tcg/mips/tcg-target-con-set.h | 4 +-- tcg/mips/tcg-target.c.inc | 20 +++++++------- tcg/ppc/tcg-target.c.inc | 31 +++++++++++----------- tcg/riscv/tcg-target-con-set.h | 2 +- tcg/riscv/tcg-target.c.inc | 18 +++++-------- tcg/s390x/tcg-target.c.inc | 31 ++++++++++++---------- tcg/sparc64/tcg-target-con-set.h | 2 +- tcg/sparc64/tcg-target.c.inc | 38 ++++++++++++++++++++------- tcg/tcg.c | 26 +++++++++++++++++++ tcg/tci.c | 9 ++----- tcg/tci/tcg-target.c.inc | 20 +++++++------- 16 files changed, 190 insertions(+), 125 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 2524e73ff4..e3d8e9090f 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1424,8 +1424,16 @@ static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) } } -static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, - TCGArg b, bool b_const, TCGLabel *l) +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, + TCGReg a, TCGReg b, TCGLabel *l) +{ + tgen_cmp(s, type, c, a, b); + tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); + tcg_out_insn(s, 3202, B_C, c, 0); +} + +static void tgen_brcondi(TCGContext *s, TCGType ext, TCGCond c, + TCGReg a, tcg_target_long b, TCGLabel *l) { int tbit = -1; bool need_cmp = true; @@ -1434,14 +1442,14 @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, case TCG_COND_EQ: case TCG_COND_NE: /* cmp xN,0; b.ne L -> cbnz xN,L */ - if (b_const && b == 0) { + if (b == 0) { need_cmp = false; } break; case TCG_COND_LT: case TCG_COND_GE: /* cmp xN,0; b.mi L -> tbnz xN,63,L */ - if (b_const && b == 0) { + if (b == 0) { c = (c == TCG_COND_LT ? TCG_COND_TSTNE : TCG_COND_TSTEQ); tbit = ext ? 63 : 31; need_cmp = false; @@ -1450,14 +1458,14 @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, case TCG_COND_TSTEQ: case TCG_COND_TSTNE: /* tst xN,0xffffffff; b.ne L -> cbnz wN,L */ - if (b_const && b == UINT32_MAX) { + if (b == UINT32_MAX) { c = tcg_tst_eqne_cond(c); ext = TCG_TYPE_I32; need_cmp = false; break; } /* tst xN,1< tbnz xN,B,L */ - if (b_const && is_power_of_2(b)) { + if (is_power_of_2(b)) { tbit = ctz64(b); need_cmp = false; } @@ -1467,7 +1475,7 @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, } if (need_cmp) { - tcg_out_cmp(s, ext, c, a, b, b_const); + tgen_cmpi(s, ext, c, a, b); tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); tcg_out_insn(s, 3202, B_C, c, 0); return; @@ -1500,6 +1508,12 @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, } } +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rC), + .out_rr = tgen_brcond, + .out_ri = tgen_brcondi, +}; + static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, TCGReg rd, TCGReg rn) { @@ -2565,13 +2579,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_brcond_i32: - a1 = (int32_t)a1; - /* FALLTHRU */ - case INDEX_op_brcond_i64: - tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); - break; - case INDEX_op_movcond_i32: a2 = (int32_t)a2; /* FALLTHRU */ @@ -3159,10 +3166,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(r, rC); - case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: return C_O1_I4(r, r, rC, rz, rz); diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 0f2a029f6d..4c7537cbeb 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -2181,6 +2181,26 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, TCGLabel *l) +{ + cond = tgen_cmp(s, cond, a0, a1); + tcg_out_goto_label(s, tcg_cond_to_arm_cond[cond], l); +} + +static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, tcg_target_long a1, TCGLabel *l) +{ + cond = tgen_cmpi(s, cond, a0, a1); + tcg_out_goto_label(s, tcg_cond_to_arm_cond[cond], l); +} + +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rIN), + .out_rr = tgen_brcond, + .out_ri = tgen_brcondi, +}; + static void finish_setcond(TCGContext *s, TCGCond cond, TCGReg ret, bool neg) { tcg_out_movi32(s, tcg_cond_to_arm_cond[tcg_invert_cond(cond)], ret, 0); @@ -2317,11 +2337,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mov_reg(s, COND_AL, args[0], a0); break; - case INDEX_op_brcond_i32: - c = tcg_out_cmp(s, args[2], args[0], args[1], const_args[1]); - tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[3])); - break; - case INDEX_op_brcond2_i32: c = tcg_out_cmp2(s, args, const_args); tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[5])); @@ -2421,8 +2436,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i32: return C_O0_I2(r, r); - case INDEX_op_brcond_i32: - return C_O0_I2(r, rIN); case INDEX_op_deposit_i32: return C_O1_I2(r, 0, rZ); case INDEX_op_extract2_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index d3a3f1f7fb..d2eff3b617 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1642,6 +1642,26 @@ static void tcg_out_brcond(TCGContext *s, int rexw, TCGCond cond, tcg_out_jxx(s, jcc, label, small); } +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, TCGReg arg2, TCGLabel *label) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_brcond(s, rexw, cond, arg1, arg2, false, label, false); +} + +static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, tcg_target_long arg2, TCGLabel *label) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_brcond(s, rexw, cond, arg1, arg2, true, label, false); +} + +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, reT), + .out_rr = tgen_brcond, + .out_ri = tgen_brcondi, +}; + #if TCG_TARGET_REG_BITS == 32 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, const int *const_args, bool small) @@ -3124,10 +3144,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(brcond): - tcg_out_brcond(s, rexw, a2, a0, a1, const_args[1], - arg_label(args[3]), 0); - break; OP_32_64(movcond): tcg_out_movcond(s, rexw, args[5], a0, a1, a2, const_a2, args[3]); break; @@ -3936,10 +3952,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(re, r); - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(r, reT); - case INDEX_op_bswap16_i32: case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i32: diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h index c145d4ab66..dfe55c6fe8 100644 --- a/tcg/loongarch64/tcg-target-con-set.h +++ b/tcg/loongarch64/tcg-target-con-set.h @@ -16,7 +16,7 @@ */ C_O0_I1(r) C_O0_I2(rz, r) -C_O0_I2(rz, rz) +C_O0_I2(r, rz) C_O0_I2(w, r) C_O0_I3(r, r, r) C_O1_I1(r, r) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 87e8b843f8..53bba07c49 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -767,8 +767,8 @@ static const struct { [TCG_COND_GTU] = { OPC_BGTU, false } }; -static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, - TCGReg arg2, TCGLabel *l) +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, TCGReg arg2, TCGLabel *l) { LoongArchInsn op = tcg_brcond_to_loongarch[cond].op; @@ -785,6 +785,11 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, tcg_out32(s, encode_djsk16_insn(op, arg1, arg2, 0)); } +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rz), + .out_rr = tgen_brcond, +}; + static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) { TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; @@ -1771,11 +1776,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_b(s, 0); break; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); - break; - case INDEX_op_extrh_i64_i32: tcg_out_opc_srai_d(s, a0, a1, 32); break; @@ -2441,10 +2441,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i128: return C_O0_I3(r, r, r); - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(rz, rz); - case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h index 67dfab2aed..a80630a8b4 100644 --- a/tcg/mips/tcg-target-con-set.h +++ b/tcg/mips/tcg-target-con-set.h @@ -10,12 +10,10 @@ * tcg-target-con-str.h; the constraint combination is inclusive or. */ C_O0_I1(r) +C_O0_I2(r, rz) C_O0_I2(rz, r) -C_O0_I2(rz, rz) -C_O0_I3(rz, r, r) C_O0_I3(rz, rz, r) C_O0_I4(rz, rz, rz, rz) -C_O0_I4(rz, rz, r, r) C_O1_I1(r, r) C_O1_I2(r, 0, rz) C_O1_I2(r, r, r) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 51b3ea4bb0..a942905dc4 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -988,8 +988,8 @@ static const TCGOutOpSetcond outop_negsetcond = { .out_rrr = tgen_negsetcond, }; -static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, - TCGReg arg2, TCGLabel *l) +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, TCGReg arg2, TCGLabel *l) { static const MIPSInsn b_zero[16] = { [TCG_COND_LT] = OPC_BLTZ, @@ -1034,6 +1034,11 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, tcg_out_nop(s); } +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rz), + .out_rr = tgen_brcond, +}; + static int tcg_out_setcond2_int(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) { @@ -2178,8 +2183,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; case INDEX_op_br: - tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, - arg_label(a0)); + tgen_brcond(s, TCG_TYPE_I32, TCG_COND_EQ, + TCG_REG_ZERO, TCG_REG_ZERO, arg_label(a0)); break; case INDEX_op_ld8u_i32: @@ -2283,10 +2288,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); - break; case INDEX_op_brcond2_i32: tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5])); break; @@ -2391,9 +2392,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: return C_O1_I2(r, 0, rz); - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(rz, rz); case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: return (use_mips32r6_instructions diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 0a66351124..819abdc906 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2124,14 +2124,26 @@ static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l) tcg_out_bc(s, cond, bd); } -static void tcg_out_brcond(TCGContext *s, TCGCond cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - TCGLabel *l, TCGType type) +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, TCGReg arg2, TCGLabel *l) { - tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type); + tcg_out_cmp(s, cond, arg1, arg2, false, 0, type); tcg_out_bc_lab(s, cond, l); } +static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, tcg_target_long arg2, TCGLabel *l) +{ + tcg_out_cmp(s, cond, arg1, arg2, true, 0, type); + tcg_out_bc_lab(s, cond, l); +} + +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rC), + .out_rr = tgen_brcond, + .out_ri = tgen_brcondi, +}; + static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, TCGArg v2, bool const_c2) @@ -3457,14 +3469,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); break; - case INDEX_op_brcond_i32: - tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], - arg_label(args[3]), TCG_TYPE_I32); - break; - case INDEX_op_brcond_i64: - tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], - arg_label(args[3]), TCG_TYPE_I64); - break; case INDEX_op_brcond2_i32: tcg_out_brcond2(s, args, const_args); break; @@ -4293,9 +4297,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(r, rC); case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: return C_O1_I4(r, r, rC, rZ, rZ); diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h index f0d3cb81bd..5ff2c2db60 100644 --- a/tcg/riscv/tcg-target-con-set.h +++ b/tcg/riscv/tcg-target-con-set.h @@ -11,7 +11,7 @@ */ C_O0_I1(r) C_O0_I2(rz, r) -C_O0_I2(rz, rz) +C_O0_I2(r, rz) C_O1_I1(r, r) C_O1_I2(r, r, r) C_O1_I2(r, r, ri) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 05114b5c5f..1d7194e883 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1184,8 +1184,8 @@ static const struct { [TCG_COND_GTU] = { OPC_BLTU, true } }; -static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, - TCGReg arg2, TCGLabel *l) +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, TCGReg arg2, TCGLabel *l) { RISCVInsn op = tcg_brcond_to_riscv[cond].op; @@ -1201,6 +1201,11 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, tcg_out_opc_branch(s, op, arg1, arg2, 0); } +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rz), + .out_rr = tgen_brcond, +}; + #define SETCOND_INV TCG_TARGET_NB_REGS #define SETCOND_NEZ (SETCOND_INV << 1) #define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) @@ -2516,11 +2521,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const_args[4], const_args[5], true, false); break; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); - break; - case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: tcg_out_movcond(s, args[5], a0, a1, a2, c2, @@ -2863,10 +2863,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(rz, rz); - case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: return C_O1_I4(r, r, rI, rM, rM); diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 3c04b87109..d3650636aa 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1693,6 +1693,24 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, tgen_branch(s, cc, l); } +static void tgen_brcondr(TCGContext *s, TCGType type, TCGCond c, + TCGReg a0, TCGReg a1, TCGLabel *l) +{ + tgen_brcond(s, type, c, a0, a1, false, l); +} + +static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond c, + TCGReg a0, tcg_target_long a1, TCGLabel *l) +{ + tgen_brcond(s, type, c, a0, a1, true, l); +} + +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rC), + .out_rr = tgen_brcondr, + .out_ri = tgen_brcondi, +}; + static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest) { ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1; @@ -2857,10 +2875,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0])); break; - case INDEX_op_brcond_i32: - tgen_brcond(s, TCG_TYPE_I32, args[2], args[0], - args[1], const_args[1], arg_label(args[3])); - break; case INDEX_op_movcond_i32: tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], const_args[2], args[3], const_args[3], args[4]); @@ -2934,10 +2948,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_insn(s, RRE, SLBGR, args[1], args[5]); break; - case INDEX_op_brcond_i64: - tgen_brcond(s, TCG_TYPE_I64, args[2], args[0], - args[1], const_args[1], arg_label(args[3])); - break; case INDEX_op_movcond_i64: tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], const_args[2], args[3], const_args[3], args[4]); @@ -3454,11 +3464,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_brcond_i32: - return C_O0_I2(r, ri); - case INDEX_op_brcond_i64: - return C_O0_I2(r, rC); - case INDEX_op_bswap16_i32: case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i32: diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h index ca7bbf0a2f..9f66e52ec6 100644 --- a/tcg/sparc64/tcg-target-con-set.h +++ b/tcg/sparc64/tcg-target-con-set.h @@ -11,7 +11,7 @@ */ C_O0_I1(r) C_O0_I2(rz, r) -C_O0_I2(rz, rJ) +C_O0_I2(r, rJ) C_O1_I1(r, r) C_O1_I2(r, r, r) C_O1_I2(r, r, rJ) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index dcbe6a8f47..68f38b7d71 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -822,6 +822,35 @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, } } +static void tcg_out_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, TCGArg arg2, bool const_arg2, + TCGLabel *l) +{ + if (type == TCG_TYPE_I32) { + tcg_out_brcond_i32(s, cond, arg1, arg2, const_arg2, l); + } else { + tcg_out_brcond_i64(s, cond, arg1, arg2, const_arg2, l); + } +} + +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, TCGReg arg2, TCGLabel *l) +{ + tcg_out_brcond(s, type, cond, arg1, arg2, false, l); +} + +static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, tcg_target_long arg2, TCGLabel *l) +{ + tcg_out_brcond(s, type, cond, arg1, arg2, true, l); +} + +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rJ), + .out_rr = tgen_brcond, + .out_ri = tgen_brcondi, +}; + static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, TCGReg ret, TCGReg c1, TCGArg c2, bool c2const, bool neg) @@ -1755,9 +1784,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, a0, a1, a2, STW); break; - case INDEX_op_brcond_i32: - tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3])); - break; case INDEX_op_movcond_i32: tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); break; @@ -1796,9 +1822,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, a0, a1, a2, STX); break; - case INDEX_op_brcond_i64: - tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3])); - break; case INDEX_op_movcond_i64: tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); break; @@ -1872,9 +1895,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i64: return C_O0_I2(rz, r); - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(rz, rJ); case INDEX_op_movcond_i32: case INDEX_op_movcond_i64: return C_O1_I4(r, rz, rJ, rI, 0); diff --git a/tcg/tcg.c b/tcg/tcg.c index 49fbf1f561..dbaa574cb5 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -986,6 +986,14 @@ typedef struct TCGOutOpBinary { TCGReg a0, TCGReg a1, tcg_target_long a2); } TCGOutOpBinary; +typedef struct TCGOutOpBrcond { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a1, TCGReg a2, TCGLabel *label); + void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a1, tcg_target_long a2, TCGLabel *label); +} TCGOutOpBrcond; + typedef struct TCGOutOpDivRem { TCGOutOp base; void (*out_rr01r)(TCGContext *s, TCGType type, @@ -1040,6 +1048,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), + OUTOP(INDEX_op_brcond_i32, TCGOutOpBrcond, outop_brcond), + OUTOP(INDEX_op_brcond_i64, TCGOutOpBrcond, outop_brcond), OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), @@ -5486,6 +5496,22 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: + { + const TCGOutOpBrcond *out = &outop_brcond; + TCGCond cond = new_args[2]; + TCGLabel *label = arg_label(new_args[3]); + + tcg_debug_assert(!const_args[0]); + if (const_args[1]) { + out->out_ri(s, type, cond, new_args[0], new_args[1], label); + } else { + out->out_rr(s, type, cond, new_args[0], new_args[1], label); + } + } + break; + case INDEX_op_setcond: case INDEX_op_negsetcond: { diff --git a/tcg/tci.c b/tcg/tci.c index d97ca1fade..d431cad6fd 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -665,8 +665,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, regs[r0] = sextract32(regs[r1], pos, len); break; case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: tci_args_rl(insn, tb_ptr, &r0, &ptr); - if ((uint32_t)regs[r0]) { + if (regs[r0]) { tb_ptr = ptr; } break; @@ -784,12 +785,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrbb(insn, &r0, &r1, &pos, &len); regs[r0] = sextract64(regs[r1], pos, len); break; - case INDEX_op_brcond_i64: - tci_args_rl(insn, tb_ptr, &r0, &ptr); - if (regs[r0]) { - tb_ptr = ptr; - } - break; case INDEX_op_ext_i32_i64: tci_args_rr(insn, &r0, &r1); regs[r0] = (int32_t)regs[r1]; diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index d49c767de5..2c7fb5d75f 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -81,10 +81,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_deposit_i64: return C_O1_I2(r, r, r); - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(r, r); - case INDEX_op_add2_i32: case INDEX_op_add2_i64: case INDEX_op_sub2_i32: @@ -964,6 +960,17 @@ static const TCGOutOpSetcond outop_negsetcond = { .out_rrr = tgen_negsetcond, }; +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg0, TCGReg arg1, TCGLabel *l) +{ + tgen_setcond(s, type, cond, TCG_REG_TMP, arg0, arg1); + tcg_out_op_rl(s, INDEX_op_brcond_i32, TCG_REG_TMP, l); +} + +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, r), + .out_rr = tgen_brcond, +}; static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1011,11 +1018,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_op_rrbb(s, opc, args[0], args[1], args[2], args[3]); break; - CASE_32_64(brcond) - tgen_setcond(s, type, args[2], TCG_REG_TMP, args[0], args[1]); - tcg_out_op_rl(s, opc, TCG_REG_TMP, arg_label(args[3])); - break; - case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */ case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */ tcg_out_op_rr(s, opc, args[0], args[1]); From b6d69fcefbd45ca33b896abfbc8e27e0f713bdf0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 11:49:22 -0800 Subject: [PATCH 083/161] tcg: Merge INDEX_op_brcond_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 4 +--- tcg/optimize.c | 6 +++--- tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 24 ++++++++---------------- tcg/tci.c | 6 ++---- tcg/tci/tcg-target.c.inc | 4 ++-- 7 files changed, 19 insertions(+), 31 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index d3283265cd..18f02c5122 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -239,7 +239,7 @@ Jumps/Labels - | Jump to label. - * - brcond_i32/i64 *t0*, *t1*, *cond*, *label* + * - brcond *t0*, *t1*, *cond*, *label* - | Conditional jump if *t0* *cond* *t1* is true. *cond* can be: | diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index f40bb5796a..d40ca001c2 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -34,6 +34,7 @@ DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT) DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) +DEF(brcond, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | TCG_OPF_INT) DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT) @@ -89,8 +90,6 @@ DEF(extract_i32, 1, 1, 2, 0) DEF(sextract_i32, 1, 1, 2, 0) DEF(extract2_i32, 1, 2, 1, 0) -DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) - DEF(add2_i32, 2, 4, 0, 0) DEF(sub2_i32, 2, 4, 0, 0) DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) @@ -124,7 +123,6 @@ DEF(extu_i32_i64, 1, 1, 0, 0) DEF(extrl_i64_i32, 1, 1, 0, 0) DEF(extrh_i64_i32, 1, 1, 0, 0) -DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(bswap16_i64, 1, 1, 1, 0) DEF(bswap32_i64, 1, 1, 1, 0) DEF(bswap64_i64, 1, 1, 1, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index e53dbd4290..d0cb4588ed 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1529,14 +1529,14 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op) break; do_brcond_low: - op->opc = INDEX_op_brcond_i32; + op->opc = INDEX_op_brcond; op->args[1] = op->args[2]; op->args[2] = cond; op->args[3] = label; return fold_brcond(ctx, op); do_brcond_high: - op->opc = INDEX_op_brcond_i32; + op->opc = INDEX_op_brcond; op->args[0] = op->args[1]; op->args[1] = op->args[3]; op->args[2] = cond; @@ -2864,7 +2864,7 @@ void tcg_optimize(TCGContext *s) case INDEX_op_andc_vec: done = fold_andc(&ctx, op); break; - CASE_OP_32_64(brcond): + case INDEX_op_brcond: done = fold_brcond(&ctx, op); break; case INDEX_op_brcond2_i32: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 477dfc25b7..041ca95f0d 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -529,7 +529,7 @@ void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l) if (cond == TCG_COND_ALWAYS) { tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { - TCGOp *op = tcg_gen_op4ii_i32(INDEX_op_brcond_i32, + TCGOp *op = tcg_gen_op4ii_i32(INDEX_op_brcond, arg1, arg2, cond, label_arg(l)); add_as_label_use(l, op); } @@ -1874,7 +1874,7 @@ void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l) TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2), cond, label_arg(l)); } else { - op = tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, + op = tcg_gen_op4ii_i64(INDEX_op_brcond, arg1, arg2, cond, label_arg(l)); } add_as_label_use(l, op); diff --git a/tcg/tcg.c b/tcg/tcg.c index dbaa574cb5..4dc6995d00 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1048,8 +1048,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), - OUTOP(INDEX_op_brcond_i32, TCGOutOpBrcond, outop_brcond), - OUTOP(INDEX_op_brcond_i64, TCGOutOpBrcond, outop_brcond), + OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond), OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), @@ -2282,6 +2281,7 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_add: case INDEX_op_and: + case INDEX_op_brcond: case INDEX_op_mov: case INDEX_op_negsetcond: case INDEX_op_or: @@ -2289,7 +2289,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_xor: return has_type; - case INDEX_op_brcond_i32: case INDEX_op_movcond_i32: case INDEX_op_ld8u_i32: case INDEX_op_ld8s_i32: @@ -2319,7 +2318,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond2_i32: return TCG_TARGET_REG_BITS == 32; - case INDEX_op_brcond_i64: case INDEX_op_movcond_i64: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i64: @@ -2869,13 +2867,12 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) op->args[k++])); } switch (c) { - case INDEX_op_brcond_i32: + case INDEX_op_brcond: case INDEX_op_setcond: case INDEX_op_negsetcond: case INDEX_op_movcond_i32: case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: - case INDEX_op_brcond_i64: case INDEX_op_movcond_i64: case INDEX_op_cmp_vec: case INDEX_op_cmpsel_vec: @@ -2961,8 +2958,7 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) switch (c) { case INDEX_op_set_label: case INDEX_op_br: - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: case INDEX_op_brcond2_i32: col += ne_fprintf(f, "%s$L%d", k ? "," : "", arg_label(op->args[k])->id); @@ -3417,8 +3413,7 @@ void tcg_op_remove(TCGContext *s, TCGOp *op) case INDEX_op_br: remove_label_use(op, 0); break; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: remove_label_use(op, 3); break; case INDEX_op_brcond2_i32: @@ -3519,8 +3514,7 @@ static void move_label_uses(TCGLabel *to, TCGLabel *from) case INDEX_op_br: op->args[0] = label_arg(to); break; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: op->args[3] = label_arg(to); break; case INDEX_op_brcond2_i32: @@ -5068,8 +5062,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) o_allocated_regs = s->reserved_regs; switch (op->opc) { - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: op_cond = op->args[2]; break; case INDEX_op_setcond: @@ -5496,8 +5489,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: { const TCGOutOpBrcond *out = &outop_brcond; TCGCond cond = new_args[2]; diff --git a/tcg/tci.c b/tcg/tci.c index d431cad6fd..4c5dc16ecb 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -664,8 +664,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrbb(insn, &r0, &r1, &pos, &len); regs[r0] = sextract32(regs[r1], pos, len); break; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: tci_args_rl(insn, tb_ptr, &r0, &ptr); if (regs[r0]) { tb_ptr = ptr; @@ -959,8 +958,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) info->fprintf_func(info->stream, "%-12s %d, %p", op_name, len, ptr); break; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: tci_args_rl(insn, tb_ptr, &r0, &ptr); info->fprintf_func(info->stream, "%-12s %s, 0, ne, %p", op_name, str_r(r0), ptr); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 2c7fb5d75f..18628b957a 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -964,7 +964,7 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, TCGReg arg0, TCGReg arg1, TCGLabel *l) { tgen_setcond(s, type, cond, TCG_REG_TMP, arg0, arg1); - tcg_out_op_rl(s, INDEX_op_brcond_i32, TCG_REG_TMP, l); + tcg_out_op_rl(s, INDEX_op_brcond, TCG_REG_TMP, l); } static const TCGOutOpBrcond outop_brcond = { @@ -1047,7 +1047,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_brcond2_i32: tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, TCG_REG_TMP, args[0], args[1], args[2], args[3], args[4]); - tcg_out_op_rl(s, INDEX_op_brcond_i32, TCG_REG_TMP, arg_label(args[5])); + tcg_out_op_rl(s, INDEX_op_brcond, TCG_REG_TMP, arg_label(args[5])); break; #endif From 1f406e46785e60e274a9c581d6fd07e05a6ff4e0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 13:29:39 -0800 Subject: [PATCH 084/161] tcg: Convert movcond to TCGOutOpMovcond Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 26 +++++++++++++------------- tcg/arm/tcg-target.c.inc | 24 ++++++++++++++---------- tcg/i386/tcg-target.c.inc | 23 +++++++++++------------ tcg/loongarch64/tcg-target-con-set.h | 2 +- tcg/loongarch64/tcg-target.c.inc | 23 +++++++++-------------- tcg/mips/tcg-target-con-set.h | 3 ++- tcg/mips/tcg-target.c.inc | 25 ++++++++++++------------- tcg/ppc/tcg-target.c.inc | 24 ++++++++---------------- tcg/riscv/tcg-target.c.inc | 26 ++++++++++---------------- tcg/s390x/tcg-target-con-set.h | 1 - tcg/s390x/tcg-target.c.inc | 26 ++++++++------------------ tcg/sparc64/tcg-target-con-set.h | 2 +- tcg/sparc64/tcg-target.c.inc | 28 ++++++++++++++++------------ tcg/tcg.c | 23 +++++++++++++++++++++++ tcg/tci.c | 12 ++++++------ tcg/tci/tcg-target-opc.h.inc | 1 + tcg/tci/tcg-target.c.inc | 18 +++++++++++++++--- 17 files changed, 150 insertions(+), 137 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index e3d8e9090f..ee45e7e244 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2513,6 +2513,19 @@ static const TCGOutOpSetcond outop_negsetcond = { .out_rri = tgen_negsetcondi, }; +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg vt, bool const_vt, TCGArg vf, bool const_vf) +{ + tcg_out_cmp(s, type, cond, c1, c2, const_c2); + tcg_out_insn(s, 3506, CSEL, type, ret, vt, vf, cond); +} + +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, rC, rz, rz), + .out = tgen_movcond, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2521,7 +2534,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, TCGArg a0 = args[0]; TCGArg a1 = args[1]; TCGArg a2 = args[2]; - int c2 = const_args[2]; switch (opc) { case INDEX_op_goto_ptr: @@ -2579,14 +2591,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); break; - case INDEX_op_movcond_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_movcond_i64: - tcg_out_cmp(s, ext, args[5], a1, a2, c2); - tcg_out_insn(s, 3506, CSEL, ext, a0, args[3], args[4], args[5]); - break; - case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: tcg_out_qemu_ld(s, a0, a1, a2, ext); @@ -3166,10 +3170,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - return C_O1_I4(r, r, rC, rz, rz); - case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 4c7537cbeb..3d864c1c1e 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -2247,6 +2247,20 @@ static const TCGOutOpSetcond outop_negsetcond = { .out_rri = tgen_negsetcondi, }; +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf) +{ + cond = tcg_out_cmp(s, cond, c1, c2, const_c2); + tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[cond], ARITH_MOV, ARITH_MVN, + ret, 0, vt, const_vt); +} + +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, rIN, rIK, 0), + .out = tgen_movcond, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2288,14 +2302,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st32(s, COND_AL, args[0], args[1], args[2]); break; - case INDEX_op_movcond_i32: - /* Constraints mean that v2 is always in the same register as dest, - * so we only need to do "if condition passed, move v1 to dest". - */ - c = tcg_out_cmp(s, args[5], args[1], args[2], const_args[2]); - tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[c], ARITH_MOV, - ARITH_MVN, args[0], 0, args[3], const_args[3]); - break; case INDEX_op_add2_i32: a0 = args[0], a1 = args[1], a2 = args[2]; a3 = args[3], a4 = args[4], a5 = args[5]; @@ -2440,8 +2446,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) return C_O1_I2(r, 0, rZ); case INDEX_op_extract2_i32: return C_O1_I2(r, rZ, rZ); - case INDEX_op_movcond_i32: - return C_O1_I4(r, r, rIN, rIK, 0); case INDEX_op_add2_i32: return C_O2_I4(r, r, r, r, rIN, rIK); case INDEX_op_sub2_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index d2eff3b617..ae3a53a18a 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1900,14 +1900,21 @@ static void tcg_out_cmov(TCGContext *s, int jcc, int rexw, tcg_out_modrm(s, OPC_CMOVCC | jcc | rexw, dest, v1); } -static void tcg_out_movcond(TCGContext *s, int rexw, TCGCond cond, - TCGReg dest, TCGReg c1, TCGArg c2, int const_c2, - TCGReg v1) +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg vt, bool const_vt, + TCGArg vf, bool consf_vf) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; int jcc = tcg_out_cmp(s, cond, c1, c2, const_c2, rexw); - tcg_out_cmov(s, jcc, rexw, dest, v1); + tcg_out_cmov(s, jcc, rexw, dest, vt); } +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, reT, r, 0), + .out = tgen_movcond, +}; + static void tcg_out_branch(TCGContext *s, int call, const tcg_insn_unit *dest) { intptr_t disp = tcg_pcrel_diff(s, dest) - 5; @@ -3144,10 +3151,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(movcond): - tcg_out_movcond(s, rexw, args[5], a0, a1, a2, const_a2, args[3]); - break; - OP_32_64(bswap16): if (a2 & TCG_BSWAP_OS) { /* Output must be sign-extended. */ @@ -3977,10 +3980,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_deposit_i64: return C_O1_I2(q, 0, qi); - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - return C_O1_I4(r, r, reT, r, 0); - case INDEX_op_add2_i32: case INDEX_op_add2_i64: case INDEX_op_sub2_i32: diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h index dfe55c6fe8..fd731c0c0f 100644 --- a/tcg/loongarch64/tcg-target-con-set.h +++ b/tcg/loongarch64/tcg-target-con-set.h @@ -33,5 +33,5 @@ C_O1_I2(w, w, w) C_O1_I2(w, w, wM) C_O1_I2(w, w, wA) C_O1_I3(w, w, w, w) -C_O1_I4(r, rz, rJ, rz, rz) +C_O1_I4(r, r, rJ, rz, rz) C_N2_I1(r, r, r) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 53bba07c49..c731096c64 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -723,11 +723,11 @@ static const TCGOutOpSetcond outop_negsetcond = { .out_rri = tgen_negsetcondi, }; -static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, tcg_target_long c2, bool const2, - TCGReg v1, TCGReg v2) +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg v1, bool const_v1, TCGArg v2, bool const_v2) { - int tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, c1, c2, const2); + int tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, c1, c2, const_c2); TCGReg t; /* Standardize the test below to t != 0. */ @@ -747,6 +747,11 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, rJ, rz, rz), + .out = tgen_movcond, +}; + /* * Branch helpers */ @@ -1759,7 +1764,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, TCGArg a1 = args[1]; TCGArg a2 = args[2]; TCGArg a3 = args[3]; - int c2 = const_args[2]; switch (opc) { case INDEX_op_mb: @@ -1849,11 +1853,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_revb_d(s, a0, a1); break; - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - tcg_out_movcond(s, args[5], a0, a1, a2, c2, args[3], args[4]); - break; - case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: tcg_out_ldst(s, OPC_LD_B, a0, a1, a2); @@ -2475,10 +2474,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) /* Must deposit into the same register as input */ return C_O1_I2(r, 0, rz); - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - return C_O1_I4(r, rz, rJ, rz, rz); - case INDEX_op_ld_vec: case INDEX_op_dupm_vec: case INDEX_op_dup_vec: diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h index a80630a8b4..f5e4852b56 100644 --- a/tcg/mips/tcg-target-con-set.h +++ b/tcg/mips/tcg-target-con-set.h @@ -23,7 +23,8 @@ C_O1_I2(r, r, rIK) C_O1_I2(r, r, rJ) C_O1_I2(r, r, rz) C_O1_I2(r, r, rzW) -C_O1_I4(r, rz, rz, rz, 0) +C_O1_I4(r, r, rz, rz, 0) +C_O1_I4(r, r, rz, rz, rz) C_O1_I4(r, rz, rz, rz, rz) C_O2_I1(r, r, r) C_O2_I2(r, r, r, r) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index a942905dc4..3ce71a1c8d 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1086,8 +1086,9 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, tcg_out_nop(s); } -static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, TCGReg c2, TCGReg v1, TCGReg v2) +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg v1, bool const_v1, TCGArg v2, bool const_v2) { int tmpflags; bool eqz; @@ -1133,6 +1134,13 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = (use_mips32r6_instructions + ? C_O1_I4(r, r, rz, rz, rz) + : C_O1_I4(r, r, rz, rz, 0)), + .out = tgen_movcond, +}; + static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) { /* @@ -1726,7 +1734,8 @@ static void tgen_clz(TCGContext *s, TCGType type, if (use_mips32r6_instructions) { MIPSInsn opcv6 = type == TCG_TYPE_I32 ? OPC_CLZ_R6 : OPC_DCLZ_R6; tcg_out_opc_reg(s, opcv6, TCG_TMP0, a1, 0); - tcg_out_movcond(s, TCG_COND_EQ, a0, a1, 0, a2, TCG_TMP0); + tgen_movcond(s, TCG_TYPE_REG, TCG_COND_EQ, a0, a1, a2, false, + TCG_TMP0, false, TCG_REG_ZERO, false); } else { MIPSInsn opcv2 = type == TCG_TYPE_I32 ? OPC_CLZ : OPC_DCLZ; if (a0 == a2) { @@ -2292,11 +2301,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5])); break; - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]); - break; - case INDEX_op_setcond2_i32: tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); break; @@ -2392,11 +2396,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: return C_O1_I2(r, 0, rz); - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - return (use_mips32r6_instructions - ? C_O1_I4(r, rz, rz, rz, rz) - : C_O1_I4(r, rz, rz, rz, 0)); case INDEX_op_add2_i32: case INDEX_op_sub2_i32: return C_O2_I4(r, r, rz, rz, rN, rN); diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 819abdc906..339b3a0904 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2144,9 +2144,9 @@ static const TCGOutOpBrcond outop_brcond = { .out_ri = tgen_brcondi, }; -static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, - TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, - TCGArg v2, bool const_c2) +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg v1, bool const_v1, TCGArg v2, bool const_v2) { /* If for some reason both inputs are zero, don't produce bad code. */ if (v1 == 0 && v2 == 0) { @@ -2192,6 +2192,11 @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, } } +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, rC, rZ, rZ), + .out = tgen_movcond, +}; + static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2) { @@ -3578,15 +3583,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_movcond_i32: - tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], - args[3], args[4], const_args[2]); - break; - case INDEX_op_movcond_i64: - tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], - args[3], args[4], const_args[2]); - break; - #if TCG_TARGET_REG_BITS == 64 case INDEX_op_add2_i64: #else @@ -4297,10 +4293,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - return C_O1_I4(r, r, rC, rZ, rZ); - case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: return C_O1_I2(r, 0, rZ); diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 1d7194e883..8d106d7f28 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1501,10 +1501,10 @@ static void tcg_out_movcond_br2(TCGContext *s, TCGCond cond, TCGReg ret, tcg_out_mov(s, TCG_TYPE_REG, ret, tmp); } -static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg cmp1, int cmp2, bool c_cmp2, - TCGReg val1, bool c_val1, - TCGReg val2, bool c_val2) +static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg cmp1, TCGArg cmp2, bool c_cmp2, + TCGArg val1, bool c_val1, + TCGArg val2, bool c_val2) { int tmpflags; TCGReg t; @@ -1531,6 +1531,11 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, rI, rM, rM), + .out = tcg_out_movcond, +}; + static void tcg_out_cltz(TCGContext *s, TCGType type, RISCVInsn insn, TCGReg ret, TCGReg src1, int src2, bool c_src2) { @@ -1542,7 +1547,7 @@ static void tcg_out_cltz(TCGContext *s, TCGType type, RISCVInsn insn, * Note that constraints put 'ret' in a new register, so the * computation above did not clobber either 'src1' or 'src2'. */ - tcg_out_movcond(s, TCG_COND_EQ, ret, src1, 0, true, + tcg_out_movcond(s, type, TCG_COND_EQ, ret, src1, 0, true, src2, c_src2, ret, false); } } @@ -2425,7 +2430,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, TCGArg a0 = args[0]; TCGArg a1 = args[1]; TCGArg a2 = args[2]; - int c2 = const_args[2]; switch (opc) { case INDEX_op_goto_ptr: @@ -2521,12 +2525,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const_args[4], const_args[5], true, false); break; - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - tcg_out_movcond(s, args[5], a0, a1, a2, c2, - args[3], const_args[3], args[4], const_args[4]); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); break; @@ -2863,10 +2861,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - return C_O1_I4(r, r, rI, rM, rM); - case INDEX_op_add2_i32: case INDEX_op_add2_i64: case INDEX_op_sub2_i32: diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h index 86af067965..78f06e3e52 100644 --- a/tcg/s390x/tcg-target-con-set.h +++ b/tcg/s390x/tcg-target-con-set.h @@ -38,7 +38,6 @@ C_O1_I2(v, v, v) C_O1_I3(v, v, v, v) C_O1_I4(v, v, v, vZ, v) C_O1_I4(v, v, v, vZM, v) -C_O1_I4(r, r, ri, rI, r) C_O1_I4(r, r, rC, rI, r) C_O2_I1(o, m, r) C_O2_I2(o, m, 0, r) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index d3650636aa..fbf39ca529 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1540,9 +1540,9 @@ static void tgen_movcond_int(TCGContext *s, TCGType type, TCGReg dest, tcg_out_insn(s, RRFc, LOCGR, dest, src, cc); } -static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest, - TCGReg c1, TCGArg c2, int c2const, - TCGArg v3, int v3const, TCGReg v4) +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, + TCGReg dest, TCGReg c1, TCGArg c2, bool c2const, + TCGArg v3, bool v3const, TCGArg v4, bool v4const) { int cc, inv_cc; @@ -1550,6 +1550,11 @@ static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest, tgen_movcond_int(s, type, dest, v3, v3const, v4, cc, inv_cc); } +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, rC, rI, r), + .out = tgen_movcond, +}; + static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src, int ofs, int len, int z) { @@ -2875,11 +2880,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0])); break; - case INDEX_op_movcond_i32: - tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], - args[2], const_args[2], args[3], const_args[3], args[4]); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32); break; @@ -2948,11 +2948,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_insn(s, RRE, SLBGR, args[1], args[5]); break; - case INDEX_op_movcond_i64: - tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], - args[2], const_args[2], args[3], const_args[3], args[4]); - break; - OP_32_64(deposit): a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[1]) { @@ -3492,11 +3487,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_deposit_i64: return C_O1_I2(r, rZ, r); - case INDEX_op_movcond_i32: - return C_O1_I4(r, r, ri, rI, r); - case INDEX_op_movcond_i64: - return C_O1_I4(r, r, rC, rI, r); - case INDEX_op_add2_i32: case INDEX_op_sub2_i32: return C_N1_O1_I4(r, r, 0, 1, ri, r); diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h index 9f66e52ec6..8cec396173 100644 --- a/tcg/sparc64/tcg-target-con-set.h +++ b/tcg/sparc64/tcg-target-con-set.h @@ -15,6 +15,6 @@ C_O0_I2(r, rJ) C_O1_I1(r, r) C_O1_I2(r, r, r) C_O1_I2(r, r, rJ) -C_O1_I4(r, rz, rJ, rI, 0) +C_O1_I4(r, r, rJ, rI, 0) C_O2_I2(r, r, r, r) C_O2_I4(r, r, rz, rz, rJ, rJ) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 68f38b7d71..d99b9e42ce 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -898,6 +898,22 @@ static const TCGOutOpSetcond outop_negsetcond = { .out_rri = tgen_negsetcondi, }; +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool c2const, + TCGArg v1, bool v1const, TCGArg v2, bool v2consf) +{ + if (type == TCG_TYPE_I32) { + tcg_out_movcond_i32(s, cond, ret, c1, c2, c2const, v1, v1const); + } else { + tcg_out_movcond_i64(s, cond, ret, c1, c2, c2const, v1, v1const); + } +} + +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, rJ, rI, 0), + .out = tgen_movcond, +}; + static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, TCGReg ah, int32_t bl, int blconst, int32_t bh, int bhconst, int opl, int oph) @@ -1735,13 +1751,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { TCGArg a0, a1, a2; - int c2; /* Hoist the loads of the most common arguments. */ a0 = args[0]; a1 = args[1]; a2 = args[2]; - c2 = const_args[2]; switch (opc) { case INDEX_op_goto_ptr: @@ -1784,10 +1798,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, a0, a1, a2, STW); break; - case INDEX_op_movcond_i32: - tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); - break; - case INDEX_op_add2_i32: tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3], args[4], const_args[4], args[5], const_args[5], @@ -1822,9 +1832,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, a0, a1, a2, STX); break; - case INDEX_op_movcond_i64: - tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); - break; case INDEX_op_add2_i64: tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], const_args[4], args[5], const_args[5], false); @@ -1895,9 +1902,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i64: return C_O0_I2(rz, r); - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - return C_O1_I4(r, rz, rJ, rI, 0); case INDEX_op_add2_i32: case INDEX_op_add2_i64: case INDEX_op_sub2_i32: diff --git a/tcg/tcg.c b/tcg/tcg.c index 4dc6995d00..ba81a67e28 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1000,6 +1000,13 @@ typedef struct TCGOutOpDivRem { TCGReg a0, TCGReg a1, TCGReg a4); } TCGOutOpDivRem; +typedef struct TCGOutOpMovcond { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf); +} TCGOutOpMovcond; + typedef struct TCGOutOpMul2 { TCGOutOp base; void (*out_rrrr)(TCGContext *s, TCGType type, @@ -1057,6 +1064,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), + OUTOP(INDEX_op_movcond_i32, TCGOutOpMovcond, outop_movcond), + OUTOP(INDEX_op_movcond_i64, TCGOutOpMovcond, outop_movcond), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), @@ -5504,6 +5513,20 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_movcond_i32: + case INDEX_op_movcond_i64: + { + const TCGOutOpMovcond *out = &outop_movcond; + TCGCond cond = new_args[5]; + + tcg_debug_assert(!const_args[1]); + out->out(s, type, cond, new_args[0], + new_args[1], new_args[2], const_args[2], + new_args[3], const_args[3], + new_args[4], const_args[4]); + } + break; + case INDEX_op_setcond: case INDEX_op_negsetcond: { diff --git a/tcg/tci.c b/tcg/tci.c index 4c5dc16ecb..aef0023dc6 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -438,11 +438,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_l(insn, tb_ptr, &ptr); tb_ptr = ptr; continue; - case INDEX_op_movcond_i32: - tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); - tmp32 = tci_compare32(regs[r1], regs[r2], condition); - regs[r0] = regs[tmp32 ? r3 : r4]; - break; #if TCG_TARGET_REG_BITS == 32 case INDEX_op_setcond2_i32: tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); @@ -628,6 +623,11 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrrc(insn, &r0, &r1, &r2, &condition); regs[r0] = tci_compare32(regs[r1], regs[r2], condition); break; + case INDEX_op_tci_movcond32: + tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); + tmp32 = tci_compare32(regs[r1], regs[r2], condition); + regs[r0] = regs[tmp32 ? r3 : r4]; + break; /* Shift/rotate operations. */ @@ -1074,7 +1074,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) op_name, str_r(r0), str_r(r1), pos, len); break; - case INDEX_op_movcond_i32: + case INDEX_op_tci_movcond32: case INDEX_op_movcond_i64: case INDEX_op_setcond2_i32: tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &c); diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc index 27b4574e4f..672d9b7323 100644 --- a/tcg/tci/tcg-target-opc.h.inc +++ b/tcg/tci/tcg-target-opc.h.inc @@ -11,3 +11,4 @@ DEF(tci_remu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_rotl32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_rotr32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_setcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_movcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 18628b957a..79f9219187 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -92,8 +92,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) return C_O0_I4(r, r, r, r); #endif - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: case INDEX_op_setcond2_i32: return C_O1_I4(r, r, r, r, r); @@ -972,6 +970,21 @@ static const TCGOutOpBrcond outop_brcond = { .out_rr = tgen_brcond, }; +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_movcond32 + : INDEX_op_movcond_i64); + tcg_out_op_rrrrrc(s, opc, ret, c1, c2, vt, vf, cond); +} + +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, r, r, r), + .out = tgen_movcond, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -987,7 +1000,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_op_l(s, opc, arg_label(args[0])); break; - CASE_32_64(movcond) case INDEX_op_setcond2_i32: tcg_out_op_rrrrrc(s, opc, args[0], args[1], args[2], args[3], args[4], args[5]); From ea46c4bce8c8a8285e6715c1bac29f5b73f5062b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 13:41:25 -0800 Subject: [PATCH 085/161] tcg: Merge INDEX_op_movcond_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 2 +- tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 15 +++++---------- tcg/tci.c | 4 ++-- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 13 insertions(+), 19 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 18f02c5122..26dc3bad49 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -511,7 +511,7 @@ Conditional moves | | Set *dest* to -1 if (*t1* *cond* *t2*) is true, otherwise set to 0. - * - movcond_i32/i64 *dest*, *c1*, *c2*, *v1*, *v2*, *cond* + * - movcond *dest*, *c1*, *c2*, *v1*, *v2*, *cond* - | *dest* = (*c1* *cond* *c2* ? *v1* : *v2*) | diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index d40ca001c2..5e085607d5 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -51,6 +51,7 @@ DEF(divs2, 2, 3, 0, TCG_OPF_INT) DEF(divu, 1, 2, 0, TCG_OPF_INT) DEF(divu2, 2, 3, 0, TCG_OPF_INT) DEF(eqv, 1, 2, 0, TCG_OPF_INT) +DEF(movcond, 1, 4, 1, TCG_OPF_INT) DEF(mul, 1, 2, 0, TCG_OPF_INT) DEF(muls2, 2, 2, 0, TCG_OPF_INT) DEF(mulsh, 1, 2, 0, TCG_OPF_INT) @@ -74,7 +75,6 @@ DEF(shr, 1, 2, 0, TCG_OPF_INT) DEF(sub, 1, 2, 0, TCG_OPF_INT) DEF(xor, 1, 2, 0, TCG_OPF_INT) -DEF(movcond_i32, 1, 4, 1, 0) /* load/store */ DEF(ld8u_i32, 1, 1, 1, 0) DEF(ld8s_i32, 1, 1, 1, 0) @@ -98,7 +98,6 @@ DEF(setcond2_i32, 1, 4, 1, 0) DEF(bswap16_i32, 1, 1, 1, 0) DEF(bswap32_i32, 1, 1, 1, 0) -DEF(movcond_i64, 1, 4, 1, 0) /* load/store */ DEF(ld8u_i64, 1, 1, 1, 0) DEF(ld8s_i64, 1, 1, 1, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index d0cb4588ed..54606388cc 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2943,7 +2943,7 @@ void tcg_optimize(TCGContext *s) case INDEX_op_mov_vec: done = fold_mov(&ctx, op); break; - CASE_OP_32_64(movcond): + case INDEX_op_movcond: done = fold_movcond(&ctx, op); break; case INDEX_op_mul: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 041ca95f0d..3527952c66 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1095,7 +1095,7 @@ void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, } else if (cond == TCG_COND_NEVER) { tcg_gen_mov_i32(ret, v2); } else { - tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond); + tcg_gen_op6i_i32(INDEX_op_movcond, ret, c1, c2, v1, v2, cond); } } @@ -2799,7 +2799,7 @@ void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1, } else if (cond == TCG_COND_NEVER) { tcg_gen_mov_i64(ret, v2); } else if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond); + tcg_gen_op6i_i64(INDEX_op_movcond, ret, c1, c2, v1, v2, cond); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 zero = tcg_constant_i32(0); diff --git a/tcg/tcg.c b/tcg/tcg.c index ba81a67e28..3f57f6aafd 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1064,8 +1064,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), - OUTOP(INDEX_op_movcond_i32, TCGOutOpMovcond, outop_movcond), - OUTOP(INDEX_op_movcond_i64, TCGOutOpMovcond, outop_movcond), + OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), @@ -2292,13 +2291,13 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_and: case INDEX_op_brcond: case INDEX_op_mov: + case INDEX_op_movcond: case INDEX_op_negsetcond: case INDEX_op_or: case INDEX_op_setcond: case INDEX_op_xor: return has_type; - case INDEX_op_movcond_i32: case INDEX_op_ld8u_i32: case INDEX_op_ld8s_i32: case INDEX_op_ld16u_i32: @@ -2327,7 +2326,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond2_i32: return TCG_TARGET_REG_BITS == 32; - case INDEX_op_movcond_i64: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i64: case INDEX_op_ld16u_i64: @@ -2879,10 +2877,9 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) case INDEX_op_brcond: case INDEX_op_setcond: case INDEX_op_negsetcond: - case INDEX_op_movcond_i32: + case INDEX_op_movcond: case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: - case INDEX_op_movcond_i64: case INDEX_op_cmp_vec: case INDEX_op_cmpsel_vec: if (op->args[k] < ARRAY_SIZE(cond_name) @@ -5082,8 +5079,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_brcond2_i32: op_cond = op->args[4]; break; - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: + case INDEX_op_movcond: case INDEX_op_setcond2_i32: case INDEX_op_cmpsel_vec: op_cond = op->args[5]; @@ -5513,8 +5509,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: + case INDEX_op_movcond: { const TCGOutOpMovcond *out = &outop_movcond; TCGCond cond = new_args[5]; diff --git a/tcg/tci.c b/tcg/tci.c index aef0023dc6..9c3f58242e 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -450,7 +450,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrrc(insn, &r0, &r1, &r2, &condition); regs[r0] = tci_compare64(regs[r1], regs[r2], condition); break; - case INDEX_op_movcond_i64: + case INDEX_op_movcond: tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); tmp32 = tci_compare64(regs[r1], regs[r2], condition); regs[r0] = regs[tmp32 ? r3 : r4]; @@ -1075,7 +1075,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) break; case INDEX_op_tci_movcond32: - case INDEX_op_movcond_i64: + case INDEX_op_movcond: case INDEX_op_setcond2_i32: tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &c); info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s", diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 79f9219187..99a5744ab4 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -976,7 +976,7 @@ static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, { TCGOpcode opc = (type == TCG_TYPE_I32 ? INDEX_op_tci_movcond32 - : INDEX_op_movcond_i64); + : INDEX_op_movcond); tcg_out_op_rrrrrc(s, opc, ret, c1, c2, vt, vf, cond); } From 298b3b548656ea89ff6fd4251bc7319986b79c67 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 13:52:49 -0800 Subject: [PATCH 086/161] tcg/ppc: Drop fallback constant loading in tcg_out_cmp Use U and C constraints for brcond2 and setcond2, so that tcg_out_cmp2 automatically passes in-range constants to tcg_out_cmp. Tested-by: Nicholas Piggin Reviewed-by: Nicholas Piggin Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target-con-set.h | 4 +-- tcg/ppc/tcg-target.c.inc | 49 ++++++++++++------------------------ 2 files changed, 18 insertions(+), 35 deletions(-) diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h index 77a1038d51..14cd217287 100644 --- a/tcg/ppc/tcg-target-con-set.h +++ b/tcg/ppc/tcg-target-con-set.h @@ -15,7 +15,7 @@ C_O0_I2(r, rC) C_O0_I2(v, r) C_O0_I3(r, r, r) C_O0_I3(o, m, r) -C_O0_I4(r, r, ri, ri) +C_O0_I4(r, r, rU, rC) C_O0_I4(r, r, r, r) C_O1_I1(r, r) C_O1_I1(v, r) @@ -34,7 +34,7 @@ C_O1_I2(v, v, v) C_O1_I3(v, v, v, v) C_O1_I4(v, v, v, vZM, v) C_O1_I4(r, r, rC, rZ, rZ) -C_O1_I4(r, r, r, ri, ri) +C_O1_I4(r, r, r, rU, rC) C_O2_I1(r, r, r) C_N1O1_I1(o, m, r) C_O2_I2(r, r, r, r) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 339b3a0904..1782d05290 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -1777,9 +1777,8 @@ static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2, } static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, - int const_arg2, int cr, TCGType type) + bool const_arg2, int cr, TCGType type) { - int imm; uint32_t op; tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); @@ -1796,18 +1795,15 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, case TCG_COND_EQ: case TCG_COND_NE: if (const_arg2) { - if ((int16_t) arg2 == arg2) { + if ((int16_t)arg2 == arg2) { op = CMPI; - imm = 1; - break; - } else if ((uint16_t) arg2 == arg2) { - op = CMPLI; - imm = 1; break; } + tcg_debug_assert((uint16_t)arg2 == arg2); + op = CMPLI; + break; } op = CMPL; - imm = 0; break; case TCG_COND_TSTEQ: @@ -1821,14 +1817,11 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, case TCG_COND_LE: case TCG_COND_GT: if (const_arg2) { - if ((int16_t) arg2 == arg2) { - op = CMPI; - imm = 1; - break; - } + tcg_debug_assert((int16_t)arg2 == arg2); + op = CMPI; + break; } op = CMP; - imm = 0; break; case TCG_COND_LTU: @@ -1836,30 +1829,20 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, case TCG_COND_LEU: case TCG_COND_GTU: if (const_arg2) { - if ((uint16_t) arg2 == arg2) { - op = CMPLI; - imm = 1; - break; - } + tcg_debug_assert((uint16_t)arg2 == arg2); + op = CMPLI; + break; } op = CMPL; - imm = 0; break; default: g_assert_not_reached(); } op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); - - if (imm) { - tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); - } else { - if (const_arg2) { - tcg_out_movi(s, type, TCG_REG_R0, arg2); - arg2 = TCG_REG_R0; - } - tcg_out32(s, op | RA(arg1) | RB(arg2)); - } + op |= RA(arg1); + op |= const_arg2 ? arg2 & 0xffff : RB(arg2); + tcg_out32(s, op); } static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, @@ -4297,9 +4280,9 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_deposit_i64: return C_O1_I2(r, 0, rZ); case INDEX_op_brcond2_i32: - return C_O0_I4(r, r, ri, ri); + return C_O0_I4(r, r, rU, rC); case INDEX_op_setcond2_i32: - return C_O1_I4(r, r, r, ri, ri); + return C_O1_I4(r, r, r, rU, rC); case INDEX_op_add2_i64: case INDEX_op_add2_i32: return C_O2_I4(r, r, r, r, rI, rZM); From cd2d9b181a67517fb7bbe8f0c8a81e2284207241 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 14:52:41 -0800 Subject: [PATCH 087/161] tcg/arm: Expand arguments to tcg_out_cmp2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass explicit arguments instead of arrays. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 3d864c1c1e..cebd783285 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1254,17 +1254,9 @@ static TCGCond tcg_out_cmp(TCGContext *s, TCGCond cond, TCGReg a, } } -static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args, - const int *const_args) +static TCGCond tcg_out_cmp2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, TCGArg bh, bool const_bh) { - TCGReg al = args[0]; - TCGReg ah = args[1]; - TCGArg bl = args[2]; - TCGArg bh = args[3]; - TCGCond cond = args[4]; - int const_bl = const_args[2]; - int const_bh = const_args[3]; - switch (cond) { case TCG_COND_EQ: case TCG_COND_NE: @@ -2344,11 +2336,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, break; case INDEX_op_brcond2_i32: - c = tcg_out_cmp2(s, args, const_args); + c = tcg_out_cmp2(s, args[4], args[0], args[1], args[2], const_args[2], + args[3], const_args[3]); tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[5])); break; case INDEX_op_setcond2_i32: - c = tcg_out_cmp2(s, args + 1, const_args + 1); + c = tcg_out_cmp2(s, args[5], args[1], args[2], args[3], const_args[3], + args[4], const_args[4]); tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1); tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)], ARITH_MOV, args[0], 0, 0); From c1e84acb7a87c23494f706c7045956ffaff5435a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 16:41:26 -0800 Subject: [PATCH 088/161] tcg/ppc: Expand arguments to tcg_out_cmp2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tested-by: Nicholas Piggin Reviewed-by: Nicholas Piggin Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c.inc | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 1782d05290..669c5eae4a 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2206,8 +2206,8 @@ static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, } } -static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, - const int *const_args) +static void tcg_out_cmp2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGArg bl, bool blconst, TCGArg bh, bool bhconst) { static const struct { uint8_t bit1, bit2; } bits[] = { [TCG_COND_LT ] = { CR_LT, CR_LT }, @@ -2220,18 +2220,9 @@ static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, [TCG_COND_GEU] = { CR_GT, CR_LT }, }; - TCGCond cond = args[4], cond2; - TCGArg al, ah, bl, bh; - int blconst, bhconst; + TCGCond cond2; int op, bit1, bit2; - al = args[0]; - ah = args[1]; - bl = args[2]; - bh = args[3]; - blconst = const_args[2]; - bhconst = const_args[3]; - switch (cond) { case TCG_COND_EQ: op = CRAND; @@ -2286,7 +2277,8 @@ static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, const int *const_args) { - tcg_out_cmp2(s, args + 1, const_args + 1); + tcg_out_cmp2(s, args[5], args[1], args[2], args[3], const_args[3], + args[4], const_args[4]); tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0)); tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31); } @@ -2294,7 +2286,8 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, const int *const_args) { - tcg_out_cmp2(s, args, const_args); + tcg_out_cmp2(s, args[4], args[0], args[1], args[2], const_args[2], + args[3], const_args[3]); tcg_out_bc_lab(s, TCG_COND_EQ, arg_label(args[5])); } From f408df587a0459c0e44414d1c0c72a7926ce8f3c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 15:51:26 -0800 Subject: [PATCH 089/161] tcg: Convert brcond2_i32 to TCGOutOpBrcond2 Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 20 +++++++---- tcg/i386/tcg-target.c.inc | 62 ++++++++++++++++++----------------- tcg/mips/tcg-target-con-set.h | 2 +- tcg/mips/tcg-target.c.inc | 19 ++++++----- tcg/ppc/tcg-target.c.inc | 25 +++++++------- tcg/tcg.c | 30 +++++++++++++++++ tcg/tci/tcg-target.c.inc | 30 +++++++++-------- 7 files changed, 118 insertions(+), 70 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index cebd783285..1c42df1092 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -2253,6 +2253,19 @@ static const TCGOutOpMovcond outop_movcond = { .out = tgen_movcond, }; +static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, TCGArg bh, bool const_bh, + TCGLabel *l) +{ + cond = tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh); + tcg_out_goto_label(s, tcg_cond_to_arm_cond[cond], l); +} + +static const TCGOutOpBrcond2 outop_brcond2 = { + .base.static_constraint = C_O0_I4(r, r, rI, rI), + .out = tgen_brcond2, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2335,11 +2348,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mov_reg(s, COND_AL, args[0], a0); break; - case INDEX_op_brcond2_i32: - c = tcg_out_cmp2(s, args[4], args[0], args[1], args[2], const_args[2], - args[3], const_args[3]); - tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[5])); - break; case INDEX_op_setcond2_i32: c = tcg_out_cmp2(s, args[5], args[1], args[2], args[3], const_args[3], args[4], const_args[4]); @@ -2444,8 +2452,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) return C_O2_I4(r, r, r, r, rIN, rIK); case INDEX_op_sub2_i32: return C_O2_I4(r, r, rI, rI, rIN, rIK); - case INDEX_op_brcond2_i32: - return C_O0_I4(r, r, rI, rI); case INDEX_op_setcond2_i32: return C_O1_I4(r, r, r, rI, rI); diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index ae3a53a18a..b7708c945f 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1662,42 +1662,52 @@ static const TCGOutOpBrcond outop_brcond = { .out_ri = tgen_brcondi, }; -#if TCG_TARGET_REG_BITS == 32 -static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, - const int *const_args, bool small) +static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, + TCGReg ah, TCGArg bl, bool blconst, + TCGArg bh, bool bhconst, + TCGLabel *label_this, bool small) { TCGLabel *label_next = gen_new_label(); - TCGLabel *label_this = arg_label(args[5]); - TCGCond cond = args[4]; switch (cond) { case TCG_COND_EQ: case TCG_COND_TSTEQ: tcg_out_brcond(s, 0, tcg_invert_cond(cond), - args[0], args[2], const_args[2], label_next, 1); - tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3], - label_this, small); + al, bl, blconst, label_next, true); + tcg_out_brcond(s, 0, cond, ah, bh, bhconst, label_this, small); break; case TCG_COND_NE: case TCG_COND_TSTNE: - tcg_out_brcond(s, 0, cond, args[0], args[2], const_args[2], - label_this, small); - tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3], - label_this, small); + tcg_out_brcond(s, 0, cond, al, bl, blconst, label_this, small); + tcg_out_brcond(s, 0, cond, ah, bh, bhconst, label_this, small); break; default: - tcg_out_brcond(s, 0, tcg_high_cond(cond), args[1], - args[3], const_args[3], label_this, small); + tcg_out_brcond(s, 0, tcg_high_cond(cond), + ah, bh, bhconst, label_this, small); tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond(s, 0, tcg_unsigned_cond(cond), args[0], - args[2], const_args[2], label_this, small); + tcg_out_brcond(s, 0, tcg_unsigned_cond(cond), + al, bl, blconst, label_this, small); break; } tcg_out_label(s, label_next); } + +static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, + TCGReg ah, TCGArg bl, bool blconst, + TCGArg bh, bool bhconst, TCGLabel *l) +{ + tcg_out_brcond2(s, cond, al, ah, bl, blconst, bh, bhconst, l, false); +} + +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) #endif +static const TCGOutOpBrcond2 outop_brcond2 = { + .base.static_constraint = C_O0_I4(r, r, ri, ri), + .out = tgen_brcond2, +}; static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, TCGReg dest, TCGReg arg1, TCGArg arg2, @@ -1854,11 +1864,8 @@ static const TCGOutOpSetcond outop_negsetcond = { static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, const int *const_args) { - TCGArg new_args[6]; TCGLabel *label_true, *label_over; - memcpy(new_args, args+1, 5*sizeof(TCGArg)); - if (args[0] == args[1] || args[0] == args[2] || (!const_args[3] && args[0] == args[3]) || (!const_args[4] && args[0] == args[4])) { @@ -1867,8 +1874,8 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, label_true = gen_new_label(); label_over = gen_new_label(); - new_args[5] = label_arg(label_true); - tcg_out_brcond2(s, new_args, const_args+1, 1); + tcg_out_brcond2(s, args[5], args[1], args[2], args[3], const_args[3], + args[4], const_args[4], label_true, true); tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); tcg_out_jxx(s, JCC_JMP, label_over, 1); @@ -1884,9 +1891,10 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); label_over = gen_new_label(); - new_args[4] = tcg_invert_cond(new_args[4]); - new_args[5] = label_arg(label_over); - tcg_out_brcond2(s, new_args, const_args+1, 1); + tcg_out_brcond2(s, tcg_invert_cond(args[5]), args[1], args[2], + args[3], const_args[3], + args[4], const_args[4], label_over, true); + tgen_arithi(s, ARITH_ADD, args[0], 1, 0); tcg_out_label(s, label_over); @@ -3233,9 +3241,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, break; #if TCG_TARGET_REG_BITS == 32 - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args, const_args, 0); - break; case INDEX_op_setcond2_i32: tcg_out_setcond2(s, args, const_args); break; @@ -4007,9 +4012,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) tcg_debug_assert(TCG_TARGET_REG_BITS == 64); return C_O0_I3(L, L, L); - case INDEX_op_brcond2_i32: - return C_O0_I4(r, r, ri, ri); - case INDEX_op_setcond2_i32: return C_O1_I4(r, r, r, ri, ri); diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h index f5e4852b56..9d0ea73f4f 100644 --- a/tcg/mips/tcg-target-con-set.h +++ b/tcg/mips/tcg-target-con-set.h @@ -13,7 +13,7 @@ C_O0_I1(r) C_O0_I2(r, rz) C_O0_I2(rz, r) C_O0_I3(rz, rz, r) -C_O0_I4(rz, rz, rz, rz) +C_O0_I4(r, r, rz, rz) C_O1_I1(r, r) C_O1_I2(r, 0, rz) C_O1_I2(r, r, r) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 3ce71a1c8d..9a9b1bb09a 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1074,8 +1074,9 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, tcg_out_setcond_end(s, ret, tmpflags); } -static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, - TCGReg bl, TCGReg bh, TCGLabel *l) +static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh, TCGLabel *l) { int tmpflags = tcg_out_setcond2_int(s, cond, TCG_TMP0, al, ah, bl, bh); TCGReg tmp = tmpflags & ~SETCOND_FLAGS; @@ -1086,6 +1087,14 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, tcg_out_nop(s); } +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) +#endif +static const TCGOutOpBrcond2 outop_brcond2 = { + .base.static_constraint = C_O0_I4(r, r, rz, rz), + .out = tgen_brcond2, +}; + static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, TCGArg v1, bool const_v1, TCGArg v2, bool const_v2) @@ -2297,10 +2306,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5])); - break; - case INDEX_op_setcond2_i32: tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); break; @@ -2401,8 +2406,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) return C_O2_I4(r, r, rz, rz, rN, rN); case INDEX_op_setcond2_i32: return C_O1_I4(r, rz, rz, rz, rz); - case INDEX_op_brcond2_i32: - return C_O0_I4(rz, rz, rz, rz); case INDEX_op_qemu_ld_i32: return C_O1_I1(r, r); diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 669c5eae4a..cde8a55918 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2283,14 +2283,23 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31); } -static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, - const int *const_args) +static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh, TCGLabel *l) { - tcg_out_cmp2(s, args[4], args[0], args[1], args[2], const_args[2], - args[3], const_args[3]); - tcg_out_bc_lab(s, TCG_COND_EQ, arg_label(args[5])); + assert(TCG_TARGET_REG_BITS == 32); + tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh); + tcg_out_bc_lab(s, TCG_COND_EQ, l); } +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) +#endif +static const TCGOutOpBrcond2 outop_brcond2 = { + .base.static_constraint = C_O0_I4(r, r, rU, rC), + .out = tgen_brcond2, +}; + static void tcg_out_mb(TCGContext *s, TCGArg a0) { uint32_t insn; @@ -3450,10 +3459,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); break; - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args, const_args); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); break; @@ -4272,8 +4277,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: return C_O1_I2(r, 0, rZ); - case INDEX_op_brcond2_i32: - return C_O0_I4(r, r, rU, rC); case INDEX_op_setcond2_i32: return C_O1_I4(r, r, r, rU, rC); case INDEX_op_add2_i64: diff --git a/tcg/tcg.c b/tcg/tcg.c index 3f57f6aafd..2a39ce3665 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -994,6 +994,13 @@ typedef struct TCGOutOpBrcond { TCGReg a1, tcg_target_long a2, TCGLabel *label); } TCGOutOpBrcond; +typedef struct TCGOutOpBrcond2 { + TCGOutOp base; + void (*out)(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh, TCGLabel *l); +} TCGOutOpBrcond2; + typedef struct TCGOutOpDivRem { TCGOutOp base; void (*out_rr01r)(TCGContext *s, TCGType type, @@ -1087,6 +1094,10 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), + +#if TCG_TARGET_REG_BITS == 32 + OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2), +#endif }; #undef OUTOP @@ -5540,6 +5551,25 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_brcond2_i32: + { + const TCGOutOpBrcond2 *out = &outop_brcond2; + TCGCond cond = new_args[4]; + TCGLabel *label = arg_label(new_args[5]); + + tcg_debug_assert(!const_args[0]); + tcg_debug_assert(!const_args[1]); + out->out(s, cond, new_args[0], new_args[1], + new_args[2], const_args[2], + new_args[3], const_args[3], label); + } + break; +#else + case INDEX_op_brcond2_i32: + g_assert_not_reached(); +#endif + default: if (def->flags & TCG_OPF_VECTOR) { tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 99a5744ab4..0fe365e2d4 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -87,11 +87,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub2_i64: return C_O2_I4(r, r, r, r, r, r); -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_brcond2_i32: - return C_O0_I4(r, r, r, r); -#endif - case INDEX_op_setcond2_i32: return C_O1_I4(r, r, r, r, r); @@ -985,6 +980,23 @@ static const TCGOutOpMovcond outop_movcond = { .out = tgen_movcond, }; +static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh, TCGLabel *l) +{ + tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, TCG_REG_TMP, + al, ah, bl, bh, cond); + tcg_out_op_rl(s, INDEX_op_brcond, TCG_REG_TMP, l); +} + +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) +#endif +static const TCGOutOpBrcond2 outop_brcond2 = { + .base.static_constraint = C_O0_I4(r, r, r, r), + .out = tgen_brcond2, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1055,14 +1067,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, args[3], args[4], args[5]); break; -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_brcond2_i32: - tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, TCG_REG_TMP, - args[0], args[1], args[2], args[3], args[4]); - tcg_out_op_rl(s, INDEX_op_brcond, TCG_REG_TMP, arg_label(args[5])); - break; -#endif - case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_st_i64: if (TCG_TARGET_REG_BITS == 32) { From e579c717cb1176d2cd8bd24d9b0e9c6a4b1a0d71 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 16:57:07 -0800 Subject: [PATCH 090/161] tcg: Convert setcond2_i32 to TCGOutOpSetcond2 Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 25 ++++++------ tcg/i386/tcg-target.c.inc | 71 +++++++++++++++++------------------ tcg/mips/tcg-target-con-set.h | 2 +- tcg/mips/tcg-target.c.inc | 20 ++++++---- tcg/ppc/tcg-target.c.inc | 25 ++++++------ tcg/tcg.c | 19 ++++++++++ tcg/tci/tcg-target.c.inc | 24 ++++++++---- 7 files changed, 110 insertions(+), 76 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 1c42df1092..8cd82b8baf 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -2266,13 +2266,25 @@ static const TCGOutOpBrcond2 outop_brcond2 = { .out = tgen_brcond2, }; +static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh) +{ + cond = tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh); + finish_setcond(s, cond, ret, false); +} + +static const TCGOutOpSetcond2 outop_setcond2 = { + .base.static_constraint = C_O1_I4(r, r, r, rI, rI), + .out = tgen_setcond2, +}; static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { TCGArg a0, a1, a2, a3, a4, a5; - int c; switch (opc) { case INDEX_op_goto_ptr: @@ -2348,14 +2360,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mov_reg(s, COND_AL, args[0], a0); break; - case INDEX_op_setcond2_i32: - c = tcg_out_cmp2(s, args[5], args[1], args[2], args[3], const_args[3], - args[4], const_args[4]); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)], - ARITH_MOV, args[0], 0, 0); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); break; @@ -2452,9 +2456,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) return C_O2_I4(r, r, r, r, rIN, rIK); case INDEX_op_sub2_i32: return C_O2_I4(r, r, rI, rI, rIN, rIK); - case INDEX_op_setcond2_i32: - return C_O1_I4(r, r, r, rI, rI); - case INDEX_op_qemu_ld_i32: return C_O1_I1(r, q); case INDEX_op_qemu_ld_i64: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index b7708c945f..6a42ffaf44 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1860,47 +1860,53 @@ static const TCGOutOpSetcond outop_negsetcond = { .out_rri = tgen_negsetcondi, }; -#if TCG_TARGET_REG_BITS == 32 -static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, - const int *const_args) +static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh) { - TCGLabel *label_true, *label_over; + TCGLabel *label_over = gen_new_label(); - if (args[0] == args[1] || args[0] == args[2] - || (!const_args[3] && args[0] == args[3]) - || (!const_args[4] && args[0] == args[4])) { - /* When the destination overlaps with one of the argument - registers, don't do anything tricky. */ - label_true = gen_new_label(); - label_over = gen_new_label(); + if (ret == al || ret == ah + || (!const_bl && ret == bl) + || (!const_bh && ret == bh)) { + /* + * When the destination overlaps with one of the argument + * registers, don't do anything tricky. + */ + TCGLabel *label_true = gen_new_label(); - tcg_out_brcond2(s, args[5], args[1], args[2], args[3], const_args[3], - args[4], const_args[4], label_true, true); + tcg_out_brcond2(s, cond, al, ah, bl, const_bl, + bh, const_bh, label_true, true); - tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + tcg_out_movi(s, TCG_TYPE_I32, ret, 0); tcg_out_jxx(s, JCC_JMP, label_over, 1); tcg_out_label(s, label_true); - tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); - tcg_out_label(s, label_over); + tcg_out_movi(s, TCG_TYPE_I32, ret, 1); } else { - /* When the destination does not overlap one of the arguments, - clear the destination first, jump if cond false, and emit an - increment in the true case. This results in smaller code. */ + /* + * When the destination does not overlap one of the arguments, + * clear the destination first, jump if cond false, and emit an + * increment in the true case. This results in smaller code. + */ + tcg_out_movi(s, TCG_TYPE_I32, ret, 0); - tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + tcg_out_brcond2(s, tcg_invert_cond(cond), al, ah, bl, const_bl, + bh, const_bh, label_over, true); - label_over = gen_new_label(); - tcg_out_brcond2(s, tcg_invert_cond(args[5]), args[1], args[2], - args[3], const_args[3], - args[4], const_args[4], label_over, true); - - - tgen_arithi(s, ARITH_ADD, args[0], 1, 0); - tcg_out_label(s, label_over); + tgen_arithi(s, ARITH_ADD, ret, 1, 0); } + tcg_out_label(s, label_over); } + +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) #endif +static const TCGOutOpSetcond2 outop_setcond2 = { + .base.static_constraint = C_O1_I4(r, r, r, ri, ri), + .out = tgen_setcond2, +}; static void tcg_out_cmov(TCGContext *s, int jcc, int rexw, TCGReg dest, TCGReg v1) @@ -3240,11 +3246,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args, const_args); - break; -#else /* TCG_TARGET_REG_BITS == 64 */ +#if TCG_TARGET_REG_BITS == 64 case INDEX_op_ld32s_i64: tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2); break; @@ -4012,9 +4014,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) tcg_debug_assert(TCG_TARGET_REG_BITS == 64); return C_O0_I3(L, L, L); - case INDEX_op_setcond2_i32: - return C_O1_I4(r, r, r, ri, ri); - case INDEX_op_ld_vec: case INDEX_op_dupm_vec: return C_O1_I1(x, r); diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h index 9d0ea73f4f..4e09c9a400 100644 --- a/tcg/mips/tcg-target-con-set.h +++ b/tcg/mips/tcg-target-con-set.h @@ -25,7 +25,7 @@ C_O1_I2(r, r, rz) C_O1_I2(r, r, rzW) C_O1_I4(r, r, rz, rz, 0) C_O1_I4(r, r, rz, rz, rz) -C_O1_I4(r, rz, rz, rz, rz) +C_O1_I4(r, r, r, rz, rz) C_O2_I1(r, r, r) C_O2_I2(r, r, r, r) C_O2_I4(r, r, rz, rz, rN, rN) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 9a9b1bb09a..e8ae65bccb 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1067,13 +1067,23 @@ static int tcg_out_setcond2_int(TCGContext *s, TCGCond cond, TCGReg ret, return ret | flags; } -static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) +static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh) { int tmpflags = tcg_out_setcond2_int(s, cond, ret, al, ah, bl, bh); tcg_out_setcond_end(s, ret, tmpflags); } +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) +#endif +static const TCGOutOpSetcond2 outop_setcond2 = { + .base.static_constraint = C_O1_I4(r, r, r, rz, rz), + .out = tgen_setcond2, +}; + static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, TCGArg bl, bool const_bl, TCGArg bh, bool const_bh, TCGLabel *l) @@ -2306,10 +2316,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, a0, 0, a1, a2, TCG_TYPE_I32); break; @@ -2404,8 +2410,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_add2_i32: case INDEX_op_sub2_i32: return C_O2_I4(r, r, rz, rz, rN, rN); - case INDEX_op_setcond2_i32: - return C_O1_I4(r, rz, rz, rz, rz); case INDEX_op_qemu_ld_i32: return C_O1_I1(r, r); diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index cde8a55918..4cdbf246d2 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2274,15 +2274,24 @@ static void tcg_out_cmp2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, } } -static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, - const int *const_args) +static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh) { - tcg_out_cmp2(s, args[5], args[1], args[2], args[3], const_args[3], - args[4], const_args[4]); + tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh); tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0)); - tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31); + tcg_out_rlw(s, RLWINM, ret, TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31); } +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) +#endif +static const TCGOutOpSetcond2 outop_setcond2 = { + .base.static_constraint = C_O1_I4(r, r, r, rU, rC), + .out = tgen_setcond2, +}; + static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, TCGArg bl, bool const_bl, TCGArg bh, bool const_bh, TCGLabel *l) @@ -3491,10 +3500,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args, const_args); - break; - case INDEX_op_bswap16_i32: case INDEX_op_bswap16_i64: tcg_out_bswap16(s, args[0], args[1], args[2]); @@ -4277,8 +4282,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: return C_O1_I2(r, 0, rZ); - case INDEX_op_setcond2_i32: - return C_O1_I4(r, r, r, rU, rC); case INDEX_op_add2_i64: case INDEX_op_add2_i32: return C_O2_I4(r, r, r, r, rI, rZM); diff --git a/tcg/tcg.c b/tcg/tcg.c index 2a39ce3665..735a7b95d4 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1033,6 +1033,12 @@ typedef struct TCGOutOpSetcond { TCGReg ret, TCGReg a1, tcg_target_long a2); } TCGOutOpSetcond; +typedef struct TCGOutOpSetcond2 { + TCGOutOp base; + void (*out)(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, TCGArg bh, bool const_bh); +} TCGOutOpSetcond2; + typedef struct TCGOutOpSubtract { TCGOutOp base; void (*out_rrr)(TCGContext *s, TCGType type, @@ -1097,6 +1103,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { #if TCG_TARGET_REG_BITS == 32 OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2), + OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2), #endif }; @@ -5565,8 +5572,20 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) new_args[3], const_args[3], label); } break; + case INDEX_op_setcond2_i32: + { + const TCGOutOpSetcond2 *out = &outop_setcond2; + TCGCond cond = new_args[5]; + + tcg_debug_assert(!const_args[1]); + tcg_debug_assert(!const_args[2]); + out->out(s, cond, new_args[0], new_args[1], new_args[2], + new_args[3], const_args[3], new_args[4], const_args[4]); + } + break; #else case INDEX_op_brcond2_i32: + case INDEX_op_setcond2_i32: g_assert_not_reached(); #endif diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 0fe365e2d4..88dc7e24e3 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -87,9 +87,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_sub2_i64: return C_O2_I4(r, r, r, r, r, r); - case INDEX_op_setcond2_i32: - return C_O1_I4(r, r, r, r, r); - case INDEX_op_qemu_ld_i32: return C_O1_I1(r, r); case INDEX_op_qemu_ld_i64: @@ -997,6 +994,22 @@ static const TCGOutOpBrcond2 outop_brcond2 = { .out = tgen_brcond2, }; +static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh) +{ + tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, ret, al, ah, bl, bh, cond); +} + +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) +#endif +static const TCGOutOpSetcond2 outop_setcond2 = { + .base.static_constraint = C_O1_I4(r, r, r, r, r), + .out = tgen_setcond2, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1012,11 +1025,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_op_l(s, opc, arg_label(args[0])); break; - case INDEX_op_setcond2_i32: - tcg_out_op_rrrrrc(s, opc, args[0], args[1], args[2], - args[3], args[4], args[5]); - break; - CASE_32_64(ld8u) CASE_32_64(ld8s) CASE_32_64(ld16u) From 5fa8e13872b0ecab7e1bc1f75c65281983df52e5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 18:32:08 -0800 Subject: [PATCH 091/161] tcg: Convert bswap16 to TCGOutOpBswap Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 - tcg/aarch64/tcg-target.c.inc | 30 +++++++------ tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 42 +++++++++--------- tcg/i386/tcg-target-has.h | 2 - tcg/i386/tcg-target.c.inc | 48 +++++++++++--------- tcg/loongarch64/tcg-target-has.h | 2 - tcg/loongarch64/tcg-target.c.inc | 28 +++++++----- tcg/mips/tcg-target-has.h | 2 - tcg/mips/tcg-target.c.inc | 74 +++++++++++++++---------------- tcg/ppc/tcg-target-has.h | 2 - tcg/ppc/tcg-target.c.inc | 76 ++++++++++++++++---------------- tcg/riscv/tcg-target-has.h | 2 - tcg/riscv/tcg-target.c.inc | 33 +++++++++----- tcg/s390x/tcg-target-has.h | 2 - tcg/s390x/tcg-target.c.inc | 40 ++++++++--------- tcg/sparc64/tcg-target-has.h | 2 - tcg/sparc64/tcg-target.c.inc | 4 ++ tcg/tcg-has.h | 1 - tcg/tcg-op.c | 4 +- tcg/tcg.c | 23 ++++++++-- tcg/tci.c | 2 - tcg/tci/tcg-target-has.h | 2 - tcg/tci/tcg-target.c.inc | 21 ++++++--- 24 files changed, 235 insertions(+), 210 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 22a574e703..4797409467 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -13,7 +13,6 @@ #define have_lse2 (cpuinfo & CPUINFO_LSE2) /* optional instructions */ -#define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_add2_i32 1 @@ -21,7 +20,6 @@ #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 1 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index ee45e7e244..03961b34aa 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2438,6 +2438,23 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori, }; +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); + if (flags & TCG_BSWAP_OS) { + /* Output must be sign-extended. */ + tcg_out_ext16s(s, type, a0, a0); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + /* Output must be zero-extended, but input isn't. */ + tcg_out_ext16u(s, a0, a0); + } +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap16, +}; static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { @@ -2618,17 +2635,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, case INDEX_op_bswap32_i32: tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); break; - case INDEX_op_bswap16_i64: - case INDEX_op_bswap16_i32: - tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); - if (a2 & TCG_BSWAP_OS) { - /* Output must be sign-extended. */ - tcg_out_ext16s(s, ext, a0, a0); - } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - /* Output must be zero-extended, but input isn't. */ - tcg_out_ext16u(s, a0, a0); - } - break; case INDEX_op_deposit_i64: case INDEX_op_deposit_i32: @@ -3148,9 +3154,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: - case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: case INDEX_op_ext_i32_i64: diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index bfa3be8028..5972def558 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -24,7 +24,6 @@ extern bool use_neon_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 8cd82b8baf..6928f209d2 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -969,23 +969,6 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) g_assert_not_reached(); } -static void tcg_out_bswap16(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, int flags) -{ - if (flags & TCG_BSWAP_OS) { - /* revsh */ - tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); - return; - } - - /* rev16 */ - tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); - if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - /* uxth */ - tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rd); - } -} - static void tcg_out_bswap32(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) { /* rev */ @@ -2153,6 +2136,27 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori, }; +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg rd, TCGReg rn, unsigned flags) +{ + if (flags & TCG_BSWAP_OS) { + /* revsh */ + tcg_out32(s, 0x06ff0fb0 | (COND_AL << 28) | (rd << 12) | rn); + return; + } + + /* rev16 */ + tcg_out32(s, 0x06bf0fb0 | (COND_AL << 28) | (rd << 12) | rn); + if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext16u(s, rd, rd); + } +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap16, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_subfi(s, type, a0, 0, a1); @@ -2374,9 +2378,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], TCG_TYPE_I64); break; - case INDEX_op_bswap16_i32: - tcg_out_bswap16(s, COND_AL, args[0], args[1], args[2]); - break; case INDEX_op_bswap32_i32: tcg_out_bswap32(s, COND_AL, args[0], args[1]); break; @@ -2437,7 +2438,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: case INDEX_op_extract_i32: case INDEX_op_sextract_i32: diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index aaf8764cc9..fd44ed8168 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -26,7 +26,6 @@ #define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl) /* optional instructions */ -#define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_add2_i32 1 @@ -35,7 +34,6 @@ #if TCG_TARGET_REG_BITS == 64 /* Keep 32-bit values zero-extended in a register. */ #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 6a42ffaf44..c74a718cee 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3062,6 +3062,34 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori, }; +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + if (flags & TCG_BSWAP_OS) { + /* Output must be sign-extended. */ + if (rexw) { + tcg_out_bswap64(s, a0); + tcg_out_shifti(s, SHIFT_SAR + rexw, a0, 48); + } else { + tcg_out_bswap32(s, a0); + tcg_out_shifti(s, SHIFT_SAR, a0, 16); + } + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + /* Output must be zero-extended, but input isn't. */ + tcg_out_bswap32(s, a0); + tcg_out_shifti(s, SHIFT_SHR, a0, 16); + } else { + tcg_out_rolw_8(s, a0); + } +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_bswap16, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; @@ -3165,24 +3193,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(bswap16): - if (a2 & TCG_BSWAP_OS) { - /* Output must be sign-extended. */ - if (rexw) { - tcg_out_bswap64(s, a0); - tcg_out_shifti(s, SHIFT_SAR + rexw, a0, 48); - } else { - tcg_out_bswap32(s, a0); - tcg_out_shifti(s, SHIFT_SAR, a0, 16); - } - } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - /* Output must be zero-extended, but input isn't. */ - tcg_out_bswap32(s, a0); - tcg_out_shifti(s, SHIFT_SHR, a0, 16); - } else { - tcg_out_rolw_8(s, a0); - } - break; OP_32_64(bswap32): tcg_out_bswap32(s, a0); if (rexw && (a2 & TCG_BSWAP_OS)) { @@ -3962,8 +3972,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(re, r); - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i32: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 90f0a131ae..11a93afd8b 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -13,14 +13,12 @@ #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 -#define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 /* 64-bit operations */ #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_add2_i64 0 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index c731096c64..8be6f69e3a 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1735,6 +1735,22 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori, }; +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_opc_revb_2h(s, a0, a1); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext16s(s, TCG_TYPE_REG, a0, a0); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext16u(s, a0, a0); + } +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap16, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_sub(s, type, a0, TCG_REG_ZERO, a1); @@ -1826,16 +1842,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_bstrins_d(s, a0, a2, args[3], args[3] + args[4] - 1); break; - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - tcg_out_opc_revb_2h(s, a0, a1); - if (a2 & TCG_BSWAP_OS) { - tcg_out_ext16s(s, TCG_TYPE_REG, a0, a0); - } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - tcg_out_ext16u(s, a0, a0); - } - break; - case INDEX_op_bswap32_i32: /* All 32-bit values are computed sign-extended in the register. */ a2 = TCG_BSWAP_OS; @@ -2448,8 +2454,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extract_i64: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i32: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index c6cecba28b..6c967d9c9f 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -39,7 +39,6 @@ extern bool use_mips32r2_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #if TCG_TARGET_REG_BITS == 64 @@ -57,7 +56,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index e8ae65bccb..258b49f9db 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -702,39 +702,6 @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, g_assert_not_reached(); } -static void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg, int flags) -{ - /* ret and arg can't be register tmp0 */ - tcg_debug_assert(ret != TCG_TMP0); - tcg_debug_assert(arg != TCG_TMP0); - - /* With arg = abcd: */ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); /* badc */ - if (flags & TCG_BSWAP_OS) { - tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); /* ssdc */ - } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xffff); /* 00dc */ - } - return; - } - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); /* 0abc */ - if (!(flags & TCG_BSWAP_IZ)) { - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff); /* 000c */ - } - if (flags & TCG_BSWAP_OS) { - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); /* d000 */ - tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); /* ssd0 */ - } else { - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); /* bcd0 */ - if (flags & TCG_BSWAP_OZ) { - tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); /* 00d0 */ - } - } - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); /* ssdc */ -} - static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub) { if (!tcg_out_opc_jmp(s, OPC_JAL, sub)) { @@ -2168,6 +2135,41 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori, }; +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg, unsigned flags) +{ + /* With arg = abcd: */ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); /* badc */ + if (flags & TCG_BSWAP_OS) { + tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); /* ssdc */ + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xffff); /* 00dc */ + } + return; + } + + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); /* 0abc */ + if (!(flags & TCG_BSWAP_IZ)) { + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff); /* 000c */ + } + if (flags & TCG_BSWAP_OS) { + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); /* d000 */ + tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); /* ssd0 */ + } else { + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); /* bcd0 */ + if (flags & TCG_BSWAP_OZ) { + tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); /* 00d0 */ + } + } + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); /* ssdc */ +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap16, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_sub(s, type, a0, TCG_REG_ZERO, a1); @@ -2259,10 +2261,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - tcg_out_bswap16(s, a0, a1, a2); - break; case INDEX_op_bswap32_i32: tcg_out_bswap32(s, a0, a1, 0); break; @@ -2373,7 +2371,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: case INDEX_op_extract_i32: case INDEX_op_sextract_i32: @@ -2384,7 +2381,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32s_i64: case INDEX_op_ld32u_i64: case INDEX_op_ld_i64: - case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: case INDEX_op_ext_i32_i64: diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 5c4fc2bc34..b73fca9789 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -17,7 +17,6 @@ #define have_vsx (cpuinfo & CPUINFO_VSX) /* optional instructions */ -#define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -26,7 +25,6 @@ #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 4cdbf246d2..3454254624 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -1012,38 +1012,6 @@ static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm) tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2); } -static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags) -{ - TCGReg tmp = dst == src ? TCG_REG_R0 : dst; - - if (have_isa_3_10) { - tcg_out32(s, BRH | RA(dst) | RS(src)); - if (flags & TCG_BSWAP_OS) { - tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst); - } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - tcg_out_ext16u(s, dst, dst); - } - return; - } - - /* - * In the following, - * dep(a, b, m) -> (a & ~m) | (b & m) - * - * Begin with: src = xxxxabcd - */ - /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */ - tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31); - /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */ - tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23); - - if (flags & TCG_BSWAP_OS) { - tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp); - } else { - tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); - } -} - static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags) { TCGReg tmp = dst == src ? TCG_REG_R0 : dst; @@ -3378,6 +3346,44 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori, }; +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg dst, TCGReg src, unsigned flags) +{ + TCGReg tmp = dst == src ? TCG_REG_R0 : dst; + + if (have_isa_3_10) { + tcg_out32(s, BRH | RA(dst) | RS(src)); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext16u(s, dst, dst); + } + return; + } + + /* + * In the following, + * dep(a, b, m) -> (a & ~m) | (b & m) + * + * Begin with: src = xxxxabcd + */ + /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */ + tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31); + /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */ + tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23); + + if (flags & TCG_BSWAP_OS) { + tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp); + } else { + tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); + } +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap16, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tcg_out32(s, NEG | RT(a0) | RA(a1)); @@ -3500,10 +3506,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); break; - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - tcg_out_bswap16(s, args[0], args[1], args[2]); - break; case INDEX_op_bswap32_i32: tcg_out_bswap32(s, args[0], args[1], 0); break; @@ -4250,7 +4252,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: case INDEX_op_extract_i32: case INDEX_op_sextract_i32: @@ -4263,7 +4264,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: case INDEX_op_extract_i64: diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index e18b5cb8ec..85bb5cd591 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -13,13 +13,11 @@ #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_bswap16_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_add2_i64 1 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 8d106d7f28..c6cd2100f8 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2402,6 +2402,28 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori, }; +static TCGConstraintSetIndex cset_bswap(TCGType type, unsigned flags) +{ + return cpuinfo & CPUINFO_ZBB ? C_O1_I1(r, r) : C_NotImplemented; +} + +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); + if (flags & TCG_BSWAP_OZ) { + tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 48); + } else { + tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 48); + } +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_bswap, + .out_rr = tgen_bswap16, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_sub(s, type, a0, TCG_REG_ZERO, a1); @@ -2498,15 +2520,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 32); } break; - case INDEX_op_bswap16_i64: - case INDEX_op_bswap16_i32: - tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); - if (a2 & TCG_BSWAP_OZ) { - tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 48); - } else { - tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 48); - } - break; case INDEX_op_add2_i32: tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], @@ -2845,9 +2858,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extract_i64: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: - case INDEX_op_bswap16_i32: case INDEX_op_bswap32_i32: - case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: return C_O1_I1(r, r); diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 41cd8a1d0d..6cd92fa240 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -29,7 +29,6 @@ extern uint64_t s390_facilities[3]; ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1) /* optional instructions */ -#define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 @@ -37,7 +36,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index fbf39ca529..e90c03628a 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2741,6 +2741,25 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori_3, }; +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRE, LRVR, a0, a1); + tcg_out_sh32(s, (flags & TCG_BSWAP_OS ? RS_SRA : RS_SRL), + a0, TCG_REG_NONE, 16); + } else { + tcg_out_insn(s, RRE, LRVGR, a0, a1); + tcg_out_sh64(s, (flags & TCG_BSWAP_OS ? RSY_SRAG : RSY_SRLG), + a0, a0, TCG_REG_NONE, 48); + } +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap16, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { if (type == TCG_TYPE_I32) { @@ -2827,25 +2846,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_bswap16_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - tcg_out_insn(s, RRE, LRVR, a0, a1); - if (a2 & TCG_BSWAP_OS) { - tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16); - } else { - tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16); - } - break; - case INDEX_op_bswap16_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - tcg_out_insn(s, RRE, LRVGR, a0, a1); - if (a2 & TCG_BSWAP_OS) { - tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48); - } else { - tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48); - } - break; - case INDEX_op_bswap32_i32: tcg_out_insn(s, RRE, LRVR, args[0], args[1]); break; @@ -3459,8 +3459,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i32: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 6ed27b8fcc..eb1e16c0e2 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -14,7 +14,6 @@ extern bool use_vis3_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_bswap16_i32 0 #define TCG_TARGET_HAS_bswap32_i32 0 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 @@ -22,7 +21,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index d99b9e42ce..5111f173e1 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1725,6 +1725,10 @@ static const TCGOutOpBinary outop_xor = { .out_rri = tgen_xori, }; +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_sub(s, type, a0, TCG_REG_G0, a1); diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 315dfd05aa..3d1c805d59 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -12,7 +12,6 @@ #if TCG_TARGET_REG_BITS == 32 /* Turn some undef macros into false macros. */ #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_bswap16_i64 0 #define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 3527952c66..c5b3bc8148 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1257,7 +1257,7 @@ void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags) /* Only one extension flag may be present. */ tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ)); - if (TCG_TARGET_HAS_bswap16_i32) { + if (tcg_op_supported(INDEX_op_bswap16_i32, TCG_TYPE_I32, 0)) { tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); @@ -2087,7 +2087,7 @@ void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) } else { tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } - } else if (TCG_TARGET_HAS_bswap16_i64) { + } else if (tcg_op_supported(INDEX_op_bswap16_i64, TCG_TYPE_I64, 0)) { tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index 735a7b95d4..25834f40a0 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1001,6 +1001,12 @@ typedef struct TCGOutOpBrcond2 { TCGArg bh, bool const_bh, TCGLabel *l); } TCGOutOpBrcond2; +typedef struct TCGOutOpBswap { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags); +} TCGOutOpBswap; + typedef struct TCGOutOpDivRem { TCGOutOp base; void (*out_rr01r)(TCGContext *s, TCGType type, @@ -1069,6 +1075,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond), + OUTOP(INDEX_op_bswap16_i32, TCGOutOpBswap, outop_bswap16), + OUTOP(INDEX_op_bswap16_i64, TCGOutOpBswap, outop_bswap16), OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), @@ -2335,8 +2343,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_add2_i32; case INDEX_op_sub2_i32: return TCG_TARGET_HAS_sub2_i32; - case INDEX_op_bswap16_i32: - return TCG_TARGET_HAS_bswap16_i32; case INDEX_op_bswap32_i32: return TCG_TARGET_HAS_bswap32_i32; @@ -2367,8 +2373,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: return TCG_TARGET_HAS_extr_i64_i32; - case INDEX_op_bswap16_i64: - return TCG_TARGET_HAS_bswap16_i64; case INDEX_op_bswap32_i64: return TCG_TARGET_HAS_bswap32_i64; case INDEX_op_bswap64_i64: @@ -5485,6 +5489,17 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_bswap16_i32: + case INDEX_op_bswap16_i64: + { + const TCGOutOpBswap *out = + container_of(all_outop[op->opc], TCGOutOpBswap, base); + + tcg_debug_assert(!const_args[1]); + out->out_rr(s, type, new_args[0], new_args[1], new_args[2]); + } + break; + case INDEX_op_divs2: case INDEX_op_divu2: { diff --git a/tcg/tci.c b/tcg/tci.c index 9c3f58242e..ae447e91bd 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -686,12 +686,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_write_reg64(regs, r1, r0, T1 - T2); break; #endif -#if TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64 CASE_32_64(bswap16) tci_args_rr(insn, &r0, &r1); regs[r0] = bswap16(regs[r1]); break; -#endif #if TCG_TARGET_HAS_bswap32_i32 || TCG_TARGET_HAS_bswap32_i64 CASE_32_64(bswap32) tci_args_rr(insn, &r0, &r1); diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index f45a0688f9..d7228246ab 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -7,14 +7,12 @@ #ifndef TCG_TARGET_HAS_H #define TCG_TARGET_HAS_H -#define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 88dc7e24e3..2a8ba07e37 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -57,8 +57,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i32: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: @@ -904,6 +902,20 @@ static const TCGOutOpUnary outop_ctpop = { .out_rr = tgen_ctpop, }; +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_op_rr(s, INDEX_op_bswap16_i32, a0, a1); + if (flags & TCG_BSWAP_OS) { + tcg_out_sextract(s, TCG_TYPE_REG, a0, a0, 0, 16); + } +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap16, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tcg_out_op_rr(s, INDEX_op_neg, a0, a1); @@ -1055,13 +1067,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_op_rr(s, opc, args[0], args[1]); break; - case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */ - case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */ - width = 16; - goto do_bswap; case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */ width = 32; - do_bswap: /* The base tci bswaps zero-extend, and ignore high bits. */ tcg_out_op_rr(s, opc, args[0], args[1]); if (args[2] & TCG_BSWAP_OS) { From 0dd07ee1122abaf1adb4f1e00a8e0b89937f53bd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 18:51:16 -0800 Subject: [PATCH 092/161] tcg: Merge INDEX_op_bswap16_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 7 +++---- tcg/tcg-op.c | 8 ++++---- tcg/tcg.c | 9 +++------ tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 15 insertions(+), 21 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 26dc3bad49..509cfe7db1 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -415,7 +415,7 @@ Misc - | *t0* = *t1* | Move *t1* to *t0*. - * - bswap16_i32/i64 *t0*, *t1*, *flags* + * - bswap16 *t0*, *t1*, *flags* - | 16 bit byte swap on the low bits of a 32/64 bit input. | diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 5e085607d5..acfbaa05b4 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -43,6 +43,7 @@ DEF(mov, 1, 1, 0, TCG_OPF_INT | TCG_OPF_NOT_PRESENT) DEF(add, 1, 2, 0, TCG_OPF_INT) DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) +DEF(bswap16, 1, 1, 1, TCG_OPF_INT) DEF(clz, 1, 2, 0, TCG_OPF_INT) DEF(ctpop, 1, 1, 0, TCG_OPF_INT) DEF(ctz, 1, 2, 0, TCG_OPF_INT) @@ -95,7 +96,6 @@ DEF(sub2_i32, 2, 4, 0, 0) DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(setcond2_i32, 1, 4, 1, 0) -DEF(bswap16_i32, 1, 1, 1, 0) DEF(bswap32_i32, 1, 1, 1, 0) /* load/store */ @@ -122,7 +122,6 @@ DEF(extu_i32_i64, 1, 1, 0, 0) DEF(extrl_i64_i32, 1, 1, 0, 0) DEF(extrh_i64_i32, 1, 1, 0, 0) -DEF(bswap16_i64, 1, 1, 1, 0) DEF(bswap32_i64, 1, 1, 1, 0) DEF(bswap64_i64, 1, 1, 1, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index 54606388cc..1d535a9fae 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -518,7 +518,7 @@ static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, case INDEX_op_ctpop: return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x); - CASE_OP_32_64(bswap16): + case INDEX_op_bswap16: x = bswap16(x); return y & TCG_BSWAP_OS ? (int16_t)x : x; @@ -1572,8 +1572,7 @@ static bool fold_bswap(OptContext *ctx, TCGOp *op) z_mask = t1->z_mask; switch (op->opc) { - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: + case INDEX_op_bswap16: z_mask = bswap16(z_mask); sign = INT16_MIN; break; @@ -2870,7 +2869,7 @@ void tcg_optimize(TCGContext *s) case INDEX_op_brcond2_i32: done = fold_brcond2(&ctx, op); break; - CASE_OP_32_64(bswap16): + case INDEX_op_bswap16: CASE_OP_32_64(bswap32): case INDEX_op_bswap64_i64: done = fold_bswap(&ctx, op); diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index c5b3bc8148..917f52b04a 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1257,8 +1257,8 @@ void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags) /* Only one extension flag may be present. */ tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ)); - if (tcg_op_supported(INDEX_op_bswap16_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags); + if (tcg_op_supported(INDEX_op_bswap16, TCG_TYPE_I32, 0)) { + tcg_gen_op3i_i32(INDEX_op_bswap16, ret, arg, flags); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 t1 = tcg_temp_ebb_new_i32(); @@ -2087,8 +2087,8 @@ void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) } else { tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } - } else if (tcg_op_supported(INDEX_op_bswap16_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags); + } else if (tcg_op_supported(INDEX_op_bswap16, TCG_TYPE_I64, 0)) { + tcg_gen_op3i_i64(INDEX_op_bswap16, ret, arg, flags); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index 25834f40a0..ae68ce88b7 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1075,8 +1075,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond), - OUTOP(INDEX_op_bswap16_i32, TCGOutOpBswap, outop_bswap16), - OUTOP(INDEX_op_bswap16_i64, TCGOutOpBswap, outop_bswap16), + OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16), OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), @@ -2941,8 +2940,7 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) i = 1; } break; - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: + case INDEX_op_bswap16: case INDEX_op_bswap32_i32: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: @@ -5489,8 +5487,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: + case INDEX_op_bswap16: { const TCGOutOpBswap *out = container_of(all_outop[op->opc], TCGOutOpBswap, base); diff --git a/tcg/tci.c b/tcg/tci.c index ae447e91bd..905ca154fc 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -686,7 +686,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_write_reg64(regs, r1, r0, T1 - T2); break; #endif - CASE_32_64(bswap16) + case INDEX_op_bswap16: tci_args_rr(insn, &r0, &r1); regs[r0] = bswap16(regs[r1]); break; @@ -1005,14 +1005,13 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) op_name, str_r(r0), str_r(r1), s2); break; + case INDEX_op_bswap16: case INDEX_op_ctpop: case INDEX_op_mov: case INDEX_op_neg: case INDEX_op_not: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i32: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 2a8ba07e37..4d3d9569cc 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -905,7 +905,7 @@ static const TCGOutOpUnary outop_ctpop = { static void tgen_bswap16(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, unsigned flags) { - tcg_out_op_rr(s, INDEX_op_bswap16_i32, a0, a1); + tcg_out_op_rr(s, INDEX_op_bswap16, a0, a1); if (flags & TCG_BSWAP_OS) { tcg_out_sextract(s, TCG_TYPE_REG, a0, a0, 0, 16); } From d7b15a25a707f977e39af20c9f14a5ac4971c762 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 19:49:20 -0800 Subject: [PATCH 093/161] tcg: Convert bswap32 to TCGOutOpBswap Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 - tcg/aarch64/tcg-target.c.inc | 25 +++++----- tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 23 ++++----- tcg/i386/tcg-target-has.h | 2 - tcg/i386/tcg-target.c.inc | 23 +++++---- tcg/loongarch64/tcg-target-has.h | 2 - tcg/loongarch64/tcg-target.c.inc | 33 +++++++------ tcg/mips/tcg-target-has.h | 2 - tcg/mips/tcg-target.c.inc | 54 ++++++++++---------- tcg/ppc/tcg-target-has.h | 2 - tcg/ppc/tcg-target.c.inc | 84 ++++++++++++++++---------------- tcg/riscv/tcg-target-has.h | 2 - tcg/riscv/tcg-target.c.inc | 30 +++++++----- tcg/s390x/tcg-target-has.h | 2 - tcg/s390x/tcg-target.c.inc | 31 ++++++------ tcg/sparc64/tcg-target-has.h | 2 - tcg/sparc64/tcg-target.c.inc | 4 ++ tcg/tcg-has.h | 1 - tcg/tcg-op.c | 4 +- tcg/tcg.c | 8 +-- tcg/tci.c | 2 - tcg/tci/tcg-target-has.h | 2 - tcg/tci/tcg-target.c.inc | 28 +++++------ 24 files changed, 182 insertions(+), 187 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 4797409467..7c3d3fc637 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -13,14 +13,12 @@ #define have_lse2 (cpuinfo & CPUINFO_LSE2) /* optional instructions */ -#define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_add2_i64 1 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 03961b34aa..a2e45ca5c8 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2456,6 +2456,20 @@ static const TCGOutOpBswap outop_bswap16 = { .out_rr = tgen_bswap16, }; +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext32s(s, a0, a0); + } +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap32, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_sub(s, type, a0, TCG_REG_XZR, a1); @@ -2626,15 +2640,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, case INDEX_op_bswap64_i64: tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); break; - case INDEX_op_bswap32_i64: - tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); - if (a2 & TCG_BSWAP_OS) { - tcg_out_ext32s(s, a0, a0); - } - break; - case INDEX_op_bswap32_i32: - tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); - break; case INDEX_op_deposit_i64: case INDEX_op_deposit_i32: @@ -3154,8 +3159,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index 5972def558..c85b5da1e5 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -24,7 +24,6 @@ extern bool use_neon_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 6928f209d2..4ca23bb718 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -969,12 +969,6 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) g_assert_not_reached(); } -static void tcg_out_bswap32(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) -{ - /* rev */ - tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); -} - static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd, TCGArg a1, int ofs, int len, bool const_a1) { @@ -2157,6 +2151,18 @@ static const TCGOutOpBswap outop_bswap16 = { .out_rr = tgen_bswap16, }; +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg rd, TCGReg rn, unsigned flags) +{ + /* rev */ + tcg_out32(s, 0x06bf0f30 | (COND_AL << 28) | (rd << 12) | rn); +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap32, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_subfi(s, type, a0, 0, a1); @@ -2378,10 +2384,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], TCG_TYPE_I64); break; - case INDEX_op_bswap32_i32: - tcg_out_bswap32(s, COND_AL, args[0], args[1]); - break; - case INDEX_op_deposit_i32: tcg_out_deposit(s, COND_AL, args[0], args[2], args[3], args[4], const_args[2]); @@ -2438,7 +2440,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_bswap32_i32: case INDEX_op_extract_i32: case INDEX_op_sextract_i32: return C_O1_I1(r, r); diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index fd44ed8168..ca533ab5cf 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -26,7 +26,6 @@ #define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl) /* optional instructions */ -#define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 @@ -34,7 +33,6 @@ #if TCG_TARGET_REG_BITS == 64 /* Keep 32-bit values zero-extended in a register. */ #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_add2_i64 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index c74a718cee..6d90666ba7 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3090,6 +3090,20 @@ static const TCGOutOpBswap outop_bswap16 = { .out_rr = tgen_bswap16, }; +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_bswap32(s, a0); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext32s(s, a0, a0); + } +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_bswap32, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; @@ -3193,13 +3207,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(bswap32): - tcg_out_bswap32(s, a0); - if (rexw && (a2 & TCG_BSWAP_OS)) { - tcg_out_ext32s(s, a0, a0); - } - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, a0, -1, a1, a2, TCG_TYPE_I32); break; @@ -3972,8 +3979,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(re, r); - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: case INDEX_op_extrh_i64_i32: return C_O1_I1(r, 0); diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 11a93afd8b..e66df31954 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -13,13 +13,11 @@ #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 -#define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 /* 64-bit operations */ #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 8be6f69e3a..d4bcb06a5f 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1751,6 +1751,24 @@ static const TCGOutOpBswap outop_bswap16 = { .out_rr = tgen_bswap16, }; +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_opc_revb_2w(s, a0, a1); + + /* All 32-bit values are computed sign-extended in the register. */ + if (type == TCG_TYPE_I32 || (flags & TCG_BSWAP_OS)) { + tcg_out_ext32s(s, a0, a0); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext32u(s, a0, a0); + } +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap32, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_sub(s, type, a0, TCG_REG_ZERO, a1); @@ -1842,19 +1860,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_bstrins_d(s, a0, a2, args[3], args[3] + args[4] - 1); break; - case INDEX_op_bswap32_i32: - /* All 32-bit values are computed sign-extended in the register. */ - a2 = TCG_BSWAP_OS; - /* fallthrough */ - case INDEX_op_bswap32_i64: - tcg_out_opc_revb_2w(s, a0, a1); - if (a2 & TCG_BSWAP_OS) { - tcg_out_ext32s(s, a0, a0); - } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - tcg_out_ext32u(s, a0, a0); - } - break; - case INDEX_op_bswap64_i64: tcg_out_opc_revb_d(s, a0, a1); break; @@ -2454,8 +2459,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extract_i64: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index 6c967d9c9f..2391f5d8bf 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -39,7 +39,6 @@ extern bool use_mips32r2_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_bswap32_i32 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 @@ -56,7 +55,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #endif diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 258b49f9db..ab8f8c9994 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -710,26 +710,6 @@ static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub) } } -static void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg, int flags) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); - tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); - if (flags & TCG_BSWAP_OZ) { - tcg_out_opc_bf(s, OPC_DEXT, ret, ret, 31, 0); - } - } else { - if (flags & TCG_BSWAP_OZ) { - tcg_out_bswap_subr(s, bswap32u_addr); - } else { - tcg_out_bswap_subr(s, bswap32_addr); - } - /* delay slot -- never omit the insn, like tcg_out_mov might. */ - tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO); - tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3); - } -} - static void tcg_out_bswap64(TCGContext *s, TCGReg ret, TCGReg arg) { if (use_mips32r2_instructions) { @@ -2170,6 +2150,32 @@ static const TCGOutOpBswap outop_bswap16 = { .out_rr = tgen_bswap16, }; +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg, unsigned flags) +{ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); + tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); + if (flags & TCG_BSWAP_OZ) { + tcg_out_opc_bf(s, OPC_DEXT, ret, ret, 31, 0); + } + } else { + if (flags & TCG_BSWAP_OZ) { + tcg_out_bswap_subr(s, bswap32u_addr); + } else { + tcg_out_bswap_subr(s, bswap32_addr); + } + /* delay slot -- never omit the insn, like tcg_out_mov might. */ + tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO); + tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3); + } +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap32, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_sub(s, type, a0, TCG_REG_ZERO, a1); @@ -2261,12 +2267,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_bswap32_i32: - tcg_out_bswap32(s, a0, a1, 0); - break; - case INDEX_op_bswap32_i64: - tcg_out_bswap32(s, a0, a1, a2); - break; case INDEX_op_bswap64_i64: tcg_out_bswap64(s, a0, a1); break; @@ -2371,7 +2371,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_bswap32_i32: case INDEX_op_extract_i32: case INDEX_op_sextract_i32: case INDEX_op_ld8u_i64: @@ -2381,7 +2380,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32s_i64: case INDEX_op_ld32u_i64: case INDEX_op_ld_i64: - case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index b73fca9789..ad0885d635 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -17,7 +17,6 @@ #define have_vsx (cpuinfo & CPUINFO_VSX) /* optional instructions */ -#define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -25,7 +24,6 @@ #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_add2_i64 1 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 3454254624..4527ed3eee 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -1012,41 +1012,6 @@ static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm) tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2); } -static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags) -{ - TCGReg tmp = dst == src ? TCG_REG_R0 : dst; - - if (have_isa_3_10) { - tcg_out32(s, BRW | RA(dst) | RS(src)); - if (flags & TCG_BSWAP_OS) { - tcg_out_ext32s(s, dst, dst); - } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - tcg_out_ext32u(s, dst, dst); - } - return; - } - - /* - * Stolen from gcc's builtin_bswap32. - * In the following, - * dep(a, b, m) -> (a & ~m) | (b & m) - * - * Begin with: src = xxxxabcd - */ - /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */ - tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31); - /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */ - tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7); - /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */ - tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23); - - if (flags & TCG_BSWAP_OS) { - tcg_out_ext32s(s, dst, tmp); - } else { - tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); - } -} - static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src) { TCGReg t0 = dst == src ? TCG_REG_R0 : dst; @@ -3384,6 +3349,47 @@ static const TCGOutOpBswap outop_bswap16 = { .out_rr = tgen_bswap16, }; +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg dst, TCGReg src, unsigned flags) +{ + TCGReg tmp = dst == src ? TCG_REG_R0 : dst; + + if (have_isa_3_10) { + tcg_out32(s, BRW | RA(dst) | RS(src)); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext32s(s, dst, dst); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext32u(s, dst, dst); + } + return; + } + + /* + * Stolen from gcc's builtin_bswap32. + * In the following, + * dep(a, b, m) -> (a & ~m) | (b & m) + * + * Begin with: src = xxxxabcd + */ + /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */ + tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31); + /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */ + tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7); + /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */ + tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23); + + if (flags & TCG_BSWAP_OS) { + tcg_out_ext32s(s, dst, tmp); + } else { + tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); + } +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap32, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tcg_out32(s, NEG | RT(a0) | RA(a1)); @@ -3506,12 +3512,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); break; - case INDEX_op_bswap32_i32: - tcg_out_bswap32(s, args[0], args[1], 0); - break; - case INDEX_op_bswap32_i64: - tcg_out_bswap32(s, args[0], args[1], args[2]); - break; case INDEX_op_bswap64_i64: tcg_out_bswap64(s, args[0], args[1]); break; @@ -4252,7 +4252,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_bswap32_i32: case INDEX_op_extract_i32: case INDEX_op_sextract_i32: case INDEX_op_ld8u_i64: @@ -4264,7 +4263,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: case INDEX_op_extract_i64: case INDEX_op_sextract_i64: diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index 85bb5cd591..fbe294474a 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -13,12 +13,10 @@ #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index c6cd2100f8..9b6ca54ae7 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2424,6 +2424,23 @@ static const TCGOutOpBswap outop_bswap16 = { .out_rr = tgen_bswap16, }; +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); + if (flags & TCG_BSWAP_OZ) { + tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 32); + } else { + tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 32); + } +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_bswap, + .out_rr = tgen_bswap32, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_sub(s, type, a0, TCG_REG_ZERO, a1); @@ -2509,17 +2526,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_bswap64_i64: tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); break; - case INDEX_op_bswap32_i32: - a2 = 0; - /* fall through */ - case INDEX_op_bswap32_i64: - tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); - if (a2 & TCG_BSWAP_OZ) { - tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 32); - } else { - tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 32); - } - break; case INDEX_op_add2_i32: tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], @@ -2858,8 +2864,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extract_i64: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: return C_O1_I1(r, r); diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 6cd92fa240..76cfe4f323 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -29,14 +29,12 @@ extern uint64_t s390_facilities[3]; ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1) /* optional instructions */ -#define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_add2_i64 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index e90c03628a..ed2da3f31d 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2760,6 +2760,22 @@ static const TCGOutOpBswap outop_bswap16 = { .out_rr = tgen_bswap16, }; +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_insn(s, RRE, LRVR, a0, a1); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext32s(s, a0, a0); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext32u(s, a0, a0); + } +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap32, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { if (type == TCG_TYPE_I32) { @@ -2846,19 +2862,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_bswap32_i32: - tcg_out_insn(s, RRE, LRVR, args[0], args[1]); - break; - case INDEX_op_bswap32_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - tcg_out_insn(s, RRE, LRVR, a0, a1); - if (a2 & TCG_BSWAP_OS) { - tcg_out_ext32s(s, a0, a0); - } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - tcg_out_ext32u(s, a0, a0); - } - break; - case INDEX_op_add2_i32: if (const_args[4]) { tcg_out_insn(s, RIL, ALFI, args[0], args[4]); @@ -3459,8 +3462,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index eb1e16c0e2..22837beca9 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -14,14 +14,12 @@ extern bool use_vis3_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_bswap32_i32 0 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_add2_i64 1 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 5111f173e1..cbe9c759ec 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1729,6 +1729,10 @@ static const TCGOutOpBswap outop_bswap16 = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_sub(s, type, a0, TCG_REG_G0, a1); diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 3d1c805d59..4034c73cca 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -12,7 +12,6 @@ #if TCG_TARGET_REG_BITS == 32 /* Turn some undef macros into false macros. */ #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_bswap32_i64 0 #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_add2_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 917f52b04a..68e53a9c85 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1294,7 +1294,7 @@ void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags) */ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_bswap32_i32) { + if (tcg_op_supported(INDEX_op_bswap32_i32, TCG_TYPE_I32, 0)) { tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); @@ -2137,7 +2137,7 @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) } else { tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } - } else if (TCG_TARGET_HAS_bswap32_i64) { + } else if (tcg_op_supported(INDEX_op_bswap32_i64, TCG_TYPE_I64, 0)) { tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index ae68ce88b7..89ef2ef89c 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1076,6 +1076,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond), OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16), + OUTOP(INDEX_op_bswap32_i32, TCGOutOpBswap, outop_bswap32), + OUTOP(INDEX_op_bswap32_i64, TCGOutOpBswap, outop_bswap32), OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), @@ -2342,8 +2344,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return TCG_TARGET_HAS_add2_i32; case INDEX_op_sub2_i32: return TCG_TARGET_HAS_sub2_i32; - case INDEX_op_bswap32_i32: - return TCG_TARGET_HAS_bswap32_i32; case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: @@ -2372,8 +2372,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: return TCG_TARGET_HAS_extr_i64_i32; - case INDEX_op_bswap32_i64: - return TCG_TARGET_HAS_bswap32_i64; case INDEX_op_bswap64_i64: return TCG_TARGET_HAS_bswap64_i64; case INDEX_op_add2_i64: @@ -5488,6 +5486,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; case INDEX_op_bswap16: + case INDEX_op_bswap32_i32: + case INDEX_op_bswap32_i64: { const TCGOutOpBswap *out = container_of(all_outop[op->opc], TCGOutOpBswap, base); diff --git a/tcg/tci.c b/tcg/tci.c index 905ca154fc..0cb89f3256 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -690,12 +690,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rr(insn, &r0, &r1); regs[r0] = bswap16(regs[r1]); break; -#if TCG_TARGET_HAS_bswap32_i32 || TCG_TARGET_HAS_bswap32_i64 CASE_32_64(bswap32) tci_args_rr(insn, &r0, &r1); regs[r0] = bswap32(regs[r1]); break; -#endif #if TCG_TARGET_REG_BITS == 64 /* Load/store operations (64 bit). */ diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index d7228246ab..c5c64f4f5d 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -7,13 +7,11 @@ #ifndef TCG_TARGET_HAS_H #define TCG_TARGET_HAS_H -#define TCG_TARGET_HAS_bswap32_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_add2_i32 1 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 4d3d9569cc..1b2f18e370 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -57,8 +57,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: case INDEX_op_extract_i32: case INDEX_op_extract_i64: @@ -916,6 +914,20 @@ static const TCGOutOpBswap outop_bswap16 = { .out_rr = tgen_bswap16, }; +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_op_rr(s, INDEX_op_bswap32_i32, a0, a1); + if (flags & TCG_BSWAP_OS) { + tcg_out_sextract(s, TCG_TYPE_REG, a0, a0, 0, 32); + } +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap32, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tcg_out_op_rr(s, INDEX_op_neg, a0, a1); @@ -1026,8 +1038,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - int width; - switch (opc) { case INDEX_op_goto_ptr: tcg_out_op_r(s, opc, args[0]); @@ -1062,20 +1072,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_op_rrbb(s, opc, args[0], args[1], args[2], args[3]); break; - case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */ case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */ tcg_out_op_rr(s, opc, args[0], args[1]); break; - case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */ - width = 32; - /* The base tci bswaps zero-extend, and ignore high bits. */ - tcg_out_op_rr(s, opc, args[0], args[1]); - if (args[2] & TCG_BSWAP_OS) { - tcg_out_sextract(s, TCG_TYPE_REG, args[0], args[0], 0, width); - } - break; - CASE_32_64(add2) CASE_32_64(sub2) tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2], From 7498d882cbe39ae7df4315ea006830e640f0d47b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 19:53:51 -0800 Subject: [PATCH 094/161] tcg: Merge INDEX_op_bswap32_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 13 ++++++------- include/tcg/tcg-opc.h | 4 +--- tcg/optimize.c | 7 +++---- tcg/tcg-op.c | 8 ++++---- tcg/tcg.c | 9 +++------ tcg/tci.c | 5 ++--- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 20 insertions(+), 28 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 509cfe7db1..e89ede54fa 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -425,16 +425,15 @@ Misc | | If neither ``TCG_BSWAP_OZ`` nor ``TCG_BSWAP_OS`` are set, then the bits of *t0* above bit 15 may contain any value. - * - bswap32_i64 *t0*, *t1*, *flags* + * - bswap32 *t0*, *t1*, *flags* - - | 32 bit byte swap on a 64-bit value. The flags are the same as for bswap16, - except they apply from bit 31 instead of bit 15. + - | 32 bit byte swap. The flags are the same as for bswap16, except + they apply from bit 31 instead of bit 15. On TCG_TYPE_I32, the + flags should be zero. - * - bswap32_i32 *t0*, *t1*, *flags* + * - bswap64_i64 *t0*, *t1*, *flags* - bswap64_i64 *t0*, *t1*, *flags* - - - | 32/64 bit byte swap. The flags are ignored, but still present + - | 64 bit byte swap. The flags are ignored, but still present for consistency with the other bswap opcodes. * - discard_i32/i64 *t0* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index acfbaa05b4..296dffe99a 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -44,6 +44,7 @@ DEF(add, 1, 2, 0, TCG_OPF_INT) DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(bswap16, 1, 1, 1, TCG_OPF_INT) +DEF(bswap32, 1, 1, 1, TCG_OPF_INT) DEF(clz, 1, 2, 0, TCG_OPF_INT) DEF(ctpop, 1, 1, 0, TCG_OPF_INT) DEF(ctz, 1, 2, 0, TCG_OPF_INT) @@ -96,8 +97,6 @@ DEF(sub2_i32, 2, 4, 0, 0) DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(setcond2_i32, 1, 4, 1, 0) -DEF(bswap32_i32, 1, 1, 1, 0) - /* load/store */ DEF(ld8u_i64, 1, 1, 1, 0) DEF(ld8s_i64, 1, 1, 1, 0) @@ -122,7 +121,6 @@ DEF(extu_i32_i64, 1, 1, 0, 0) DEF(extrl_i64_i32, 1, 1, 0, 0) DEF(extrh_i64_i32, 1, 1, 0, 0) -DEF(bswap32_i64, 1, 1, 1, 0) DEF(bswap64_i64, 1, 1, 1, 0) DEF(add2_i64, 2, 4, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index 1d535a9fae..6fa968624d 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -522,7 +522,7 @@ static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, x = bswap16(x); return y & TCG_BSWAP_OS ? (int16_t)x : x; - CASE_OP_32_64(bswap32): + case INDEX_op_bswap32: x = bswap32(x); return y & TCG_BSWAP_OS ? (int32_t)x : x; @@ -1576,8 +1576,7 @@ static bool fold_bswap(OptContext *ctx, TCGOp *op) z_mask = bswap16(z_mask); sign = INT16_MIN; break; - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: + case INDEX_op_bswap32: z_mask = bswap32(z_mask); sign = INT32_MIN; break; @@ -2870,7 +2869,7 @@ void tcg_optimize(TCGContext *s) done = fold_brcond2(&ctx, op); break; case INDEX_op_bswap16: - CASE_OP_32_64(bswap32): + case INDEX_op_bswap32: case INDEX_op_bswap64_i64: done = fold_bswap(&ctx, op); break; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 68e53a9c85..b1174f60cc 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1294,8 +1294,8 @@ void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags) */ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (tcg_op_supported(INDEX_op_bswap32_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0); + if (tcg_op_supported(INDEX_op_bswap32, TCG_TYPE_I32, 0)) { + tcg_gen_op3i_i32(INDEX_op_bswap32, ret, arg, 0); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 t1 = tcg_temp_ebb_new_i32(); @@ -2137,8 +2137,8 @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) } else { tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } - } else if (tcg_op_supported(INDEX_op_bswap32_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags); + } else if (tcg_op_supported(INDEX_op_bswap32, TCG_TYPE_I64, 0)) { + tcg_gen_op3i_i64(INDEX_op_bswap32, ret, arg, flags); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index 89ef2ef89c..571f15626c 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1076,8 +1076,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond), OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16), - OUTOP(INDEX_op_bswap32_i32, TCGOutOpBswap, outop_bswap32), - OUTOP(INDEX_op_bswap32_i64, TCGOutOpBswap, outop_bswap32), + OUTOP(INDEX_op_bswap32, TCGOutOpBswap, outop_bswap32), OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), @@ -2939,8 +2938,7 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) } break; case INDEX_op_bswap16: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: + case INDEX_op_bswap32: case INDEX_op_bswap64_i64: { TCGArg flags = op->args[k]; @@ -5486,8 +5484,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; case INDEX_op_bswap16: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: + case INDEX_op_bswap32: { const TCGOutOpBswap *out = container_of(all_outop[op->opc], TCGOutOpBswap, base); diff --git a/tcg/tci.c b/tcg/tci.c index 0cb89f3256..f98c437100 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -690,7 +690,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rr(insn, &r0, &r1); regs[r0] = bswap16(regs[r1]); break; - CASE_32_64(bswap32) + case INDEX_op_bswap32: tci_args_rr(insn, &r0, &r1); regs[r0] = bswap32(regs[r1]); break; @@ -1004,14 +1004,13 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) break; case INDEX_op_bswap16: + case INDEX_op_bswap32: case INDEX_op_ctpop: case INDEX_op_mov: case INDEX_op_neg: case INDEX_op_not: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: tci_args_rr(insn, &r0, &r1); info->fprintf_func(info->stream, "%-12s %s, %s", diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 1b2f18e370..7478ada393 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -917,7 +917,7 @@ static const TCGOutOpBswap outop_bswap16 = { static void tgen_bswap32(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, unsigned flags) { - tcg_out_op_rr(s, INDEX_op_bswap32_i32, a0, a1); + tcg_out_op_rr(s, INDEX_op_bswap32, a0, a1); if (flags & TCG_BSWAP_OS) { tcg_out_sextract(s, TCG_TYPE_REG, a0, a0, 0, 32); } From 613b571c93db4cec7d0023b857c1b857af7a3324 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 21:46:38 -0800 Subject: [PATCH 095/161] tcg: Convert bswap64 to TCGOutOpUnary Use TCGOutOpUnary instead of TCGOutOpBswap because the flags are not used with this opcode; they are merely present for uniformity with the smaller bswaps. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 1 - tcg/aarch64/tcg-target.c.inc | 15 ++++-- tcg/arm/tcg-target.c.inc | 4 ++ tcg/i386/tcg-target-has.h | 1 - tcg/i386/tcg-target.c.inc | 16 ++++-- tcg/loongarch64/tcg-target-has.h | 1 - tcg/loongarch64/tcg-target.c.inc | 15 ++++-- tcg/mips/tcg-target-has.h | 1 - tcg/mips/tcg-target.c.inc | 37 ++++++++------ tcg/ppc/tcg-target-has.h | 1 - tcg/ppc/tcg-target.c.inc | 88 ++++++++++++++++---------------- tcg/riscv/tcg-target-has.h | 1 - tcg/riscv/tcg-target.c.inc | 16 ++++-- tcg/s390x/tcg-target-has.h | 1 - tcg/s390x/tcg-target.c.inc | 15 ++++-- tcg/sparc64/tcg-target-has.h | 1 - tcg/sparc64/tcg-target.c.inc | 4 ++ tcg/tcg-has.h | 1 - tcg/tcg-op.c | 2 +- tcg/tcg.c | 7 ++- tcg/tci.c | 2 - tcg/tci/tcg-target-has.h | 1 - tcg/tci/tcg-target.c.inc | 17 ++++-- 23 files changed, 144 insertions(+), 104 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 7c3d3fc637..82d8cd5965 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -19,7 +19,6 @@ #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index a2e45ca5c8..79c0e2e097 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2470,6 +2470,16 @@ static const TCGOutOpBswap outop_bswap32 = { .out_rr = tgen_bswap32, }; +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap64, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_sub(s, type, a0, TCG_REG_XZR, a1); @@ -2637,10 +2647,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false); break; - case INDEX_op_bswap64_i64: - tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); - break; - case INDEX_op_deposit_i64: case INDEX_op_deposit_i32: tcg_out_dep(s, ext, a0, a2, args[3], args[4]); @@ -3159,7 +3165,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_bswap64_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extract_i32: diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 4ca23bb718..3bbc28c63c 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -2163,6 +2163,10 @@ static const TCGOutOpBswap outop_bswap32 = { .out_rr = tgen_bswap32, }; +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_subfi(s, type, a0, 0, a1); diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index ca533ab5cf..6b91b23fe8 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -33,7 +33,6 @@ #if TCG_TARGET_REG_BITS == 64 /* Keep 32-bit values zero-extended in a register. */ #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 6d90666ba7..347e01c076 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3104,6 +3104,18 @@ static const TCGOutOpBswap outop_bswap32 = { .out_rr = tgen_bswap32, }; +#if TCG_TARGET_REG_BITS == 64 +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_bswap64(s, a0); +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_bswap64, +}; +#endif + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; @@ -3279,9 +3291,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_bswap64_i64: - tcg_out_bswap64(s, a0); - break; case INDEX_op_extrh_i64_i32: tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32); break; @@ -3979,7 +3988,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(re, r); - case INDEX_op_bswap64_i64: case INDEX_op_extrh_i64_i32: return C_O1_I1(r, 0); diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index e66df31954..10090102f7 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -18,7 +18,6 @@ /* 64-bit operations */ #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index d4bcb06a5f..f3b2f709d2 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1769,6 +1769,16 @@ static const TCGOutOpBswap outop_bswap32 = { .out_rr = tgen_bswap32, }; +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_opc_revb_d(s, a0, a1); +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap64, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_sub(s, type, a0, TCG_REG_ZERO, a1); @@ -1860,10 +1870,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_bstrins_d(s, a0, a2, args[3], args[3] + args[4] - 1); break; - case INDEX_op_bswap64_i64: - tcg_out_opc_revb_d(s, a0, a1); - break; - case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: tcg_out_ldst(s, OPC_LD_B, a0, a1, a2); @@ -2459,7 +2465,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extract_i64: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: - case INDEX_op_bswap64_i64: case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: case INDEX_op_ld8u_i32: diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index 2391f5d8bf..24b00f1eec 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -55,7 +55,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #endif diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index ab8f8c9994..baaf0e416b 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -710,19 +710,6 @@ static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub) } } -static void tcg_out_bswap64(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg); - tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret); - } else { - tcg_out_bswap_subr(s, bswap64_addr); - /* delay slot -- never omit the insn, like tcg_out_mov might. */ - tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO); - tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3); - } -} - static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg) { tcg_debug_assert(TCG_TARGET_REG_BITS == 64); @@ -2176,6 +2163,26 @@ static const TCGOutOpBswap outop_bswap32 = { .out_rr = tgen_bswap32, }; +#if TCG_TARGET_REG_BITS == 64 +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg); + tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret); + } else { + tcg_out_bswap_subr(s, bswap64_addr); + /* delay slot -- never omit the insn, like tcg_out_mov might. */ + tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO); + tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3); + } +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap64, +}; +#endif /* TCG_TARGET_REG_BITS == 64 */ + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_sub(s, type, a0, TCG_REG_ZERO, a1); @@ -2267,9 +2274,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_bswap64_i64: - tcg_out_bswap64(s, a0, a1); - break; case INDEX_op_extrh_i64_i32: tcg_out_dsra(s, a0, a1, 32); break; @@ -2380,7 +2384,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32s_i64: case INDEX_op_ld32u_i64: case INDEX_op_ld_i64: - case INDEX_op_bswap64_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index ad0885d635..bd9c3d92ed 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -24,7 +24,6 @@ #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 4527ed3eee..083137d211 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -1012,44 +1012,6 @@ static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm) tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2); } -static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src) -{ - TCGReg t0 = dst == src ? TCG_REG_R0 : dst; - TCGReg t1 = dst == src ? dst : TCG_REG_R0; - - if (have_isa_3_10) { - tcg_out32(s, BRD | RA(dst) | RS(src)); - return; - } - - /* - * In the following, - * dep(a, b, m) -> (a & ~m) | (b & m) - * - * Begin with: src = abcdefgh - */ - /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */ - tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31); - /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */ - tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7); - /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */ - tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23); - - /* t0 = rol64(t0, 32) = hgfe0000 */ - tcg_out_rld(s, RLDICL, t0, t0, 32, 0); - /* t1 = rol64(src, 32) = efghabcd */ - tcg_out_rld(s, RLDICL, t1, src, 32, 0); - - /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */ - tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31); - /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */ - tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7); - /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */ - tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23); - - tcg_out_mov(s, TCG_TYPE_REG, dst, t0); -} - /* Emit a move into ret of arg, if it can be done in one insn. */ static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg) { @@ -3390,6 +3352,51 @@ static const TCGOutOpBswap outop_bswap32 = { .out_rr = tgen_bswap32, }; +#if TCG_TARGET_REG_BITS == 64 +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) +{ + TCGReg t0 = dst == src ? TCG_REG_R0 : dst; + TCGReg t1 = dst == src ? dst : TCG_REG_R0; + + if (have_isa_3_10) { + tcg_out32(s, BRD | RA(dst) | RS(src)); + return; + } + + /* + * In the following, + * dep(a, b, m) -> (a & ~m) | (b & m) + * + * Begin with: src = abcdefgh + */ + /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */ + tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31); + /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */ + tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7); + /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */ + tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23); + + /* t0 = rol64(t0, 32) = hgfe0000 */ + tcg_out_rld(s, RLDICL, t0, t0, 32, 0); + /* t1 = rol64(src, 32) = efghabcd */ + tcg_out_rld(s, RLDICL, t1, src, 32, 0); + + /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */ + tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31); + /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */ + tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7); + /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */ + tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23); + + tcg_out_mov(s, TCG_TYPE_REG, dst, t0); +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap64, +}; +#endif /* TCG_TARGET_REG_BITS == 64 */ + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tcg_out32(s, NEG | RT(a0) | RA(a1)); @@ -3512,10 +3519,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); break; - case INDEX_op_bswap64_i64: - tcg_out_bswap64(s, args[0], args[1]); - break; - case INDEX_op_deposit_i32: if (const_args[2]) { uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; @@ -4263,7 +4266,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_bswap64_i64: case INDEX_op_extract_i64: case INDEX_op_sextract_i64: return C_O1_I1(r, r); diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index fbe294474a..88fadc2428 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -17,7 +17,6 @@ #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB) #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 9b6ca54ae7..00b097d171 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2441,6 +2441,17 @@ static const TCGOutOpBswap outop_bswap32 = { .out_rr = tgen_bswap32, }; +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_bswap, + .out_rr = tgen_bswap64, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_sub(s, type, a0, TCG_REG_ZERO, a1); @@ -2523,10 +2534,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, OPC_SD, a0, a1, a2); break; - case INDEX_op_bswap64_i64: - tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); - break; - case INDEX_op_add2_i32: tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], const_args[4], const_args[5], false, true); @@ -2864,7 +2871,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extract_i64: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: - case INDEX_op_bswap64_i64: return C_O1_I1(r, r); case INDEX_op_st8_i32: diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 76cfe4f323..95407f61cf 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -35,7 +35,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index ed2da3f31d..2ed288cfe0 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2776,6 +2776,16 @@ static const TCGOutOpBswap outop_bswap32 = { .out_rr = tgen_bswap32, }; +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_insn(s, RRE, LRVGR, a0, a1); +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap64, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { if (type == TCG_TYPE_I32) { @@ -2922,10 +2932,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); break; - case INDEX_op_bswap64_i64: - tcg_out_insn(s, RRE, LRVGR, args[0], args[1]); - break; - case INDEX_op_add2_i64: if (const_args[4]) { if ((int64_t)args[4] >= 0) { @@ -3462,7 +3468,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_bswap64_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extract_i32: diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 22837beca9..2ced6f7c1c 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -20,7 +20,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index cbe9c759ec..96ffba9af6 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1733,6 +1733,10 @@ static const TCGOutOpBswap outop_bswap32 = { .base.static_constraint = C_NotImplemented, }; +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tgen_sub(s, type, a0, TCG_REG_G0, a1); diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 4034c73cca..21bef070fe 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -12,7 +12,6 @@ #if TCG_TARGET_REG_BITS == 32 /* Turn some undef macros into false macros. */ #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index b1174f60cc..27e700161f 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2184,7 +2184,7 @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) tcg_gen_mov_i32(TCGV_HIGH(ret), t0); tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); - } else if (TCG_TARGET_HAS_bswap64_i64) { + } else if (tcg_op_supported(INDEX_op_bswap64_i64, TCG_TYPE_I64, 0)) { tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index 571f15626c..f2f2c0dd74 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1112,6 +1112,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { #if TCG_TARGET_REG_BITS == 32 OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2), OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2), +#else + OUTOP(INDEX_op_bswap64_i64, TCGOutOpUnary, outop_bswap64), #endif }; @@ -2371,8 +2373,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: return TCG_TARGET_HAS_extr_i64_i32; - case INDEX_op_bswap64_i64: - return TCG_TARGET_HAS_bswap64_i64; case INDEX_op_add2_i64: return TCG_TARGET_HAS_add2_i64; case INDEX_op_sub2_i64: @@ -5470,6 +5470,9 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_bswap64_i64: + assert(TCG_TARGET_REG_BITS == 64); + /* fall through */ case INDEX_op_ctpop: case INDEX_op_neg: case INDEX_op_not: diff --git a/tcg/tci.c b/tcg/tci.c index f98c437100..903f996f02 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -788,12 +788,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rr(insn, &r0, &r1); regs[r0] = (uint32_t)regs[r1]; break; -#if TCG_TARGET_HAS_bswap64_i64 case INDEX_op_bswap64_i64: tci_args_rr(insn, &r0, &r1); regs[r0] = bswap64(regs[r1]); break; -#endif #endif /* TCG_TARGET_REG_BITS == 64 */ /* QEMU specific operations. */ diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index c5c64f4f5d..90aa5c8bbb 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -12,7 +12,6 @@ #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 7478ada393..cbfe92adf3 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -57,7 +57,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_bswap64_i64: case INDEX_op_extract_i32: case INDEX_op_extract_i64: case INDEX_op_sextract_i32: @@ -928,6 +927,18 @@ static const TCGOutOpBswap outop_bswap32 = { .out_rr = tgen_bswap32, }; +#if TCG_TARGET_REG_BITS == 64 +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_op_rr(s, INDEX_op_bswap64_i64, a0, a1); +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap64, +}; +#endif + static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { tcg_out_op_rr(s, INDEX_op_neg, a0, a1); @@ -1072,10 +1083,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_op_rrbb(s, opc, args[0], args[1], args[2], args[3]); break; - case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */ - tcg_out_op_rr(s, opc, args[0], args[1]); - break; - CASE_32_64(add2) CASE_32_64(sub2) tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2], From 3ad5d4ccb4bdebdff4e90957bb2b8a93e5e418e2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 10 Jan 2025 21:54:44 -0800 Subject: [PATCH 096/161] tcg: Rename INDEX_op_bswap64_i64 to INDEX_op_bswap64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even though bswap64 can only be used with TCG_TYPE_I64, rename the opcode to maintain uniformity. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 5 +++-- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 6 +++--- tcg/tcg-op.c | 4 ++-- tcg/tcg.c | 6 +++--- tcg/tci.c | 4 ++-- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index e89ede54fa..72a23d6ea2 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -431,10 +431,11 @@ Misc they apply from bit 31 instead of bit 15. On TCG_TYPE_I32, the flags should be zero. - * - bswap64_i64 *t0*, *t1*, *flags* + * - bswap64 *t0*, *t1*, *flags* - | 64 bit byte swap. The flags are ignored, but still present - for consistency with the other bswap opcodes. + for consistency with the other bswap opcodes. For future + compatibility, the flags should be zero. * - discard_i32/i64 *t0* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 296dffe99a..1d27b882fe 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -45,6 +45,7 @@ DEF(and, 1, 2, 0, TCG_OPF_INT) DEF(andc, 1, 2, 0, TCG_OPF_INT) DEF(bswap16, 1, 1, 1, TCG_OPF_INT) DEF(bswap32, 1, 1, 1, TCG_OPF_INT) +DEF(bswap64, 1, 1, 1, TCG_OPF_INT) DEF(clz, 1, 2, 0, TCG_OPF_INT) DEF(ctpop, 1, 1, 0, TCG_OPF_INT) DEF(ctz, 1, 2, 0, TCG_OPF_INT) @@ -121,8 +122,6 @@ DEF(extu_i32_i64, 1, 1, 0, 0) DEF(extrl_i64_i32, 1, 1, 0, 0) DEF(extrh_i64_i32, 1, 1, 0, 0) -DEF(bswap64_i64, 1, 1, 1, 0) - DEF(add2_i64, 2, 4, 0, 0) DEF(sub2_i64, 2, 4, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index 6fa968624d..a860b62109 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -526,7 +526,7 @@ static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, x = bswap32(x); return y & TCG_BSWAP_OS ? (int32_t)x : x; - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: return bswap64(x); case INDEX_op_ext_i32_i64: @@ -1580,7 +1580,7 @@ static bool fold_bswap(OptContext *ctx, TCGOp *op) z_mask = bswap32(z_mask); sign = INT32_MIN; break; - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: z_mask = bswap64(z_mask); sign = INT64_MIN; break; @@ -2870,7 +2870,7 @@ void tcg_optimize(TCGContext *s) break; case INDEX_op_bswap16: case INDEX_op_bswap32: - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: done = fold_bswap(&ctx, op); break; case INDEX_op_clz: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 27e700161f..ba062191ac 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2184,8 +2184,8 @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) tcg_gen_mov_i32(TCGV_HIGH(ret), t0); tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); - } else if (tcg_op_supported(INDEX_op_bswap64_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0); + } else if (tcg_op_supported(INDEX_op_bswap64, TCG_TYPE_I64, 0)) { + tcg_gen_op3i_i64(INDEX_op_bswap64, ret, arg, 0); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index f2f2c0dd74..1ba86dd515 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1113,7 +1113,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2), OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2), #else - OUTOP(INDEX_op_bswap64_i64, TCGOutOpUnary, outop_bswap64), + OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64), #endif }; @@ -2939,7 +2939,7 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) break; case INDEX_op_bswap16: case INDEX_op_bswap32: - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: { TCGArg flags = op->args[k]; const char *name = NULL; @@ -5470,7 +5470,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: assert(TCG_TARGET_REG_BITS == 64); /* fall through */ case INDEX_op_ctpop: diff --git a/tcg/tci.c b/tcg/tci.c index 903f996f02..30928c3412 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -788,7 +788,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rr(insn, &r0, &r1); regs[r0] = (uint32_t)regs[r1]; break; - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: tci_args_rr(insn, &r0, &r1); regs[r0] = bswap64(regs[r1]); break; @@ -1009,7 +1009,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_not: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: tci_args_rr(insn, &r0, &r1); info->fprintf_func(info->stream, "%-12s %s, %s", op_name, str_r(r0), str_r(r1)); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index cbfe92adf3..4fc857ad35 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -930,7 +930,7 @@ static const TCGOutOpBswap outop_bswap32 = { #if TCG_TARGET_REG_BITS == 64 static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) { - tcg_out_op_rr(s, INDEX_op_bswap64_i64, a0, a1); + tcg_out_op_rr(s, INDEX_op_bswap64, a0, a1); } static const TCGOutOpUnary outop_bswap64 = { From 5a4d034f3cbc6d3bffc983b24a2746e9fe9b91cd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 11 Jan 2025 07:55:47 -0800 Subject: [PATCH 097/161] tcg: Convert extract to TCGOutOpExtract Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 28 +++++++----- tcg/arm/tcg-target.c.inc | 23 +++++----- tcg/i386/tcg-target.c.inc | 77 +++++++++++++++++--------------- tcg/loongarch64/tcg-target.c.inc | 33 +++++++------- tcg/mips/tcg-target.c.inc | 35 +++++++-------- tcg/ppc/tcg-target.c.inc | 35 +++++++-------- tcg/riscv/tcg-target.c.inc | 54 +++++++++++----------- tcg/s390x/tcg-target.c.inc | 14 +++--- tcg/sparc64/tcg-target.c.inc | 16 ++++--- tcg/tcg.c | 20 +++++++++ tcg/tci/tcg-target.c.inc | 8 ++-- 11 files changed, 191 insertions(+), 152 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 79c0e2e097..6c9d6094a2 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2567,6 +2567,22 @@ static const TCGOutOpMovcond outop_movcond = { .out = tgen_movcond, }; +static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + uint64_t mask = MAKE_64BIT_MASK(0, len); + tcg_out_logicali(s, I3404_ANDI, type, a0, a1, mask); + } else { + tcg_out_ubfm(s, type, a0, a1, ofs, ofs + len - 1); + } +} + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2652,16 +2668,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_dep(s, ext, a0, a2, args[3], args[4]); break; - case INDEX_op_extract_i64: - case INDEX_op_extract_i32: - if (a2 == 0) { - uint64_t mask = MAKE_64BIT_MASK(0, args[3]); - tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, mask); - } else { - tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); - } - break; - case INDEX_op_sextract_i64: case INDEX_op_sextract_i32: tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); @@ -3167,8 +3173,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: return C_O1_I1(r, r); diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 3bbc28c63c..bc060b20f2 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -981,19 +981,19 @@ static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd, | (ofs << 7) | ((ofs + len - 1) << 16)); } -static void tcg_out_extract(TCGContext *s, ARMCond cond, TCGReg rd, - TCGReg rn, int ofs, int len) +static void tgen_extract(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn, + unsigned ofs, unsigned len) { /* According to gcc, AND can be faster. */ if (ofs == 0 && len <= 8) { - tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, + tcg_out_dat_imm(s, COND_AL, ARITH_AND, rd, rn, encode_imm_nofail((1 << len) - 1)); return; } if (use_armv7_instructions) { /* ubfx */ - tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn + tcg_out32(s, 0x07e00050 | (COND_AL << 28) | (rd << 12) | rn | (ofs << 7) | ((len - 1) << 16)); return; } @@ -1002,17 +1002,24 @@ static void tcg_out_extract(TCGContext *s, ARMCond cond, TCGReg rd, switch (len) { case 8: /* uxtb */ - tcg_out32(s, 0x06ef0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn); + tcg_out32(s, 0x06ef0070 | (COND_AL << 28) | + (rd << 12) | (ofs << 7) | rn); break; case 16: /* uxth */ - tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn); + tcg_out32(s, 0x06ff0070 | (COND_AL << 28) | + (rd << 12) | (ofs << 7) | rn); break; default: g_assert_not_reached(); } } +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; + static void tcg_out_sextract(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn, int ofs, int len) { @@ -2392,9 +2399,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_deposit(s, COND_AL, args[0], args[2], args[3], args[4], const_args[2]); break; - case INDEX_op_extract_i32: - tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]); - break; case INDEX_op_sextract_i32: tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]); break; @@ -2444,7 +2448,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_extract_i32: case INDEX_op_sextract_i32: return C_O1_I1(r, r); diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 347e01c076..b26c93bdb1 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3138,6 +3138,47 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + switch (len) { + case 8: + tcg_out_ext8u(s, a0, a1); + return; + case 16: + tcg_out_ext16u(s, a0, a1); + return; + case 32: + tcg_out_ext32u(s, a0, a1); + return; + } + } else if (TCG_TARGET_REG_BITS == 64 && ofs + len == 32) { + /* This is a 32-bit zero-extending right shift. */ + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + tcg_out_shifti(s, SHIFT_SHR, a0, ofs); + return; + } else if (ofs == 8 && len == 8) { + /* + * On the off-chance that we can use the high-byte registers. + * Otherwise we emit the same ext16 + shift pattern that we + * would have gotten from the normal tcg-op.c expansion. + */ + if (a1 < 4 && (TCG_TARGET_REG_BITS == 32 || a0 < 8)) { + tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4); + } else { + tcg_out_ext16u(s, a0, a1); + tcg_out_shifti(s, SHIFT_SHR, a0, 8); + } + return; + } + g_assert_not_reached(); +} + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -3328,40 +3369,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_extract_i64: - if (a2 + args[3] == 32) { - if (a2 == 0) { - tcg_out_ext32u(s, a0, a1); - break; - } - /* This is a 32-bit zero-extending right shift. */ - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tcg_out_shifti(s, SHIFT_SHR, a0, a2); - break; - } - /* FALLTHRU */ - case INDEX_op_extract_i32: - if (a2 == 0 && args[3] == 8) { - tcg_out_ext8u(s, a0, a1); - } else if (a2 == 0 && args[3] == 16) { - tcg_out_ext16u(s, a0, a1); - } else if (a2 == 8 && args[3] == 8) { - /* - * On the off-chance that we can use the high-byte registers. - * Otherwise we emit the same ext16 + shift pattern that we - * would have gotten from the normal tcg-op.c expansion. - */ - if (a1 < 4 && a0 < 8) { - tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4); - } else { - tcg_out_ext16u(s, a0, a1); - tcg_out_shifti(s, SHIFT_SHR, a0, 8); - } - } else { - g_assert_not_reached(); - } - break; - case INDEX_op_sextract_i64: if (a2 == 0 && args[3] == 8) { tcg_out_ext8s(s, TCG_TYPE_I64, a0, a1); @@ -3994,8 +4001,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: return C_O1_I1(r, r); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index f3b2f709d2..f63d33e9ac 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1799,6 +1799,22 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0 && len <= 12) { + tcg_out_opc_andi(s, a0, a1, (1 << len) - 1); + } else if (type == TCG_TYPE_I32) { + tcg_out_opc_bstrpick_w(s, a0, a1, ofs, ofs + len - 1); + } else { + tcg_out_opc_bstrpick_d(s, a0, a1, ofs, ofs + len - 1); + } +} + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1828,21 +1844,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_srai_d(s, a0, a1, 32); break; - case INDEX_op_extract_i32: - if (a2 == 0 && args[3] <= 12) { - tcg_out_opc_andi(s, a0, a1, (1 << args[3]) - 1); - } else { - tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1); - } - break; - case INDEX_op_extract_i64: - if (a2 == 0 && args[3] <= 12) { - tcg_out_opc_andi(s, a0, a1, (1 << args[3]) - 1); - } else { - tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1); - } - break; - case INDEX_op_sextract_i64: if (a2 + args[3] == 32) { if (a2 == 0) { @@ -2461,8 +2462,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: case INDEX_op_ext_i32_i64: - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: case INDEX_op_ld8s_i32: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index baaf0e416b..dbb4b9355d 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2203,6 +2203,23 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0 && len <= 16) { + tcg_out_opc_imm(s, OPC_ANDI, a0, a1, (1 << len) - 1); + } else if (type == TCG_TYPE_I32) { + tcg_out_opc_bf(s, OPC_EXT, a0, a1, len - 1, ofs); + } else { + tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, + a0, a1, len - 1, ofs); + } +} + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2286,22 +2303,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, args[3] + args[4] - 1, args[3]); break; - case INDEX_op_extract_i32: - if (a2 == 0 && args[3] <= 16) { - tcg_out_opc_imm(s, OPC_ANDI, a0, a1, (1 << args[3]) - 1); - } else { - tcg_out_opc_bf(s, OPC_EXT, a0, a1, args[3] - 1, a2); - } - break; - case INDEX_op_extract_i64: - if (a2 == 0 && args[3] <= 16) { - tcg_out_opc_imm(s, OPC_ANDI, a0, a1, (1 << args[3]) - 1); - } else { - tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, - a0, a1, args[3] - 1, a2); - } - break; - case INDEX_op_sextract_i64: if (a2 == 0 && args[3] == 32) { tcg_out_ext32s(s, a0, a1); @@ -2375,7 +2376,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_extract_i32: case INDEX_op_sextract_i32: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i64: @@ -2388,7 +2388,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: - case INDEX_op_extract_i64: case INDEX_op_sextract_i64: return C_O1_I1(r, r); diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 083137d211..a8558a47b7 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3417,6 +3417,23 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0 && len <= 16) { + tgen_andi(s, TCG_TYPE_I32, a0, a1, (1 << len) - 1); + } else if (type == TCG_TYPE_I32) { + tcg_out_rlw(s, RLWINM, a0, a1, 32 - ofs, 32 - len, 31); + } else { + tcg_out_rld(s, RLDICL, a0, a1, 64 - ofs, 64 - len); + } +} + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -3538,22 +3555,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_extract_i32: - if (args[2] == 0 && args[3] <= 16) { - tcg_out32(s, ANDI | SAI(args[1], args[0], (1 << args[3]) - 1)); - break; - } - tcg_out_rlw(s, RLWINM, args[0], args[1], - 32 - args[2], 32 - args[3], 31); - break; - case INDEX_op_extract_i64: - if (args[2] == 0 && args[3] <= 16) { - tcg_out32(s, ANDI | SAI(args[1], args[0], (1 << args[3]) - 1)); - break; - } - tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]); - break; - case INDEX_op_sextract_i64: if (args[2] + args[3] == 32) { if (args[2] == 0) { @@ -4255,7 +4256,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_extract_i32: case INDEX_op_sextract_i32: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i64: @@ -4266,7 +4266,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_extract_i64: case INDEX_op_sextract_i64: return C_O1_I1(r, r); diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 00b097d171..85d978763c 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2472,6 +2472,34 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + switch (len) { + case 16: + tcg_out_ext16u(s, a0, a1); + return; + case 32: + tcg_out_ext32u(s, a0, a1); + return; + } + } + if (ofs + len == 32) { + tgen_shli(s, TCG_TYPE_I32, a0, a1, ofs); + return; + } + if (len == 1) { + tcg_out_opc_imm(s, OPC_BEXTI, a0, a1, ofs); + return; + } + g_assert_not_reached(); +} + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2572,30 +2600,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mb(s, a0); break; - case INDEX_op_extract_i64: - if (a2 + args[3] == 32) { - if (a2 == 0) { - tcg_out_ext32u(s, a0, a1); - } else { - tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2); - } - break; - } - /* FALLTHRU */ - case INDEX_op_extract_i32: - switch (args[3]) { - case 1: - tcg_out_opc_imm(s, OPC_BEXTI, a0, a1, a2); - break; - case 16: - tcg_debug_assert(a2 == 0); - tcg_out_ext16u(s, a0, a1); - break; - default: - g_assert_not_reached(); - } - break; - case INDEX_op_sextract_i64: if (a2 + args[3] == 32) { if (a2 == 0) { @@ -2867,8 +2871,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: case INDEX_op_ext_i32_i64: - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: return C_O1_I1(r, r); diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 2ed288cfe0..96e2dc0ad5 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1563,8 +1563,8 @@ static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src, tcg_out_risbg(s, dest, src, msb, lsb, ofs, z); } -static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src, - int ofs, int len) +static void tgen_extract(TCGContext *s, TCGType type, TCGReg dest, + TCGReg src, unsigned ofs, unsigned len) { if (ofs == 0) { switch (len) { @@ -1582,6 +1582,11 @@ static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src, tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1); } +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; + static void tgen_sextract(TCGContext *s, TCGReg dest, TCGReg src, int ofs, int len) { @@ -2975,9 +2980,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(extract): - tgen_extract(s, args[0], args[1], args[2], args[3]); - break; OP_32_64(sextract): tgen_sextract(s, args[0], args[1], args[2], args[3]); break; @@ -3470,8 +3472,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: return C_O1_I1(r, r); diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 96ffba9af6..cba1dd009c 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1757,6 +1757,17 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + tcg_debug_assert(ofs + len == 32); + tcg_out_arithi(s, a0, a1, ofs, SHIFT_SRL); +} + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1857,10 +1868,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mb(s, a0); break; - case INDEX_op_extract_i64: - tcg_debug_assert(a2 + args[3] == 32); - tcg_out_arithi(s, a0, a1, a2, SHIFT_SRL); - break; case INDEX_op_sextract_i64: tcg_debug_assert(a2 + args[3] == 32); tcg_out_arithi(s, a0, a1, a2, SHIFT_SRA); @@ -1897,7 +1904,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_extract_i64: case INDEX_op_sextract_i64: case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: diff --git a/tcg/tcg.c b/tcg/tcg.c index 1ba86dd515..36c5e9c847 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1013,6 +1013,12 @@ typedef struct TCGOutOpDivRem { TCGReg a0, TCGReg a1, TCGReg a4); } TCGOutOpDivRem; +typedef struct TCGOutOpExtract { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len); +} TCGOutOpExtract; + typedef struct TCGOutOpMovcond { TCGOutOp base; void (*out)(TCGContext *s, TCGType type, TCGCond cond, @@ -1085,6 +1091,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), + OUTOP(INDEX_op_extract_i32, TCGOutOpExtract, outop_extract), + OUTOP(INDEX_op_extract_i64, TCGOutOpExtract, outop_extract), OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), @@ -5511,6 +5519,18 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_extract_i32: + case INDEX_op_extract_i64: + { + const TCGOutOpExtract *out = + container_of(all_outop[op->opc], TCGOutOpExtract, base); + + tcg_debug_assert(!const_args[1]); + out->out_rr(s, type, new_args[0], new_args[1], + new_args[2], new_args[3]); + } + break; + case INDEX_op_muls2: case INDEX_op_mulu2: { diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 4fc857ad35..d8cf5d237b 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -57,8 +57,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: return C_O1_I1(r, r); @@ -444,6 +442,11 @@ static void tcg_out_extract(TCGContext *s, TCGType type, TCGReg rd, tcg_out_op_rrbb(s, opc, rd, rs, pos, len); } +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tcg_out_extract, +}; + static void tcg_out_sextract(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs, unsigned pos, unsigned len) { @@ -1078,7 +1081,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_op_rrrbb(s, opc, args[0], args[1], args[2], args[3], args[4]); break; - CASE_32_64(extract) /* Optional (TCG_TARGET_HAS_extract_*). */ CASE_32_64(sextract) /* Optional (TCG_TARGET_HAS_sextract_*). */ tcg_out_op_rrbb(s, opc, args[0], args[1], args[2], args[3]); break; From 07d5d502f2b4a8eedda3c6bdfcab31dc36d1d1d5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 11 Jan 2025 09:01:46 -0800 Subject: [PATCH 098/161] tcg: Merge INDEX_op_extract_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 6 +++--- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 14 ++++---------- tcg/tcg-op.c | 8 ++++---- tcg/tcg.c | 9 +++------ tcg/tci.c | 12 ++++-------- tcg/tci/tcg-target.c.inc | 5 +---- 7 files changed, 20 insertions(+), 37 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 72a23d6ea2..2843f88772 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -456,7 +456,7 @@ Misc | | *dest* = (*t1* & ~0x0f00) | ((*t2* << 8) & 0x0f00) - * - extract_i32/i64 *dest*, *t1*, *pos*, *len* + * - extract *dest*, *t1*, *pos*, *len* sextract_i32/i64 *dest*, *t1*, *pos*, *len* @@ -467,12 +467,12 @@ Misc to the left with zeros; for sextract_*, the result will be extended to the left with copies of the bitfield sign bit at *pos* + *len* - 1. | - | For example, "sextract_i32 dest, t1, 8, 4" indicates a 4-bit field + | For example, "sextract dest, t1, 8, 4" indicates a 4-bit field at bit 8. This operation would be equivalent to | | *dest* = (*t1* << 20) >> 28 | - | (using an arithmetic right shift). + | (using an arithmetic right shift) on TCG_TYPE_I32. * - extract2_i32/i64 *dest*, *t1*, *t2*, *pos* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 1d27b882fe..a8c304ca63 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -54,6 +54,7 @@ DEF(divs2, 2, 3, 0, TCG_OPF_INT) DEF(divu, 1, 2, 0, TCG_OPF_INT) DEF(divu2, 2, 3, 0, TCG_OPF_INT) DEF(eqv, 1, 2, 0, TCG_OPF_INT) +DEF(extract, 1, 1, 2, TCG_OPF_INT) DEF(movcond, 1, 4, 1, TCG_OPF_INT) DEF(mul, 1, 2, 0, TCG_OPF_INT) DEF(muls2, 2, 2, 0, TCG_OPF_INT) @@ -89,7 +90,6 @@ DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) /* shifts/rotates */ DEF(deposit_i32, 1, 2, 2, 0) -DEF(extract_i32, 1, 1, 2, 0) DEF(sextract_i32, 1, 1, 2, 0) DEF(extract2_i32, 1, 2, 1, 0) @@ -112,7 +112,6 @@ DEF(st32_i64, 0, 2, 1, 0) DEF(st_i64, 0, 2, 1, 0) /* shifts/rotates */ DEF(deposit_i64, 1, 2, 2, 0) -DEF(extract_i64, 1, 1, 2, 0) DEF(sextract_i64, 1, 1, 2, 0) DEF(extract2_i64, 1, 2, 1, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index a860b62109..fbfcbf23cd 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2317,7 +2317,7 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { - TCGOpcode uext_opc = 0, sext_opc = 0; + TCGOpcode sext_opc = 0; TCGCond cond = op->args[3]; TCGArg ret, src1, src2; TCGOp *op2; @@ -2338,17 +2338,11 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) switch (ctx->type) { case TCG_TYPE_I32: - if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) { - uext_opc = INDEX_op_extract_i32; - } if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) { sext_opc = INDEX_op_sextract_i32; } break; case TCG_TYPE_I64: - if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) { - uext_opc = INDEX_op_extract_i64; - } if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) { sext_opc = INDEX_op_sextract_i64; } @@ -2367,8 +2361,8 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) op->args[2] = sh; op->args[3] = 1; return; - } else if (sh && uext_opc) { - op->opc = uext_opc; + } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) { + op->opc = INDEX_op_extract; op->args[1] = src1; op->args[2] = sh; op->args[3] = 1; @@ -2897,7 +2891,7 @@ void tcg_optimize(TCGContext *s) case INDEX_op_eqv_vec: done = fold_eqv(&ctx, op); break; - CASE_OP_32_64(extract): + case INDEX_op_extract: done = fold_extract(&ctx, op); break; CASE_OP_32_64(extract2): diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index ba062191ac..ddade73b7b 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -998,7 +998,7 @@ void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg, } if (TCG_TARGET_extract_valid(TCG_TYPE_I32, ofs, len)) { - tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len); + tcg_gen_op4ii_i32(INDEX_op_extract, ret, arg, ofs, len); return; } if (ofs == 0) { @@ -1008,7 +1008,7 @@ void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg, /* Assume that zero-extension, if available, is cheaper than a shift. */ if (TCG_TARGET_extract_valid(TCG_TYPE_I32, 0, ofs + len)) { - tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, 0, ofs + len); + tcg_gen_op4ii_i32(INDEX_op_extract, ret, arg, 0, ofs + len); tcg_gen_shri_i32(ret, ret, ofs); return; } @@ -2670,7 +2670,7 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, } if (TCG_TARGET_extract_valid(TCG_TYPE_I64, ofs, len)) { - tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len); + tcg_gen_op4ii_i64(INDEX_op_extract, ret, arg, ofs, len); return; } if (ofs == 0) { @@ -2680,7 +2680,7 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, /* Assume that zero-extension, if available, is cheaper than a shift. */ if (TCG_TARGET_extract_valid(TCG_TYPE_I64, 0, ofs + len)) { - tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, 0, ofs + len); + tcg_gen_op4ii_i64(INDEX_op_extract, ret, arg, 0, ofs + len); tcg_gen_shri_i64(ret, ret, ofs); return; } diff --git a/tcg/tcg.c b/tcg/tcg.c index 36c5e9c847..ce0d862b19 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1091,8 +1091,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), - OUTOP(INDEX_op_extract_i32, TCGOutOpExtract, outop_extract), - OUTOP(INDEX_op_extract_i64, TCGOutOpExtract, outop_extract), + OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract), OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), @@ -2326,6 +2325,7 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_add: case INDEX_op_and: case INDEX_op_brcond: + case INDEX_op_extract: case INDEX_op_mov: case INDEX_op_movcond: case INDEX_op_negsetcond: @@ -2342,7 +2342,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: - case INDEX_op_extract_i32: case INDEX_op_sextract_i32: case INDEX_op_deposit_i32: return true; @@ -2371,7 +2370,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_extract_i64: case INDEX_op_sextract_i64: case INDEX_op_deposit_i64: return TCG_TARGET_REG_BITS == 64; @@ -5519,8 +5517,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: + case INDEX_op_extract: { const TCGOutOpExtract *out = container_of(all_outop[op->opc], TCGOutOpExtract, base); diff --git a/tcg/tci.c b/tcg/tci.c index 30928c3412..6345029802 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -27,6 +27,7 @@ #define ctpop_tr glue(ctpop, TCG_TARGET_REG_BITS) +#define extract_tr glue(extract, TCG_TARGET_REG_BITS) /* * Enable TCI assertions only when debugging TCG (and without NDEBUG defined). @@ -656,9 +657,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); regs[r0] = deposit32(regs[r1], pos, len, regs[r2]); break; - case INDEX_op_extract_i32: + case INDEX_op_extract: tci_args_rrbb(insn, &r0, &r1, &pos, &len); - regs[r0] = extract32(regs[r1], pos, len); + regs[r0] = extract_tr(regs[r1], pos, len); break; case INDEX_op_sextract_i32: tci_args_rrbb(insn, &r0, &r1, &pos, &len); @@ -772,10 +773,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); regs[r0] = deposit64(regs[r1], pos, len, regs[r2]); break; - case INDEX_op_extract_i64: - tci_args_rrbb(insn, &r0, &r1, &pos, &len); - regs[r0] = extract64(regs[r1], pos, len); - break; case INDEX_op_sextract_i64: tci_args_rrbb(insn, &r0, &r1, &pos, &len); regs[r0] = sextract64(regs[r1], pos, len); @@ -1057,8 +1054,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) op_name, str_r(r0), str_r(r1), str_r(r2), pos, len); break; - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: + case INDEX_op_extract: case INDEX_op_sextract_i32: case INDEX_op_sextract_i64: tci_args_rrbb(insn, &r0, &r1, &pos, &len); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index d8cf5d237b..ede11d9e70 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -436,10 +436,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, static void tcg_out_extract(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs, unsigned pos, unsigned len) { - TCGOpcode opc = type == TCG_TYPE_I32 ? - INDEX_op_extract_i32 : - INDEX_op_extract_i64; - tcg_out_op_rrbb(s, opc, rd, rs, pos, len); + tcg_out_op_rrbb(s, INDEX_op_extract, rd, rs, pos, len); } static const TCGOutOpExtract outop_extract = { From 05a1129e23bfed1fe799a169e0eca0f676fa5016 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 12 Jan 2025 11:44:30 -0800 Subject: [PATCH 099/161] tcg: Convert sextract to TCGOutOpExtract Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 18 +++++---- tcg/arm/tcg-target.c.inc | 21 ++++++----- tcg/i386/tcg-target.c.inc | 63 ++++++++++++++++---------------- tcg/loongarch64/tcg-target.c.inc | 49 ++++++++++++++----------- tcg/mips/tcg-target.c.inc | 42 ++++++++++++--------- tcg/ppc/tcg-target.c.inc | 49 ++++++++++++++----------- tcg/riscv/tcg-target.c.inc | 49 ++++++++++++++----------- tcg/s390x/tcg-target.c.inc | 15 ++++---- tcg/sparc64/tcg-target.c.inc | 18 ++++++--- tcg/tcg.c | 4 ++ tcg/tci/tcg-target.c.inc | 11 +++--- 11 files changed, 188 insertions(+), 151 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 6c9d6094a2..00400f6ea7 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2583,6 +2583,17 @@ static const TCGOutOpExtract outop_extract = { .out_rr = tgen_extract, }; +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + tcg_out_sbfm(s, type, a0, a1, ofs, ofs + len - 1); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2668,11 +2679,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_dep(s, ext, a0, a2, args[3], args[4]); break; - case INDEX_op_sextract_i64: - case INDEX_op_sextract_i32: - tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); - break; - case INDEX_op_extract2_i64: case INDEX_op_extract2_i32: tcg_out_extr(s, ext, a0, a2, a1, args[3]); @@ -3173,8 +3179,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_sextract_i32: - case INDEX_op_sextract_i64: return C_O1_I1(r, r); case INDEX_op_st8_i32: diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index bc060b20f2..aebe48679c 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1020,12 +1020,12 @@ static const TCGOutOpExtract outop_extract = { .out_rr = tgen_extract, }; -static void tcg_out_sextract(TCGContext *s, ARMCond cond, TCGReg rd, - TCGReg rn, int ofs, int len) +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn, + unsigned ofs, unsigned len) { if (use_armv7_instructions) { /* sbfx */ - tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn + tcg_out32(s, 0x07a00050 | (COND_AL << 28) | (rd << 12) | rn | (ofs << 7) | ((len - 1) << 16)); return; } @@ -1034,17 +1034,24 @@ static void tcg_out_sextract(TCGContext *s, ARMCond cond, TCGReg rd, switch (len) { case 8: /* sxtb */ - tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn); + tcg_out32(s, 0x06af0070 | (COND_AL << 28) | + (rd << 12) | (ofs << 7) | rn); break; case 16: /* sxth */ - tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn); + tcg_out32(s, 0x06bf0070 | (COND_AL << 28) | + (rd << 12) | (ofs << 7) | rn); break; default: g_assert_not_reached(); } } +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + static void tcg_out_ld32u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn, int32_t offset) @@ -2399,9 +2406,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_deposit(s, COND_AL, args[0], args[2], args[3], args[4], const_args[2]); break; - case INDEX_op_sextract_i32: - tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]); - break; case INDEX_op_extract2_i32: /* ??? These optimization vs zero should be generic. */ /* ??? But we can't substitute 2 for 1 in the opcode stream yet. */ @@ -2448,7 +2452,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_sextract_i32: return C_O1_I1(r, r); case INDEX_op_st8_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index b26c93bdb1..6a5414ab3a 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3180,6 +3180,38 @@ static const TCGOutOpExtract outop_extract = { .out_rr = tgen_extract, }; +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + switch (len) { + case 8: + tcg_out_ext8s(s, type, a0, a1); + return; + case 16: + tcg_out_ext16s(s, type, a0, a1); + return; + case 32: + tcg_out_ext32s(s, a0, a1); + return; + } + } else if (ofs == 8 && len == 8) { + if (type == TCG_TYPE_I32 && a1 < 4 && a0 < 8) { + tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4); + } else { + tcg_out_ext16s(s, type, a0, a1); + tgen_sari(s, type, a0, a0, 8); + } + return; + } + g_assert_not_reached(); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -3369,35 +3401,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_sextract_i64: - if (a2 == 0 && args[3] == 8) { - tcg_out_ext8s(s, TCG_TYPE_I64, a0, a1); - } else if (a2 == 0 && args[3] == 16) { - tcg_out_ext16s(s, TCG_TYPE_I64, a0, a1); - } else if (a2 == 0 && args[3] == 32) { - tcg_out_ext32s(s, a0, a1); - } else { - g_assert_not_reached(); - } - break; - - case INDEX_op_sextract_i32: - if (a2 == 0 && args[3] == 8) { - tcg_out_ext8s(s, TCG_TYPE_I32, a0, a1); - } else if (a2 == 0 && args[3] == 16) { - tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1); - } else if (a2 == 8 && args[3] == 8) { - if (a1 < 4 && a0 < 8) { - tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4); - } else { - tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1); - tcg_out_shifti(s, SHIFT_SAR, a0, 8); - } - } else { - g_assert_not_reached(); - } - break; - OP_32_64(extract2): /* Note that SHRD outputs to the r/m operand. */ tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0); @@ -4001,8 +4004,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: - case INDEX_op_sextract_i32: - case INDEX_op_sextract_i64: return C_O1_I1(r, r); case INDEX_op_extract2_i32: diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index f63d33e9ac..2ea5234097 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1816,6 +1816,33 @@ static const TCGOutOpExtract outop_extract = { .out_rr = tgen_extract, }; +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + switch (len) { + case 8: + tcg_out_ext8s(s, type, a0, a1); + return; + case 16: + tcg_out_ext16s(s, type, a0, a1); + return; + case 32: + tcg_out_ext32s(s, a0, a1); + return; + } + } else if (ofs + len == 32) { + tcg_out_opc_srai_w(s, a0, a1, ofs); + return; + } + g_assert_not_reached(); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1844,26 +1871,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_srai_d(s, a0, a1, 32); break; - case INDEX_op_sextract_i64: - if (a2 + args[3] == 32) { - if (a2 == 0) { - tcg_out_ext32s(s, a0, a1); - } else { - tcg_out_opc_srai_w(s, a0, a1, a2); - } - break; - } - /* FALLTHRU */ - case INDEX_op_sextract_i32: - if (a2 == 0 && args[3] == 8) { - tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1); - } else if (a2 == 0 && args[3] == 16) { - tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1); - } else { - g_assert_not_reached(); - } - break; - case INDEX_op_deposit_i32: tcg_out_opc_bstrins_w(s, a0, a2, args[3], args[3] + args[4] - 1); break; @@ -2462,8 +2469,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: case INDEX_op_ext_i32_i64: - case INDEX_op_sextract_i32: - case INDEX_op_sextract_i64: case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: case INDEX_op_ld8u_i32: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index dbb4b9355d..56c58bf82d 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2221,6 +2221,30 @@ static const TCGOutOpExtract outop_extract = { .out_rr = tgen_extract, }; +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + switch (len) { + case 8: + tcg_out_ext8s(s, type, a0, a1); + return; + case 16: + tcg_out_ext16s(s, type, a0, a1); + return; + case 32: + tcg_out_ext32s(s, a0, a1); + return; + } + } + g_assert_not_reached(); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2303,22 +2327,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, args[3] + args[4] - 1, args[3]); break; - case INDEX_op_sextract_i64: - if (a2 == 0 && args[3] == 32) { - tcg_out_ext32s(s, a0, a1); - break; - } - /* FALLTHRU */ - case INDEX_op_sextract_i32: - if (a2 == 0 && args[3] == 8) { - tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1); - } else if (a2 == 0 && args[3] == 16) { - tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1); - } else { - g_assert_not_reached(); - } - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, a0, 0, a1, a2, TCG_TYPE_I32); break; @@ -2376,7 +2384,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_sextract_i32: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i64: case INDEX_op_ld16u_i64: @@ -2388,7 +2395,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: - case INDEX_op_sextract_i64: return C_O1_I1(r, r); case INDEX_op_st8_i32: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index a8558a47b7..3d1ffa9130 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3434,6 +3434,33 @@ static const TCGOutOpExtract outop_extract = { .out_rr = tgen_extract, }; +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + switch (len) { + case 8: + tcg_out_ext8s(s, type, a0, a1); + return; + case 16: + tcg_out_ext16s(s, type, a0, a1); + return; + case 32: + tcg_out_ext32s(s, a0, a1); + return; + } + } else if (ofs + len == 32) { + tcg_out_sari32(s, a0, a1, ofs); + return; + } + g_assert_not_reached(); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -3555,26 +3582,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_sextract_i64: - if (args[2] + args[3] == 32) { - if (args[2] == 0) { - tcg_out_ext32s(s, args[0], args[1]); - } else { - tcg_out_sari32(s, args[0], args[1], args[2]); - } - break; - } - /* FALLTHRU */ - case INDEX_op_sextract_i32: - if (args[2] == 0 && args[3] == 8) { - tcg_out_ext8s(s, TCG_TYPE_I32, args[0], args[1]); - } else if (args[2] == 0 && args[3] == 16) { - tcg_out_ext16s(s, TCG_TYPE_I32, args[0], args[1]); - } else { - g_assert_not_reached(); - } - break; - #if TCG_TARGET_REG_BITS == 64 case INDEX_op_add2_i64: #else @@ -4256,7 +4263,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld16u_i32: case INDEX_op_ld16s_i32: case INDEX_op_ld_i32: - case INDEX_op_sextract_i32: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i64: case INDEX_op_ld16u_i64: @@ -4266,7 +4272,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_sextract_i64: return C_O1_I1(r, r); case INDEX_op_st8_i32: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 85d978763c..dc2b487844 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2501,6 +2501,33 @@ static const TCGOutOpExtract outop_extract = { .out_rr = tgen_extract, }; +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + switch (len) { + case 8: + tcg_out_ext8s(s, type, a0, a1); + return; + case 16: + tcg_out_ext16s(s, type, a0, a1); + return; + case 32: + tcg_out_ext32s(s, a0, a1); + return; + } + } else if (ofs + len == 32) { + tgen_sari(s, TCG_TYPE_I32, a0, a1, ofs); + return; + } + g_assert_not_reached(); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2600,26 +2627,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mb(s, a0); break; - case INDEX_op_sextract_i64: - if (a2 + args[3] == 32) { - if (a2 == 0) { - tcg_out_ext32s(s, a0, a1); - } else { - tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2); - } - break; - } - /* FALLTHRU */ - case INDEX_op_sextract_i32: - if (a2 == 0 && args[3] == 8) { - tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1); - } else if (a2 == 0 && args[3] == 16) { - tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1); - } else { - g_assert_not_reached(); - } - break; - case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ @@ -2871,8 +2878,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: case INDEX_op_ext_i32_i64: - case INDEX_op_sextract_i32: - case INDEX_op_sextract_i64: return C_O1_I1(r, r); case INDEX_op_st8_i32: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 96e2dc0ad5..ab178bebc8 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1587,8 +1587,8 @@ static const TCGOutOpExtract outop_extract = { .out_rr = tgen_extract, }; -static void tgen_sextract(TCGContext *s, TCGReg dest, TCGReg src, - int ofs, int len) +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg dest, + TCGReg src, unsigned ofs, unsigned len) { if (ofs == 0) { switch (len) { @@ -1606,6 +1606,11 @@ static void tgen_sextract(TCGContext *s, TCGReg dest, TCGReg src, g_assert_not_reached(); } +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest) { ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1; @@ -2980,10 +2985,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - OP_32_64(sextract): - tgen_sextract(s, args[0], args[1], args[2], args[3]); - break; - case INDEX_op_mb: /* The host memory model is quite strong, we simply need to serialize the instruction stream. */ @@ -3472,8 +3473,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_sextract_i32: - case INDEX_op_sextract_i64: return C_O1_I1(r, r); case INDEX_op_qemu_ld_i32: diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index cba1dd009c..0f2bec21e9 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1769,6 +1769,18 @@ static const TCGOutOpExtract outop_extract = { .out_rr = tgen_extract, }; +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + tcg_debug_assert(ofs + len == 32); + tcg_out_arithi(s, a0, a1, ofs, SHIFT_SRA); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1868,11 +1880,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_mb(s, a0); break; - case INDEX_op_sextract_i64: - tcg_debug_assert(a2 + args[3] == 32); - tcg_out_arithi(s, a0, a1, a2, SHIFT_SRA); - break; - case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ @@ -1904,7 +1911,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_sextract_i64: case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); diff --git a/tcg/tcg.c b/tcg/tcg.c index ce0d862b19..7f5fa25062 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1111,6 +1111,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond), + OUTOP(INDEX_op_sextract_i32, TCGOutOpExtract, outop_sextract), + OUTOP(INDEX_op_sextract_i64, TCGOutOpExtract, outop_sextract), OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), @@ -5518,6 +5520,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; case INDEX_op_extract: + case INDEX_op_sextract_i32: + case INDEX_op_sextract_i64: { const TCGOutOpExtract *out = container_of(all_outop[op->opc], TCGOutOpExtract, base); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index ede11d9e70..e013321ac7 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -57,8 +57,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_sextract_i32: - case INDEX_op_sextract_i64: return C_O1_I1(r, r); case INDEX_op_st8_i32: @@ -453,6 +451,11 @@ static void tcg_out_sextract(TCGContext *s, TCGType type, TCGReg rd, tcg_out_op_rrbb(s, opc, rd, rs, pos, len); } +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tcg_out_sextract, +}; + static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs) { tcg_out_sextract(s, type, rd, rs, 0, 8); @@ -1078,10 +1081,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_op_rrrbb(s, opc, args[0], args[1], args[2], args[3], args[4]); break; - CASE_32_64(sextract) /* Optional (TCG_TARGET_HAS_sextract_*). */ - tcg_out_op_rrbb(s, opc, args[0], args[1], args[2], args[3]); - break; - CASE_32_64(add2) CASE_32_64(sub2) tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2], From fa361eefac24dcaa1d6dfbc433fce0652fdd8ba8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 12 Jan 2025 11:50:09 -0800 Subject: [PATCH 100/161] tcg: Merge INDEX_op_sextract_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 2 +- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 22 +++------------------- tcg/tcg-op.c | 12 ++++++------ tcg/tcg.c | 9 +++------ tcg/tci.c | 12 ++++-------- tcg/tci/tcg-target.c.inc | 5 +---- 7 files changed, 19 insertions(+), 46 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 2843f88772..ca7550f68c 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -458,7 +458,7 @@ Misc * - extract *dest*, *t1*, *pos*, *len* - sextract_i32/i64 *dest*, *t1*, *pos*, *len* + sextract *dest*, *t1*, *pos*, *len* - | Extract a bitfield from *t1*, placing the result in *dest*. | diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index a8c304ca63..4ace1f85c4 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -74,6 +74,7 @@ DEF(rotl, 1, 2, 0, TCG_OPF_INT) DEF(rotr, 1, 2, 0, TCG_OPF_INT) DEF(sar, 1, 2, 0, TCG_OPF_INT) DEF(setcond, 1, 2, 1, TCG_OPF_INT) +DEF(sextract, 1, 1, 2, TCG_OPF_INT) DEF(shl, 1, 2, 0, TCG_OPF_INT) DEF(shr, 1, 2, 0, TCG_OPF_INT) DEF(sub, 1, 2, 0, TCG_OPF_INT) @@ -90,7 +91,6 @@ DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) /* shifts/rotates */ DEF(deposit_i32, 1, 2, 2, 0) -DEF(sextract_i32, 1, 1, 2, 0) DEF(extract2_i32, 1, 2, 1, 0) DEF(add2_i32, 2, 4, 0, 0) @@ -112,7 +112,6 @@ DEF(st32_i64, 0, 2, 1, 0) DEF(st_i64, 0, 2, 1, 0) /* shifts/rotates */ DEF(deposit_i64, 1, 2, 2, 0) -DEF(sextract_i64, 1, 1, 2, 0) DEF(extract2_i64, 1, 2, 1, 0) /* size changing ops */ diff --git a/tcg/optimize.c b/tcg/optimize.c index fbfcbf23cd..d324cbf7fe 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2317,7 +2317,6 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { - TCGOpcode sext_opc = 0; TCGCond cond = op->args[3]; TCGArg ret, src1, src2; TCGOp *op2; @@ -2336,27 +2335,12 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) } sh = ctz64(val); - switch (ctx->type) { - case TCG_TYPE_I32: - if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) { - sext_opc = INDEX_op_sextract_i32; - } - break; - case TCG_TYPE_I64: - if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) { - sext_opc = INDEX_op_sextract_i64; - } - break; - default: - g_assert_not_reached(); - } - ret = op->args[0]; src1 = op->args[1]; inv = cond == TCG_COND_TSTEQ; - if (sh && sext_opc && neg && !inv) { - op->opc = sext_opc; + if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) { + op->opc = INDEX_op_sextract; op->args[1] = src1; op->args[2] = sh; op->args[3] = 1; @@ -3019,7 +3003,7 @@ void tcg_optimize(TCGContext *s) case INDEX_op_bitsel_vec: done = fold_bitsel_vec(&ctx, op); break; - CASE_OP_32_64(sextract): + case INDEX_op_sextract: done = fold_sextract(&ctx, op); break; case INDEX_op_sub: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index ddade73b7b..d3f3c9d248 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1043,19 +1043,19 @@ void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg, } if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, ofs, len)) { - tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len); + tcg_gen_op4ii_i32(INDEX_op_sextract, ret, arg, ofs, len); return; } /* Assume that sign-extension, if available, is cheaper than a shift. */ if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, 0, ofs + len)) { - tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, 0, ofs + len); + tcg_gen_op4ii_i32(INDEX_op_sextract, ret, arg, 0, ofs + len); tcg_gen_sari_i32(ret, ret, ofs); return; } if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, 0, len)) { tcg_gen_shri_i32(ret, arg, ofs); - tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, ret, 0, len); + tcg_gen_op4ii_i32(INDEX_op_sextract, ret, ret, 0, len); return; } @@ -2747,19 +2747,19 @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg, } if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, ofs, len)) { - tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len); + tcg_gen_op4ii_i64(INDEX_op_sextract, ret, arg, ofs, len); return; } /* Assume that sign-extension, if available, is cheaper than a shift. */ if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, 0, ofs + len)) { - tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, 0, ofs + len); + tcg_gen_op4ii_i64(INDEX_op_sextract, ret, arg, 0, ofs + len); tcg_gen_sari_i64(ret, ret, ofs); return; } if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, 0, len)) { tcg_gen_shri_i64(ret, arg, ofs); - tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, ret, 0, len); + tcg_gen_op4ii_i64(INDEX_op_sextract, ret, ret, 0, len); return; } diff --git a/tcg/tcg.c b/tcg/tcg.c index 7f5fa25062..c7ce13cda0 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1111,8 +1111,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond), - OUTOP(INDEX_op_sextract_i32, TCGOutOpExtract, outop_sextract), - OUTOP(INDEX_op_sextract_i64, TCGOutOpExtract, outop_sextract), + OUTOP(INDEX_op_sextract, TCGOutOpExtract, outop_sextract), OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), @@ -2333,6 +2332,7 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_negsetcond: case INDEX_op_or: case INDEX_op_setcond: + case INDEX_op_sextract: case INDEX_op_xor: return has_type; @@ -2344,7 +2344,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: - case INDEX_op_sextract_i32: case INDEX_op_deposit_i32: return true; @@ -2372,7 +2371,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_sextract_i64: case INDEX_op_deposit_i64: return TCG_TARGET_REG_BITS == 64; @@ -5520,8 +5518,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; case INDEX_op_extract: - case INDEX_op_sextract_i32: - case INDEX_op_sextract_i64: + case INDEX_op_sextract: { const TCGOutOpExtract *out = container_of(all_outop[op->opc], TCGOutOpExtract, base); diff --git a/tcg/tci.c b/tcg/tci.c index 6345029802..5a07d65db8 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -28,6 +28,7 @@ #define ctpop_tr glue(ctpop, TCG_TARGET_REG_BITS) #define extract_tr glue(extract, TCG_TARGET_REG_BITS) +#define sextract_tr glue(sextract, TCG_TARGET_REG_BITS) /* * Enable TCI assertions only when debugging TCG (and without NDEBUG defined). @@ -661,9 +662,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrbb(insn, &r0, &r1, &pos, &len); regs[r0] = extract_tr(regs[r1], pos, len); break; - case INDEX_op_sextract_i32: + case INDEX_op_sextract: tci_args_rrbb(insn, &r0, &r1, &pos, &len); - regs[r0] = sextract32(regs[r1], pos, len); + regs[r0] = sextract_tr(regs[r1], pos, len); break; case INDEX_op_brcond: tci_args_rl(insn, tb_ptr, &r0, &ptr); @@ -773,10 +774,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); regs[r0] = deposit64(regs[r1], pos, len, regs[r2]); break; - case INDEX_op_sextract_i64: - tci_args_rrbb(insn, &r0, &r1, &pos, &len); - regs[r0] = sextract64(regs[r1], pos, len); - break; case INDEX_op_ext_i32_i64: tci_args_rr(insn, &r0, &r1); regs[r0] = (int32_t)regs[r1]; @@ -1055,8 +1052,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) break; case INDEX_op_extract: - case INDEX_op_sextract_i32: - case INDEX_op_sextract_i64: + case INDEX_op_sextract: tci_args_rrbb(insn, &r0, &r1, &pos, &len); info->fprintf_func(info->stream, "%-12s %s,%s,%d,%d", op_name, str_r(r0), str_r(r1), pos, len); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index e013321ac7..9ba108ef8d 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -445,10 +445,7 @@ static const TCGOutOpExtract outop_extract = { static void tcg_out_sextract(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs, unsigned pos, unsigned len) { - TCGOpcode opc = type == TCG_TYPE_I32 ? - INDEX_op_sextract_i32 : - INDEX_op_sextract_i64; - tcg_out_op_rrbb(s, opc, rd, rs, pos, len); + tcg_out_op_rrbb(s, INDEX_op_sextract, rd, rs, pos, len); } static const TCGOutOpExtract outop_sextract = { From b7b7347fe391f134c7ea616c0593d3ea835d5eea Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 12 Jan 2025 12:22:45 -0800 Subject: [PATCH 101/161] tcg: Convert ext_i32_i64 to TCGOutOpUnary Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 2 -- tcg/i386/tcg-target.c.inc | 2 -- tcg/loongarch64/tcg-target.c.inc | 2 -- tcg/mips/tcg-target.c.inc | 2 -- tcg/ppc/tcg-target.c.inc | 2 -- tcg/riscv/tcg-target.c.inc | 2 -- tcg/s390x/tcg-target.c.inc | 2 -- tcg/sparc64/tcg-target.c.inc | 2 -- tcg/tcg.c | 22 +++++++++++++++++++--- tcg/tci/tcg-target.c.inc | 2 -- 10 files changed, 19 insertions(+), 21 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 00400f6ea7..68f7a1cec2 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2710,7 +2710,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: @@ -3177,7 +3176,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: return C_O1_I1(r, r); diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 6a5414ab3a..14b912beb7 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3413,7 +3413,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: @@ -4001,7 +4000,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrh_i64_i32: return C_O1_I1(r, 0); - case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: return C_O1_I1(r, r); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 2ea5234097..2a9c7fc10a 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1943,7 +1943,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: @@ -2468,7 +2467,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: - case INDEX_op_ext_i32_i64: case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: case INDEX_op_ld8u_i32: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 56c58bf82d..e992a468eb 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2364,7 +2364,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: @@ -2391,7 +2390,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32s_i64: case INDEX_op_ld32u_i64: case INDEX_op_ld_i64: - case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 3d1ffa9130..fea767573c 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3640,7 +3640,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: @@ -4270,7 +4269,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: return C_O1_I1(r, r); diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index dc2b487844..e5fe15c338 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2630,7 +2630,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: @@ -2877,7 +2876,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: - case INDEX_op_ext_i32_i64: return C_O1_I1(r, r); case INDEX_op_st8_i32: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index ab178bebc8..5c5a38c2c8 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2997,7 +2997,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: @@ -3471,7 +3470,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: return C_O1_I1(r, r); diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 0f2bec21e9..e93ef8e7f2 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1883,7 +1883,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: default: g_assert_not_reached(); @@ -1909,7 +1908,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: diff --git a/tcg/tcg.c b/tcg/tcg.c index c7ce13cda0..6bce097eac 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1068,6 +1068,23 @@ QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - < MIN_TLB_MASK_TABLE_OFS); #endif +#if TCG_TARGET_REG_BITS == 64 +/* + * We require these functions for slow-path function calls. + * Adapt them generically for opcode output. + */ + +static void tgen_exts_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_exts_i32_i64(s, a0, a1); +} + +static const TCGOutOpUnary outop_exts_i32_i64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_exts_i32_i64, +}; +#endif + /* * Register V as the TCGOutOp for O. * This verifies that V is of type T, otherwise give a nice compiler error. @@ -1122,6 +1139,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2), #else OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64), + OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64), #endif }; @@ -5412,9 +5430,6 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* emit instruction */ TCGType type = TCGOP_TYPE(op); switch (op->opc) { - case INDEX_op_ext_i32_i64: - tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); - break; case INDEX_op_extu_i32_i64: tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); break; @@ -5477,6 +5492,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; case INDEX_op_bswap64: + case INDEX_op_ext_i32_i64: assert(TCG_TARGET_REG_BITS == 64); /* fall through */ case INDEX_op_ctpop: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 9ba108ef8d..ecff90404f 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -55,7 +55,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: return C_O1_I1(r, r); @@ -1109,7 +1108,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext_i32_i64: /* Always emitted via tcg_reg_alloc_op. */ case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: From c1ad25de11ccf47a650c860f490864dcf7fe7675 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 12 Jan 2025 12:34:45 -0800 Subject: [PATCH 102/161] tcg: Convert extu_i32_i64 to TCGOutOpUnary Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 2 -- tcg/i386/tcg-target.c.inc | 2 -- tcg/loongarch64/tcg-target.c.inc | 2 -- tcg/mips/tcg-target.c.inc | 2 -- tcg/ppc/tcg-target.c.inc | 2 -- tcg/riscv/tcg-target.c.inc | 2 -- tcg/s390x/tcg-target.c.inc | 4 ---- tcg/sparc64/tcg-target.c.inc | 2 -- tcg/tcg.c | 15 ++++++++++++--- tcg/tci/tcg-target.c.inc | 2 -- 10 files changed, 12 insertions(+), 23 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 68f7a1cec2..44314f6a0f 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2710,7 +2710,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); @@ -3176,7 +3175,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_extu_i32_i64: return C_O1_I1(r, r); case INDEX_op_st8_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 14b912beb7..8371cfaf5a 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3413,7 +3413,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); @@ -4000,7 +3999,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrh_i64_i32: return C_O1_I1(r, 0); - case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: return C_O1_I1(r, r); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 2a9c7fc10a..60356d5dfd 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1943,7 +1943,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); @@ -2464,7 +2463,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i128: return C_O0_I3(r, r, r); - case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: case INDEX_op_ld8s_i32: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index e992a468eb..b6b7070fbb 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2364,7 +2364,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); @@ -2390,7 +2389,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32s_i64: case INDEX_op_ld32u_i64: case INDEX_op_ld_i64: - case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: return C_O1_I1(r, r); diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index fea767573c..e1767f1d6c 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3640,7 +3640,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); @@ -4269,7 +4268,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_extu_i32_i64: return C_O1_I1(r, r); case INDEX_op_st8_i32: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index e5fe15c338..48d4325097 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2630,7 +2630,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); @@ -2873,7 +2872,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32s_i64: case INDEX_op_ld32u_i64: case INDEX_op_ld_i64: - case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: return C_O1_I1(r, r); diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 5c5a38c2c8..d81b8fb8f4 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2997,7 +2997,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); @@ -3470,9 +3469,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_extu_i32_i64: - return C_O1_I1(r, r); - case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index e93ef8e7f2..d52907f7e3 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1883,7 +1883,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extu_i32_i64: default: g_assert_not_reached(); } @@ -1908,7 +1907,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_extu_i32_i64: case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); diff --git a/tcg/tcg.c b/tcg/tcg.c index 6bce097eac..0b2dc17600 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1083,6 +1083,16 @@ static const TCGOutOpUnary outop_exts_i32_i64 = { .base.static_constraint = C_O1_I1(r, r), .out_rr = tgen_exts_i32_i64, }; + +static void tgen_extu_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_extu_i32_i64(s, a0, a1); +} + +static const TCGOutOpUnary outop_extu_i32_i64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extu_i32_i64, +}; #endif /* @@ -1140,6 +1150,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { #else OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64), OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64), + OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64), #endif }; @@ -5430,9 +5441,6 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* emit instruction */ TCGType type = TCGOP_TYPE(op); switch (op->opc) { - case INDEX_op_extu_i32_i64: - tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); - break; case INDEX_op_extrl_i64_i32: tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); break; @@ -5493,6 +5501,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_bswap64: case INDEX_op_ext_i32_i64: + case INDEX_op_extu_i32_i64: assert(TCG_TARGET_REG_BITS == 64); /* fall through */ case INDEX_op_ctpop: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index ecff90404f..3cf2913acd 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -55,7 +55,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32u_i64: case INDEX_op_ld32s_i64: case INDEX_op_ld_i64: - case INDEX_op_extu_i32_i64: return C_O1_I1(r, r); case INDEX_op_st8_i32: @@ -1108,7 +1107,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); From 1e6fec9dd90756b22331a3c4c6859c51ad3a2c3e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 12 Jan 2025 12:57:13 -0800 Subject: [PATCH 103/161] tcg: Convert extrl_i64_i32 to TCGOutOpUnary Drop the cast from TCGv_i64 to TCGv_i32 in tcg_gen_extrl_i64_i32 an emit extrl_i64_i32 unconditionally. Move that special case to tcg_gen_code when we find out if the output is live or dead. In this way even hosts that canonicalize truncations can make use of a store directly from the 64-bit host register. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 1 - tcg/i386/tcg-target.c.inc | 4 ---- tcg/loongarch64/tcg-target.c.inc | 2 -- tcg/mips/tcg-target.c.inc | 2 -- tcg/ppc/tcg-target.c.inc | 1 - tcg/riscv/tcg-target.c.inc | 2 -- tcg/s390x/tcg-target.c.inc | 1 - tcg/tcg-op.c | 4 +--- tcg/tcg.c | 35 +++++++++++++++++++++++++++----- tcg/tci/tcg-target.c.inc | 1 - 10 files changed, 31 insertions(+), 22 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 44314f6a0f..8abc5f26da 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2710,7 +2710,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); } diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 8371cfaf5a..9bae60d3b6 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3413,7 +3413,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); } @@ -3999,9 +3998,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrh_i64_i32: return C_O1_I1(r, 0); - case INDEX_op_extrl_i64_i32: - return C_O1_I1(r, r); - case INDEX_op_extract2_i32: case INDEX_op_extract2_i64: return C_O1_I2(r, 0, r); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 60356d5dfd..fae1a58c94 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1943,7 +1943,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); } @@ -2463,7 +2462,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i128: return C_O0_I3(r, r, r); - case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index b6b7070fbb..095eb8f672 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2364,7 +2364,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); } @@ -2389,7 +2388,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32s_i64: case INDEX_op_ld32u_i64: case INDEX_op_ld_i64: - case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: return C_O1_I1(r, r); diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index e1767f1d6c..bb03efe055 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3640,7 +3640,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); } diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 48d4325097..76ad2df410 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2630,7 +2630,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); } @@ -2872,7 +2871,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32s_i64: case INDEX_op_ld32u_i64: case INDEX_op_ld_i64: - case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: return C_O1_I1(r, r); diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index d81b8fb8f4..1ea041c75f 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2997,7 +2997,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); } diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index d3f3c9d248..7ecd1f6c8f 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2962,11 +2962,9 @@ void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg) { if (TCG_TARGET_REG_BITS == 32) { tcg_gen_mov_i32(ret, TCGV_LOW(arg)); - } else if (TCG_TARGET_HAS_extr_i64_i32) { + } else { tcg_gen_op2(INDEX_op_extrl_i64_i32, TCG_TYPE_I32, tcgv_i32_arg(ret), tcgv_i64_arg(arg)); - } else { - tcg_gen_mov_i32(ret, (TCGv_i32)arg); } } diff --git a/tcg/tcg.c b/tcg/tcg.c index 0b2dc17600..8fcaf54d32 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1093,6 +1093,16 @@ static const TCGOutOpUnary outop_extu_i32_i64 = { .base.static_constraint = C_O1_I1(r, r), .out_rr = tgen_extu_i32_i64, }; + +static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_extrl_i64_i32(s, a0, a1); +} + +static const TCGOutOpUnary outop_extrl_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL, +}; #endif /* @@ -1151,6 +1161,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64), OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64), OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64), + OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32), #endif }; @@ -2400,12 +2411,12 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: + case INDEX_op_extrl_i64_i32: case INDEX_op_deposit_i64: return TCG_TARGET_REG_BITS == 64; case INDEX_op_extract2_i64: return TCG_TARGET_HAS_extract2_i64; - case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: return TCG_TARGET_HAS_extr_i64_i32; case INDEX_op_add2_i64: @@ -5441,10 +5452,6 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* emit instruction */ TCGType type = TCGOP_TYPE(op); switch (op->opc) { - case INDEX_op_extrl_i64_i32: - tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); - break; - case INDEX_op_add: case INDEX_op_and: case INDEX_op_andc: @@ -5502,6 +5509,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_bswap64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: + case INDEX_op_extrl_i64_i32: assert(TCG_TARGET_REG_BITS == 64); /* fall through */ case INDEX_op_ctpop: @@ -6660,6 +6668,22 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) TCGOpcode opc = op->opc; switch (opc) { + case INDEX_op_extrl_i64_i32: + assert(TCG_TARGET_REG_BITS == 64); + /* + * If TCG_TYPE_I32 is represented in some canonical form, + * e.g. zero or sign-extended, then emit as a unary op. + * Otherwise we can treat this as a plain move. + * If the output dies, treat this as a plain move, because + * this will be implemented with a store. + */ + if (TCG_TARGET_HAS_extr_i64_i32) { + TCGLifeData arg_life = op->life; + if (!IS_DEAD_ARG(0)) { + goto do_default; + } + } + /* fall through */ case INDEX_op_mov: case INDEX_op_mov_vec: tcg_reg_alloc_mov(s, op); @@ -6702,6 +6726,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) } /* fall through */ default: + do_default: /* Sanity check that we've not introduced any unhandled opcodes. */ tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), TCGOP_FLAGS(op))); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 3cf2913acd..e9b46d5e66 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -1107,7 +1107,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_extrl_i64_i32: default: g_assert_not_reached(); } From b3b139766424d022dc3455b711837dc6acf0724b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 12 Jan 2025 13:37:28 -0800 Subject: [PATCH 104/161] tcg: Convert extrh_i64_i32 to TCGOutOpUnary At the same time, make extrh_i64_i32 mandatory. This closes a hole in which move arguments could be cast between TCGv_i32 and TCGv_i64. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 10 ++++++++++ tcg/i386/tcg-target.c.inc | 20 +++++++++++++------- tcg/loongarch64/tcg-target.c.inc | 15 ++++++++++----- tcg/mips/tcg-target.c.inc | 17 ++++++++++++----- tcg/ppc/tcg-target.c.inc | 12 ++++++++++++ tcg/riscv/tcg-target.c.inc | 15 ++++++++++----- tcg/s390x/tcg-target.c.inc | 10 ++++++++++ tcg/sparc64/tcg-target.c.inc | 10 ++++++++++ tcg/tcg-op.c | 7 +------ tcg/tcg.c | 5 +++-- tcg/tci/tcg-target.c.inc | 12 ++++++++++++ 11 files changed, 103 insertions(+), 30 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 8abc5f26da..4ea1aebc5e 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2220,6 +2220,16 @@ static const TCGOutOpBinary outop_eqv = { .out_rrr = tgen_eqv, }; +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_ubfm(s, TCG_TYPE_I64, a0, a1, 32, 63); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; + static void tgen_mul(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 9bae60d3b6..63c9aae26e 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2794,6 +2794,18 @@ static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; +#if TCG_TARGET_REG_BITS == 64 +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_extrh_i64_i32, +}; +#endif /* TCG_TARGET_REG_BITS == 64 */ + static void tgen_mul(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3212,6 +3224,7 @@ static const TCGOutOpExtract outop_sextract = { .out_rr = tgen_sextract, }; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -3363,10 +3376,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2); } break; - - case INDEX_op_extrh_i64_i32: - tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32); - break; #endif OP_32_64(deposit): @@ -3995,9 +4004,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(re, r); - case INDEX_op_extrh_i64_i32: - return C_O1_I1(r, 0); - case INDEX_op_extract2_i32: case INDEX_op_extract2_i64: return C_O1_I2(r, 0, r); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index fae1a58c94..1062eb1883 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1467,6 +1467,16 @@ static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_opc_srai_d(s, a0, a1, 32); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; + static void tgen_mul(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1867,10 +1877,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_b(s, 0); break; - case INDEX_op_extrh_i64_i32: - tcg_out_opc_srai_d(s, a0, a1, 32); - break; - case INDEX_op_deposit_i32: tcg_out_opc_bstrins_w(s, a0, a2, args[3], args[3] + args[4] - 1); break; @@ -2462,7 +2468,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i128: return C_O0_I3(r, r, r); - case INDEX_op_extrh_i64_i32: case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: case INDEX_op_ld8u_i32: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 095eb8f672..ad0482902d 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1793,6 +1793,18 @@ static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; +#if TCG_TARGET_REG_BITS == 64 +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_dsra(s, a0, a1, 32); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; +#endif + static void tgen_mul(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2315,10 +2327,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_extrh_i64_i32: - tcg_out_dsra(s, a0, a1, 32); - break; - case INDEX_op_deposit_i32: tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]); break; @@ -2388,7 +2396,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32s_i64: case INDEX_op_ld32u_i64: case INDEX_op_ld_i64: - case INDEX_op_extrh_i64_i32: return C_O1_I1(r, r); case INDEX_op_st8_i32: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index bb03efe055..ba6d7556f7 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2965,6 +2965,18 @@ static void tgen_eqv(TCGContext *s, TCGType type, tcg_out32(s, EQV | SAB(a1, a0, a2)); } +#if TCG_TARGET_REG_BITS == 64 +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_shri64(s, a0, a1, 32); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; +#endif + static void tgen_divs(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 76ad2df410..46b4e1167c 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2151,6 +2151,16 @@ static const TCGOutOpBinary outop_eqv = { .out_rrr = tgen_eqv, }; +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_opc_imm(s, OPC_SRAI, a0, a1, 32); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; + static void tgen_mul(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2619,10 +2629,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64); break; - case INDEX_op_extrh_i64_i32: - tcg_out_opc_imm(s, OPC_SRAI, a0, a1, 32); - break; - case INDEX_op_mb: tcg_out_mb(s, a0); break; @@ -2871,7 +2877,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ld32s_i64: case INDEX_op_ld32u_i64: case INDEX_op_ld_i64: - case INDEX_op_extrh_i64_i32: return C_O1_I1(r, r); case INDEX_op_st8_i32: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 1ea041c75f..3b3749efd3 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2409,6 +2409,16 @@ static const TCGOutOpBinary outop_eqv = { .out_rrr = tgen_eqv, }; +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_sh64(s, RSY_SRLG, a0, a1, TCG_REG_NONE, 32); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; + static void tgen_mul(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index d52907f7e3..c1cce7c196 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1501,6 +1501,16 @@ static const TCGOutOpBinary outop_eqv = { .base.static_constraint = C_NotImplemented, }; +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; + static void tgen_mul(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 7ecd1f6c8f..b88f411ece 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2972,14 +2972,9 @@ void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg) { if (TCG_TARGET_REG_BITS == 32) { tcg_gen_mov_i32(ret, TCGV_HIGH(arg)); - } else if (TCG_TARGET_HAS_extr_i64_i32) { + } else { tcg_gen_op2(INDEX_op_extrh_i64_i32, TCG_TYPE_I32, tcgv_i32_arg(ret), tcgv_i64_arg(arg)); - } else { - TCGv_i64 t = tcg_temp_ebb_new_i64(); - tcg_gen_shri_i64(t, arg, 32); - tcg_gen_mov_i32(ret, (TCGv_i32)t); - tcg_temp_free_i64(t); } } diff --git a/tcg/tcg.c b/tcg/tcg.c index 8fcaf54d32..ce583b824c 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1162,6 +1162,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64), OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64), OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32), + OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32), #endif }; @@ -2412,13 +2413,12 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: + case INDEX_op_extrh_i64_i32: case INDEX_op_deposit_i64: return TCG_TARGET_REG_BITS == 64; case INDEX_op_extract2_i64: return TCG_TARGET_HAS_extract2_i64; - case INDEX_op_extrh_i64_i32: - return TCG_TARGET_HAS_extr_i64_i32; case INDEX_op_add2_i64: return TCG_TARGET_HAS_add2_i64; case INDEX_op_sub2_i64: @@ -5510,6 +5510,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: + case INDEX_op_extrh_i64_i32: assert(TCG_TARGET_REG_BITS == 64); /* fall through */ case INDEX_op_ctpop: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index e9b46d5e66..d84d01e098 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -670,6 +670,18 @@ static const TCGOutOpBinary outop_eqv = { .out_rrr = tgen_eqv, }; +#if TCG_TARGET_REG_BITS == 64 +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_extract(s, TCG_TYPE_I64, a0, a1, 32, 32); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; +#endif + static void tgen_mul(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { From cf4905c03135f1181e86c618426f8d6c703b38c0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 12 Jan 2025 20:29:41 -0800 Subject: [PATCH 105/161] tcg: Convert deposit to TCGOutOpDeposit Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 30 +++++-------- tcg/arm/tcg-target.c.inc | 29 ++++++------ tcg/i386/tcg-target.c.inc | 76 ++++++++++++++++---------------- tcg/loongarch64/tcg-target.c.inc | 27 +++++++----- tcg/mips/tcg-target.c.inc | 27 +++++++----- tcg/ppc/tcg-target.c.inc | 44 +++++++++--------- tcg/riscv/tcg-target.c.inc | 4 ++ tcg/s390x/tcg-target.c.inc | 60 +++++++++++++------------ tcg/sparc64/tcg-target.c.inc | 4 ++ tcg/tcg.c | 33 ++++++++++++++ tcg/tci.c | 8 ++-- tcg/tci/tcg-target.c.inc | 19 ++++---- 12 files changed, 206 insertions(+), 155 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 4ea1aebc5e..62b045c222 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1347,15 +1347,6 @@ static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); } -static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, - TCGReg rn, unsigned lsb, unsigned width) -{ - unsigned size = ext ? 64 : 32; - unsigned a = (size - lsb) & (size - 1); - unsigned b = width - 1; - tcg_out_bfm(s, ext, rd, rn, a, b); -} - static void tgen_cmp(TCGContext *s, TCGType ext, TCGCond cond, TCGReg a, TCGReg b) { @@ -2577,6 +2568,18 @@ static const TCGOutOpMovcond outop_movcond = { .out = tgen_movcond, }; +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) +{ + unsigned mask = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_bfm(s, type, a0, a2, -ofs & mask, len - 1); +} + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(r, 0, rz), + .out_rrr = tgen_deposit, +}; + static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, unsigned ofs, unsigned len) { @@ -2684,11 +2687,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false); break; - case INDEX_op_deposit_i64: - case INDEX_op_deposit_i32: - tcg_out_dep(s, ext, a0, a2, args[3], args[4]); - break; - case INDEX_op_extract2_i64: case INDEX_op_extract2_i32: tcg_out_extr(s, ext, a0, a2, a1, args[3]); @@ -3206,10 +3204,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i128: return C_O0_I3(rz, rz, r); - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - return C_O1_I2(r, 0, rz); - case INDEX_op_extract2_i32: case INDEX_op_extract2_i64: return C_O1_I2(r, rz, rz); diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index aebe48679c..2bf6bfe274 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -969,18 +969,27 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) g_assert_not_reached(); } -static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd, - TCGArg a1, int ofs, int len, bool const_a1) +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) { - if (const_a1) { - /* bfi becomes bfc with rn == 15. */ - a1 = 15; - } /* bfi/bfc */ - tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1 + tcg_out32(s, 0x07c00010 | (COND_AL << 28) | (a0 << 12) | a1 | (ofs << 7) | ((ofs + len - 1) << 16)); } +static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + tcg_target_long a2, unsigned ofs, unsigned len) +{ + /* bfi becomes bfc with rn == 15. */ + tgen_deposit(s, type, a0, a1, 15, ofs, len); +} + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(r, 0, rZ), + .out_rrr = tgen_deposit, + .out_rri = tgen_depositi, +}; + static void tgen_extract(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn, unsigned ofs, unsigned len) { @@ -2402,10 +2411,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], TCG_TYPE_I64); break; - case INDEX_op_deposit_i32: - tcg_out_deposit(s, COND_AL, args[0], args[2], - args[3], args[4], const_args[2]); - break; case INDEX_op_extract2_i32: /* ??? These optimization vs zero should be generic. */ /* ??? But we can't substitute 2 for 1 in the opcode stream yet. */ @@ -2459,8 +2464,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i32: return C_O0_I2(r, r); - case INDEX_op_deposit_i32: - return C_O1_I2(r, 0, rZ); case INDEX_op_extract2_i32: return C_O1_I2(r, rZ, rZ); case INDEX_op_add2_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 63c9aae26e..1dd9741f45 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3150,6 +3150,43 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) +{ + if (ofs == 0 && len == 8) { + tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0); + } else if (ofs == 0 && len == 16) { + tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0); + } else if (TCG_TARGET_REG_BITS == 32 && ofs == 8 && len == 8) { + tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4); + } else { + g_assert_not_reached(); + } +} + +static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + tcg_target_long a2, unsigned ofs, unsigned len) +{ + if (ofs == 0 && len == 8) { + tcg_out_opc(s, OPC_MOVB_Ib | P_REXB_RM | LOWREGMASK(a0), 0, a0, 0); + tcg_out8(s, a2); + } else if (ofs == 0 && len == 16) { + tcg_out_opc(s, OPC_MOVL_Iv | P_DATA16 | LOWREGMASK(a0), 0, a0, 0); + tcg_out16(s, a2); + } else if (TCG_TARGET_REG_BITS == 32 && ofs == 8 && len == 8) { + tcg_out8(s, OPC_MOVB_Ib + a0 + 4); + tcg_out8(s, a2); + } else { + g_assert_not_reached(); + } +} + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(q, 0, qi), + .out_rrr = tgen_deposit, + .out_rri = tgen_depositi, +}; + static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, unsigned ofs, unsigned len) { @@ -3230,7 +3267,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { TCGArg a0, a1, a2; - int const_a2, rexw; + int rexw; #if TCG_TARGET_REG_BITS == 64 # define OP_32_64(x) \ @@ -3245,7 +3282,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, a0 = args[0]; a1 = args[1]; a2 = args[2]; - const_a2 = const_args[2]; rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; switch (opc) { @@ -3378,38 +3414,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, break; #endif - OP_32_64(deposit): - if (args[3] == 0 && args[4] == 8) { - /* load bits 0..7 */ - if (const_a2) { - tcg_out_opc(s, OPC_MOVB_Ib | P_REXB_RM | LOWREGMASK(a0), - 0, a0, 0); - tcg_out8(s, a2); - } else { - tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0); - } - } else if (TCG_TARGET_REG_BITS == 32 && args[3] == 8 && args[4] == 8) { - /* load bits 8..15 */ - if (const_a2) { - tcg_out8(s, OPC_MOVB_Ib + a0 + 4); - tcg_out8(s, a2); - } else { - tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4); - } - } else if (args[3] == 0 && args[4] == 16) { - /* load bits 0..15 */ - if (const_a2) { - tcg_out_opc(s, OPC_MOVL_Iv | P_DATA16 | LOWREGMASK(a0), - 0, a0, 0); - tcg_out16(s, a2); - } else { - tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0); - } - } else { - g_assert_not_reached(); - } - break; - OP_32_64(extract2): /* Note that SHRD outputs to the r/m operand. */ tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0); @@ -4008,10 +4012,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extract2_i64: return C_O1_I2(r, 0, r); - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - return C_O1_I2(q, 0, qi); - case INDEX_op_add2_i32: case INDEX_op_add2_i64: case INDEX_op_sub2_i32: diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 1062eb1883..0b64efc078 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1809,6 +1809,21 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_bstrins_w(s, a0, a2, ofs, ofs + len - 1); + } else { + tcg_out_opc_bstrins_d(s, a0, a2, ofs, ofs + len - 1); + } +} + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(r, 0, rz), + .out_rrr = tgen_deposit, +}; + static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, unsigned ofs, unsigned len) { @@ -1877,13 +1892,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_b(s, 0); break; - case INDEX_op_deposit_i32: - tcg_out_opc_bstrins_w(s, a0, a2, args[3], args[3] + args[4] - 1); - break; - case INDEX_op_deposit_i64: - tcg_out_opc_bstrins_d(s, a0, a2, args[3], args[3] + args[4] - 1); - break; - case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: tcg_out_ldst(s, OPC_LD_B, a0, a1, a2); @@ -2484,11 +2492,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - /* Must deposit into the same register as input */ - return C_O1_I2(r, 0, rz); - case INDEX_op_ld_vec: case INDEX_op_dupm_vec: case INDEX_op_dup_vec: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index ad0482902d..cd648ab1df 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2215,6 +2215,22 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_bf(s, OPC_INS, a0, a2, ofs + len - 1, ofs); + } else { + tcg_out_opc_bf64(s, OPC_DINS, OPC_DINSM, OPC_DINSU, a0, a2, + ofs + len - 1, ofs); + } +} + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(r, 0, rz), + .out_rrr = tgen_deposit, +}; + static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, unsigned ofs, unsigned len) { @@ -2327,14 +2343,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, i1, a0, a1, a2); break; - case INDEX_op_deposit_i32: - tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]); - break; - case INDEX_op_deposit_i64: - tcg_out_opc_bf64(s, OPC_DINS, OPC_DINSM, OPC_DINSU, a0, a2, - args[3] + args[4] - 1, args[3]); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, a0, 0, a1, a2, TCG_TYPE_I32); break; @@ -2407,9 +2415,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - return C_O1_I2(r, 0, rz); case INDEX_op_add2_i32: case INDEX_op_sub2_i32: return C_O2_I4(r, r, rz, rz, rN, rN); diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index ba6d7556f7..fc92a4896d 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3429,6 +3429,28 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) +{ + if (type == TCG_TYPE_I32) { + tcg_out_rlw(s, RLWIMI, a0, a2, ofs, 32 - ofs - len, 31 - ofs); + } else { + tcg_out_rld(s, RLDIMI, a0, a2, ofs, 64 - ofs - len); + } +} + +static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + tcg_target_long a2, unsigned ofs, unsigned len) +{ + tgen_andi(s, type, a0, a1, ~MAKE_64BIT_MASK(ofs, len)); +} + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(r, 0, rZ), + .out_rrr = tgen_deposit, + .out_rri = tgen_depositi, +}; + static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, unsigned ofs, unsigned len) { @@ -3575,25 +3597,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); break; - case INDEX_op_deposit_i32: - if (const_args[2]) { - uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; - tcg_out_andi32(s, args[0], args[0], ~mask); - } else { - tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], - 32 - args[3] - args[4], 31 - args[3]); - } - break; - case INDEX_op_deposit_i64: - if (const_args[2]) { - uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; - tcg_out_andi64(s, args[0], args[0], ~mask); - } else { - tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], - 64 - args[3] - args[4]); - } - break; - #if TCG_TARGET_REG_BITS == 64 case INDEX_op_add2_i64: #else @@ -4290,9 +4293,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - return C_O1_I2(r, 0, rZ); case INDEX_op_add2_i64: case INDEX_op_add2_i32: return C_O2_I4(r, r, r, r, rI, rZM); diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 46b4e1167c..371e0c24c8 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2482,6 +2482,10 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, unsigned ofs, unsigned len) { diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 3b3749efd3..d72393315d 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1555,14 +1555,40 @@ static const TCGOutOpMovcond outop_movcond = { .out = tgen_movcond, }; -static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src, - int ofs, int len, int z) +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) { - int lsb = (63 - ofs); - int msb = lsb - (len - 1); - tcg_out_risbg(s, dest, src, msb, lsb, ofs, z); + unsigned lsb = (63 - ofs); + unsigned msb = lsb - (len - 1); + + /* + * Since we can't support "0Z" as a constraint, we allow a1 in + * any register. Fix things up as if a matching constraint. + */ + if (a0 != a1) { + if (a0 == a2) { + tcg_out_mov(s, type, TCG_TMP0, a2); + a2 = TCG_TMP0; + } + tcg_out_mov(s, type, a0, a1); + } + tcg_out_risbg(s, a0, a2, msb, lsb, ofs, false); } +static void tgen_depositz(TCGContext *s, TCGType type, TCGReg a0, TCGReg a2, + unsigned ofs, unsigned len) +{ + unsigned lsb = (63 - ofs); + unsigned msb = lsb - (len - 1); + tcg_out_risbg(s, a0, a2, msb, lsb, ofs, true); +} + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(r, rZ, r), + .out_rrr = tgen_deposit, + .out_rzr = tgen_depositz, +}; + static void tgen_extract(TCGContext *s, TCGType type, TCGReg dest, TCGReg src, unsigned ofs, unsigned len) { @@ -2845,7 +2871,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - TCGArg a0, a1, a2; + TCGArg a0; switch (opc) { case INDEX_op_goto_ptr: @@ -2977,24 +3003,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_insn(s, RRE, SLBGR, args[1], args[5]); break; - OP_32_64(deposit): - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[1]) { - tgen_deposit(s, a0, a2, args[3], args[4], 1); - } else { - /* Since we can't support "0Z" as a constraint, we allow a1 in - any register. Fix things up as if a matching constraint. */ - if (a0 != a1) { - if (a0 == a2) { - tcg_out_mov(s, type, TCG_TMP0, a2); - a2 = TCG_TMP0; - } - tcg_out_mov(s, type, a0, a1); - } - tgen_deposit(s, a0, a2, args[3], args[4], 0); - } - break; - case INDEX_op_mb: /* The host memory model is quite strong, we simply need to serialize the instruction stream. */ @@ -3489,10 +3497,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i128: return C_O0_I3(o, m, r); - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - return C_O1_I2(r, rZ, r); - case INDEX_op_add2_i32: case INDEX_op_sub2_i32: return C_N1_O1_I4(r, r, 0, 1, ri, r); diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index c1cce7c196..741de260e9 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1767,6 +1767,10 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, unsigned ofs, unsigned len) { diff --git a/tcg/tcg.c b/tcg/tcg.c index ce583b824c..b9e9454654 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1007,6 +1007,16 @@ typedef struct TCGOutOpBswap { TCGReg a0, TCGReg a1, unsigned flags); } TCGOutOpBswap; +typedef struct TCGOutOpDeposit { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len); + void (*out_rri)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + tcg_target_long a2, unsigned ofs, unsigned len); + void (*out_rzr)(TCGContext *s, TCGType type, TCGReg a0, + TCGReg a2, unsigned ofs, unsigned len); +} TCGOutOpDeposit; + typedef struct TCGOutOpDivRem { TCGOutOp base; void (*out_rr01r)(TCGContext *s, TCGType type, @@ -1123,6 +1133,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), + OUTOP(INDEX_op_deposit_i32, TCGOutOpDeposit, outop_deposit), + OUTOP(INDEX_op_deposit_i64, TCGOutOpDeposit, outop_deposit), OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), @@ -5537,6 +5549,27 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_deposit_i32: + case INDEX_op_deposit_i64: + { + const TCGOutOpDeposit *out = &outop_deposit; + + if (const_args[2]) { + tcg_debug_assert(!const_args[1]); + out->out_rri(s, type, new_args[0], new_args[1], + new_args[2], new_args[3], new_args[4]); + } else if (const_args[1]) { + tcg_debug_assert(new_args[1] == 0); + tcg_debug_assert(!const_args[2]); + out->out_rzr(s, type, new_args[0], new_args[2], + new_args[3], new_args[4]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], + new_args[2], new_args[3], new_args[4]); + } + } + break; + case INDEX_op_divs2: case INDEX_op_divu2: { diff --git a/tcg/tci.c b/tcg/tci.c index 5a07d65db8..595416a192 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -27,6 +27,7 @@ #define ctpop_tr glue(ctpop, TCG_TARGET_REG_BITS) +#define deposit_tr glue(deposit, TCG_TARGET_REG_BITS) #define extract_tr glue(extract, TCG_TARGET_REG_BITS) #define sextract_tr glue(sextract, TCG_TARGET_REG_BITS) @@ -655,8 +656,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, regs[r0] = ror32(regs[r1], regs[r2] & 31); break; case INDEX_op_deposit_i32: + case INDEX_op_deposit_i64: tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); - regs[r0] = deposit32(regs[r1], pos, len, regs[r2]); + regs[r0] = deposit_tr(regs[r1], pos, len, regs[r2]); break; case INDEX_op_extract: tci_args_rrbb(insn, &r0, &r1, &pos, &len); @@ -770,10 +772,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ror64(regs[r1], regs[r2] & 63); break; - case INDEX_op_deposit_i64: - tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); - regs[r0] = deposit64(regs[r1], pos, len, regs[r2]); - break; case INDEX_op_ext_i32_i64: tci_args_rr(insn, &r0, &r1); regs[r0] = (int32_t)regs[r1]; diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index d84d01e098..566c2fb0d0 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -66,10 +66,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - return C_O1_I2(r, r, r); - case INDEX_op_add2_i32: case INDEX_op_add2_i64: case INDEX_op_sub2_i32: @@ -623,6 +619,17 @@ static const TCGOutOpBinary outop_ctz = { .out_rrr = tgen_ctz, }; +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) +{ + tcg_out_op_rrrbb(s, INDEX_op_deposit_i64, a0, a1, a2, ofs, len); +} + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_deposit, +}; + static void tgen_divs(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1084,10 +1091,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, opc, args[0], args[1], args[2]); break; - CASE_32_64(deposit) - tcg_out_op_rrrbb(s, opc, args[0], args[1], args[2], args[3], args[4]); - break; - CASE_32_64(add2) CASE_32_64(sub2) tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2], From a5a8dd33aad1ada9b05968e8b93033bdbdfdbca2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 12 Jan 2025 20:42:13 -0800 Subject: [PATCH 106/161] tcg/aarch64: Improve deposit Use ANDI for deposit 0 into a register. Use UBFIZ, aka UBFM, for deposit register into 0. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-con-set.h | 2 +- tcg/aarch64/tcg-target.c.inc | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h index 1281e5efc0..2eda499cd3 100644 --- a/tcg/aarch64/tcg-target-con-set.h +++ b/tcg/aarch64/tcg-target-con-set.h @@ -18,7 +18,6 @@ C_O1_I1(r, r) C_O1_I1(w, r) C_O1_I1(w, w) C_O1_I1(w, wr) -C_O1_I2(r, 0, rz) C_O1_I2(r, r, r) C_O1_I2(r, r, rA) C_O1_I2(r, r, rAL) @@ -26,6 +25,7 @@ C_O1_I2(r, r, rC) C_O1_I2(r, r, ri) C_O1_I2(r, r, rL) C_O1_I2(r, rz, rz) +C_O1_I2(r, rZ, rZ) C_O1_I2(w, 0, w) C_O1_I2(w, w, w) C_O1_I2(w, w, wN) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 62b045c222..dee4afcce1 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2572,12 +2572,39 @@ static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2, unsigned ofs, unsigned len) { unsigned mask = type == TCG_TYPE_I32 ? 31 : 63; + + /* + * Since we can't support "0Z" as a constraint, we allow a1 in + * any register. Fix things up as if a matching constraint. + */ + if (a0 != a1) { + if (a0 == a2) { + tcg_out_mov(s, type, TCG_REG_TMP0, a2); + a2 = TCG_REG_TMP0; + } + tcg_out_mov(s, type, a0, a1); + } tcg_out_bfm(s, type, a0, a2, -ofs & mask, len - 1); } +static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + tcg_target_long a2, unsigned ofs, unsigned len) +{ + tgen_andi(s, type, a0, a1, ~MAKE_64BIT_MASK(ofs, len)); +} + +static void tgen_depositz(TCGContext *s, TCGType type, TCGReg a0, TCGReg a2, + unsigned ofs, unsigned len) +{ + int max = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_ubfm(s, type, a0, a2, -ofs & max, len - 1); +} + static const TCGOutOpDeposit outop_deposit = { - .base.static_constraint = C_O1_I2(r, 0, rz), + .base.static_constraint = C_O1_I2(r, rZ, rZ), .out_rrr = tgen_deposit, + .out_rri = tgen_depositi, + .out_rzr = tgen_depositz, }; static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, From 4d137ff819bae33d045f13bb9186e3a2c71cb7e4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 12 Jan 2025 20:48:57 -0800 Subject: [PATCH 107/161] tcg: Merge INDEX_op_deposit_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 6 ++++-- include/tcg/tcg-opc.h | 3 +-- tcg/optimize.c | 2 +- tcg/tcg-op.c | 8 ++++---- tcg/tcg.c | 9 +++------ tcg/tci.c | 6 ++---- tcg/tci/tcg-target.c.inc | 2 +- 7 files changed, 16 insertions(+), 20 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index ca7550f68c..aea8a897bd 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -442,7 +442,7 @@ Misc - | Indicate that the value of *t0* won't be used later. It is useful to force dead code elimination. - * - deposit_i32/i64 *dest*, *t1*, *t2*, *pos*, *len* + * - deposit *dest*, *t1*, *t2*, *pos*, *len* - | Deposit *t2* as a bitfield into *t1*, placing the result in *dest*. | @@ -451,10 +451,12 @@ Misc | *len* - the length of the bitfield | *pos* - the position of the first bit, counting from the LSB | - | For example, "deposit_i32 dest, t1, t2, 8, 4" indicates a 4-bit field + | For example, "deposit dest, t1, t2, 8, 4" indicates a 4-bit field at bit 8. This operation would be equivalent to | | *dest* = (*t1* & ~0x0f00) | ((*t2* << 8) & 0x0f00) + | + | on TCG_TYPE_I32. * - extract *dest*, *t1*, *pos*, *len* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 4ace1f85c4..c6848b3c63 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -49,6 +49,7 @@ DEF(bswap64, 1, 1, 1, TCG_OPF_INT) DEF(clz, 1, 2, 0, TCG_OPF_INT) DEF(ctpop, 1, 1, 0, TCG_OPF_INT) DEF(ctz, 1, 2, 0, TCG_OPF_INT) +DEF(deposit, 1, 2, 2, TCG_OPF_INT) DEF(divs, 1, 2, 0, TCG_OPF_INT) DEF(divs2, 2, 3, 0, TCG_OPF_INT) DEF(divu, 1, 2, 0, TCG_OPF_INT) @@ -90,7 +91,6 @@ DEF(st8_i32, 0, 2, 1, 0) DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) /* shifts/rotates */ -DEF(deposit_i32, 1, 2, 2, 0) DEF(extract2_i32, 1, 2, 1, 0) DEF(add2_i32, 2, 4, 0, 0) @@ -111,7 +111,6 @@ DEF(st16_i64, 0, 2, 1, 0) DEF(st32_i64, 0, 2, 1, 0) DEF(st_i64, 0, 2, 1, 0) /* shifts/rotates */ -DEF(deposit_i64, 1, 2, 2, 0) DEF(extract2_i64, 1, 2, 1, 0) /* size changing ops */ diff --git a/tcg/optimize.c b/tcg/optimize.c index d324cbf7fe..acc566ed76 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2858,7 +2858,7 @@ void tcg_optimize(TCGContext *s) case INDEX_op_ctpop: done = fold_ctpop(&ctx, op); break; - CASE_OP_32_64(deposit): + case INDEX_op_deposit: done = fold_deposit(&ctx, op); break; case INDEX_op_divs: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index b88f411ece..961a39f446 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -915,7 +915,7 @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, return; } if (TCG_TARGET_deposit_valid(TCG_TYPE_I32, ofs, len)) { - tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len); + tcg_gen_op5ii_i32(INDEX_op_deposit, ret, arg1, arg2, ofs, len); return; } @@ -961,7 +961,7 @@ void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg, tcg_gen_andi_i32(ret, arg, (1u << len) - 1); } else if (TCG_TARGET_deposit_valid(TCG_TYPE_I32, ofs, len)) { TCGv_i32 zero = tcg_constant_i32(0); - tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len); + tcg_gen_op5ii_i32(INDEX_op_deposit, ret, zero, arg, ofs, len); } else { /* * To help two-operand hosts we prefer to zero-extend first, @@ -2533,7 +2533,7 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, if (TCG_TARGET_REG_BITS == 64) { if (TCG_TARGET_deposit_valid(TCG_TYPE_I64, ofs, len)) { - tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len); + tcg_gen_op5ii_i64(INDEX_op_deposit, ret, arg1, arg2, ofs, len); return; } } else { @@ -2594,7 +2594,7 @@ void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg, } else if (TCG_TARGET_REG_BITS == 64 && TCG_TARGET_deposit_valid(TCG_TYPE_I64, ofs, len)) { TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len); + tcg_gen_op5ii_i64(INDEX_op_deposit, ret, zero, arg, ofs, len); } else { if (TCG_TARGET_REG_BITS == 32) { if (ofs >= 32) { diff --git a/tcg/tcg.c b/tcg/tcg.c index b9e9454654..6b49fd4acf 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1133,8 +1133,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), - OUTOP(INDEX_op_deposit_i32, TCGOutOpDeposit, outop_deposit), - OUTOP(INDEX_op_deposit_i64, TCGOutOpDeposit, outop_deposit), + OUTOP(INDEX_op_deposit, TCGOutOpDeposit, outop_deposit), OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), @@ -2379,6 +2378,7 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_add: case INDEX_op_and: case INDEX_op_brcond: + case INDEX_op_deposit: case INDEX_op_extract: case INDEX_op_mov: case INDEX_op_movcond: @@ -2397,7 +2397,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: - case INDEX_op_deposit_i32: return true; case INDEX_op_extract2_i32: @@ -2426,7 +2425,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: - case INDEX_op_deposit_i64: return TCG_TARGET_REG_BITS == 64; case INDEX_op_extract2_i64: @@ -5549,8 +5547,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: + case INDEX_op_deposit: { const TCGOutOpDeposit *out = &outop_deposit; diff --git a/tcg/tci.c b/tcg/tci.c index 595416a192..dc916eb112 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -655,8 +655,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ror32(regs[r1], regs[r2] & 31); break; - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: + case INDEX_op_deposit: tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); regs[r0] = deposit_tr(regs[r1], pos, len, regs[r2]); break; @@ -1042,8 +1041,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) op_name, str_r(r0), str_r(r1), str_r(r2)); break; - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: + case INDEX_op_deposit: tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); info->fprintf_func(info->stream, "%-12s %s, %s, %s, %d, %d", op_name, str_r(r0), str_r(r1), str_r(r2), pos, len); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 566c2fb0d0..ef14e81609 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -622,7 +622,7 @@ static const TCGOutOpBinary outop_ctz = { static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2, unsigned ofs, unsigned len) { - tcg_out_op_rrrbb(s, INDEX_op_deposit_i64, a0, a1, a2, ofs, len); + tcg_out_op_rrrbb(s, INDEX_op_deposit, a0, a1, a2, ofs, len); } static const TCGOutOpDeposit outop_deposit = { From c8f9f70047e17ce70e016bc59fe7857123f3cbd5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 12 Jan 2025 21:30:10 -0800 Subject: [PATCH 108/161] tcg: Convert extract2 to TCGOutOpExtract2 Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-has.h | 2 -- tcg/aarch64/tcg-target.c.inc | 20 +++++++++-------- tcg/arm/tcg-target-has.h | 1 - tcg/arm/tcg-target.c.inc | 38 ++++++++++++-------------------- tcg/i386/tcg-target-has.h | 2 -- tcg/i386/tcg-target.c.inc | 25 ++++++++++++--------- tcg/loongarch64/tcg-target-has.h | 2 -- tcg/loongarch64/tcg-target.c.inc | 5 +++++ tcg/mips/tcg-target-has.h | 6 ----- tcg/mips/tcg-target.c.inc | 5 +++++ tcg/ppc/tcg-target-has.h | 2 -- tcg/ppc/tcg-target.c.inc | 4 ++++ tcg/riscv/tcg-target-has.h | 2 -- tcg/riscv/tcg-target.c.inc | 5 +++++ tcg/s390x/tcg-target-has.h | 2 -- tcg/s390x/tcg-target.c.inc | 4 ++++ tcg/sparc64/tcg-target-has.h | 2 -- tcg/sparc64/tcg-target.c.inc | 4 ++++ tcg/tcg-has.h | 1 - tcg/tcg-op.c | 12 +++++----- tcg/tcg.c | 24 ++++++++++++++++---- tcg/tci/tcg-target-has.h | 2 -- tcg/tci/tcg-target.c.inc | 4 ++++ 23 files changed, 97 insertions(+), 77 deletions(-) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 82d8cd5965..011a91c263 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -13,13 +13,11 @@ #define have_lse2 (cpuinfo & CPUINFO_LSE2) /* optional instructions */ -#define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index dee4afcce1..bece494c55 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2634,6 +2634,17 @@ static const TCGOutOpExtract outop_sextract = { .out_rr = tgen_sextract, }; +static void tgen_extract2(TCGContext *s, TCGType type, TCGReg a0, + TCGReg a1, TCGReg a2, unsigned shr) +{ + tcg_out_extr(s, type, a0, a2, a1, shr); +} + +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_O1_I2(r, rz, rz), + .out_rrr = tgen_extract2, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2714,11 +2725,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false); break; - case INDEX_op_extract2_i64: - case INDEX_op_extract2_i32: - tcg_out_extr(s, ext, a0, a2, a1, args[3]); - break; - case INDEX_op_add2_i32: tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, a2, args[3], (int32_t)args[4], args[5], const_args[4], @@ -3231,10 +3237,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i128: return C_O0_I3(rz, rz, r); - case INDEX_op_extract2_i32: - case INDEX_op_extract2_i64: - return C_O1_I2(r, rz, rz); - case INDEX_op_add2_i32: case INDEX_op_add2_i64: case INDEX_op_sub2_i32: diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index c85b5da1e5..0d6a785542 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -24,7 +24,6 @@ extern bool use_neon_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 2bf6bfe274..f366424af5 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -2317,6 +2317,20 @@ static const TCGOutOpSetcond2 outop_setcond2 = { .out = tgen_setcond2, }; +static void tgen_extract2(TCGContext *s, TCGType type, TCGReg a0, + TCGReg a1, TCGReg a2, unsigned shr) +{ + /* We can do extract2 in 2 insns, vs the 3 required otherwise. */ + tgen_shli(s, TCG_TYPE_I32, TCG_REG_TMP, a2, 32 - shr); + tcg_out_dat_reg(s, COND_AL, ARITH_ORR, a0, TCG_REG_TMP, + a1, SHIFT_IMM_LSR(shr)); +} + +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_extract2, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2411,28 +2425,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], TCG_TYPE_I64); break; - case INDEX_op_extract2_i32: - /* ??? These optimization vs zero should be generic. */ - /* ??? But we can't substitute 2 for 1 in the opcode stream yet. */ - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi(s, TCG_TYPE_REG, args[0], 0); - } else { - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, - args[2], SHIFT_IMM_LSL(32 - args[3])); - } - } else if (const_args[2]) { - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, - args[1], SHIFT_IMM_LSR(args[3])); - } else { - /* We can do extract2 in 2 insns, vs the 3 required otherwise. */ - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, - args[2], SHIFT_IMM_LSL(32 - args[3])); - tcg_out_dat_reg(s, COND_AL, ARITH_ORR, args[0], TCG_REG_TMP, - args[1], SHIFT_IMM_LSR(args[3])); - } - break; - case INDEX_op_mb: tcg_out_mb(s, args[0]); break; @@ -2464,8 +2456,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i32: return C_O0_I2(r, r); - case INDEX_op_extract2_i32: - return C_O1_I2(r, rZ, rZ); case INDEX_op_add2_i32: return C_O2_I4(r, r, r, r, rIN, rIK); case INDEX_op_sub2_i32: diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index 6b91b23fe8..0328102c2a 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -26,14 +26,12 @@ #define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl) /* optional instructions */ -#define TCG_TARGET_HAS_extract2_i32 1 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #if TCG_TARGET_REG_BITS == 64 /* Keep 32-bit values zero-extended in a register. */ #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_extract2_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 1dd9741f45..2b2ad9ca95 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3261,6 +3261,21 @@ static const TCGOutOpExtract outop_sextract = { .out_rr = tgen_sextract, }; +static void tgen_extract2(TCGContext *s, TCGType type, TCGReg a0, + TCGReg a1, TCGReg a2, unsigned shr) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + /* Note that SHRD outputs to the r/m operand. */ + tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0); + tcg_out8(s, shr); +} + +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_O1_I2(r, 0, r), + .out_rrr = tgen_extract2, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -3414,12 +3429,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, break; #endif - OP_32_64(extract2): - /* Note that SHRD outputs to the r/m operand. */ - tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0); - tcg_out8(s, args[3]); - break; - case INDEX_op_mb: tcg_out_mb(s, a0); break; @@ -4008,10 +4017,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(re, r); - case INDEX_op_extract2_i32: - case INDEX_op_extract2_i64: - return C_O1_I2(r, 0, r); - case INDEX_op_add2_i32: case INDEX_op_add2_i64: case INDEX_op_sub2_i32: diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 10090102f7..a1bd71db6a 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -10,13 +10,11 @@ #include "host/cpuinfo.h" /* optional instructions */ -#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 /* 64-bit operations */ -#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 0b64efc078..eb2143703d 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1868,6 +1868,11 @@ static const TCGOutOpExtract outop_sextract = { .out_rr = tgen_sextract, }; +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index 24b00f1eec..48a1e68fbe 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -51,13 +51,7 @@ extern bool use_mips32r2_instructions; #endif /* optional instructions detected at runtime */ -#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 - -#if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_extract2_i64 0 -#endif - #define TCG_TARGET_HAS_qemu_ldst_i128 0 #define TCG_TARGET_HAS_tst 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index cd648ab1df..7fae1c51e9 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2273,6 +2273,11 @@ static const TCGOutOpExtract outop_sextract = { .out_rr = tgen_sextract, }; +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index bd9c3d92ed..033d58e095 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -17,14 +17,12 @@ #define have_vsx (cpuinfo & CPUINFO_VSX) /* optional instructions */ -#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #endif diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index fc92a4896d..a964239aab 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3495,6 +3495,10 @@ static const TCGOutOpExtract outop_sextract = { .out_rr = tgen_sextract, }; +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index 88fadc2428..b2814f8ef9 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -10,12 +10,10 @@ #include "host/cpuinfo.h" /* optional instructions */ -#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 371e0c24c8..d74ac7587a 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2542,6 +2542,11 @@ static const TCGOutOpExtract outop_sextract = { .out_rr = tgen_sextract, }; +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 95407f61cf..4a2b71995d 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -29,13 +29,11 @@ extern uint64_t s390_facilities[3]; ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1) /* optional instructions */ -#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index d72393315d..ff06834e6e 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1637,6 +1637,10 @@ static const TCGOutOpExtract outop_sextract = { .out_rr = tgen_sextract, }; +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest) { ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1; diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 2ced6f7c1c..b8760dd154 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -14,13 +14,11 @@ extern bool use_vis3_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 741de260e9..4c7d916302 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1795,6 +1795,10 @@ static const TCGOutOpExtract outop_sextract = { .out_rr = tgen_sextract, }; +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 21bef070fe..6125ac677c 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -12,7 +12,6 @@ #if TCG_TARGET_REG_BITS == 32 /* Turn some undef macros into false macros. */ #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 /* Turn some undef macros into true macros. */ diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 961a39f446..5f95350d5d 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -921,7 +921,7 @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, t1 = tcg_temp_ebb_new_i32(); - if (TCG_TARGET_HAS_extract2_i32) { + if (tcg_op_supported(INDEX_op_extract2_i32, TCG_TYPE_I32, 0)) { if (ofs + len == 32) { tcg_gen_shli_i32(t1, arg1, len); tcg_gen_extract2_i32(ret, t1, arg2, len); @@ -1077,7 +1077,7 @@ void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah, tcg_gen_mov_i32(ret, ah); } else if (al == ah) { tcg_gen_rotri_i32(ret, al, ofs); - } else if (TCG_TARGET_HAS_extract2_i32) { + } else if (tcg_op_supported(INDEX_op_extract2_i32, TCG_TYPE_I32, 0)) { tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); @@ -1799,7 +1799,7 @@ static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, tcg_gen_movi_i32(TCGV_LOW(ret), 0); } } else if (right) { - if (TCG_TARGET_HAS_extract2_i32) { + if (tcg_op_supported(INDEX_op_extract2_i32, TCG_TYPE_I32, 0)) { tcg_gen_extract2_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), c); } else { @@ -1813,7 +1813,7 @@ static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, tcg_gen_shri_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); } } else { - if (TCG_TARGET_HAS_extract2_i32) { + if (tcg_op_supported(INDEX_op_extract2_i32, TCG_TYPE_I32, 0)) { tcg_gen_extract2_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c); } else { @@ -2553,7 +2553,7 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, t1 = tcg_temp_ebb_new_i64(); - if (TCG_TARGET_HAS_extract2_i64) { + if (tcg_op_supported(INDEX_op_extract2_i64, TCG_TYPE_I64, 0)) { if (ofs + len == 64) { tcg_gen_shli_i64(t1, arg1, len); tcg_gen_extract2_i64(ret, t1, arg2, len); @@ -2781,7 +2781,7 @@ void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah, tcg_gen_mov_i64(ret, ah); } else if (al == ah) { tcg_gen_rotri_i64(ret, al, ofs); - } else if (TCG_TARGET_HAS_extract2_i64) { + } else if (tcg_op_supported(INDEX_op_extract2_i64, TCG_TYPE_I64, 0)) { tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index 6b49fd4acf..3fbbe3bd83 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1029,6 +1029,12 @@ typedef struct TCGOutOpExtract { unsigned ofs, unsigned len); } TCGOutOpExtract; +typedef struct TCGOutOpExtract2 { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned shr); +} TCGOutOpExtract2; + typedef struct TCGOutOpMovcond { TCGOutOp base; void (*out)(TCGContext *s, TCGType type, TCGCond cond, @@ -1140,6 +1146,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract), + OUTOP(INDEX_op_extract2_i32, TCGOutOpExtract2, outop_extract2), + OUTOP(INDEX_op_extract2_i64, TCGOutOpExtract2, outop_extract2), OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), @@ -2399,8 +2407,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i32: return true; - case INDEX_op_extract2_i32: - return TCG_TARGET_HAS_extract2_i32; case INDEX_op_add2_i32: return TCG_TARGET_HAS_add2_i32; case INDEX_op_sub2_i32: @@ -2427,8 +2433,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrh_i64_i32: return TCG_TARGET_REG_BITS == 64; - case INDEX_op_extract2_i64: - return TCG_TARGET_HAS_extract2_i64; case INDEX_op_add2_i64: return TCG_TARGET_HAS_add2_i64; case INDEX_op_sub2_i64: @@ -5593,6 +5597,18 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_extract2_i32: + case INDEX_op_extract2_i64: + { + const TCGOutOpExtract2 *out = &outop_extract2; + + tcg_debug_assert(!const_args[1]); + tcg_debug_assert(!const_args[2]); + out->out_rrr(s, type, new_args[0], new_args[1], + new_args[2], new_args[3]); + } + break; + case INDEX_op_muls2: case INDEX_op_mulu2: { diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index 90aa5c8bbb..4cb2b529ae 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -7,12 +7,10 @@ #ifndef TCG_TARGET_HAS_H #define TCG_TARGET_HAS_H -#define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_add2_i64 1 diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index ef14e81609..9a5ca9c778 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -447,6 +447,10 @@ static const TCGOutOpExtract outop_sextract = { .out_rr = tcg_out_sextract, }; +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs) { tcg_out_sextract(s, type, rd, rs, 0, 8); From 61d6a8767a5d4cd4fe5086ef98b53614ae099104 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 12 Jan 2025 21:40:43 -0800 Subject: [PATCH 109/161] tcg: Merge INDEX_op_extract2_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 4 ++-- include/tcg/tcg-opc.h | 5 +---- target/i386/tcg/emit.c.inc | 12 +----------- tcg/optimize.c | 10 +++++----- tcg/tcg-op.c | 16 ++++++++-------- tcg/tcg.c | 6 ++---- 6 files changed, 19 insertions(+), 34 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index aea8a897bd..9392d88069 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -476,9 +476,9 @@ Misc | | (using an arithmetic right shift) on TCG_TYPE_I32. - * - extract2_i32/i64 *dest*, *t1*, *t2*, *pos* + * - extract2 *dest*, *t1*, *t2*, *pos* - - | For N = {32,64}, extract an N-bit quantity from the concatenation + - | For TCG_TYPE_I{N}, extract an N-bit quantity from the concatenation of *t2*:*t1*, beginning at *pos*. The tcg_gen_extract2_{i32,i64} expander accepts 0 <= *pos* <= N as inputs. The backend code generator will not see either 0 or N as inputs for these opcodes. diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index c6848b3c63..1f995c54be 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -56,6 +56,7 @@ DEF(divu, 1, 2, 0, TCG_OPF_INT) DEF(divu2, 2, 3, 0, TCG_OPF_INT) DEF(eqv, 1, 2, 0, TCG_OPF_INT) DEF(extract, 1, 1, 2, TCG_OPF_INT) +DEF(extract2, 1, 2, 1, TCG_OPF_INT) DEF(movcond, 1, 4, 1, TCG_OPF_INT) DEF(mul, 1, 2, 0, TCG_OPF_INT) DEF(muls2, 2, 2, 0, TCG_OPF_INT) @@ -90,8 +91,6 @@ DEF(ld_i32, 1, 1, 1, 0) DEF(st8_i32, 0, 2, 1, 0) DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) -/* shifts/rotates */ -DEF(extract2_i32, 1, 2, 1, 0) DEF(add2_i32, 2, 4, 0, 0) DEF(sub2_i32, 2, 4, 0, 0) @@ -110,8 +109,6 @@ DEF(st8_i64, 0, 2, 1, 0) DEF(st16_i64, 0, 2, 1, 0) DEF(st32_i64, 0, 2, 1, 0) DEF(st_i64, 0, 2, 1, 0) -/* shifts/rotates */ -DEF(extract2_i64, 1, 2, 1, 0) /* size changing ops */ DEF(ext_i32_i64, 1, 1, 0, 0) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index ca6bc2ea82..e3166e70a5 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -19,16 +19,6 @@ * License along with this library; if not, see . */ -/* - * Sometimes, knowing what the backend has can produce better code. - * The exact opcode to check depends on 32- vs. 64-bit. - */ -#ifdef TARGET_X86_64 -#define INDEX_op_extract2_tl INDEX_op_extract2_i64 -#else -#define INDEX_op_extract2_tl INDEX_op_extract2_i32 -#endif - #define MMX_OFFSET(reg) \ ({ assert((reg) >= 0 && (reg) <= 7); \ offsetof(CPUX86State, fpregs[reg].mmx); }) @@ -3023,7 +3013,7 @@ static void gen_PMOVMSKB(DisasContext *s, X86DecodedInsn *decode) tcg_gen_ld8u_tl(s->T0, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1))); while (vec_len > 8) { vec_len -= 8; - if (tcg_op_supported(INDEX_op_extract2_tl, TCG_TYPE_TL, 0)) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_TL, 0)) { /* * Load the next byte of the result into the high byte of T. * TCG does a similar expansion of deposit to shl+extract2; by diff --git a/tcg/optimize.c b/tcg/optimize.c index acc566ed76..a728a4b2fa 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1873,12 +1873,12 @@ static bool fold_extract2(OptContext *ctx, TCGOp *op) uint64_t v2 = arg_info(op->args[2])->val; int shr = op->args[3]; - if (op->opc == INDEX_op_extract2_i64) { - v1 >>= shr; - v2 <<= 64 - shr; - } else { + if (ctx->type == TCG_TYPE_I32) { v1 = (uint32_t)v1 >> shr; v2 = (uint64_t)((int32_t)v2 << (32 - shr)); + } else { + v1 >>= shr; + v2 <<= 64 - shr; } return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2); } @@ -2878,7 +2878,7 @@ void tcg_optimize(TCGContext *s) case INDEX_op_extract: done = fold_extract(&ctx, op); break; - CASE_OP_32_64(extract2): + case INDEX_op_extract2: done = fold_extract2(&ctx, op); break; case INDEX_op_ext_i32_i64: diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 5f95350d5d..edbb214f7c 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -921,7 +921,7 @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, t1 = tcg_temp_ebb_new_i32(); - if (tcg_op_supported(INDEX_op_extract2_i32, TCG_TYPE_I32, 0)) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I32, 0)) { if (ofs + len == 32) { tcg_gen_shli_i32(t1, arg1, len); tcg_gen_extract2_i32(ret, t1, arg2, len); @@ -1077,8 +1077,8 @@ void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah, tcg_gen_mov_i32(ret, ah); } else if (al == ah) { tcg_gen_rotri_i32(ret, al, ofs); - } else if (tcg_op_supported(INDEX_op_extract2_i32, TCG_TYPE_I32, 0)) { - tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs); + } else if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I32, 0)) { + tcg_gen_op4i_i32(INDEX_op_extract2, ret, al, ah, ofs); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_shri_i32(t0, al, ofs); @@ -1799,7 +1799,7 @@ static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, tcg_gen_movi_i32(TCGV_LOW(ret), 0); } } else if (right) { - if (tcg_op_supported(INDEX_op_extract2_i32, TCG_TYPE_I32, 0)) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I32, 0)) { tcg_gen_extract2_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), c); } else { @@ -1813,7 +1813,7 @@ static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, tcg_gen_shri_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); } } else { - if (tcg_op_supported(INDEX_op_extract2_i32, TCG_TYPE_I32, 0)) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I32, 0)) { tcg_gen_extract2_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c); } else { @@ -2553,7 +2553,7 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, t1 = tcg_temp_ebb_new_i64(); - if (tcg_op_supported(INDEX_op_extract2_i64, TCG_TYPE_I64, 0)) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I64, 0)) { if (ofs + len == 64) { tcg_gen_shli_i64(t1, arg1, len); tcg_gen_extract2_i64(ret, t1, arg2, len); @@ -2781,8 +2781,8 @@ void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah, tcg_gen_mov_i64(ret, ah); } else if (al == ah) { tcg_gen_rotri_i64(ret, al, ofs); - } else if (tcg_op_supported(INDEX_op_extract2_i64, TCG_TYPE_I64, 0)) { - tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs); + } else if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I64, 0)) { + tcg_gen_op4i_i64(INDEX_op_extract2, ret, al, ah, ofs); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_shri_i64(t0, al, ofs); diff --git a/tcg/tcg.c b/tcg/tcg.c index 3fbbe3bd83..037b5a4664 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1146,8 +1146,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract), - OUTOP(INDEX_op_extract2_i32, TCGOutOpExtract2, outop_extract2), - OUTOP(INDEX_op_extract2_i64, TCGOutOpExtract2, outop_extract2), + OUTOP(INDEX_op_extract2, TCGOutOpExtract2, outop_extract2), OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), @@ -5597,8 +5596,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_extract2_i32: - case INDEX_op_extract2_i64: + case INDEX_op_extract2: { const TCGOutOpExtract2 *out = &outop_extract2; From 3ff7e44ef35924f76dbe36510c54c27e72af9121 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 13 Jan 2025 20:14:09 -0800 Subject: [PATCH 110/161] tcg: Expand fallback add2 with 32-bit operations No need to expand to i64 to perform the add. This is smaller on a loongarch64 host, e.g. bstrpick_d r28, r27, 31, 0 bstrpick_d r29, r24, 31, 0 add_d r28, r28, r29 addi_w r29, r28, 0 srai_d r28, r28, 32 --- add_w r28, r27, r24 sltu r29, r28, r24 Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/tcg-op.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index edbb214f7c..8b1356c526 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1105,14 +1105,15 @@ void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, if (TCG_TARGET_HAS_add2_i32) { tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh); } else { - TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - TCGv_i64 t1 = tcg_temp_ebb_new_i64(); - tcg_gen_concat_i32_i64(t0, al, ah); - tcg_gen_concat_i32_i64(t1, bl, bh); - tcg_gen_add_i64(t0, t0, t1); - tcg_gen_extr_i64_i32(rl, rh, t0); - tcg_temp_free_i64(t0); - tcg_temp_free_i64(t1); + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); + tcg_gen_add_i32(t0, al, bl); + tcg_gen_setcond_i32(TCG_COND_LTU, t1, t0, al); + tcg_gen_add_i32(rh, ah, bh); + tcg_gen_add_i32(rh, rh, t1); + tcg_gen_mov_i32(rl, t0); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); } } From 3db22914ced9bb3683bffa03729608be0c42f666 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 13 Jan 2025 20:21:09 -0800 Subject: [PATCH 111/161] tcg: Expand fallback sub2 with 32-bit operations No need to expand to i64 to perform the subtract. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/tcg-op.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 8b1356c526..127338b994 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1123,14 +1123,15 @@ void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, if (TCG_TARGET_HAS_sub2_i32) { tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh); } else { - TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - TCGv_i64 t1 = tcg_temp_ebb_new_i64(); - tcg_gen_concat_i32_i64(t0, al, ah); - tcg_gen_concat_i32_i64(t1, bl, bh); - tcg_gen_sub_i64(t0, t0, t1); - tcg_gen_extr_i64_i32(rl, rh, t0); - tcg_temp_free_i64(t0); - tcg_temp_free_i64(t1); + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); + tcg_gen_sub_i32(t0, al, bl); + tcg_gen_setcond_i32(TCG_COND_LTU, t1, al, bl); + tcg_gen_sub_i32(rh, ah, bh); + tcg_gen_sub_i32(rh, rh, t1); + tcg_gen_mov_i32(rl, t0); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); } } From 8dc1bdf79553594287b6c3f9185fe028307aedcb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 13 Jan 2025 21:13:54 -0800 Subject: [PATCH 112/161] tcg: Do not default add2/sub2_i32 for 32-bit hosts Require TCG_TARGET_HAS_{add2,sub2}_i32 be defined, one way or another. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/arm/tcg-target-has.h | 2 ++ tcg/mips/tcg-target-has.h | 3 +++ tcg/ppc/tcg-target-has.h | 3 +++ tcg/tcg-has.h | 3 --- tcg/tci/tcg-target-has.h | 4 ++-- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index 0d6a785542..3973df1f12 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -24,6 +24,8 @@ extern bool use_neon_instructions; #endif /* optional instructions */ +#define TCG_TARGET_HAS_add2_i32 1 +#define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index 48a1e68fbe..9f6fa194b9 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -48,6 +48,9 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_ext32s_i64 1 #define TCG_TARGET_HAS_ext32u_i64 1 +#else +#define TCG_TARGET_HAS_add2_i32 1 +#define TCG_TARGET_HAS_sub2_i32 1 #endif /* optional instructions detected at runtime */ diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 033d58e095..8d832ce99c 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -25,6 +25,9 @@ #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 +#else +#define TCG_TARGET_HAS_add2_i32 1 +#define TCG_TARGET_HAS_sub2_i32 1 #endif #define TCG_TARGET_HAS_qemu_ldst_i128 \ diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 6125ac677c..50e8d0cda4 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -14,9 +14,6 @@ #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 -/* Turn some undef macros into true macros. */ -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 #endif #if !defined(TCG_TARGET_HAS_v64) \ diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index 4cb2b529ae..6063f32f7b 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -8,11 +8,11 @@ #define TCG_TARGET_HAS_H #define TCG_TARGET_HAS_qemu_st8_i32 0 +#define TCG_TARGET_HAS_add2_i32 1 +#define TCG_TARGET_HAS_sub2_i32 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #endif /* TCG_TARGET_REG_BITS == 64 */ From 52c49c79b820ceeff10c7c414f747f9bbb39f6c9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 13 Jan 2025 21:16:40 -0800 Subject: [PATCH 113/161] tcg/mips: Drop support for add2/sub2 We now produce exactly the same code via generic expansion. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/mips/tcg-target-con-set.h | 1 - tcg/mips/tcg-target-con-str.h | 1 - tcg/mips/tcg-target-has.h | 7 ++-- tcg/mips/tcg-target.c.inc | 67 +---------------------------------- 4 files changed, 3 insertions(+), 73 deletions(-) diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h index 4e09c9a400..5304691dc1 100644 --- a/tcg/mips/tcg-target-con-set.h +++ b/tcg/mips/tcg-target-con-set.h @@ -28,4 +28,3 @@ C_O1_I4(r, r, rz, rz, rz) C_O1_I4(r, r, r, rz, rz) C_O2_I1(r, r, r) C_O2_I2(r, r, r, r) -C_O2_I4(r, r, rz, rz, rN, rN) diff --git a/tcg/mips/tcg-target-con-str.h b/tcg/mips/tcg-target-con-str.h index dfe2b156df..db2b225e4a 100644 --- a/tcg/mips/tcg-target-con-str.h +++ b/tcg/mips/tcg-target-con-str.h @@ -17,5 +17,4 @@ REGS('r', ALL_GENERAL_REGS) CONST('I', TCG_CT_CONST_U16) CONST('J', TCG_CT_CONST_S16) CONST('K', TCG_CT_CONST_P2M1) -CONST('N', TCG_CT_CONST_N16) CONST('W', TCG_CT_CONST_WSZ) diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index 9f6fa194b9..9d86906bf3 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -39,18 +39,15 @@ extern bool use_mips32r2_instructions; #endif /* optional instructions */ - -#if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 + +#if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_ext32s_i64 1 #define TCG_TARGET_HAS_ext32u_i64 1 -#else -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 #endif /* optional instructions detected at runtime */ diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 7fae1c51e9..e69781b871 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -187,8 +187,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, #define TCG_CT_CONST_U16 0x100 /* Unsigned 16-bit: 0 - 0xffff. */ #define TCG_CT_CONST_S16 0x200 /* Signed 16-bit: -32768 - 32767 */ #define TCG_CT_CONST_P2M1 0x400 /* Power of 2 minus 1. */ -#define TCG_CT_CONST_N16 0x800 /* "Negatable" 16-bit: -32767 - 32767 */ -#define TCG_CT_CONST_WSZ 0x1000 /* word size */ +#define TCG_CT_CONST_WSZ 0x800 /* word size */ #define ALL_GENERAL_REGS 0xffffffffu @@ -207,8 +206,6 @@ static bool tcg_target_const_match(int64_t val, int ct, return 1; } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { return 1; - } else if ((ct & TCG_CT_CONST_N16) && val >= -32767 && val <= 32767) { - return 1; } else if ((ct & TCG_CT_CONST_P2M1) && use_mips32r2_instructions && is_p2m1(val)) { return 1; @@ -765,55 +762,6 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, return false; } -static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, - TCGReg ah, TCGArg bl, TCGArg bh, bool cbl, - bool cbh, bool is_sub) -{ - TCGReg th = TCG_TMP1; - - /* If we have a negative constant such that negating it would - make the high part zero, we can (usually) eliminate one insn. */ - if (cbl && cbh && bh == -1 && bl != 0) { - bl = -bl; - bh = 0; - is_sub = !is_sub; - } - - /* By operating on the high part first, we get to use the final - carry operation to move back from the temporary. */ - if (!cbh) { - tcg_out_opc_reg(s, (is_sub ? OPC_SUBU : OPC_ADDU), th, ah, bh); - } else if (bh != 0 || ah == rl) { - tcg_out_opc_imm(s, OPC_ADDIU, th, ah, (is_sub ? -bh : bh)); - } else { - th = ah; - } - - /* Note that tcg optimization should eliminate the bl == 0 case. */ - if (is_sub) { - if (cbl) { - tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, al, bl); - tcg_out_opc_imm(s, OPC_ADDIU, rl, al, -bl); - } else { - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, al, bl); - tcg_out_opc_reg(s, OPC_SUBU, rl, al, bl); - } - tcg_out_opc_reg(s, OPC_SUBU, rh, th, TCG_TMP0); - } else { - if (cbl) { - tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl); - tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl); - } else if (rl == al && rl == bl) { - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, TCG_TARGET_REG_BITS - 1); - tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); - } else { - tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, rl, (rl == bl ? al : bl)); - } - tcg_out_opc_reg(s, OPC_ADDU, rh, th, TCG_TMP0); - } -} - #define SETCOND_INV TCG_TARGET_NB_REGS #define SETCOND_NEZ (SETCOND_INV << 1) #define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) @@ -2370,15 +2318,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_add2_i32: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], false); - break; - case INDEX_op_sub2_i32: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], true); - break; - case INDEX_op_mb: tcg_out_mb(s, a0); break; @@ -2420,10 +2359,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_add2_i32: - case INDEX_op_sub2_i32: - return C_O2_I4(r, r, rz, rz, rN, rN); - case INDEX_op_qemu_ld_i32: return C_O1_I1(r, r); case INDEX_op_qemu_st_i32: From 0e08be0f5482af78f7ed703f665287964d1650f5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 13 Jan 2025 21:24:25 -0800 Subject: [PATCH 114/161] tcg/riscv: Drop support for add2/sub2 We now produce exactly the same code via generic expansion. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target-con-set.h | 1 - tcg/riscv/tcg-target-has.h | 6 +-- tcg/riscv/tcg-target.c.inc | 86 +--------------------------------- 3 files changed, 3 insertions(+), 90 deletions(-) diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h index 5ff2c2db60..0fc26d3f98 100644 --- a/tcg/riscv/tcg-target-con-set.h +++ b/tcg/riscv/tcg-target-con-set.h @@ -18,7 +18,6 @@ C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) C_N1_I2(r, r, rM) C_O1_I4(r, r, rI, rM, rM) -C_O2_I4(r, r, rz, rz, rM, rM) C_O0_I2(v, r) C_O1_I1(v, r) C_O1_I1(v, v) diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index b2814f8ef9..c95dc1921e 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -10,13 +10,11 @@ #include "host/cpuinfo.h" /* optional instructions */ -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index d74ac7587a..dce46dcba6 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -401,7 +401,7 @@ static bool tcg_target_const_match(int64_t val, int ct, } /* * Sign extended from 12 bits, +/- matching: [-0x7ff, 0x7ff]. - * Used by addsub2 and movcond, which may need the negative value, + * Used by movcond, which may need the negative value, * and requires the modified constant to be representable. */ if ((ct & TCG_CT_CONST_M12) && val >= -0x7ff && val <= 0x7ff) { @@ -1073,67 +1073,6 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, return false; } -static void tcg_out_addsub2(TCGContext *s, - TCGReg rl, TCGReg rh, - TCGReg al, TCGReg ah, - TCGArg bl, TCGArg bh, - bool cbl, bool cbh, bool is_sub, bool is32bit) -{ - const RISCVInsn opc_add = is32bit ? OPC_ADDW : OPC_ADD; - const RISCVInsn opc_addi = is32bit ? OPC_ADDIW : OPC_ADDI; - const RISCVInsn opc_sub = is32bit ? OPC_SUBW : OPC_SUB; - TCGReg th = TCG_REG_TMP1; - - /* If we have a negative constant such that negating it would - make the high part zero, we can (usually) eliminate one insn. */ - if (cbl && cbh && bh == -1 && bl != 0) { - bl = -bl; - bh = 0; - is_sub = !is_sub; - } - - /* By operating on the high part first, we get to use the final - carry operation to move back from the temporary. */ - if (!cbh) { - tcg_out_opc_reg(s, (is_sub ? opc_sub : opc_add), th, ah, bh); - } else if (bh != 0 || ah == rl) { - tcg_out_opc_imm(s, opc_addi, th, ah, (is_sub ? -bh : bh)); - } else { - th = ah; - } - - /* Note that tcg optimization should eliminate the bl == 0 case. */ - if (is_sub) { - if (cbl) { - tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, al, bl); - tcg_out_opc_imm(s, opc_addi, rl, al, -bl); - } else { - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, al, bl); - tcg_out_opc_reg(s, opc_sub, rl, al, bl); - } - tcg_out_opc_reg(s, opc_sub, rh, th, TCG_REG_TMP0); - } else { - if (cbl) { - tcg_out_opc_imm(s, opc_addi, rl, al, bl); - tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, rl, bl); - } else if (al == bl) { - /* - * If the input regs overlap, this is a simple doubling - * and carry-out is the input msb. This special case is - * required when the output reg overlaps the input, - * but we might as well use it always. - */ - tcg_out_opc_imm(s, OPC_SLTI, TCG_REG_TMP0, al, 0); - tcg_out_opc_reg(s, opc_add, rl, al, al); - } else { - tcg_out_opc_reg(s, opc_add, rl, al, bl); - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, - rl, (rl == bl ? al : bl)); - } - tcg_out_opc_reg(s, opc_add, rh, th, TCG_REG_TMP0); - } -} - static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg dst, TCGReg src) { @@ -2608,23 +2547,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, OPC_SD, a0, a1, a2); break; - case INDEX_op_add2_i32: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], false, true); - break; - case INDEX_op_add2_i64: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], false, false); - break; - case INDEX_op_sub2_i32: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], true, true); - break; - case INDEX_op_sub2_i64: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], true, false); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); break; @@ -2897,12 +2819,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(rz, r); - case INDEX_op_add2_i32: - case INDEX_op_add2_i64: - case INDEX_op_sub2_i32: - case INDEX_op_sub2_i64: - return C_O2_I4(r, r, rz, rz, rM, rM); - case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); From 7d10d8e076201e968d57f1c2f5ffd8c88ae1eec9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 14 Jan 2025 13:04:15 -0800 Subject: [PATCH 115/161] tcg: Move i into each for loop in liveness_pass_1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use per-loop variables instead of one 'i' for the function. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/tcg.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index 037b5a4664..e2ca02eddf 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -3924,10 +3924,9 @@ liveness_pass_1(TCGContext *s) int nb_temps = s->nb_temps; TCGOp *op, *op_prev; TCGRegSet *prefs; - int i; prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); - for (i = 0; i < nb_temps; ++i) { + for (int i = 0; i < nb_temps; ++i) { s->temps[i].state_ptr = prefs + i; } @@ -3954,7 +3953,7 @@ liveness_pass_1(TCGContext *s) /* pure functions can be removed if their result is unused */ if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { - for (i = 0; i < nb_oargs; i++) { + for (int i = 0; i < nb_oargs; i++) { ts = arg_temp(op->args[i]); if (ts->state != TS_DEAD) { goto do_not_remove_call; @@ -3965,7 +3964,7 @@ liveness_pass_1(TCGContext *s) do_not_remove_call: /* Output args are dead. */ - for (i = 0; i < nb_oargs; i++) { + for (int i = 0; i < nb_oargs; i++) { ts = arg_temp(op->args[i]); if (ts->state & TS_DEAD) { arg_life |= DEAD_ARG << i; @@ -3988,7 +3987,7 @@ liveness_pass_1(TCGContext *s) } /* Record arguments that die in this helper. */ - for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { + for (int i = nb_oargs; i < nb_iargs + nb_oargs; i++) { ts = arg_temp(op->args[i]); if (ts->state & TS_DEAD) { arg_life |= DEAD_ARG << i; @@ -4008,7 +4007,7 @@ liveness_pass_1(TCGContext *s) * order so that if a temp is used more than once, the stack * reset to max happens before the register reset to 0. */ - for (i = nb_iargs - 1; i >= 0; i--) { + for (int i = nb_iargs - 1; i >= 0; i--) { const TCGCallArgumentLoc *loc = &info->in[i]; ts = arg_temp(op->args[nb_oargs + i]); @@ -4036,7 +4035,7 @@ liveness_pass_1(TCGContext *s) * If a temp is used once, this produces a single set bit; * if a temp is used multiple times, this produces a set. */ - for (i = 0; i < nb_iargs; i++) { + for (int i = 0; i < nb_iargs; i++) { const TCGCallArgumentLoc *loc = &info->in[i]; ts = arg_temp(op->args[nb_oargs + i]); @@ -4135,7 +4134,7 @@ liveness_pass_1(TCGContext *s) its outputs are dead. We assume that nb_oargs == 0 implies side effects */ if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { - for (i = 0; i < nb_oargs; i++) { + for (int i = 0; i < nb_oargs; i++) { if (arg_temp(op->args[i])->state != TS_DEAD) { goto do_not_remove; } @@ -4149,7 +4148,7 @@ liveness_pass_1(TCGContext *s) break; do_not_remove: - for (i = 0; i < nb_oargs; i++) { + for (int i = 0; i < nb_oargs; i++) { ts = arg_temp(op->args[i]); /* Remember the preference of the uses that followed. */ @@ -4183,7 +4182,7 @@ liveness_pass_1(TCGContext *s) } /* Record arguments that die in this opcode. */ - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { ts = arg_temp(op->args[i]); if (ts->state & TS_DEAD) { arg_life |= DEAD_ARG << i; @@ -4191,7 +4190,7 @@ liveness_pass_1(TCGContext *s) } /* Input arguments are live for preceding opcodes. */ - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { ts = arg_temp(op->args[i]); if (ts->state & TS_DEAD) { /* For operands that were dead, initially allow @@ -4215,7 +4214,7 @@ liveness_pass_1(TCGContext *s) default: args_ct = opcode_args_ct(op); - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { const TCGArgConstraint *ct = &args_ct[i]; TCGRegSet set, *pset; From 3e3689df4e05eb76b64a9e45247d87f9dad03177 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 14 Jan 2025 13:12:35 -0800 Subject: [PATCH 116/161] tcg: Sink def, nb_iargs, nb_oargs loads in liveness_pass_1 Sink the sets of the def, nb_iargs, nb_oargs variables to the default and do_not_remove labels. They're not really needed beforehand, and it avoids preceding code from having to keep them up-to-date. Note that def had *not* been kept up-to-date; thankfully only def->flags had been used and those bits were constant between opcode changes. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/tcg.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index e2ca02eddf..2849bba480 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -4071,8 +4071,6 @@ liveness_pass_1(TCGContext *s) case INDEX_op_sub2_i64: opc_new = INDEX_op_sub; do_addsub2: - nb_iargs = 4; - nb_oargs = 2; /* Test if the high part of the operation is dead, but not the low part. The result can be optimized to a simple add or sub. This happens often for x86_64 guest when the @@ -4087,8 +4085,6 @@ liveness_pass_1(TCGContext *s) op->args[1] = op->args[2]; op->args[2] = op->args[4]; /* Fall through and mark the single-word operation live. */ - nb_iargs = 2; - nb_oargs = 1; } goto do_not_remove; @@ -4100,8 +4096,6 @@ liveness_pass_1(TCGContext *s) opc_new = INDEX_op_mul; opc_new2 = INDEX_op_muluh; do_mul2: - nb_iargs = 2; - nb_oargs = 2; if (arg_temp(op->args[1])->state == TS_DEAD) { if (arg_temp(op->args[0])->state == TS_DEAD) { /* Both parts of the operation are dead. */ @@ -4122,19 +4116,15 @@ liveness_pass_1(TCGContext *s) goto do_not_remove; } /* Mark the single-word operation live. */ - nb_oargs = 1; goto do_not_remove; default: - /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ - nb_iargs = def->nb_iargs; - nb_oargs = def->nb_oargs; - /* Test if the operation can be removed because all its outputs are dead. We assume that nb_oargs == 0 implies side effects */ - if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { - for (int i = 0; i < nb_oargs; i++) { + def = &tcg_op_defs[opc]; + if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && def->nb_oargs != 0) { + for (int i = def->nb_oargs - 1; i >= 0; i--) { if (arg_temp(op->args[i])->state != TS_DEAD) { goto do_not_remove; } @@ -4148,6 +4138,10 @@ liveness_pass_1(TCGContext *s) break; do_not_remove: + def = &tcg_op_defs[opc]; + nb_iargs = def->nb_iargs; + nb_oargs = def->nb_oargs; + for (int i = 0; i < nb_oargs; i++) { ts = arg_temp(op->args[i]); From 76f42780292c16a0d2f36cbbfbaf57495cd4d5e8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 14 Jan 2025 13:58:39 -0800 Subject: [PATCH 117/161] tcg: Add add/sub with carry opcodes and infrastructure Liveness needs to track carry-live state in order to determine if the (hidden) output of the opcode is used. Code generation needs to track carry-live state in order to avoid clobbering cpu flags when loading constants. So far, output routines and backends are unchanged. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 61 +++++++++++++++++ include/tcg/tcg-opc.h | 10 +++ include/tcg/tcg.h | 13 +++- tcg/optimize.c | 11 +++ tcg/tcg.c | 150 ++++++++++++++++++++++++++++++++++++++--- 5 files changed, 235 insertions(+), 10 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 9392d88069..93bcc70639 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -593,6 +593,67 @@ Multiword arithmetic support .. list-table:: + * - addco *t0*, *t1*, *t2* + + - | Compute *t0* = *t1* + *t2* and in addition output to the + carry bit provided by the host architecture. + + * - addci *t0, *t1*, *t2* + + - | Compute *t0* = *t1* + *t2* + *C*, where *C* is the + input carry bit provided by the host architecture. + The output carry bit need not be computed. + + * - addcio *t0, *t1*, *t2* + + - | Compute *t0* = *t1* + *t2* + *C*, where *C* is the + input carry bit provided by the host architecture, + and also compute the output carry bit. + + * - addc1o *t0, *t1*, *t2* + + - | Compute *t0* = *t1* + *t2* + 1, and in addition output to the + carry bit provided by the host architecture. This is akin to + *addcio* with a fixed carry-in value of 1. + | This is intended to be used by the optimization pass, + intermediate to complete folding of the addition chain. + In some cases complete folding is not possible and this + opcode will remain until output. If this happens, the + code generator will use ``tcg_out_set_carry`` and then + the output routine for *addcio*. + + * - subbo *t0*, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* and in addition output to the + borrow bit provided by the host architecture. + | Depending on the host architecture, the carry bit may or may not be + identical to the borrow bit. Thus the addc\* and subb\* + opcodes must not be mixed. + + * - subbi *t0, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* - *B*, where *B* is the + input borrow bit provided by the host architecture. + The output borrow bit need not be computed. + + * - subbio *t0, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* - *B*, where *B* is the + input borrow bit provided by the host architecture, + and also compute the output borrow bit. + + * - subb1o *t0, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* - 1, and in addition output to the + borrow bit provided by the host architecture. This is akin to + *subbio* with a fixed borrow-in value of 1. + | This is intended to be used by the optimization pass, + intermediate to complete folding of the subtraction chain. + In some cases complete folding is not possible and this + opcode will remain until output. If this happens, the + code generator will use ``tcg_out_set_borrow`` and then + the output routine for *subbio*. + * - add2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* sub2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 1f995c54be..9cc20cd62c 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -82,6 +82,16 @@ DEF(shr, 1, 2, 0, TCG_OPF_INT) DEF(sub, 1, 2, 0, TCG_OPF_INT) DEF(xor, 1, 2, 0, TCG_OPF_INT) +DEF(addco, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_OUT) +DEF(addc1o, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_OUT) +DEF(addci, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN) +DEF(addcio, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN | TCG_OPF_CARRY_OUT) + +DEF(subbo, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_OUT) +DEF(subb1o, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_OUT) +DEF(subbi, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN) +DEF(subbio, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN | TCG_OPF_CARRY_OUT) + /* load/store */ DEF(ld8u_i32, 1, 1, 1, 0) DEF(ld8s_i32, 1, 1, 1, 0) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index c6b50b5226..aa300a2f8b 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -418,6 +418,11 @@ struct TCGContext { MemOp riscv_cur_vsew; TCGType riscv_cur_type; #endif + /* + * During the tcg_reg_alloc_op loop, we are within a sequence of + * carry-using opcodes like addco+addci. + */ + bool carry_live; GHashTable *const_table[TCG_TYPE_COUNT]; TCGTempSet free_temps[TCG_TYPE_COUNT]; @@ -749,13 +754,17 @@ enum { /* Instruction operands are vectors. */ TCG_OPF_VECTOR = 0x40, /* Instruction is a conditional branch. */ - TCG_OPF_COND_BRANCH = 0x80 + TCG_OPF_COND_BRANCH = 0x80, + /* Instruction produces carry out. */ + TCG_OPF_CARRY_OUT = 0x100, + /* Instruction consumes carry in. */ + TCG_OPF_CARRY_IN = 0x200, }; typedef struct TCGOpDef { const char *name; uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args; - uint8_t flags; + uint16_t flags; } TCGOpDef; extern const TCGOpDef tcg_op_defs[]; diff --git a/tcg/optimize.c b/tcg/optimize.c index a728a4b2fa..8b00833f97 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1226,6 +1226,12 @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op) return finish_folding(ctx, op); } +static bool fold_add_carry(OptContext *ctx, TCGOp *op) +{ + fold_commutative(ctx, op); + return finish_folding(ctx, op); +} + static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) { bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]); @@ -2829,6 +2835,11 @@ void tcg_optimize(TCGContext *s) case INDEX_op_add_vec: done = fold_add_vec(&ctx, op); break; + case INDEX_op_addci: + case INDEX_op_addco: + case INDEX_op_addcio: + done = fold_add_carry(&ctx, op); + break; CASE_OP_32_64(add2): done = fold_add2(&ctx, op); break; diff --git a/tcg/tcg.c b/tcg/tcg.c index 2849bba480..f04ad0afcf 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -3914,6 +3914,17 @@ liveness_pass_0(TCGContext *s) } } +static void assert_carry_dead(TCGContext *s) +{ + /* + * Carry operations can be separated by a few insns like mov, + * load or store, but they should always be "close", and + * carry-out operations should always be paired with carry-in. + * At various boundaries, carry must have been consumed. + */ + tcg_debug_assert(!s->carry_live); +} + /* Liveness analysis : update the opc_arg_life array to tell if a given input arguments is dead. Instructions updating dead temporaries are removed. */ @@ -3933,17 +3944,19 @@ liveness_pass_1(TCGContext *s) /* ??? Should be redundant with the exit_tb that ends the TB. */ la_func_end(s, nb_globals, nb_temps); + s->carry_live = false; QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { int nb_iargs, nb_oargs; TCGOpcode opc_new, opc_new2; TCGLifeData arg_life = 0; TCGTemp *ts; TCGOpcode opc = op->opc; - const TCGOpDef *def = &tcg_op_defs[opc]; + const TCGOpDef *def; const TCGArgConstraint *args_ct; switch (opc) { case INDEX_op_call: + assert_carry_dead(s); { const TCGHelperInfo *info = tcg_call_info(op); int call_flags = tcg_call_flags(op); @@ -4055,6 +4068,7 @@ liveness_pass_1(TCGContext *s) } break; case INDEX_op_insn_start: + assert_carry_dead(s); break; case INDEX_op_discard: /* mark the temporary as dead */ @@ -4071,6 +4085,7 @@ liveness_pass_1(TCGContext *s) case INDEX_op_sub2_i64: opc_new = INDEX_op_sub; do_addsub2: + assert_carry_dead(s); /* Test if the high part of the operation is dead, but not the low part. The result can be optimized to a simple add or sub. This happens often for x86_64 guest when the @@ -4096,6 +4111,7 @@ liveness_pass_1(TCGContext *s) opc_new = INDEX_op_mul; opc_new2 = INDEX_op_muluh; do_mul2: + assert_carry_dead(s); if (arg_temp(op->args[1])->state == TS_DEAD) { if (arg_temp(op->args[0])->state == TS_DEAD) { /* Both parts of the operation are dead. */ @@ -4118,10 +4134,89 @@ liveness_pass_1(TCGContext *s) /* Mark the single-word operation live. */ goto do_not_remove; + case INDEX_op_addco: + if (s->carry_live) { + goto do_not_remove; + } + op->opc = opc = INDEX_op_add; + goto do_default; + + case INDEX_op_addcio: + if (s->carry_live) { + goto do_not_remove; + } + op->opc = opc = INDEX_op_addci; + goto do_default; + + case INDEX_op_subbo: + if (s->carry_live) { + goto do_not_remove; + } + /* Lower to sub, but this may also require canonicalization. */ + op->opc = opc = INDEX_op_sub; + ts = arg_temp(op->args[2]); + if (ts->kind == TEMP_CONST) { + ts = tcg_constant_internal(ts->type, -ts->val); + if (ts->state_ptr == NULL) { + tcg_debug_assert(temp_idx(ts) == nb_temps); + nb_temps++; + ts->state_ptr = tcg_malloc(sizeof(TCGRegSet)); + ts->state = TS_DEAD; + la_reset_pref(ts); + } + op->args[2] = temp_arg(ts); + op->opc = opc = INDEX_op_add; + } + goto do_default; + + case INDEX_op_subbio: + if (s->carry_live) { + goto do_not_remove; + } + op->opc = opc = INDEX_op_subbi; + goto do_default; + + case INDEX_op_addc1o: + if (s->carry_live) { + goto do_not_remove; + } + /* Lower to add, add +1. */ + op_prev = tcg_op_insert_before(s, op, INDEX_op_add, + TCGOP_TYPE(op), 3); + op_prev->args[0] = op->args[0]; + op_prev->args[1] = op->args[1]; + op_prev->args[2] = op->args[2]; + op->opc = opc = INDEX_op_add; + op->args[1] = op->args[0]; + ts = arg_temp(op->args[0]); + ts = tcg_constant_internal(ts->type, 1); + op->args[2] = temp_arg(ts); + goto do_default; + + case INDEX_op_subb1o: + if (s->carry_live) { + goto do_not_remove; + } + /* Lower to sub, add -1. */ + op_prev = tcg_op_insert_before(s, op, INDEX_op_sub, + TCGOP_TYPE(op), 3); + op_prev->args[0] = op->args[0]; + op_prev->args[1] = op->args[1]; + op_prev->args[2] = op->args[2]; + op->opc = opc = INDEX_op_add; + op->args[1] = op->args[0]; + ts = arg_temp(op->args[0]); + ts = tcg_constant_internal(ts->type, -1); + op->args[2] = temp_arg(ts); + goto do_default; + default: - /* Test if the operation can be removed because all - its outputs are dead. We assume that nb_oargs == 0 - implies side effects */ + do_default: + /* + * Test if the operation can be removed because all + * its outputs are dead. We assume that nb_oargs == 0 + * implies side effects. + */ def = &tcg_op_defs[opc]; if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && def->nb_oargs != 0) { for (int i = def->nb_oargs - 1; i >= 0; i--) { @@ -4163,12 +4258,16 @@ liveness_pass_1(TCGContext *s) /* If end of basic block, update. */ if (def->flags & TCG_OPF_BB_EXIT) { + assert_carry_dead(s); la_func_end(s, nb_globals, nb_temps); } else if (def->flags & TCG_OPF_COND_BRANCH) { + assert_carry_dead(s); la_bb_sync(s, nb_globals, nb_temps); } else if (def->flags & TCG_OPF_BB_END) { + assert_carry_dead(s); la_bb_end(s, nb_globals, nb_temps); } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { + assert_carry_dead(s); la_global_sync(s, nb_globals); if (def->flags & TCG_OPF_CALL_CLOBBER) { la_cross_call(s, nb_temps); @@ -4182,6 +4281,9 @@ liveness_pass_1(TCGContext *s) arg_life |= DEAD_ARG << i; } } + if (def->flags & TCG_OPF_CARRY_OUT) { + s->carry_live = false; + } /* Input arguments are live for preceding opcodes. */ for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { @@ -4193,6 +4295,9 @@ liveness_pass_1(TCGContext *s) ts->state &= ~TS_DEAD; } } + if (def->flags & TCG_OPF_CARRY_IN) { + s->carry_live = true; + } /* Incorporate constraints for this operand. */ switch (opc) { @@ -4232,6 +4337,7 @@ liveness_pass_1(TCGContext *s) } op->life = arg_life; } + assert_carry_dead(s); } /* Liveness analysis: Convert indirect regs to direct temporaries. */ @@ -4820,9 +4926,8 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) all globals are stored at their canonical location. */ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) { - int i; - - for (i = s->nb_globals; i < s->nb_temps; i++) { + assert_carry_dead(s); + for (int i = s->nb_globals; i < s->nb_temps; i++) { TCGTemp *ts = &s->temps[i]; switch (ts->kind) { @@ -4853,6 +4958,7 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) */ static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) { + assert_carry_dead(s); sync_globals(s, allocated_regs); for (int i = s->nb_globals; i < s->nb_temps; i++) { @@ -5124,6 +5230,10 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) int const_args[TCG_MAX_OP_ARGS]; TCGCond op_cond; + if (def->flags & TCG_OPF_CARRY_IN) { + tcg_debug_assert(s->carry_live); + } + nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; @@ -5380,6 +5490,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) tcg_reg_alloc_bb_end(s, i_allocated_regs); } else { if (def->flags & TCG_OPF_CALL_CLOBBER) { + assert_carry_dead(s); /* XXX: permit generic clobber register list ? */ for (i = 0; i < TCG_TARGET_NB_REGS; i++) { if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { @@ -5497,7 +5608,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_sub: { - const TCGOutOpSubtract *out = &outop_sub; + const TCGOutOpSubtract *out = + container_of(all_outop[op->opc], TCGOutOpSubtract, base); /* * Constants should never appear in the second source operand. @@ -5512,6 +5624,16 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_addco: + case INDEX_op_subbo: + case INDEX_op_addci: + case INDEX_op_subbi: + case INDEX_op_addcio: + case INDEX_op_subbio: + case INDEX_op_addc1o: + case INDEX_op_subb1o: + g_assert_not_reached(); + case INDEX_op_bswap64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: @@ -5700,6 +5822,13 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; } + if (def->flags & TCG_OPF_CARRY_IN) { + s->carry_live = false; + } + if (def->flags & TCG_OPF_CARRY_OUT) { + s->carry_live = true; + } + /* move the outputs in the correct register if needed */ for(i = 0; i < nb_oargs; i++) { ts = arg_temp(op->args[i]); @@ -6702,6 +6831,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) tcg_out_tb_start(s); num_insns = -1; + s->carry_live = false; QTAILQ_FOREACH(op, &s->ops, link) { TCGOpcode opc = op->opc; @@ -6730,6 +6860,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) tcg_reg_alloc_dup(s, op); break; case INDEX_op_insn_start: + assert_carry_dead(s); if (num_insns >= 0) { size_t off = tcg_current_code_size(s); s->gen_insn_end_off[num_insns] = off; @@ -6750,6 +6881,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) tcg_out_label(s, arg_label(op->args[0])); break; case INDEX_op_call: + assert_carry_dead(s); tcg_reg_alloc_call(s, op); break; case INDEX_op_exit_tb: @@ -6786,6 +6918,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) return -2; } } + assert_carry_dead(s); + tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); From ee60315210058c9b3d0869b2046eeb254ba33f3a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 14 Jan 2025 23:27:53 -0800 Subject: [PATCH 118/161] tcg: Add TCGOutOp structures for add/sub carry opcodes Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 34 ++++++++++++++++++ tcg/arm/tcg-target.c.inc | 34 ++++++++++++++++++ tcg/i386/tcg-target.c.inc | 34 ++++++++++++++++++ tcg/loongarch64/tcg-target.c.inc | 34 ++++++++++++++++++ tcg/mips/tcg-target.c.inc | 34 ++++++++++++++++++ tcg/ppc/tcg-target.c.inc | 34 ++++++++++++++++++ tcg/riscv/tcg-target.c.inc | 34 ++++++++++++++++++ tcg/s390x/tcg-target.c.inc | 34 ++++++++++++++++++ tcg/sparc64/tcg-target.c.inc | 34 ++++++++++++++++++ tcg/tcg.c | 61 +++++++++++++++++++++++++++----- tcg/tci/tcg-target.c.inc | 34 ++++++++++++++++++ 11 files changed, 393 insertions(+), 8 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index bece494c55..87f8c98ed7 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2078,6 +2078,23 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_carry(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_and(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2421,6 +2438,23 @@ static const TCGOutOpSubtract outop_sub = { .out_rrr = tgen_sub, }; +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index f366424af5..aa0397520d 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1826,6 +1826,23 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_carry(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_and(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2135,6 +2152,23 @@ static const TCGOutOpSubtract outop_sub = { .out_rir = tgen_subfi, }; +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 2b2ad9ca95..04e31cae12 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2629,6 +2629,23 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_carry(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_and(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3054,6 +3071,23 @@ static const TCGOutOpSubtract outop_sub = { .out_rrr = tgen_sub, }; +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index eb2143703d..4f640764ef 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1338,6 +1338,23 @@ static const TCGOutOpBinary outop_add = { .out_rri = tcg_out_addi, }; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_carry(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_and(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1727,6 +1744,23 @@ static const TCGOutOpSubtract outop_sub = { .out_rrr = tgen_sub, }; +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index e69781b871..0c268cef42 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1593,6 +1593,23 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_carry(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_and(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2044,6 +2061,23 @@ static const TCGOutOpSubtract outop_sub = { .out_rrr = tgen_sub, }; +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index a964239aab..5b04655f3b 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2863,6 +2863,23 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_carry(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_and(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -3267,6 +3284,23 @@ static const TCGOutOpSubtract outop_sub = { .out_rir = tgen_subfi, }; +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index dce46dcba6..707ebb8f6d 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1947,6 +1947,23 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_carry(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_and(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2333,6 +2350,23 @@ static const TCGOutOpSubtract outop_sub = { .out_rrr = tgen_sub, }; +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index ff06834e6e..a30afb455e 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2248,6 +2248,23 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_carry(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_and(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -2766,6 +2783,23 @@ static const TCGOutOpSubtract outop_sub = { .out_rrr = tgen_sub, }; +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 4c7d916302..12f0dbd23d 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1381,6 +1381,23 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_carry(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_and(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -1717,6 +1734,23 @@ static const TCGOutOpSubtract outop_sub = { .out_rrr = tgen_sub, }; +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { diff --git a/tcg/tcg.c b/tcg/tcg.c index f04ad0afcf..3b9f519ef6 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -133,6 +133,8 @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); static void tcg_out_goto_tb(TCGContext *s, int which); +static void tcg_out_set_carry(TCGContext *s); +static void tcg_out_set_borrow(TCGContext *s); static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]); @@ -978,6 +980,18 @@ typedef struct TCGOutOp { TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); } TCGOutOp; +typedef struct TCGOutOpAddSubCarry { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2); + void (*out_rri)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2); + void (*out_rir)(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2); + void (*out_rii)(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, tcg_target_long a2); +} TCGOutOpAddSubCarry; + typedef struct TCGOutOpBinary { TCGOutOp base; void (*out_rrr)(TCGContext *s, TCGType type, @@ -1131,6 +1145,11 @@ static const TCGOutOpUnary outop_extrl_i64_i32 = { /* Register allocation descriptions for every TCGOpcode. */ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), + OUTOP(INDEX_op_addci, TCGOutOpAddSubCarry, outop_addci), + OUTOP(INDEX_op_addcio, TCGOutOpBinary, outop_addcio), + OUTOP(INDEX_op_addco, TCGOutOpBinary, outop_addco), + /* addc1o is implemented with set_carry + addcio */ + OUTOP(INDEX_op_addc1o, TCGOutOpBinary, outop_addcio), OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond), @@ -1170,6 +1189,11 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), + OUTOP(INDEX_op_subbi, TCGOutOpAddSubCarry, outop_subbi), + OUTOP(INDEX_op_subbio, TCGOutOpAddSubCarry, outop_subbio), + OUTOP(INDEX_op_subbo, TCGOutOpAddSubCarry, outop_subbo), + /* subb1o is implemented with set_borrow + subbio */ + OUTOP(INDEX_op_subb1o, TCGOutOpAddSubCarry, outop_subbio), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), #if TCG_TARGET_REG_BITS == 32 @@ -5569,7 +5593,12 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* emit instruction */ TCGType type = TCGOP_TYPE(op); switch (op->opc) { + case INDEX_op_addc1o: + tcg_out_set_carry(s); + /* fall through */ case INDEX_op_add: + case INDEX_op_addcio: + case INDEX_op_addco: case INDEX_op_and: case INDEX_op_andc: case INDEX_op_clz: @@ -5608,8 +5637,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) case INDEX_op_sub: { - const TCGOutOpSubtract *out = - container_of(all_outop[op->opc], TCGOutOpSubtract, base); + const TCGOutOpSubtract *out = &outop_sub; /* * Constants should never appear in the second source operand. @@ -5624,15 +5652,32 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_addco: - case INDEX_op_subbo: + case INDEX_op_subb1o: + tcg_out_set_borrow(s); + /* fall through */ case INDEX_op_addci: case INDEX_op_subbi: - case INDEX_op_addcio: case INDEX_op_subbio: - case INDEX_op_addc1o: - case INDEX_op_subb1o: - g_assert_not_reached(); + case INDEX_op_subbo: + { + const TCGOutOpAddSubCarry *out = + container_of(all_outop[op->opc], TCGOutOpAddSubCarry, base); + + if (const_args[2]) { + if (const_args[1]) { + out->out_rii(s, type, new_args[0], + new_args[1], new_args[2]); + } else { + out->out_rri(s, type, new_args[0], + new_args[1], new_args[2]); + } + } else if (const_args[1]) { + out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); + } + } + break; case INDEX_op_bswap64: case INDEX_op_ext_i32_i64: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 9a5ca9c778..bba96d7a19 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -573,6 +573,23 @@ static const TCGOutOpBinary outop_add = { .out_rrr = tgen_add, }; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_carry(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_and(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -893,6 +910,23 @@ static const TCGOutOpSubtract outop_sub = { .out_rrr = tgen_sub, }; +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tgen_xor(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { From aeb3514bd06b278dd026c90e8f71ca5b32762ab9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 14 Jan 2025 18:28:15 -0800 Subject: [PATCH 119/161] tcg/optimize: Handle add/sub with carry opcodes Propagate known carry when possible, and simplify the opcodes to not require carry-in when known. The result will be cleaned up further by the subsequent liveness analysis pass. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/optimize.c | 319 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 316 insertions(+), 3 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 8b00833f97..cfcd0ab7f9 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -66,6 +66,7 @@ typedef struct OptContext { /* In flight values from optimization. */ TCGType type; + int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */ } OptContext; static inline TempOptInfo *ts_info(TCGTemp *ts) @@ -1203,8 +1204,10 @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op) * 3) those that produce information about the result value. */ +static bool fold_addco(OptContext *ctx, TCGOp *op); static bool fold_or(OptContext *ctx, TCGOp *op); static bool fold_orc(OptContext *ctx, TCGOp *op); +static bool fold_subbo(OptContext *ctx, TCGOp *op); static bool fold_xor(OptContext *ctx, TCGOp *op); static bool fold_add(OptContext *ctx, TCGOp *op) @@ -1226,9 +1229,167 @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op) return finish_folding(ctx, op); } -static bool fold_add_carry(OptContext *ctx, TCGOp *op) +static void squash_prev_carryout(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t2; + + op = QTAILQ_PREV(op, link); + switch (op->opc) { + case INDEX_op_addco: + op->opc = INDEX_op_add; + fold_add(ctx, op); + break; + case INDEX_op_addcio: + op->opc = INDEX_op_addci; + break; + case INDEX_op_addc1o: + op->opc = INDEX_op_add; + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); + /* Perform other constant folding, if needed. */ + fold_add(ctx, op); + } else { + TCGArg ret = op->args[0]; + op = opt_insert_after(ctx, op, INDEX_op_add, 3); + op->args[0] = ret; + op->args[1] = ret; + op->args[2] = arg_new_constant(ctx, 1); + } + break; + default: + g_assert_not_reached(); + } +} + +static bool fold_addci(OptContext *ctx, TCGOp *op) { fold_commutative(ctx, op); + + if (ctx->carry_state < 0) { + return finish_folding(ctx, op); + } + + squash_prev_carryout(ctx, op); + op->opc = INDEX_op_add; + + if (ctx->carry_state > 0) { + TempOptInfo *t2 = arg_info(op->args[2]); + + /* + * Propagate the known carry-in into a constant, if possible. + * Otherwise emit a second add +1. + */ + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); + } else { + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3); + + op2->args[0] = op->args[0]; + op2->args[1] = op->args[1]; + op2->args[2] = op->args[2]; + fold_add(ctx, op2); + + op->args[1] = op->args[0]; + op->args[2] = arg_new_constant(ctx, 1); + } + } + + ctx->carry_state = -1; + return fold_add(ctx, op); +} + +static bool fold_addcio(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t1, *t2; + int carry_out = -1; + uint64_t sum, max; + + fold_commutative(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + /* + * The z_mask value is >= the maximum value that can be represented + * with the known zero bits. So adding the z_mask values will not + * overflow if and only if the true values cannot overflow. + */ + if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) && + !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) { + carry_out = 0; + } + + if (ctx->carry_state < 0) { + ctx->carry_state = carry_out; + return finish_folding(ctx, op); + } + + squash_prev_carryout(ctx, op); + if (ctx->carry_state == 0) { + goto do_addco; + } + + /* Propagate the known carry-in into a constant, if possible. */ + max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; + if (ti_is_const(t2)) { + uint64_t v = ti_const_val(t2) & max; + if (v < max) { + op->args[2] = arg_new_constant(ctx, v + 1); + goto do_addco; + } + /* max + known carry in produces known carry out. */ + carry_out = 1; + } + if (ti_is_const(t1)) { + uint64_t v = ti_const_val(t1) & max; + if (v < max) { + op->args[1] = arg_new_constant(ctx, v + 1); + goto do_addco; + } + carry_out = 1; + } + + /* Adjust the opcode to remember the known carry-in. */ + op->opc = INDEX_op_addc1o; + ctx->carry_state = carry_out; + return finish_folding(ctx, op); + + do_addco: + op->opc = INDEX_op_addco; + return fold_addco(ctx, op); +} + +static bool fold_addco(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t1, *t2; + int carry_out = -1; + uint64_t ign; + + fold_commutative(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + if (ti_is_const(t2)) { + uint64_t v2 = ti_const_val(t2); + + if (ti_is_const(t1)) { + uint64_t v1 = ti_const_val(t1); + /* Given sign-extension of z_mask for I32, we need not truncate. */ + carry_out = uadd64_overflow(v1, v2, &ign); + } else if (v2 == 0) { + carry_out = 0; + } + } else { + /* + * The z_mask value is >= the maximum value that can be represented + * with the known zero bits. So adding the z_mask values will not + * overflow if and only if the true values cannot overflow. + */ + if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) { + carry_out = 0; + } + } + ctx->carry_state = carry_out; return finish_folding(ctx, op); } @@ -2649,6 +2810,145 @@ static bool fold_sub2(OptContext *ctx, TCGOp *op) return fold_addsub2(ctx, op, false); } +static void squash_prev_borrowout(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t2; + + op = QTAILQ_PREV(op, link); + switch (op->opc) { + case INDEX_op_subbo: + op->opc = INDEX_op_sub; + fold_sub(ctx, op); + break; + case INDEX_op_subbio: + op->opc = INDEX_op_subbi; + break; + case INDEX_op_subb1o: + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->opc = INDEX_op_add; + op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); + /* Perform other constant folding, if needed. */ + fold_add(ctx, op); + } else { + TCGArg ret = op->args[0]; + op->opc = INDEX_op_sub; + op = opt_insert_after(ctx, op, INDEX_op_add, 3); + op->args[0] = ret; + op->args[1] = ret; + op->args[2] = arg_new_constant(ctx, -1); + } + break; + default: + g_assert_not_reached(); + } +} + +static bool fold_subbi(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t2; + int borrow_in = ctx->carry_state; + + if (borrow_in < 0) { + return finish_folding(ctx, op); + } + ctx->carry_state = -1; + + squash_prev_borrowout(ctx, op); + if (borrow_in == 0) { + op->opc = INDEX_op_sub; + return fold_sub(ctx, op); + } + + /* + * Propagate the known carry-in into any constant, then negate to + * transform from sub to add. If there is no constant, emit a + * separate add -1. + */ + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); + } else { + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3); + + op2->args[0] = op->args[0]; + op2->args[1] = op->args[1]; + op2->args[2] = op->args[2]; + fold_sub(ctx, op2); + + op->args[1] = op->args[0]; + op->args[2] = arg_new_constant(ctx, -1); + } + op->opc = INDEX_op_add; + return fold_add(ctx, op); +} + +static bool fold_subbio(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t1, *t2; + int borrow_out = -1; + + if (ctx->carry_state < 0) { + return finish_folding(ctx, op); + } + + squash_prev_borrowout(ctx, op); + if (ctx->carry_state == 0) { + goto do_subbo; + } + + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + /* Propagate the known borrow-in into a constant, if possible. */ + if (ti_is_const(t2)) { + uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; + uint64_t v = ti_const_val(t2) & max; + + if (v < max) { + op->args[2] = arg_new_constant(ctx, v + 1); + goto do_subbo; + } + /* subtracting max + 1 produces known borrow out. */ + borrow_out = 1; + } + if (ti_is_const(t1)) { + uint64_t v = ti_const_val(t1); + if (v != 0) { + op->args[2] = arg_new_constant(ctx, v - 1); + goto do_subbo; + } + } + + /* Adjust the opcode to remember the known carry-in. */ + op->opc = INDEX_op_subb1o; + ctx->carry_state = borrow_out; + return finish_folding(ctx, op); + + do_subbo: + op->opc = INDEX_op_subbo; + return fold_subbo(ctx, op); +} + +static bool fold_subbo(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t1 = arg_info(op->args[1]); + TempOptInfo *t2 = arg_info(op->args[2]); + int borrow_out = -1; + + if (ti_is_const(t2)) { + uint64_t v2 = ti_const_val(t2); + if (v2 == 0) { + borrow_out = 0; + } else if (ti_is_const(t1)) { + uint64_t v1 = ti_const_val(t1); + borrow_out = v1 < v2; + } + } + ctx->carry_state = borrow_out; + return finish_folding(ctx, op); +} + static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) { uint64_t z_mask = -1, s_mask = 0; @@ -2836,9 +3136,13 @@ void tcg_optimize(TCGContext *s) done = fold_add_vec(&ctx, op); break; case INDEX_op_addci: - case INDEX_op_addco: + done = fold_addci(&ctx, op); + break; case INDEX_op_addcio: - done = fold_add_carry(&ctx, op); + done = fold_addcio(&ctx, op); + break; + case INDEX_op_addco: + done = fold_addco(&ctx, op); break; CASE_OP_32_64(add2): done = fold_add2(&ctx, op); @@ -3020,6 +3324,15 @@ void tcg_optimize(TCGContext *s) case INDEX_op_sub: done = fold_sub(&ctx, op); break; + case INDEX_op_subbi: + done = fold_subbi(&ctx, op); + break; + case INDEX_op_subbio: + done = fold_subbio(&ctx, op); + break; + case INDEX_op_subbo: + done = fold_subbo(&ctx, op); + break; case INDEX_op_sub_vec: done = fold_sub_vec(&ctx, op); break; From e2f5ee36afb3b5306c99f40044534ae7d2580114 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 14 Jan 2025 23:08:24 -0800 Subject: [PATCH 120/161] tcg/optimize: With two const operands, prefer 0 in arg1 For most binary operands, two const operands fold. However, the add/sub carry opcodes have a third input. Prefer "reg, zero, const" since many risc hosts have a zero register that can fit a "reg, reg, const" insn format. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/optimize.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index cfcd0ab7f9..95ec3b426d 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -737,12 +737,18 @@ static int do_constant_folding_cond(TCGType type, TCGArg x, #define NO_DEST temp_arg(NULL) +static int pref_commutative(TempOptInfo *ti) +{ + /* Slight preference for non-zero constants second. */ + return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2; +} + static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) { TCGArg a1 = *p1, a2 = *p2; int sum = 0; - sum += arg_is_const(a1); - sum -= arg_is_const(a2); + sum += pref_commutative(arg_info(a1)); + sum -= pref_commutative(arg_info(a2)); /* Prefer the constant in second argument, and then the form op a, a, b, which is better handled on non-RISC hosts. */ @@ -757,10 +763,10 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) static bool swap_commutative2(TCGArg *p1, TCGArg *p2) { int sum = 0; - sum += arg_is_const(p1[0]); - sum += arg_is_const(p1[1]); - sum -= arg_is_const(p2[0]); - sum -= arg_is_const(p2[1]); + sum += pref_commutative(arg_info(p1[0])); + sum += pref_commutative(arg_info(p1[1])); + sum -= pref_commutative(arg_info(p2[0])); + sum -= pref_commutative(arg_info(p2[1])); if (sum > 0) { TCGArg t; t = p1[0], p1[0] = p2[0], p2[0] = t; From c4d2e3d736197bb2d82bd37de0de1819222d9768 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 13 Jan 2025 23:29:42 -0800 Subject: [PATCH 121/161] tcg: Use add carry opcodes to expand add2 Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/tcg-op.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 127338b994..f17ec658fb 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1102,7 +1102,13 @@ void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh) { - if (TCG_TARGET_HAS_add2_i32) { + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + tcg_gen_op3_i32(INDEX_op_addco, t0, al, bl); + tcg_gen_op3_i32(INDEX_op_addci, rh, ah, bh); + tcg_gen_mov_i32(rl, t0); + tcg_temp_free_i32(t0); + } else if (TCG_TARGET_HAS_add2_i32) { tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); @@ -2822,7 +2828,26 @@ void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1, void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) { - if (TCG_TARGET_HAS_add2_i64) { + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_REG, 0)) { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_op3_i32(INDEX_op_addco, TCGV_LOW(t0), + TCGV_LOW(al), TCGV_LOW(bl)); + tcg_gen_op3_i32(INDEX_op_addcio, TCGV_HIGH(t0), + TCGV_HIGH(al), TCGV_HIGH(bl)); + tcg_gen_op3_i32(INDEX_op_addcio, TCGV_LOW(rh), + TCGV_LOW(ah), TCGV_LOW(bh)); + tcg_gen_op3_i32(INDEX_op_addci, TCGV_HIGH(rh), + TCGV_HIGH(ah), TCGV_HIGH(bh)); + } else { + tcg_gen_op3_i64(INDEX_op_addco, t0, al, bl); + tcg_gen_op3_i64(INDEX_op_addci, rh, ah, bh); + } + + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); + } else if (TCG_TARGET_HAS_add2_i64) { tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); From 7a89deae635ceaf687973dd95e4714af27a3f9d2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 14 Jan 2025 18:58:05 -0800 Subject: [PATCH 122/161] tcg: Use sub carry opcodes to expand sub2 Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/tcg-op.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index f17ec658fb..447b0ebacd 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1126,7 +1126,13 @@ void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh) { - if (TCG_TARGET_HAS_sub2_i32) { + if (tcg_op_supported(INDEX_op_subbi, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + tcg_gen_op3_i32(INDEX_op_subbo, t0, al, bl); + tcg_gen_op3_i32(INDEX_op_subbi, rh, ah, bh); + tcg_gen_mov_i32(rl, t0); + tcg_temp_free_i32(t0); + } else if (TCG_TARGET_HAS_sub2_i32) { tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); @@ -2865,7 +2871,26 @@ void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) { - if (TCG_TARGET_HAS_sub2_i64) { + if (tcg_op_supported(INDEX_op_subbi, TCG_TYPE_REG, 0)) { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_op3_i32(INDEX_op_subbo, TCGV_LOW(t0), + TCGV_LOW(al), TCGV_LOW(bl)); + tcg_gen_op3_i32(INDEX_op_subbio, TCGV_HIGH(t0), + TCGV_HIGH(al), TCGV_HIGH(bl)); + tcg_gen_op3_i32(INDEX_op_subbio, TCGV_LOW(rh), + TCGV_LOW(ah), TCGV_LOW(bh)); + tcg_gen_op3_i32(INDEX_op_subbi, TCGV_HIGH(rh), + TCGV_HIGH(ah), TCGV_HIGH(bh)); + } else { + tcg_gen_op3_i64(INDEX_op_subbo, t0, al, bl); + tcg_gen_op3_i64(INDEX_op_subbi, rh, ah, bh); + } + + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); + } else if (TCG_TARGET_HAS_sub2_i64) { tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); From 1f049cc5fda405c46de94f9328f0e3f84c0c993b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 17 Jan 2025 22:05:48 -0800 Subject: [PATCH 123/161] tcg/i386: Honor carry_live in tcg_out_movi Do not clobber flags if they're live. Required in order to perform register allocation on add/sub carry opcodes. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.c.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 04e31cae12..8e0ccbc722 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1092,7 +1092,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, { tcg_target_long diff; - if (arg == 0) { + if (arg == 0 && !s->carry_live) { tgen_arithr(s, ARITH_XOR, ret, ret); return; } From e37e98b711975752d592e7b2daf11d320d0a8daf Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 17 Jan 2025 22:24:56 -0800 Subject: [PATCH 124/161] tcg/i386: Implement add/sub carry opcodes Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/i386/tcg-target-con-set.h | 1 - tcg/i386/tcg-target-has.h | 8 +-- tcg/i386/tcg-target.c.inc | 117 +++++++++++++++++++++------------- 3 files changed, 76 insertions(+), 50 deletions(-) diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h index 0ae9775944..85c93836bb 100644 --- a/tcg/i386/tcg-target-con-set.h +++ b/tcg/i386/tcg-target-con-set.h @@ -57,4 +57,3 @@ C_O2_I1(r, r, L) C_O2_I2(a, d, a, r) C_O2_I2(r, r, L, L) C_O2_I3(a, d, 0, 1, r) -C_N1_O1_I4(r, r, 0, 1, re, re) diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index 0328102c2a..a984a6af2e 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -26,14 +26,14 @@ #define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl) /* optional instructions */ -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_sub2_i32 0 #if TCG_TARGET_REG_BITS == 64 /* Keep 32-bit values zero-extended in a register. */ #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #else #define TCG_TARGET_HAS_qemu_st8_i32 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 8e0ccbc722..44f9afc0d6 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -424,6 +424,7 @@ static bool tcg_target_const_match(int64_t val, int ct, #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) #define OPC_SHRD_Ib (0xac | P_EXT) +#define OPC_STC (0xf9) #define OPC_TESTB (0x84) #define OPC_TESTL (0x85) #define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3) @@ -2629,21 +2630,55 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static void tgen_addco(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_ADD + rexw, a0, a2); +} + +static void tgen_addco_imm(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_ADD + rexw, a0, a2, true); +} + static const TCGOutOpBinary outop_addco = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_addco, + .out_rri = tgen_addco_imm, +}; + +static void tgen_addcio(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_ADC + rexw, a0, a2); +} + +static void tgen_addcio_imm(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_ADC + rexw, a0, a2, true); +} + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_addcio, + .out_rri = tgen_addcio_imm, }; static const TCGOutOpAddSubCarry outop_addci = { - .base.static_constraint = C_NotImplemented, -}; - -static const TCGOutOpBinary outop_addcio = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_addcio, + .out_rri = tgen_addcio_imm, }; static void tcg_out_set_carry(TCGContext *s) { - g_assert_not_reached(); + tcg_out8(s, OPC_STC); } static void tgen_and(TCGContext *s, TCGType type, @@ -3060,7 +3095,7 @@ static const TCGOutOpBinary outop_shr = { }; static void tgen_sub(TCGContext *s, TCGType type, - TCGReg a0, TCGReg a1, TCGReg a2) + TCGReg a0, TCGReg a1, TCGReg a2) { int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; tgen_arithr(s, ARITH_SUB + rexw, a0, a2); @@ -3071,21 +3106,44 @@ static const TCGOutOpSubtract outop_sub = { .out_rrr = tgen_sub, }; +static void tgen_subbo_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_SUB + rexw, a0, a2, 1); +} + static const TCGOutOpAddSubCarry outop_subbo = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_sub, + .out_rri = tgen_subbo_rri, }; -static const TCGOutOpAddSubCarry outop_subbi = { - .base.static_constraint = C_NotImplemented, -}; +static void tgen_subbio_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_SBB + rexw, a0, a2); +} + +static void tgen_subbio_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_SBB + rexw, a0, a2, 1); +} static const TCGOutOpAddSubCarry outop_subbio = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_subbio_rrr, + .out_rri = tgen_subbio_rri, }; +#define outop_subbi outop_subbio + static void tcg_out_set_borrow(TCGContext *s) { - g_assert_not_reached(); + tcg_out8(s, OPC_STC); } static void tgen_xor(TCGContext *s, TCGType type, @@ -3421,31 +3479,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_st(s, a0, a1, a2, args[3], TCG_TYPE_I128); break; - OP_32_64(add2): - if (const_args[4]) { - tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1); - } else { - tgen_arithr(s, ARITH_ADD + rexw, a0, args[4]); - } - if (const_args[5]) { - tgen_arithi(s, ARITH_ADC + rexw, a1, args[5], 1); - } else { - tgen_arithr(s, ARITH_ADC + rexw, a1, args[5]); - } - break; - OP_32_64(sub2): - if (const_args[4]) { - tgen_arithi(s, ARITH_SUB + rexw, a0, args[4], 1); - } else { - tgen_arithr(s, ARITH_SUB + rexw, a0, args[4]); - } - if (const_args[5]) { - tgen_arithi(s, ARITH_SBB + rexw, a1, args[5], 1); - } else { - tgen_arithr(s, ARITH_SBB + rexw, a1, args[5]); - } - break; - #if TCG_TARGET_REG_BITS == 64 case INDEX_op_ld32s_i64: tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2); @@ -4051,12 +4084,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(re, r); - case INDEX_op_add2_i32: - case INDEX_op_add2_i64: - case INDEX_op_sub2_i32: - case INDEX_op_sub2_i64: - return C_N1_O1_I4(r, r, 0, 1, re, re); - case INDEX_op_qemu_ld_i32: return C_O1_I1(r, L); From 0ad6d64b7be6287f8c07c72f8462ef5635b1e2da Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 17 Jan 2025 22:39:14 -0800 Subject: [PATCH 125/161] tcg/i386: Special case addci r, 0, 0 Using addci with two zeros as input in order to capture the value of the carry-in bit is common. Special case this with sbb+neg so that we do not have to load 0 into a register first. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/i386/tcg-target-con-set.h | 1 + tcg/i386/tcg-target.c.inc | 46 ++++++++++++++++++++++++++++++++--- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h index 85c93836bb..458d69c3c0 100644 --- a/tcg/i386/tcg-target-con-set.h +++ b/tcg/i386/tcg-target-con-set.h @@ -45,6 +45,7 @@ C_O1_I2(r, L, L) C_O1_I2(r, r, r) C_O1_I2(r, r, re) C_O1_I2(r, r, ri) +C_O1_I2(r, rO, re) C_O1_I2(x, x, x) C_N1_I2(r, r, r) C_N1_I2(r, r, rW) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 44f9afc0d6..da05f13b21 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2670,10 +2670,50 @@ static const TCGOutOpBinary outop_addcio = { .out_rri = tgen_addcio_imm, }; +static void tgen_addci_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + /* Because "0O" is not a valid constraint, we must match ourselves. */ + if (a0 == a2) { + tgen_addcio(s, type, a0, a0, a1); + } else { + tcg_out_mov(s, type, a0, a1); + tgen_addcio(s, type, a0, a0, a2); + } +} + +static void tgen_addci_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_mov(s, type, a0, a1); + tgen_addcio_imm(s, type, a0, a0, a2); +} + +static void tgen_addci_rir(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tgen_addci_rri(s, type, a0, a2, a1); +} + +static void tgen_addci_rii(TCGContext *s, TCGType type, TCGReg a0, + tcg_target_long a1, tcg_target_long a2) +{ + if (a2 == 0) { + /* Implement 0 + 0 + C with -(x - x - c). */ + tgen_arithr(s, ARITH_SBB, a0, a0); + tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_NEG, a0); + } else { + tcg_out_movi(s, type, a0, a2); + tgen_addcio_imm(s, type, a0, a0, a1); + } +} + static const TCGOutOpAddSubCarry outop_addci = { - .base.static_constraint = C_O1_I2(r, 0, re), - .out_rrr = tgen_addcio, - .out_rri = tgen_addcio_imm, + .base.static_constraint = C_O1_I2(r, rO, re), + .out_rrr = tgen_addci_rrr, + .out_rri = tgen_addci_rri, + .out_rir = tgen_addci_rir, + .out_rii = tgen_addci_rii, }; static void tcg_out_set_carry(TCGContext *s) From db3feb02b82d388d85a0c0a14d62f1a625f626a4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 18 Jan 2025 01:19:51 -0800 Subject: [PATCH 126/161] tcg: Add tcg_gen_addcio_{i32,i64,tl} Create a function for performing an add with carry-in and producing carry out. The carry-out result is boolean. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- include/tcg/tcg-op-common.h | 4 ++ include/tcg/tcg-op.h | 2 + tcg/tcg-op.c | 95 +++++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+) diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h index 009e2778c5..b439bdb385 100644 --- a/include/tcg/tcg-op-common.h +++ b/include/tcg/tcg-op-common.h @@ -135,6 +135,8 @@ void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh); void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh); +void tcg_gen_addcio_i32(TCGv_i32 r, TCGv_i32 co, + TCGv_i32 a, TCGv_i32 b, TCGv_i32 ci); void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2); @@ -238,6 +240,8 @@ void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh); void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh); +void tcg_gen_addcio_i64(TCGv_i64 r, TCGv_i64 co, + TCGv_i64 a, TCGv_i64 b, TCGv_i64 ci); void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h index cded92a447..59d19755e6 100644 --- a/include/tcg/tcg-op.h +++ b/include/tcg/tcg-op.h @@ -253,6 +253,7 @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64) #define tcg_gen_movcond_tl tcg_gen_movcond_i64 #define tcg_gen_add2_tl tcg_gen_add2_i64 #define tcg_gen_sub2_tl tcg_gen_sub2_i64 +#define tcg_gen_addcio_tl tcg_gen_addcio_i64 #define tcg_gen_mulu2_tl tcg_gen_mulu2_i64 #define tcg_gen_muls2_tl tcg_gen_muls2_i64 #define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i64 @@ -371,6 +372,7 @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64) #define tcg_gen_movcond_tl tcg_gen_movcond_i32 #define tcg_gen_add2_tl tcg_gen_add2_i32 #define tcg_gen_sub2_tl tcg_gen_sub2_i32 +#define tcg_gen_addcio_tl tcg_gen_addcio_i32 #define tcg_gen_mulu2_tl tcg_gen_mulu2_i32 #define tcg_gen_muls2_tl tcg_gen_muls2_i32 #define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i32 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 447b0ebacd..b0a29278ab 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1123,6 +1123,33 @@ void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, } } +void tcg_gen_addcio_i32(TCGv_i32 r, TCGv_i32 co, + TCGv_i32 a, TCGv_i32 b, TCGv_i32 ci) +{ + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 zero = tcg_constant_i32(0); + TCGv_i32 mone = tcg_constant_i32(-1); + + tcg_gen_op3_i32(INDEX_op_addco, t0, ci, mone); + tcg_gen_op3_i32(INDEX_op_addcio, r, a, b); + tcg_gen_op3_i32(INDEX_op_addci, co, zero, zero); + tcg_temp_free_i32(t0); + } else { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); + + tcg_gen_add_i32(t0, a, b); + tcg_gen_setcond_i32(TCG_COND_LTU, t1, t0, a); + tcg_gen_add_i32(r, t0, ci); + tcg_gen_setcond_i32(TCG_COND_LTU, t0, r, t0); + tcg_gen_or_i32(co, t0, t1); + + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); + } +} + void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh) { @@ -2868,6 +2895,74 @@ void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, } } +void tcg_gen_addcio_i64(TCGv_i64 r, TCGv_i64 co, + TCGv_i64 a, TCGv_i64 b, TCGv_i64 ci) +{ + if (TCG_TARGET_REG_BITS == 64) { + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_I64, 0)) { + TCGv_i64 discard = tcg_temp_ebb_new_i64(); + TCGv_i64 zero = tcg_constant_i64(0); + TCGv_i64 mone = tcg_constant_i64(-1); + + tcg_gen_op3_i64(INDEX_op_addco, discard, ci, mone); + tcg_gen_op3_i64(INDEX_op_addcio, r, a, b); + tcg_gen_op3_i64(INDEX_op_addci, co, zero, zero); + tcg_temp_free_i64(discard); + } else { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + TCGv_i64 t1 = tcg_temp_ebb_new_i64(); + + tcg_gen_add_i64(t0, a, b); + tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, a); + tcg_gen_add_i64(r, t0, ci); + tcg_gen_setcond_i64(TCG_COND_LTU, t0, r, t0); + tcg_gen_or_i64(co, t0, t1); + + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } + } else { + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_I32, 0)) { + TCGv_i32 discard = tcg_temp_ebb_new_i32(); + TCGv_i32 zero = tcg_constant_i32(0); + TCGv_i32 mone = tcg_constant_i32(-1); + + tcg_gen_op3_i32(INDEX_op_addco, discard, TCGV_LOW(ci), mone); + tcg_gen_op3_i32(INDEX_op_addcio, discard, TCGV_HIGH(ci), mone); + tcg_gen_op3_i32(INDEX_op_addcio, TCGV_LOW(r), + TCGV_LOW(a), TCGV_LOW(b)); + tcg_gen_op3_i32(INDEX_op_addcio, TCGV_HIGH(r), + TCGV_HIGH(a), TCGV_HIGH(b)); + tcg_gen_op3_i32(INDEX_op_addci, TCGV_LOW(co), zero, zero); + tcg_temp_free_i32(discard); + } else { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 c0 = tcg_temp_ebb_new_i32(); + TCGv_i32 c1 = tcg_temp_ebb_new_i32(); + + tcg_gen_or_i32(c1, TCGV_LOW(ci), TCGV_HIGH(ci)); + tcg_gen_setcondi_i32(TCG_COND_NE, c1, c1, 0); + + tcg_gen_add_i32(t0, TCGV_LOW(a), TCGV_LOW(b)); + tcg_gen_setcond_i32(TCG_COND_LTU, c0, t0, TCGV_LOW(a)); + tcg_gen_add_i32(TCGV_LOW(r), t0, c1); + tcg_gen_setcond_i32(TCG_COND_LTU, c1, TCGV_LOW(r), c1); + tcg_gen_or_i32(c1, c1, c0); + + tcg_gen_add_i32(t0, TCGV_HIGH(a), TCGV_HIGH(b)); + tcg_gen_setcond_i32(TCG_COND_LTU, c0, t0, TCGV_HIGH(a)); + tcg_gen_add_i32(TCGV_HIGH(r), t0, c1); + tcg_gen_setcond_i32(TCG_COND_LTU, c1, TCGV_HIGH(r), c1); + tcg_gen_or_i32(TCGV_LOW(co), c0, c1); + + tcg_temp_free_i32(t0); + tcg_temp_free_i32(c0); + tcg_temp_free_i32(c1); + } + tcg_gen_movi_i32(TCGV_HIGH(co), 0); + } +} + void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) { From eea0f7ea5170b47aa2cd6f6ad077e48702aad6f3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 18 Jan 2025 01:27:41 -0800 Subject: [PATCH 127/161] target/arm: Use tcg_gen_addcio_* for ADCS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- target/arm/tcg/translate-a64.c | 8 ++------ target/arm/tcg/translate.c | 17 +++-------------- 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index e076d4aa05..d9305f9d26 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -1076,11 +1076,9 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) TCGv_i64 cf_64 = tcg_temp_new_i64(); TCGv_i64 vf_64 = tcg_temp_new_i64(); TCGv_i64 tmp = tcg_temp_new_i64(); - TCGv_i64 zero = tcg_constant_i64(0); tcg_gen_extu_i32_i64(cf_64, cpu_CF); - tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); - tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); + tcg_gen_addcio_i64(result, cf_64, t0, t1, cf_64); tcg_gen_extrl_i64_i32(cpu_CF, cf_64); gen_set_NZ64(result); @@ -1094,12 +1092,10 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) TCGv_i32 t0_32 = tcg_temp_new_i32(); TCGv_i32 t1_32 = tcg_temp_new_i32(); TCGv_i32 tmp = tcg_temp_new_i32(); - TCGv_i32 zero = tcg_constant_i32(0); tcg_gen_extrl_i64_i32(t0_32, t0); tcg_gen_extrl_i64_i32(t1_32, t1); - tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); - tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); + tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0_32, t1_32, cpu_CF); tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c index 273b860d57..88df9c482a 100644 --- a/target/arm/tcg/translate.c +++ b/target/arm/tcg/translate.c @@ -494,20 +494,9 @@ static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) { TCGv_i32 tmp = tcg_temp_new_i32(); - if (tcg_op_supported(INDEX_op_add2_i32, TCG_TYPE_I32, 0)) { - tcg_gen_movi_i32(tmp, 0); - tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp); - tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp); - } else { - TCGv_i64 q0 = tcg_temp_new_i64(); - TCGv_i64 q1 = tcg_temp_new_i64(); - tcg_gen_extu_i32_i64(q0, t0); - tcg_gen_extu_i32_i64(q1, t1); - tcg_gen_add_i64(q0, q0, q1); - tcg_gen_extu_i32_i64(q1, cpu_CF); - tcg_gen_add_i64(q0, q0, q1); - tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0); - } + + tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0, t1, cpu_CF); + tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_xor_i32(cpu_VF, cpu_NF, t0); tcg_gen_xor_i32(tmp, t0, t1); From 14e9ff8514bcae95d45dc8603f1ef33ad2f1dce1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 18 Jan 2025 01:35:49 -0800 Subject: [PATCH 128/161] target/hppa: Use tcg_gen_addcio_i64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use this in do_add, do_sub, and do_ds, all of which need add with carry-in and carry-out. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- target/hppa/translate.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/target/hppa/translate.c b/target/hppa/translate.c index 14f3833322..88a7d339eb 100644 --- a/target/hppa/translate.c +++ b/target/hppa/translate.c @@ -1209,10 +1209,10 @@ static void do_add(DisasContext *ctx, unsigned rt, TCGv_i64 orig_in1, cb_msb = tcg_temp_new_i64(); cb = tcg_temp_new_i64(); - tcg_gen_add2_i64(dest, cb_msb, in1, ctx->zero, in2, ctx->zero); if (is_c) { - tcg_gen_add2_i64(dest, cb_msb, dest, cb_msb, - get_psw_carry(ctx, d), ctx->zero); + tcg_gen_addcio_i64(dest, cb_msb, in1, in2, get_psw_carry(ctx, d)); + } else { + tcg_gen_add2_i64(dest, cb_msb, in1, ctx->zero, in2, ctx->zero); } tcg_gen_xor_i64(cb, in1, in2); tcg_gen_xor_i64(cb, cb, dest); @@ -1308,9 +1308,7 @@ static void do_sub(DisasContext *ctx, unsigned rt, TCGv_i64 in1, if (is_b) { /* DEST,C = IN1 + ~IN2 + C. */ tcg_gen_not_i64(cb, in2); - tcg_gen_add2_i64(dest, cb_msb, in1, ctx->zero, - get_psw_carry(ctx, d), ctx->zero); - tcg_gen_add2_i64(dest, cb_msb, dest, cb_msb, cb, ctx->zero); + tcg_gen_addcio_i64(dest, cb_msb, in1, cb, get_psw_carry(ctx, d)); tcg_gen_xor_i64(cb, cb, in1); tcg_gen_xor_i64(cb, cb, dest); } else { @@ -3008,9 +3006,7 @@ static bool trans_ds(DisasContext *ctx, arg_rrr_cf *a) tcg_gen_xor_i64(add2, in2, addc); tcg_gen_andi_i64(addc, addc, 1); - tcg_gen_add2_i64(dest, cpu_psw_cb_msb, add1, ctx->zero, add2, ctx->zero); - tcg_gen_add2_i64(dest, cpu_psw_cb_msb, dest, cpu_psw_cb_msb, - addc, ctx->zero); + tcg_gen_addcio_i64(dest, cpu_psw_cb_msb, add1, add2, addc); /* Write back the result register. */ save_gpr(ctx, a->t, dest); @@ -3553,8 +3549,7 @@ static bool do_addb(DisasContext *ctx, unsigned r, TCGv_i64 in1, TCGv_i64 cb = tcg_temp_new_i64(); TCGv_i64 cb_msb = tcg_temp_new_i64(); - tcg_gen_movi_i64(cb_msb, 0); - tcg_gen_add2_i64(dest, cb_msb, in1, cb_msb, in2, cb_msb); + tcg_gen_add2_i64(dest, cb_msb, in1, ctx->zero, in2, ctx->zero); tcg_gen_xor_i64(cb, in1, in2); tcg_gen_xor_i64(cb, cb, dest); cb_cond = get_carry(ctx, d, cb, cb_msb); From fcfbd8f4a9c3cbc09376230093d28e14acf7854b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 18 Jan 2025 01:47:53 -0800 Subject: [PATCH 129/161] target/microblaze: Use tcg_gen_addcio_i32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use this in gen_addc and gen_rsubc, both of which need add with carry-in and carry-out. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- target/microblaze/translate.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c index 7dcad6cf0d..23f1037236 100644 --- a/target/microblaze/translate.c +++ b/target/microblaze/translate.c @@ -311,11 +311,7 @@ static void gen_add(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) /* Input and output carry. */ static void gen_addc(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) { - TCGv_i32 zero = tcg_constant_i32(0); - TCGv_i32 tmp = tcg_temp_new_i32(); - - tcg_gen_add2_i32(tmp, cpu_msr_c, ina, zero, cpu_msr_c, zero); - tcg_gen_add2_i32(out, cpu_msr_c, tmp, cpu_msr_c, inb, zero); + tcg_gen_addcio_i32(out, cpu_msr_c, ina, inb, cpu_msr_c); } /* Input carry, but no output carry. */ @@ -544,12 +540,10 @@ static void gen_rsub(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) /* Input and output carry. */ static void gen_rsubc(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) { - TCGv_i32 zero = tcg_constant_i32(0); TCGv_i32 tmp = tcg_temp_new_i32(); tcg_gen_not_i32(tmp, ina); - tcg_gen_add2_i32(tmp, cpu_msr_c, tmp, zero, cpu_msr_c, zero); - tcg_gen_add2_i32(out, cpu_msr_c, tmp, cpu_msr_c, inb, zero); + tcg_gen_addcio_i32(out, cpu_msr_c, tmp, inb, cpu_msr_c); } /* No input or output carry. */ From 02b9d791be10ea9e4c8fdfe4cfdc2a6a0de7e810 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 18 Jan 2025 01:50:50 -0800 Subject: [PATCH 130/161] target/openrisc: Use tcg_gen_addcio_* for ADDC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- target/openrisc/translate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c index d4ce60188b..baadea4448 100644 --- a/target/openrisc/translate.c +++ b/target/openrisc/translate.c @@ -221,8 +221,7 @@ static void gen_addc(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb) TCGv t0 = tcg_temp_new(); TCGv res = tcg_temp_new(); - tcg_gen_add2_tl(res, cpu_sr_cy, srca, dc->zero, cpu_sr_cy, dc->zero); - tcg_gen_add2_tl(res, cpu_sr_cy, res, cpu_sr_cy, srcb, dc->zero); + tcg_gen_addcio_tl(res, cpu_sr_cy, srca, srcb, cpu_sr_cy); tcg_gen_xor_tl(cpu_sr_ov, srca, srcb); tcg_gen_xor_tl(t0, res, srcb); tcg_gen_andc_tl(cpu_sr_ov, t0, cpu_sr_ov); From 3a6ec7d71a4e268a6be319cf2efa780091d42d30 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 18 Jan 2025 01:55:11 -0800 Subject: [PATCH 131/161] target/ppc: Use tcg_gen_addcio_tl for ADD and SUBF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tested-by: Nicholas Piggin Reviewed-by: Nicholas Piggin Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- target/ppc/translate.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index fea2f2ce23..62dd008e36 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1746,11 +1746,10 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_gen_mov_tl(ca32, ca); } } else { - TCGv zero = tcg_constant_tl(0); if (add_ca) { - tcg_gen_add2_tl(t0, ca, arg1, zero, ca, zero); - tcg_gen_add2_tl(t0, ca, t0, ca, arg2, zero); + tcg_gen_addcio_tl(t0, ca, arg1, arg2, ca); } else { + TCGv zero = tcg_constant_tl(0); tcg_gen_add2_tl(t0, ca, arg1, zero, arg2, zero); } gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, ca32, 0); @@ -1949,11 +1948,9 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_gen_mov_tl(cpu_ca32, cpu_ca); } } else if (add_ca) { - TCGv zero, inv1 = tcg_temp_new(); + TCGv inv1 = tcg_temp_new(); tcg_gen_not_tl(inv1, arg1); - zero = tcg_constant_tl(0); - tcg_gen_add2_tl(t0, cpu_ca, arg2, zero, cpu_ca, zero); - tcg_gen_add2_tl(t0, cpu_ca, t0, cpu_ca, inv1, zero); + tcg_gen_addcio_tl(t0, cpu_ca, arg2, inv1, cpu_ca); gen_op_arith_compute_ca32(ctx, t0, inv1, arg2, cpu_ca32, 0); } else { tcg_gen_setcond_tl(TCG_COND_GEU, cpu_ca, arg2, arg1); From 206c23e4720cd2b7668197887d1694bd346e979e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 18 Jan 2025 01:59:12 -0800 Subject: [PATCH 132/161] target/s390x: Use tcg_gen_addcio_i64 for op_addc64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- target/s390x/tcg/translate.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index 00073c5560..a714f9c0c2 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -1250,11 +1250,7 @@ static DisasJumpType op_addc32(DisasContext *s, DisasOps *o) static DisasJumpType op_addc64(DisasContext *s, DisasOps *o) { compute_carry(s); - - TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_add2_i64(o->out, cc_src, o->in1, zero, cc_src, zero); - tcg_gen_add2_i64(o->out, cc_src, o->out, cc_src, o->in2, zero); - + tcg_gen_addcio_i64(o->out, cc_src, o->in1, o->in2, cc_src); return DISAS_NEXT; } From 68188214d5b5dc80acc9143f6bdca25fc06f6e2b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 18 Jan 2025 02:03:03 -0800 Subject: [PATCH 133/161] target/sh4: Use tcg_gen_addcio_i32 for addc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- target/sh4/translate.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 712a57fb54..712117be22 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -695,14 +695,8 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_add_i32(REG(B11_8), REG(B11_8), REG(B7_4)); return; case 0x300e: /* addc Rm,Rn */ - { - TCGv t0, t1; - t0 = tcg_constant_tl(0); - t1 = tcg_temp_new(); - tcg_gen_add2_i32(t1, cpu_sr_t, cpu_sr_t, t0, REG(B7_4), t0); - tcg_gen_add2_i32(REG(B11_8), cpu_sr_t, - REG(B11_8), t0, t1, cpu_sr_t); - } + tcg_gen_addcio_i32(REG(B11_8), cpu_sr_t, + REG(B11_8), REG(B7_4), cpu_sr_t); return; case 0x300f: /* addv Rm,Rn */ { From 6ed4d97ff7fbd2ef3d0d3f1f06d89560955873b8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 18 Jan 2025 02:05:19 -0800 Subject: [PATCH 134/161] target/sparc: Use tcg_gen_addcio_tl for gen_op_addcc_int Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- target/sparc/translate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/target/sparc/translate.c b/target/sparc/translate.c index adebddf27b..63dd90447b 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -396,8 +396,7 @@ static void gen_op_addcc_int(TCGv dst, TCGv src1, TCGv src2, TCGv cin) TCGv z = tcg_constant_tl(0); if (cin) { - tcg_gen_add2_tl(cpu_cc_N, cpu_cc_C, src1, z, cin, z); - tcg_gen_add2_tl(cpu_cc_N, cpu_cc_C, cpu_cc_N, cpu_cc_C, src2, z); + tcg_gen_addcio_tl(cpu_cc_N, cpu_cc_C, src1, src2, cin); } else { tcg_gen_add2_tl(cpu_cc_N, cpu_cc_C, src1, z, src2, z); } From 867878c112a0c8bbf7590a948ea291f1f1d61209 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 18 Jan 2025 02:08:55 -0800 Subject: [PATCH 135/161] target/tricore: Use tcg_gen_addcio_i32 for gen_addc_CC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- target/tricore/translate.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/target/tricore/translate.c b/target/tricore/translate.c index ede0c92c1e..ba36c9fcc8 100644 --- a/target/tricore/translate.c +++ b/target/tricore/translate.c @@ -1346,15 +1346,11 @@ static inline void gen_addi_CC(TCGv ret, TCGv r1, int32_t con) static inline void gen_addc_CC(TCGv ret, TCGv r1, TCGv r2) { - TCGv carry = tcg_temp_new_i32(); - TCGv t0 = tcg_temp_new_i32(); + TCGv t0 = tcg_temp_new_i32(); TCGv result = tcg_temp_new_i32(); - tcg_gen_movi_tl(t0, 0); - tcg_gen_setcondi_tl(TCG_COND_NE, carry, cpu_PSW_C, 0); /* Addition, carry and set C/V/SV bits */ - tcg_gen_add2_i32(result, cpu_PSW_C, r1, t0, carry, t0); - tcg_gen_add2_i32(result, cpu_PSW_C, result, cpu_PSW_C, r2, t0); + tcg_gen_addcio_i32(result, cpu_PSW_C, r1, r2, cpu_PSW_C); /* calc V bit */ tcg_gen_xor_tl(cpu_PSW_V, result, r1); tcg_gen_xor_tl(t0, r1, r2); From 75351891b85f5fe4bd3c1e281d0d9d2dae097e6a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 15 Jan 2025 02:41:58 -0800 Subject: [PATCH 136/161] tcg/aarch64: Implement add/sub carry opcodes Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target-con-set.h | 3 +- tcg/aarch64/tcg-target-has.h | 8 +- tcg/aarch64/tcg-target.c.inc | 227 ++++++++++++++++++++----------- 3 files changed, 150 insertions(+), 88 deletions(-) diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h index 2eda499cd3..d0622e65fb 100644 --- a/tcg/aarch64/tcg-target-con-set.h +++ b/tcg/aarch64/tcg-target-con-set.h @@ -24,6 +24,8 @@ C_O1_I2(r, r, rAL) C_O1_I2(r, r, rC) C_O1_I2(r, r, ri) C_O1_I2(r, r, rL) +C_O1_I2(r, rZ, rA) +C_O1_I2(r, rz, rMZ) C_O1_I2(r, rz, rz) C_O1_I2(r, rZ, rZ) C_O1_I2(w, 0, w) @@ -34,4 +36,3 @@ C_O1_I2(w, w, wZ) C_O1_I3(w, w, w, w) C_O1_I4(r, r, rC, rz, rz) C_O2_I1(r, r, r) -C_O2_I4(r, r, rz, rz, rA, rMZ) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 011a91c263..695effd77c 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -13,13 +13,13 @@ #define have_lse2 (cpuinfo & CPUINFO_LSE2) /* optional instructions */ -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 /* * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load, diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 87f8c98ed7..75cf490fd2 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -508,7 +508,9 @@ typedef enum { /* Add/subtract with carry instructions. */ I3503_ADC = 0x1a000000, + I3503_ADCS = 0x3a000000, I3503_SBC = 0x5a000000, + I3503_SBCS = 0x7a000000, /* Conditional select instructions. */ I3506_CSEL = 0x1a800000, @@ -1573,56 +1575,6 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) tcg_out_mov(s, TCG_TYPE_I32, rd, rn); } -static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, - TCGReg rh, TCGReg al, TCGReg ah, - tcg_target_long bl, tcg_target_long bh, - bool const_bl, bool const_bh, bool sub) -{ - TCGReg orig_rl = rl; - AArch64Insn insn; - - if (rl == ah || (!const_bh && rl == bh)) { - rl = TCG_REG_TMP0; - } - - if (const_bl) { - if (bl < 0) { - bl = -bl; - insn = sub ? I3401_ADDSI : I3401_SUBSI; - } else { - insn = sub ? I3401_SUBSI : I3401_ADDSI; - } - - if (unlikely(al == TCG_REG_XZR)) { - /* ??? We want to allow al to be zero for the benefit of - negation via subtraction. However, that leaves open the - possibility of adding 0+const in the low part, and the - immediate add instructions encode XSP not XZR. Don't try - anything more elaborate here than loading another zero. */ - al = TCG_REG_TMP0; - tcg_out_movi(s, ext, al, 0); - } - tcg_out_insn_3401(s, insn, ext, rl, al, bl); - } else { - tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); - } - - insn = I3503_ADC; - if (const_bh) { - /* Note that the only two constants we support are 0 and -1, and - that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ - if ((bh != 0) ^ sub) { - insn = I3503_SBC; - } - bh = TCG_REG_XZR; - } else if (sub) { - insn = I3503_SBC; - } - tcg_out_insn_3503(s, insn, ext, rh, ah, bh); - - tcg_out_mov(s, ext, orig_rl, rl); -} - static inline void tcg_out_mb(TCGContext *s, TCGArg a0) { static const uint32_t sync[] = { @@ -2078,21 +2030,81 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static void tgen_addco(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3502, ADDS, type, a0, a1, a2); +} + +static void tgen_addco_imm(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a2 >= 0) { + tcg_out_insn(s, 3401, ADDSI, type, a0, a1, a2); + } else { + tcg_out_insn(s, 3401, SUBSI, type, a0, a1, -a2); + } +} + static const TCGOutOpBinary outop_addco = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, r, rA), + .out_rrr = tgen_addco, + .out_rri = tgen_addco_imm, }; +static void tgen_addci_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3503, ADC, type, a0, a1, a2); +} + +static void tgen_addci_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + /* + * Note that the only two constants we support are 0 and -1, and + * that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. + */ + if (a2) { + tcg_out_insn(s, 3503, SBC, type, a0, a1, TCG_REG_XZR); + } else { + tcg_out_insn(s, 3503, ADC, type, a0, a1, TCG_REG_XZR); + } +} + static const TCGOutOpAddSubCarry outop_addci = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, rz, rMZ), + .out_rrr = tgen_addci_rrr, + .out_rri = tgen_addci_rri, }; +static void tgen_addcio(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3503, ADCS, type, a0, a1, a2); +} + +static void tgen_addcio_imm(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + /* Use SBCS w/0 for ADCS w/-1 -- see above. */ + if (a2) { + tcg_out_insn(s, 3503, SBCS, type, a0, a1, TCG_REG_XZR); + } else { + tcg_out_insn(s, 3503, ADCS, type, a0, a1, TCG_REG_XZR); + } +} + static const TCGOutOpBinary outop_addcio = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, rz, rMZ), + .out_rrr = tgen_addcio, + .out_rri = tgen_addcio_imm, }; static void tcg_out_set_carry(TCGContext *s) { - g_assert_not_reached(); + tcg_out_insn(s, 3502, SUBS, TCG_TYPE_I32, + TCG_REG_XZR, TCG_REG_XZR, TCG_REG_XZR); } static void tgen_and(TCGContext *s, TCGType type, @@ -2438,21 +2450,95 @@ static const TCGOutOpSubtract outop_sub = { .out_rrr = tgen_sub, }; +static void tgen_subbo_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3502, SUBS, type, a0, a1, a2); +} + +static void tgen_subbo_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a2 >= 0) { + tcg_out_insn(s, 3401, SUBSI, type, a0, a1, a2); + } else { + tcg_out_insn(s, 3401, ADDSI, type, a0, a1, -a2); + } +} + +static void tgen_subbo_rir(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tgen_subbo_rrr(s, type, a0, TCG_REG_XZR, a2); +} + +static void tgen_subbo_rii(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, tcg_target_long a2) +{ + if (a2 == 0) { + tgen_subbo_rrr(s, type, a0, TCG_REG_XZR, TCG_REG_XZR); + return; + } + + /* + * We want to allow a1 to be zero for the benefit of negation via + * subtraction. However, that leaves open the possibility of + * adding 0 +/- const, and the immediate add/sub instructions + * encode XSP not XZR. Since we have 0 - non-zero, borrow is + * always set. + */ + tcg_out_movi(s, type, a0, -a2); + tcg_out_set_borrow(s); +} + static const TCGOutOpAddSubCarry outop_subbo = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, rZ, rA), + .out_rrr = tgen_subbo_rrr, + .out_rri = tgen_subbo_rri, + .out_rir = tgen_subbo_rir, + .out_rii = tgen_subbo_rii, }; +static void tgen_subbi_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3503, SBC, type, a0, a1, a2); +} + +static void tgen_subbi_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_addci_rri(s, type, a0, a1, ~a2); +} + static const TCGOutOpAddSubCarry outop_subbi = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, rz, rMZ), + .out_rrr = tgen_subbi_rrr, + .out_rri = tgen_subbi_rri, }; +static void tgen_subbio_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3503, SBCS, type, a0, a1, a2); +} + +static void tgen_subbio_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_addcio_imm(s, type, a0, a1, ~a2); +} + static const TCGOutOpAddSubCarry outop_subbio = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, rz, rMZ), + .out_rrr = tgen_subbio_rrr, + .out_rri = tgen_subbio_rri, }; static void tcg_out_set_borrow(TCGContext *s) { - g_assert_not_reached(); + tcg_out_insn(s, 3502, ADDS, TCG_TYPE_I32, + TCG_REG_XZR, TCG_REG_XZR, TCG_REG_XZR); } static void tgen_xor(TCGContext *s, TCGType type, @@ -2759,25 +2845,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false); break; - case INDEX_op_add2_i32: - tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, a2, args[3], - (int32_t)args[4], args[5], const_args[4], - const_args[5], false); - break; - case INDEX_op_add2_i64: - tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, a2, args[3], args[4], - args[5], const_args[4], const_args[5], false); - break; - case INDEX_op_sub2_i32: - tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, a2, args[3], - (int32_t)args[4], args[5], const_args[4], - const_args[5], true); - break; - case INDEX_op_sub2_i64: - tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, a2, args[3], args[4], - args[5], const_args[4], const_args[5], true); - break; - case INDEX_op_mb: tcg_out_mb(s, a0); break; @@ -3271,12 +3338,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i128: return C_O0_I3(rz, rz, r); - case INDEX_op_add2_i32: - case INDEX_op_add2_i64: - case INDEX_op_sub2_i32: - case INDEX_op_sub2_i64: - return C_O2_I4(r, r, rz, rz, rA, rMZ); - case INDEX_op_add_vec: case INDEX_op_sub_vec: case INDEX_op_mul_vec: From b15c0d11a28e6be7156b6920a7a5b2179112b1fa Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 15 Jan 2025 23:35:53 +0000 Subject: [PATCH 137/161] tcg/arm: Implement add/sub carry opcodes Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/arm/tcg-target-con-set.h | 4 +- tcg/arm/tcg-target-has.h | 4 +- tcg/arm/tcg-target.c.inc | 212 ++++++++++++++++++++++++++--------- 3 files changed, 161 insertions(+), 59 deletions(-) diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h index f46a8444fb..16b1193228 100644 --- a/tcg/arm/tcg-target-con-set.h +++ b/tcg/arm/tcg-target-con-set.h @@ -31,6 +31,8 @@ C_O1_I2(r, r, rIK) C_O1_I2(r, r, rIN) C_O1_I2(r, r, ri) C_O1_I2(r, rI, r) +C_O1_I2(r, rI, rIK) +C_O1_I2(r, rI, rIN) C_O1_I2(r, rZ, rZ) C_O1_I2(w, 0, w) C_O1_I2(w, w, w) @@ -43,5 +45,3 @@ C_O1_I4(r, r, rIN, rIK, 0) C_O2_I1(e, p, q) C_O2_I2(e, p, q, q) C_O2_I2(r, r, r, r) -C_O2_I4(r, r, r, r, rIN, rIK) -C_O2_I4(r, r, rI, rI, rIN, rIK) diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index 3973df1f12..f4bd15c68a 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -24,8 +24,8 @@ extern bool use_neon_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index aa0397520d..3c9042ebfa 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -178,6 +178,8 @@ typedef enum { INSN_DMB_ISH = 0xf57ff05b, INSN_DMB_MCR = 0xee070fba, + INSN_MSRI_CPSR = 0x0360f000, + /* Architected nop introduced in v6k. */ /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this also Just So Happened to do nothing on pre-v6k so that we @@ -1826,21 +1828,74 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static void tgen_addco(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_ADD | TO_CPSR, + a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_addco_imm(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR, + a0, a1, a2); +} + static const TCGOutOpBinary outop_addco = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, r, rIN), + .out_rrr = tgen_addco, + .out_rri = tgen_addco_imm, }; +static void tgen_addci(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_ADC, a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_addci_imm(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IK(s, COND_AL, ARITH_ADC, ARITH_SBC, a0, a1, a2); +} + static const TCGOutOpAddSubCarry outop_addci = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, r, rIK), + .out_rrr = tgen_addci, + .out_rri = tgen_addci_imm, }; +static void tgen_addcio(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_ADC | TO_CPSR, + a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_addcio_imm(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IK(s, COND_AL, ARITH_ADC | TO_CPSR, ARITH_SBC | TO_CPSR, + a0, a1, a2); +} + static const TCGOutOpBinary outop_addcio = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, r, rIK), + .out_rrr = tgen_addcio, + .out_rri = tgen_addcio_imm, }; +/* Set C to @c; NZVQ all set to 0. */ +static void tcg_out_movi_apsr_c(TCGContext *s, bool c) +{ + int imm12 = encode_imm_nofail(c << 29); + tcg_out32(s, (COND_AL << 28) | INSN_MSRI_CPSR | 0x80000 | imm12); +} + static void tcg_out_set_carry(TCGContext *s) { - g_assert_not_reached(); + tcg_out_movi_apsr_c(s, 1); } static void tgen_and(TCGContext *s, TCGType type, @@ -2152,21 +2207,115 @@ static const TCGOutOpSubtract outop_sub = { .out_rir = tgen_subfi, }; +static void tgen_subbo_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_SUB | TO_CPSR, + a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_subbo_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IN(s, COND_AL, ARITH_SUB | TO_CPSR, ARITH_ADD | TO_CPSR, + a0, a1, a2); +} + +static void tgen_subbo_rir(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tcg_out_dat_imm(s, COND_AL, ARITH_RSB | TO_CPSR, + a0, a2, encode_imm_nofail(a1)); +} + +static void tgen_subbo_rii(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, tcg_target_long a2) +{ + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, a2); + tgen_subbo_rir(s, TCG_TYPE_I32, a0, a1, TCG_REG_TMP); +} + static const TCGOutOpAddSubCarry outop_subbo = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, rI, rIN), + .out_rrr = tgen_subbo_rrr, + .out_rri = tgen_subbo_rri, + .out_rir = tgen_subbo_rir, + .out_rii = tgen_subbo_rii, }; +static void tgen_subbi_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_SBC, + a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_subbi_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IK(s, COND_AL, ARITH_SBC, ARITH_ADC, a0, a1, a2); +} + +static void tgen_subbi_rir(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tcg_out_dat_imm(s, COND_AL, ARITH_RSC, a0, a2, encode_imm_nofail(a1)); +} + +static void tgen_subbi_rii(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, tcg_target_long a2) +{ + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, a2); + tgen_subbi_rir(s, TCG_TYPE_I32, a0, a1, TCG_REG_TMP); +} + static const TCGOutOpAddSubCarry outop_subbi = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, rI, rIK), + .out_rrr = tgen_subbi_rrr, + .out_rri = tgen_subbi_rri, + .out_rir = tgen_subbi_rir, + .out_rii = tgen_subbi_rii, }; +static void tgen_subbio_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_SBC | TO_CPSR, + a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_subbio_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IK(s, COND_AL, ARITH_SBC | TO_CPSR, ARITH_ADC | TO_CPSR, + a0, a1, a2); +} + +static void tgen_subbio_rir(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tcg_out_dat_imm(s, COND_AL, ARITH_RSC | TO_CPSR, + a0, a2, encode_imm_nofail(a1)); +} + +static void tgen_subbio_rii(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, tcg_target_long a2) +{ + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, a2); + tgen_subbio_rir(s, TCG_TYPE_I32, a0, a1, TCG_REG_TMP); +} + static const TCGOutOpAddSubCarry outop_subbio = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, rI, rIK), + .out_rrr = tgen_subbio_rrr, + .out_rri = tgen_subbio_rri, + .out_rir = tgen_subbio_rir, + .out_rii = tgen_subbio_rii, }; static void tcg_out_set_borrow(TCGContext *s) { - g_assert_not_reached(); + tcg_out_movi_apsr_c(s, 0); /* borrow = !carry */ } static void tgen_xor(TCGContext *s, TCGType type, @@ -2369,8 +2518,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - TCGArg a0, a1, a2, a3, a4, a5; - switch (opc) { case INDEX_op_goto_ptr: tcg_out_b_reg(s, COND_AL, args[0]); @@ -2404,47 +2551,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st32(s, COND_AL, args[0], args[1], args[2]); break; - case INDEX_op_add2_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - a3 = args[3], a4 = args[4], a5 = args[5]; - if (a0 == a3 || (a0 == a5 && !const_args[5])) { - a0 = TCG_REG_TMP; - } - tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR, - a0, a2, a4, const_args[4]); - tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC, - a1, a3, a5, const_args[5]); - tcg_out_mov_reg(s, COND_AL, args[0], a0); - break; - case INDEX_op_sub2_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - a3 = args[3], a4 = args[4], a5 = args[5]; - if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) { - a0 = TCG_REG_TMP; - } - if (const_args[2]) { - if (const_args[4]) { - tcg_out_movi32(s, COND_AL, a0, a4); - a4 = a0; - } - tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1); - } else { - tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR, - ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]); - } - if (const_args[3]) { - if (const_args[5]) { - tcg_out_movi32(s, COND_AL, a1, a5); - a5 = a1; - } - tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1); - } else { - tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC, - a1, a3, a5, const_args[5]); - } - tcg_out_mov_reg(s, COND_AL, args[0], a0); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); break; @@ -2490,10 +2596,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i32: return C_O0_I2(r, r); - case INDEX_op_add2_i32: - return C_O2_I4(r, r, r, r, rIN, rIK); - case INDEX_op_sub2_i32: - return C_O2_I4(r, r, rI, rI, rIN, rIK); case INDEX_op_qemu_ld_i32: return C_O1_I1(r, q); case INDEX_op_qemu_ld_i64: From 2329da9605b0f1b7bd59f58afb5ab2154d0af137 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 18 Jan 2025 00:38:13 +0000 Subject: [PATCH 138/161] tcg/ppc: Implement add/sub carry opcodes Tested-by: Nicholas Piggin Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target-con-set.h | 5 +- tcg/ppc/tcg-target-con-str.h | 1 + tcg/ppc/tcg-target-has.h | 11 +- tcg/ppc/tcg-target.c.inc | 227 ++++++++++++++++++++++------------- 4 files changed, 154 insertions(+), 90 deletions(-) diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h index 14cd217287..da7a383bff 100644 --- a/tcg/ppc/tcg-target-con-set.h +++ b/tcg/ppc/tcg-target-con-set.h @@ -29,7 +29,10 @@ C_O1_I2(r, r, rC) C_O1_I2(r, r, rI) C_O1_I2(r, r, rT) C_O1_I2(r, r, rU) +C_O1_I2(r, r, rZM) C_O1_I2(r, r, rZW) +C_O1_I2(r, rI, rN) +C_O1_I2(r, rZM, rZM) C_O1_I2(v, v, v) C_O1_I3(v, v, v, v) C_O1_I4(v, v, v, vZM, v) @@ -38,5 +41,3 @@ C_O1_I4(r, r, r, rU, rC) C_O2_I1(r, r, r) C_N1O1_I1(o, m, r) C_O2_I2(r, r, r, r) -C_O2_I4(r, r, rI, rZM, r, r) -C_O2_I4(r, r, r, r, rI, rZM) diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h index 16b687216e..faf92da47f 100644 --- a/tcg/ppc/tcg-target-con-str.h +++ b/tcg/ppc/tcg-target-con-str.h @@ -19,6 +19,7 @@ REGS('v', ALL_VECTOR_REGS) CONST('C', TCG_CT_CONST_CMP) CONST('I', TCG_CT_CONST_S16) CONST('M', TCG_CT_CONST_MONE) +CONST('N', TCG_CT_CONST_N16) CONST('T', TCG_CT_CONST_S32) CONST('U', TCG_CT_CONST_U32) CONST('W', TCG_CT_CONST_WSZ) diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 8d832ce99c..4dda668706 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -18,16 +18,13 @@ /* optional instructions */ #define TCG_TARGET_HAS_qemu_st8_i32 0 - -#if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 + +#if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 -#else -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 #endif #define TCG_TARGET_HAS_qemu_ldst_i128 \ diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 5b04655f3b..91df9610ec 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -89,14 +89,15 @@ /* Shorthand for size of a register. */ #define SZR (TCG_TARGET_REG_BITS / 8) -#define TCG_CT_CONST_S16 0x100 -#define TCG_CT_CONST_U16 0x200 -#define TCG_CT_CONST_S32 0x400 -#define TCG_CT_CONST_U32 0x800 -#define TCG_CT_CONST_ZERO 0x1000 -#define TCG_CT_CONST_MONE 0x2000 -#define TCG_CT_CONST_WSZ 0x4000 -#define TCG_CT_CONST_CMP 0x8000 +#define TCG_CT_CONST_S16 0x00100 +#define TCG_CT_CONST_U16 0x00200 +#define TCG_CT_CONST_N16 0x00400 +#define TCG_CT_CONST_S32 0x00800 +#define TCG_CT_CONST_U32 0x01000 +#define TCG_CT_CONST_ZERO 0x02000 +#define TCG_CT_CONST_MONE 0x04000 +#define TCG_CT_CONST_WSZ 0x08000 +#define TCG_CT_CONST_CMP 0x10000 #define ALL_GENERAL_REGS 0xffffffffu #define ALL_VECTOR_REGS 0xffffffff00000000ull @@ -342,6 +343,9 @@ static bool tcg_target_const_match(int64_t sval, int ct, if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) { return 1; } + if ((ct & TCG_CT_CONST_N16) && -sval == (int16_t)-sval) { + return 1; + } if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) { return 1; } @@ -2863,21 +2867,69 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static void tgen_addco_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, ADDC | TAB(a0, a1, a2)); +} + +static void tgen_addco_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out32(s, ADDIC | TAI(a0, a1, a2)); +} + +static TCGConstraintSetIndex cset_addco(TCGType type, unsigned flags) +{ + /* + * Note that the CA bit is defined based on the word size of the + * environment. So in 64-bit mode it's always carry-out of bit 63. + * The fallback code using deposit works just as well for TCG_TYPE_I32. + */ + return type == TCG_TYPE_REG ? C_O1_I2(r, r, rI) : C_NotImplemented; +} + static const TCGOutOpBinary outop_addco = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addco, + .out_rrr = tgen_addco_rrr, + .out_rri = tgen_addco_rri, +}; + +static void tgen_addcio_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, ADDE | TAB(a0, a1, a2)); +} + +static void tgen_addcio_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out32(s, (a2 ? ADDME : ADDZE) | RT(a0) | RA(a1)); +} + +static TCGConstraintSetIndex cset_addcio(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_REG ? C_O1_I2(r, r, rZM) : C_NotImplemented; +} + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addcio, + .out_rrr = tgen_addcio_rrr, + .out_rri = tgen_addcio_rri, }; static const TCGOutOpAddSubCarry outop_addci = { - .base.static_constraint = C_NotImplemented, -}; - -static const TCGOutOpBinary outop_addcio = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addcio, + .out_rrr = tgen_addcio_rrr, + .out_rri = tgen_addcio_rri, }; static void tcg_out_set_carry(TCGContext *s) { - g_assert_not_reached(); + tcg_out32(s, SUBFC | TAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_R0)); } static void tgen_and(TCGContext *s, TCGType type, @@ -3284,21 +3336,94 @@ static const TCGOutOpSubtract outop_sub = { .out_rir = tgen_subfi, }; +static void tgen_subbo_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, SUBFC | TAB(a0, a2, a1)); +} + +static void tgen_subbo_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a2 == 0) { + tcg_out_movi(s, type, TCG_REG_R0, 0); + tgen_subbo_rrr(s, type, a0, a1, TCG_REG_R0); + } else { + tgen_addco_rri(s, type, a0, a1, -a2); + } +} + +/* The underlying insn for subfi is subfic. */ +#define tgen_subbo_rir tgen_subfi + +static void tgen_subbo_rii(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, tcg_target_long a2) +{ + tcg_out_movi(s, type, TCG_REG_R0, a2); + tgen_subbo_rir(s, type, a0, a1, TCG_REG_R0); +} + +static TCGConstraintSetIndex cset_subbo(TCGType type, unsigned flags) +{ + /* Recall that the CA bit is defined based on the host word size. */ + return type == TCG_TYPE_REG ? C_O1_I2(r, rI, rN) : C_NotImplemented; +} + static const TCGOutOpAddSubCarry outop_subbo = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_subbo, + .out_rrr = tgen_subbo_rrr, + .out_rri = tgen_subbo_rri, + .out_rir = tgen_subbo_rir, + .out_rii = tgen_subbo_rii, }; -static const TCGOutOpAddSubCarry outop_subbi = { - .base.static_constraint = C_NotImplemented, -}; +static void tgen_subbio_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, SUBFE | TAB(a0, a2, a1)); +} + +static void tgen_subbio_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_addcio_rri(s, type, a0, a1, ~a2); +} + +static void tgen_subbio_rir(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tcg_debug_assert(a1 == 0 || a1 == -1); + tcg_out32(s, (a1 ? SUBFME : SUBFZE) | RT(a0) | RA(a2)); +} + +static void tgen_subbio_rii(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, tcg_target_long a2) +{ + tcg_out_movi(s, type, TCG_REG_R0, a2); + tgen_subbio_rir(s, type, a0, a1, TCG_REG_R0); +} + +static TCGConstraintSetIndex cset_subbio(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_REG ? C_O1_I2(r, rZM, rZM) : C_NotImplemented; +} static const TCGOutOpAddSubCarry outop_subbio = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_subbio, + .out_rrr = tgen_subbio_rrr, + .out_rri = tgen_subbio_rri, + .out_rir = tgen_subbio_rir, + .out_rii = tgen_subbio_rii, }; +#define outop_subbi outop_subbio + static void tcg_out_set_borrow(TCGContext *s) { - g_assert_not_reached(); + /* borrow = !carry */ + tcg_out32(s, ADDIC | TAI(TCG_REG_R0, TCG_REG_R0, 0)); } static void tgen_xor(TCGContext *s, TCGType type, @@ -3538,8 +3663,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - TCGArg a0, a1; - switch (opc) { case INDEX_op_goto_ptr: tcg_out32(s, MTSPR | RS(args[0]) | CTR); @@ -3635,57 +3758,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); break; -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_add2_i64: -#else - case INDEX_op_add2_i32: -#endif - /* Note that the CA bit is defined based on the word size of the - environment. So in 64-bit mode it's always carry-out of bit 63. - The fallback code using deposit works just as well for 32-bit. */ - a0 = args[0], a1 = args[1]; - if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { - a0 = TCG_REG_R0; - } - if (const_args[4]) { - tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); - } else { - tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); - } - if (const_args[5]) { - tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); - } else { - tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); - } - if (a0 != args[0]) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); - } - break; - -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_sub2_i64: -#else - case INDEX_op_sub2_i32: -#endif - a0 = args[0], a1 = args[1]; - if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { - a0 = TCG_REG_R0; - } - if (const_args[2]) { - tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); - } else { - tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); - } - if (const_args[3]) { - tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); - } else { - tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); - } - if (a0 != args[0]) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); - } - break; - case INDEX_op_mb: tcg_out_mb(s, args[0]); break; @@ -4331,13 +4403,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_add2_i64: - case INDEX_op_add2_i32: - return C_O2_I4(r, r, r, r, rI, rZM); - case INDEX_op_sub2_i64: - case INDEX_op_sub2_i32: - return C_O2_I4(r, r, rI, rZM, r, r); - case INDEX_op_qemu_ld_i32: return C_O1_I1(r, r); case INDEX_op_qemu_ld_i64: From 1b6e25e300e78cf8d8581245c2f8339c5b423d30 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 18 Jan 2025 13:26:43 -0800 Subject: [PATCH 139/161] tcg/s390x: Honor carry_live in tcg_out_movi Do not clobber flags if they're live. Required in order to perform register allocation on add/sub carry opcodes. LA and AGHI are the same size, so use LA unconditionally. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/s390x/tcg-target.c.inc | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index a30afb455e..e262876614 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -951,25 +951,32 @@ static void tcg_out_movi(TCGContext *s, TCGType type, if (pc_off == (int32_t)pc_off) { tcg_out_insn(s, RIL, LARL, ret, pc_off); if (sval & 1) { - tcg_out_insn(s, RI, AGHI, ret, 1); + tcg_out_insn(s, RX, LA, ret, ret, TCG_REG_NONE, 1); } return; } - /* Otherwise, load it by parts. */ - i = is_const_p16((uint32_t)uval); - if (i >= 0) { - tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16)); - } else { - tcg_out_insn(s, RIL, LLILF, ret, uval); - } - uval >>= 32; - i = is_const_p16(uval); - if (i >= 0) { - tcg_out_insn_RI(s, oi_insns[i + 2], ret, uval >> (i * 16)); - } else { - tcg_out_insn(s, RIL, OIHF, ret, uval); + if (!s->carry_live) { + /* Load by parts, at most 2 instructions. */ + i = is_const_p16((uint32_t)uval); + if (i >= 0) { + tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16)); + } else { + tcg_out_insn(s, RIL, LLILF, ret, uval); + } + uval >>= 32; + i = is_const_p16(uval); + if (i >= 0) { + tcg_out_insn_RI(s, oi_insns[i + 2], ret, uval >> (i * 16)); + } else { + tcg_out_insn(s, RIL, OIHF, ret, uval); + } + return; } + + /* Otherwise, stuff it in the constant pool. */ + tcg_out_insn(s, RIL, LGRL, ret, 0); + new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2); } /* Emit a load/store type instruction. Inputs are: From 19b9fc2a39c733f585388918e4e093f08e2c33eb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 18 Jan 2025 14:01:12 -0800 Subject: [PATCH 140/161] tcg/s390x: Add TCG_CT_CONST_N32 We were using S32 | U32 for add2/sub2. But the ALGFI and SLGFI insns that implement this both have uint32_t immediates. This makes the composite range balanced and enables use of -0xffffffff ... -0x80000001. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/s390x/tcg-target-con-set.h | 2 +- tcg/s390x/tcg-target-con-str.h | 1 + tcg/s390x/tcg-target.c.inc | 8 ++++++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h index 78f06e3e52..f5d3878070 100644 --- a/tcg/s390x/tcg-target-con-set.h +++ b/tcg/s390x/tcg-target-con-set.h @@ -44,4 +44,4 @@ C_O2_I2(o, m, 0, r) C_O2_I2(o, m, r, r) C_O2_I3(o, m, 0, 1, r) C_N1_O1_I4(r, r, 0, 1, ri, r) -C_N1_O1_I4(r, r, 0, 1, rJU, r) +C_N1_O1_I4(r, r, 0, 1, rUV, r) diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h index 3e574e0662..636a38a168 100644 --- a/tcg/s390x/tcg-target-con-str.h +++ b/tcg/s390x/tcg-target-con-str.h @@ -24,4 +24,5 @@ CONST('M', TCG_CT_CONST_M1) CONST('N', TCG_CT_CONST_INV) CONST('R', TCG_CT_CONST_INVRISBG) CONST('U', TCG_CT_CONST_U32) +CONST('V', TCG_CT_CONST_N32) CONST('Z', TCG_CT_CONST_ZERO) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index e262876614..9b28083945 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -43,6 +43,7 @@ #define TCG_CT_CONST_INVRISBG (1 << 14) #define TCG_CT_CONST_CMP (1 << 15) #define TCG_CT_CONST_M1 (1 << 16) +#define TCG_CT_CONST_N32 (1 << 17) #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16) #define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32) @@ -613,7 +614,10 @@ static bool tcg_target_const_match(int64_t val, int ct, if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { return true; } - if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { + if ((ct & TCG_CT_CONST_U32) && uval <= UINT32_MAX) { + return true; + } + if ((ct & TCG_CT_CONST_N32) && -uval <= UINT32_MAX) { return true; } if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { @@ -3548,7 +3552,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_add2_i64: case INDEX_op_sub2_i64: - return C_N1_O1_I4(r, r, 0, 1, rJU, r); + return C_N1_O1_I4(r, r, 0, 1, rUV, r); case INDEX_op_st_vec: return C_O0_I2(v, r); From cda7f93fa2fbf52e0fc70e4078caf8e7f7d3bd6f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 19 Jan 2025 09:31:26 -0800 Subject: [PATCH 141/161] tcg/s390x: Implement add/sub carry opcodes Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/s390x/tcg-target-con-set.h | 4 +- tcg/s390x/tcg-target-has.h | 8 +- tcg/s390x/tcg-target.c.inc | 153 +++++++++++++++++++-------------- 3 files changed, 96 insertions(+), 69 deletions(-) diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h index f5d3878070..f67fd7898e 100644 --- a/tcg/s390x/tcg-target-con-set.h +++ b/tcg/s390x/tcg-target-con-set.h @@ -22,6 +22,7 @@ C_O1_I1(r, r) C_O1_I1(v, r) C_O1_I1(v, v) C_O1_I1(v, vr) +C_O1_I2(r, 0, r) C_O1_I2(r, 0, ri) C_O1_I2(r, 0, rI) C_O1_I2(r, 0, rJ) @@ -32,6 +33,7 @@ C_O1_I2(r, r, rI) C_O1_I2(r, r, rJ) C_O1_I2(r, r, rK) C_O1_I2(r, r, rNKR) +C_O1_I2(r, r, rUV) C_O1_I2(r, rZ, r) C_O1_I2(v, v, r) C_O1_I2(v, v, v) @@ -43,5 +45,3 @@ C_O2_I1(o, m, r) C_O2_I2(o, m, 0, r) C_O2_I2(o, m, r, r) C_O2_I3(o, m, 0, 1, r) -C_N1_O1_I4(r, r, 0, 1, ri, r) -C_N1_O1_I4(r, r, 0, 1, rUV, r) diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 4a2b71995d..17e61130cd 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -29,13 +29,13 @@ extern uint64_t s390_facilities[3]; ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1) /* optional instructions */ -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_qemu_ldst_i128 1 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 9b28083945..67179de848 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -173,6 +173,8 @@ typedef enum S390Opcode { RRE_SLBGR = 0xb989, RRE_XGR = 0xb982, + RRFa_ALRK = 0xb9fa, + RRFa_ALGRK = 0xb9ea, RRFa_MGRK = 0xb9ec, RRFa_MSRKC = 0xb9fd, RRFa_MSGRKC = 0xb9ed, @@ -2259,21 +2261,60 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static void tgen_addco_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, ALGRK, a0, a1, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RR, ALR, a0, a2); + } else { + tcg_out_insn(s, RRFa, ALRK, a0, a1, a2); + } +} + +static void tgen_addco_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_mov(s, type, a0, a1); + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, ALFI, a0, a2); + } else if (a2 >= 0) { + tcg_out_insn(s, RIL, ALGFI, a0, a2); + } else { + tcg_out_insn(s, RIL, SLGFI, a0, -a2); + } +} + static const TCGOutOpBinary outop_addco = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, r, rUV), + .out_rrr = tgen_addco_rrr, + .out_rri = tgen_addco_rri, +}; + +static void tgen_addcio(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRE, ALCR, a0, a2); + } else { + tcg_out_insn(s, RRE, ALCGR, a0, a2); + } +} + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_O1_I2(r, 0, r), + .out_rrr = tgen_addcio, }; static const TCGOutOpAddSubCarry outop_addci = { - .base.static_constraint = C_NotImplemented, -}; - -static const TCGOutOpBinary outop_addcio = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, 0, r), + .out_rrr = tgen_addcio, }; static void tcg_out_set_carry(TCGContext *s) { - g_assert_not_reached(); + tcg_out_insn(s, RR, SLR, TCG_REG_R0, TCG_REG_R0); /* cc = 2 */ } static void tgen_and(TCGContext *s, TCGType type, @@ -2794,21 +2835,57 @@ static const TCGOutOpSubtract outop_sub = { .out_rrr = tgen_sub, }; +static void tgen_subbo_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, SLGRK, a0, a1, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RR, SLR, a0, a2); + } else { + tcg_out_insn(s, RRFa, SLRK, a0, a1, a2); + } +} + +static void tgen_subbo_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_mov(s, type, a0, a1); + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, SLFI, a0, a2); + } else if (a2 >= 0) { + tcg_out_insn(s, RIL, SLGFI, a0, a2); + } else { + tcg_out_insn(s, RIL, ALGFI, a0, -a2); + } +} + static const TCGOutOpAddSubCarry outop_subbo = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, r, rUV), + .out_rrr = tgen_subbo_rrr, + .out_rri = tgen_subbo_rri, }; -static const TCGOutOpAddSubCarry outop_subbi = { - .base.static_constraint = C_NotImplemented, -}; +static void tgen_subbio(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRE, SLBR, a0, a2); + } else { + tcg_out_insn(s, RRE, SLBGR, a0, a2); + } +} static const TCGOutOpAddSubCarry outop_subbio = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, 0, r), + .out_rrr = tgen_subbio, }; +#define outop_subbi outop_subbio + static void tcg_out_set_borrow(TCGContext *s) { - g_assert_not_reached(); + tcg_out_insn(s, RR, CLR, TCG_REG_R0, TCG_REG_R0); /* cc = 0 */ } static void tgen_xor(TCGContext *s, TCGType type, @@ -2967,23 +3044,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_add2_i32: - if (const_args[4]) { - tcg_out_insn(s, RIL, ALFI, args[0], args[4]); - } else { - tcg_out_insn(s, RR, ALR, args[0], args[4]); - } - tcg_out_insn(s, RRE, ALCR, args[1], args[5]); - break; - case INDEX_op_sub2_i32: - if (const_args[4]) { - tcg_out_insn(s, RIL, SLFI, args[0], args[4]); - } else { - tcg_out_insn(s, RR, SLR, args[0], args[4]); - } - tcg_out_insn(s, RRE, SLBR, args[1], args[5]); - break; - case INDEX_op_br: tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0])); break; @@ -3027,31 +3087,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); break; - case INDEX_op_add2_i64: - if (const_args[4]) { - if ((int64_t)args[4] >= 0) { - tcg_out_insn(s, RIL, ALGFI, args[0], args[4]); - } else { - tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]); - } - } else { - tcg_out_insn(s, RRE, ALGR, args[0], args[4]); - } - tcg_out_insn(s, RRE, ALCGR, args[1], args[5]); - break; - case INDEX_op_sub2_i64: - if (const_args[4]) { - if ((int64_t)args[4] >= 0) { - tcg_out_insn(s, RIL, SLGFI, args[0], args[4]); - } else { - tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]); - } - } else { - tcg_out_insn(s, RRE, SLGR, args[0], args[4]); - } - tcg_out_insn(s, RRE, SLBGR, args[1], args[5]); - break; - case INDEX_op_mb: /* The host memory model is quite strong, we simply need to serialize the instruction stream. */ @@ -3546,14 +3581,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i128: return C_O0_I3(o, m, r); - case INDEX_op_add2_i32: - case INDEX_op_sub2_i32: - return C_N1_O1_I4(r, r, 0, 1, ri, r); - - case INDEX_op_add2_i64: - case INDEX_op_sub2_i64: - return C_N1_O1_I4(r, r, 0, 1, rUV, r); - case INDEX_op_st_vec: return C_O0_I2(v, r); case INDEX_op_ld_vec: From 41dd55c79c0a1b1a4bf573821b81d97e6d4c159a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 19 Jan 2025 10:01:18 -0800 Subject: [PATCH 142/161] tcg/s390x: Use ADD LOGICAL WITH SIGNED IMMEDIATE Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/s390x/tcg-target.c.inc | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 67179de848..09c7ca5b44 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -135,6 +135,9 @@ typedef enum S390Opcode { RIEc_CLGIJ = 0xec7d, RIEc_CLIJ = 0xec7f, + RIEd_ALHSIK = 0xecda, + RIEd_ALGHSIK = 0xecdb, + RIEf_RISBG = 0xec55, RIEg_LOCGHI = 0xec46, @@ -682,8 +685,16 @@ static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2) tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff)); } +static void tcg_out_insn_RIEd(TCGContext *s, S390Opcode op, + TCGReg r1, TCGReg r3, int i2) +{ + tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3); + tcg_out16(s, i2); + tcg_out16(s, op & 0xff); +} + static void tcg_out_insn_RIEg(TCGContext *s, S390Opcode op, TCGReg r1, - int i2, int m3) + int i2, int m3) { tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3); tcg_out32(s, (i2 << 16) | (op & 0xff)); @@ -2276,6 +2287,15 @@ static void tgen_addco_rrr(TCGContext *s, TCGType type, static void tgen_addco_rri(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, tcg_target_long a2) { + if (a2 == (int16_t)a2) { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIEd, ALHSIK, a0, a1, a2); + } else { + tcg_out_insn(s, RIEd, ALGHSIK, a0, a1, a2); + } + return; + } + tcg_out_mov(s, type, a0, a1); if (type == TCG_TYPE_I32) { tcg_out_insn(s, RIL, ALFI, a0, a2); From 809069eaa3d803092adaa06cec770d78db44211d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 20 Jan 2025 16:34:47 -0800 Subject: [PATCH 143/161] tcg/sparc64: Hoist tcg_cond_to_bcond lookup out of tcg_out_movcc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the sparc COND_* value not the tcg TCG_COND_* value. This makes the usage within add2/sub2 clearer. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/sparc64/tcg-target.c.inc | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 12f0dbd23d..3f97261626 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -652,11 +652,10 @@ static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1, tcg_out_nop(s); } -static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret, +static void tcg_out_movcc(TCGContext *s, int scond, int cc, TCGReg ret, int32_t v1, int v1const) { - tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret) - | INSN_RS1(tcg_cond_to_bcond[cond]) + tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret) | INSN_RS1(scond) | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1))); } @@ -665,7 +664,7 @@ static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, int32_t v1, int v1const) { tcg_out_cmp(s, cond, c1, c2, c2const); - tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const); + tcg_out_movcc(s, tcg_cond_to_bcond[cond], MOVCC_ICC, ret, v1, v1const); } static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1, @@ -709,7 +708,7 @@ static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, tcg_out_movr(s, rcond, ret, c1, v1, v1const); } else { tcg_out_cmp(s, cond, c1, c2, c2const); - tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const); + tcg_out_movcc(s, tcg_cond_to_bcond[cond], MOVCC_XCC, ret, v1, v1const); } } @@ -763,7 +762,8 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, default: tcg_out_cmp(s, cond, c1, c2, c2const); tcg_out_movi_s13(s, ret, 0); - tcg_out_movcc(s, cond, MOVCC_ICC, ret, neg ? -1 : 1, 1); + tcg_out_movcc(s, tcg_cond_to_bcond[cond], + MOVCC_ICC, ret, neg ? -1 : 1, 1); return; } @@ -818,7 +818,8 @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, } else { tcg_out_cmp(s, cond, c1, c2, c2const); tcg_out_movi_s13(s, ret, 0); - tcg_out_movcc(s, cond, MOVCC_XCC, ret, neg ? -1 : 1, 1); + tcg_out_movcc(s, tcg_cond_to_bcond[cond], + MOVCC_XCC, ret, neg ? -1 : 1, 1); } } @@ -956,10 +957,10 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, if (rh == ah) { tcg_out_arithi(s, TCG_REG_T2, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD); - tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0); + tcg_out_movcc(s, COND_CS, MOVCC_XCC, rh, TCG_REG_T2, 0); } else { tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD); - tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0); + tcg_out_movcc(s, COND_CC, MOVCC_XCC, rh, ah, 0); } } else { /* @@ -974,7 +975,7 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, is_sub ? ARITH_SUB : ARITH_ADD); } /* ... smoosh T2 back to original BH if carry is clear ... */ - tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst); + tcg_out_movcc(s, COND_CC, MOVCC_XCC, TCG_REG_T2, bh, bhconst); /* ... and finally perform the arithmetic with the new operand. */ tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD); } From 9dd1ea33b238d8b661b7541ebdb122b7cae6fd19 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 20 Jan 2025 18:48:06 -0800 Subject: [PATCH 144/161] tcg/sparc64: Implement add/sub carry opcodes Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/sparc64/tcg-target-con-set.h | 3 +- tcg/sparc64/tcg-target-has.h | 8 +- tcg/sparc64/tcg-target.c.inc | 300 ++++++++++++++++++++----------- 3 files changed, 201 insertions(+), 110 deletions(-) diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h index 8cec396173..1a57adc0e8 100644 --- a/tcg/sparc64/tcg-target-con-set.h +++ b/tcg/sparc64/tcg-target-con-set.h @@ -15,6 +15,7 @@ C_O0_I2(r, rJ) C_O1_I1(r, r) C_O1_I2(r, r, r) C_O1_I2(r, r, rJ) +C_O1_I2(r, rz, rJ) +C_O1_I2(r, rz, rz) C_O1_I4(r, r, rJ, rI, 0) C_O2_I2(r, r, r, r) -C_O2_I4(r, r, rz, rz, rJ, rJ) diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index b8760dd154..caf7679595 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -14,13 +14,13 @@ extern bool use_vis3_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 3f97261626..c2251a6927 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -199,7 +199,9 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define ARITH_SUB (INSN_OP(2) | INSN_OP3(0x04)) #define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14)) #define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08)) +#define ARITH_ADDCCC (INSN_OP(2) | INSN_OP3(0x18)) #define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c)) +#define ARITH_SUBCCC (INSN_OP(2) | INSN_OP3(0x1c)) #define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a)) #define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b)) #define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e)) @@ -211,6 +213,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f)) #define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11)) +#define ARITH_ADDXCCC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x13)) #define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16)) #define SHIFT_SLL (INSN_OP(2) | INSN_OP3(0x25)) @@ -223,6 +226,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define RDY (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0)) #define WRY (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0)) +#define WRCCR (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(2)) #define JMPL (INSN_OP(2) | INSN_OP3(0x38)) #define RETURN (INSN_OP(2) | INSN_OP3(0x39)) #define SAVE (INSN_OP(2) | INSN_OP3(0x3c)) @@ -366,7 +370,7 @@ static void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1, } static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1, - int32_t val2, int val2const, int op) + int32_t val2, int val2const, int op) { tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2))); @@ -733,7 +737,7 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, } c1 = TCG_REG_G0, c2const = 0; cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU); - break; + break; case TCG_COND_TSTEQ: case TCG_COND_TSTNE: @@ -742,7 +746,7 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, c1 = TCG_REG_G0; c2 = TCG_REG_T1, c2const = 0; cond = (cond == TCG_COND_TSTEQ ? TCG_COND_GEU : TCG_COND_LTU); - break; + break; case TCG_COND_GTU: case TCG_COND_LEU: @@ -915,74 +919,6 @@ static const TCGOutOpMovcond outop_movcond = { .out = tgen_movcond, }; -static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh, - TCGReg al, TCGReg ah, int32_t bl, int blconst, - int32_t bh, int bhconst, int opl, int oph) -{ - TCGReg tmp = TCG_REG_T1; - - /* Note that the low parts are fully consumed before tmp is set. */ - if (rl != ah && (bhconst || rl != bh)) { - tmp = rl; - } - - tcg_out_arithc(s, tmp, al, bl, blconst, opl); - tcg_out_arithc(s, rh, ah, bh, bhconst, oph); - tcg_out_mov(s, TCG_TYPE_I32, rl, tmp); -} - -static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, - TCGReg al, TCGReg ah, int32_t bl, int blconst, - int32_t bh, int bhconst, bool is_sub) -{ - TCGReg tmp = TCG_REG_T1; - - /* Note that the low parts are fully consumed before tmp is set. */ - if (rl != ah && (bhconst || rl != bh)) { - tmp = rl; - } - - tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC); - - if (use_vis3_instructions && !is_sub) { - /* Note that ADDXC doesn't accept immediates. */ - if (bhconst && bh != 0) { - tcg_out_movi_s13(s, TCG_REG_T2, bh); - bh = TCG_REG_T2; - } - tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC); - } else if (bh == TCG_REG_G0) { - /* If we have a zero, we can perform the operation in two insns, - with the arithmetic first, and a conditional move into place. */ - if (rh == ah) { - tcg_out_arithi(s, TCG_REG_T2, ah, 1, - is_sub ? ARITH_SUB : ARITH_ADD); - tcg_out_movcc(s, COND_CS, MOVCC_XCC, rh, TCG_REG_T2, 0); - } else { - tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD); - tcg_out_movcc(s, COND_CC, MOVCC_XCC, rh, ah, 0); - } - } else { - /* - * Otherwise adjust BH as if there is carry into T2. - * Note that constant BH is constrained to 11 bits for the MOVCC, - * so the adjustment fits 12 bits. - */ - if (bhconst) { - tcg_out_movi_s13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1)); - } else { - tcg_out_arithi(s, TCG_REG_T2, bh, 1, - is_sub ? ARITH_SUB : ARITH_ADD); - } - /* ... smoosh T2 back to original BH if carry is clear ... */ - tcg_out_movcc(s, COND_CC, MOVCC_XCC, TCG_REG_T2, bh, bhconst); - /* ... and finally perform the arithmetic with the new operand. */ - tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD); - } - - tcg_out_mov(s, TCG_TYPE_I64, rl, tmp); -} - static void tcg_out_jmpl_const(TCGContext *s, const tcg_insn_unit *dest, bool in_prologue, bool tail_call) { @@ -1382,21 +1318,132 @@ static const TCGOutOpBinary outop_add = { .out_rri = tgen_addi, }; +static void tgen_addco_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_ADDCC); +} + +static void tgen_addco_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_arithi(s, a0, a1, a2, ARITH_ADDCC); +} + static const TCGOutOpBinary outop_addco = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_addco_rrr, + .out_rri = tgen_addco_rri, }; +static void tgen_addci_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_arith(s, a0, a1, a2, ARITH_ADDC); + } else if (use_vis3_instructions) { + tcg_out_arith(s, a0, a1, a2, ARITH_ADDXC); + } else { + tcg_out_arith(s, TCG_REG_T1, a1, a2, ARITH_ADD); /* for CC */ + tcg_out_arithi(s, a0, TCG_REG_T1, 1, ARITH_ADD); /* for CS */ + /* Select the correct result based on actual carry value. */ + tcg_out_movcc(s, COND_CC, MOVCC_XCC, a0, TCG_REG_T1, false); + } +} + +static void tgen_addci_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_arithi(s, a0, a1, a2, ARITH_ADDC); + return; + } + /* !use_vis3_instructions */ + if (a2 != 0) { + tcg_out_arithi(s, TCG_REG_T1, a1, a2, ARITH_ADD); /* for CC */ + tcg_out_arithi(s, a0, TCG_REG_T1, 1, ARITH_ADD); /* for CS */ + tcg_out_movcc(s, COND_CC, MOVCC_XCC, a0, TCG_REG_T1, false); + } else if (a0 == a1) { + tcg_out_arithi(s, TCG_REG_T1, a1, 1, ARITH_ADD); + tcg_out_movcc(s, COND_CS, MOVCC_XCC, a0, TCG_REG_T1, false); + } else { + tcg_out_arithi(s, a0, a1, 1, ARITH_ADD); + tcg_out_movcc(s, COND_CC, MOVCC_XCC, a0, a1, false); + } +} + +static TCGConstraintSetIndex cset_addci(TCGType type, unsigned flags) +{ + if (use_vis3_instructions && type == TCG_TYPE_I64) { + /* Note that ADDXC doesn't accept immediates. */ + return C_O1_I2(r, rz, rz); + } + return C_O1_I2(r, rz, rJ); +} + static const TCGOutOpAddSubCarry outop_addci = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addci, + .out_rrr = tgen_addci_rrr, + .out_rri = tgen_addci_rri, }; +/* Copy %xcc.c to %icc.c */ +static void tcg_out_dup_xcc_c(TCGContext *s) +{ + if (use_vis3_instructions) { + tcg_out_arith(s, TCG_REG_T1, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC); + } else { + tcg_out_movi_s13(s, TCG_REG_T1, 0); + tcg_out_movcc(s, COND_CS, MOVCC_XCC, TCG_REG_T1, 1, true); + } + /* Write carry-in into %icc via {0,1} + -1. */ + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_T1, -1, ARITH_ADDCC); +} + +static void tgen_addcio_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + if (use_vis3_instructions) { + tcg_out_arith(s, a0, a1, a2, ARITH_ADDXCCC); + return; + } + tcg_out_dup_xcc_c(s); + } + tcg_out_arith(s, a0, a1, a2, ARITH_ADDCCC); +} + +static void tgen_addcio_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type != TCG_TYPE_I32) { + /* !use_vis3_instructions */ + tcg_out_dup_xcc_c(s); + } + tcg_out_arithi(s, a0, a1, a2, ARITH_ADDCCC); +} + +static TCGConstraintSetIndex cset_addcio(TCGType type, unsigned flags) +{ + if (use_vis3_instructions && type == TCG_TYPE_I64) { + /* Note that ADDXCCC doesn't accept immediates. */ + return C_O1_I2(r, rz, rz); + } + return C_O1_I2(r, rz, rJ); +} + static const TCGOutOpBinary outop_addcio = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addcio, + .out_rrr = tgen_addcio_rrr, + .out_rri = tgen_addcio_rri, }; static void tcg_out_set_carry(TCGContext *s) { - g_assert_not_reached(); + /* 0x11 -> xcc = nzvC, icc = nzvC */ + tcg_out_arithi(s, 0, TCG_REG_G0, 0x11, WRCCR); } static void tgen_and(TCGContext *s, TCGType type, @@ -1735,21 +1782,90 @@ static const TCGOutOpSubtract outop_sub = { .out_rrr = tgen_sub, }; +static void tgen_subbo_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_SUBCC); +} + +static void tgen_subbo_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_arithi(s, a0, a1, a2, ARITH_SUBCC); +} + static const TCGOutOpAddSubCarry outop_subbo = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, rz, rJ), + .out_rrr = tgen_subbo_rrr, + .out_rri = tgen_subbo_rri, }; +static void tgen_subbi_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + /* TODO: OSA 2015 added SUBXC */ + if (type == TCG_TYPE_I32) { + tcg_out_arith(s, a0, a1, a2, ARITH_SUBC); + } else { + tcg_out_arith(s, TCG_REG_T1, a1, a2, ARITH_SUB); /* for CC */ + tcg_out_arithi(s, a0, TCG_REG_T1, 1, ARITH_SUB); /* for CS */ + /* Select the correct result based on actual borrow value. */ + tcg_out_movcc(s, COND_CC, MOVCC_XCC, a0, TCG_REG_T1, false); + } +} + +static void tgen_subbi_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_arithi(s, a0, a1, a2, ARITH_SUBC); + } else if (a2 != 0) { + tcg_out_arithi(s, TCG_REG_T1, a1, a2, ARITH_SUB); /* for CC */ + tcg_out_arithi(s, a0, TCG_REG_T1, 1, ARITH_SUB); /* for CS */ + tcg_out_movcc(s, COND_CC, MOVCC_XCC, a0, TCG_REG_T1, false); + } else if (a0 == a1) { + tcg_out_arithi(s, TCG_REG_T1, a1, 1, ARITH_SUB); + tcg_out_movcc(s, COND_CS, MOVCC_XCC, a0, TCG_REG_T1, false); + } else { + tcg_out_arithi(s, a0, a1, 1, ARITH_SUB); + tcg_out_movcc(s, COND_CC, MOVCC_XCC, a0, a1, false); + } +} + static const TCGOutOpAddSubCarry outop_subbi = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, rz, rJ), + .out_rrr = tgen_subbi_rrr, + .out_rri = tgen_subbi_rri, }; +static void tgen_subbio_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + /* TODO: OSA 2015 added SUBXCCC */ + tcg_out_dup_xcc_c(s); + } + tcg_out_arith(s, a0, a1, a2, ARITH_SUBCCC); +} + +static void tgen_subbio_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type != TCG_TYPE_I32) { + tcg_out_dup_xcc_c(s); + } + tcg_out_arithi(s, a0, a1, a2, ARITH_SUBCCC); +} + static const TCGOutOpAddSubCarry outop_subbio = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_O1_I2(r, rz, rJ), + .out_rrr = tgen_subbio_rrr, + .out_rri = tgen_subbio_rri, }; static void tcg_out_set_borrow(TCGContext *s) { - g_assert_not_reached(); + tcg_out_set_carry(s); /* borrow == carry */ } static void tgen_xor(TCGContext *s, TCGType type, @@ -1886,17 +2002,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, a0, a1, a2, STW); break; - case INDEX_op_add2_i32: - tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3], - args[4], const_args[4], args[5], const_args[5], - ARITH_ADDCC, ARITH_ADDC); - break; - case INDEX_op_sub2_i32: - tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3], - args[4], const_args[4], args[5], const_args[5], - ARITH_SUBCC, ARITH_SUBC); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); break; @@ -1920,15 +2025,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, a0, a1, a2, STX); break; - case INDEX_op_add2_i64: - tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], - const_args[4], args[5], const_args[5], false); - break; - case INDEX_op_sub2_i64: - tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], - const_args[4], args[5], const_args[5], true); - break; - case INDEX_op_mb: tcg_out_mb(s, a0); break; @@ -1975,12 +2071,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i64: return C_O0_I2(rz, r); - case INDEX_op_add2_i32: - case INDEX_op_add2_i64: - case INDEX_op_sub2_i32: - case INDEX_op_sub2_i64: - return C_O2_I4(r, r, rz, rz, rJ, rJ); - default: return C_NotImplemented; } From 4b0ee858be252ed272a758e5b6f894717e911d51 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 20 Jan 2025 19:46:04 -0800 Subject: [PATCH 145/161] tcg/tci: Implement add/sub carry opcodes Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/tci.c | 120 +++++++++++++++++------------------ tcg/tci/tcg-target-has.h | 8 +-- tcg/tci/tcg-target-opc.h.inc | 1 + tcg/tci/tcg-target.c.inc | 97 +++++++++++++++++----------- 4 files changed, 125 insertions(+), 101 deletions(-) diff --git a/tcg/tci.c b/tcg/tci.c index dc916eb112..a18478a07a 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -179,17 +179,6 @@ static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1, *c5 = extract32(insn, 28, 4); } -static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1, - TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGReg *r5) -{ - *r0 = extract32(insn, 8, 4); - *r1 = extract32(insn, 12, 4); - *r2 = extract32(insn, 16, 4); - *r3 = extract32(insn, 20, 4); - *r4 = extract32(insn, 24, 4); - *r5 = extract32(insn, 28, 4); -} - static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition) { bool result = false; @@ -361,6 +350,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tcg_target_ulong regs[TCG_TARGET_NB_REGS]; uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE) / sizeof(uint64_t)]; + bool carry = false; regs[TCG_AREG0] = (tcg_target_ulong)env; regs[TCG_REG_CALL_STACK] = (uintptr_t)stack; @@ -369,13 +359,12 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, for (;;) { uint32_t insn; TCGOpcode opc; - TCGReg r0, r1, r2, r3, r4, r5; + TCGReg r0, r1, r2, r3, r4; tcg_target_ulong t1; TCGCond condition; uint8_t pos, len; uint32_t tmp32; uint64_t tmp64, taddr; - uint64_t T1, T2; MemOpIdx oi; int32_t ofs; void *ptr; @@ -444,9 +433,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, #if TCG_TARGET_REG_BITS == 32 case INDEX_op_setcond2_i32: tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); - T1 = tci_uint64(regs[r2], regs[r1]); - T2 = tci_uint64(regs[r4], regs[r3]); - regs[r0] = tci_compare64(T1, T2, condition); + regs[r0] = tci_compare64(tci_uint64(regs[r2], regs[r1]), + tci_uint64(regs[r4], regs[r3]), + condition); break; #elif TCG_TARGET_REG_BITS == 64 case INDEX_op_setcond: @@ -471,6 +460,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rl(insn, tb_ptr, &r0, &ptr); regs[r0] = *(tcg_target_ulong *)ptr; break; + case INDEX_op_tci_setcarry: + carry = true; + break; /* Load/store operations (32 bit). */ @@ -575,6 +567,46 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rr(insn, &r0, &r1); regs[r0] = ctpop_tr(regs[r1]); break; + case INDEX_op_addco: + tci_args_rrr(insn, &r0, &r1, &r2); + t1 = regs[r1] + regs[r2]; + carry = t1 < regs[r1]; + regs[r0] = t1; + break; + case INDEX_op_addci: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] + regs[r2] + carry; + break; + case INDEX_op_addcio: + tci_args_rrr(insn, &r0, &r1, &r2); + if (carry) { + t1 = regs[r1] + regs[r2] + 1; + carry = t1 <= regs[r1]; + } else { + t1 = regs[r1] + regs[r2]; + carry = t1 < regs[r1]; + } + regs[r0] = t1; + break; + case INDEX_op_subbo: + tci_args_rrr(insn, &r0, &r1, &r2); + carry = regs[r1] < regs[r2]; + regs[r0] = regs[r1] - regs[r2]; + break; + case INDEX_op_subbi: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] - regs[r2] - carry; + break; + case INDEX_op_subbio: + tci_args_rrr(insn, &r0, &r1, &r2); + if (carry) { + carry = regs[r1] <= regs[r2]; + regs[r0] = regs[r1] - regs[r2] - 1; + } else { + carry = regs[r1] < regs[r2]; + regs[r0] = regs[r1] - regs[r2]; + } + break; case INDEX_op_muls2: tci_args_rrrr(insn, &r0, &r1, &r2, &r3); #if TCG_TARGET_REG_BITS == 32 @@ -673,22 +705,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tb_ptr = ptr; } break; -#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_add2_i32 - case INDEX_op_add2_i32: - tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); - T1 = tci_uint64(regs[r3], regs[r2]); - T2 = tci_uint64(regs[r5], regs[r4]); - tci_write_reg64(regs, r1, r0, T1 + T2); - break; -#endif -#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_sub2_i32 - case INDEX_op_sub2_i32: - tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); - T1 = tci_uint64(regs[r3], regs[r2]); - T2 = tci_uint64(regs[r5], regs[r4]); - tci_write_reg64(regs, r1, r0, T1 - T2); - break; -#endif case INDEX_op_bswap16: tci_args_rr(insn, &r0, &r1); regs[r0] = bswap16(regs[r1]); @@ -742,24 +758,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2]; break; -#if TCG_TARGET_HAS_add2_i64 - case INDEX_op_add2_i64: - tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); - T1 = regs[r2] + regs[r4]; - T2 = regs[r3] + regs[r5] + (T1 < regs[r2]); - regs[r0] = T1; - regs[r1] = T2; - break; -#endif -#if TCG_TARGET_HAS_add2_i64 - case INDEX_op_sub2_i64: - tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); - T1 = regs[r2] - regs[r4]; - T2 = regs[r3] - regs[r5] - (regs[r2] < regs[r4]); - regs[r0] = T1; - regs[r1] = T2; - break; -#endif /* Shift/rotate operations (64 bit). */ @@ -908,7 +906,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) const char *op_name; uint32_t insn; TCGOpcode op; - TCGReg r0, r1, r2, r3, r4, r5; + TCGReg r0, r1, r2, r3, r4; tcg_target_ulong i1; int32_t s2; TCGCond c; @@ -968,6 +966,10 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) op_name, str_r(r0), ptr); break; + case INDEX_op_tci_setcarry: + info->fprintf_func(info->stream, "%-12s", op_name); + break; + case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i32: @@ -1007,6 +1009,9 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) break; case INDEX_op_add: + case INDEX_op_addci: + case INDEX_op_addcio: + case INDEX_op_addco: case INDEX_op_and: case INDEX_op_andc: case INDEX_op_clz: @@ -1027,6 +1032,9 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_shl: case INDEX_op_shr: case INDEX_op_sub: + case INDEX_op_subbi: + case INDEX_op_subbio: + case INDEX_op_subbo: case INDEX_op_xor: case INDEX_op_tci_ctz32: case INDEX_op_tci_clz32: @@ -1071,16 +1079,6 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) str_r(r2), str_r(r3)); break; - case INDEX_op_add2_i32: - case INDEX_op_add2_i64: - case INDEX_op_sub2_i32: - case INDEX_op_sub2_i64: - tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); - info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s", - op_name, str_r(r0), str_r(r1), str_r(r2), - str_r(r3), str_r(r4), str_r(r5)); - break; - case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_st_i64: if (TCG_TARGET_REG_BITS == 32) { diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index 6063f32f7b..310d45ba62 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -8,13 +8,13 @@ #define TCG_TARGET_HAS_H #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_add2_i32 0 +#define TCG_TARGET_HAS_sub2_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 +#define TCG_TARGET_HAS_add2_i64 0 +#define TCG_TARGET_HAS_sub2_i64 0 #endif /* TCG_TARGET_REG_BITS == 64 */ #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc index 672d9b7323..4eb32ed736 100644 --- a/tcg/tci/tcg-target-opc.h.inc +++ b/tcg/tci/tcg-target-opc.h.inc @@ -2,6 +2,7 @@ /* These opcodes for use between the tci generator and interpreter. */ DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT) DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_setcarry, 0, 0, 0, TCG_OPF_NOT_PRESENT) DEF(tci_clz32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_ctz32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_divs32, 1, 2, 0, TCG_OPF_NOT_PRESENT) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index bba96d7a19..35c0c91f3e 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -66,12 +66,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i64: return C_O0_I2(r, r); - case INDEX_op_add2_i32: - case INDEX_op_add2_i64: - case INDEX_op_sub2_i32: - case INDEX_op_sub2_i64: - return C_O2_I4(r, r, r, r, r, r); - case INDEX_op_qemu_ld_i32: return C_O1_I1(r, r); case INDEX_op_qemu_ld_i64: @@ -346,22 +340,6 @@ static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op, tcg_out32(s, insn); } -static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op, - TCGReg r0, TCGReg r1, TCGReg r2, - TCGReg r3, TCGReg r4, TCGReg r5) -{ - tcg_insn_unit insn = 0; - - insn = deposit32(insn, 0, 8, op); - insn = deposit32(insn, 8, 4, r0); - insn = deposit32(insn, 12, 4, r1); - insn = deposit32(insn, 16, 4, r2); - insn = deposit32(insn, 20, 4, r3); - insn = deposit32(insn, 24, 4, r4); - insn = deposit32(insn, 28, 4, r5); - tcg_out32(s, insn); -} - static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val, TCGReg base, intptr_t offset) { @@ -573,21 +551,50 @@ static const TCGOutOpBinary outop_add = { .out_rrr = tgen_add, }; +static TCGConstraintSetIndex cset_addsubcarry(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_REG ? C_O1_I2(r, r, r) : C_NotImplemented; +} + +static void tgen_addco(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_addco, a0, a1, a2); +} + static const TCGOutOpBinary outop_addco = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addsubcarry, + .out_rrr = tgen_addco, }; +static void tgen_addci(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_addci, a0, a1, a2); +} + static const TCGOutOpAddSubCarry outop_addci = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addsubcarry, + .out_rrr = tgen_addci, }; +static void tgen_addcio(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_addcio, a0, a1, a2); +} + static const TCGOutOpBinary outop_addcio = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addsubcarry, + .out_rrr = tgen_addcio, }; static void tcg_out_set_carry(TCGContext *s) { - g_assert_not_reached(); + tcg_out_op_v(s, INDEX_op_tci_setcarry); } static void tgen_and(TCGContext *s, TCGType type, @@ -910,21 +917,45 @@ static const TCGOutOpSubtract outop_sub = { .out_rrr = tgen_sub, }; +static void tgen_subbo(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_subbo, a0, a1, a2); +} + static const TCGOutOpAddSubCarry outop_subbo = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addsubcarry, + .out_rrr = tgen_subbo, }; +static void tgen_subbi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_subbi, a0, a1, a2); +} + static const TCGOutOpAddSubCarry outop_subbi = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addsubcarry, + .out_rrr = tgen_subbi, }; +static void tgen_subbio(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_subbio, a0, a1, a2); +} + static const TCGOutOpAddSubCarry outop_subbio = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addsubcarry, + .out_rrr = tgen_subbio, }; static void tcg_out_set_borrow(TCGContext *s) { - g_assert_not_reached(); + tcg_out_op_v(s, INDEX_op_tci_setcarry); /* borrow == carry */ } static void tgen_xor(TCGContext *s, TCGType type, @@ -1129,12 +1160,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, opc, args[0], args[1], args[2]); break; - CASE_32_64(add2) - CASE_32_64(sub2) - tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2], - args[3], args[4], args[5]); - break; - case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_st_i64: if (TCG_TARGET_REG_BITS == 32) { From f2b1708e8080ab1beb0a2bf52a79a51e8de335cb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 20 Jan 2025 20:15:31 -0800 Subject: [PATCH 146/161] tcg: Remove add2/sub2 opcodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All uses have been replaced by add/sub carry opcodes. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 14 ++--- include/tcg/tcg-opc.h | 5 -- tcg/aarch64/tcg-target-has.h | 5 -- tcg/arm/tcg-target-has.h | 4 -- tcg/i386/tcg-target-has.h | 5 -- tcg/loongarch64/tcg-target-has.h | 4 -- tcg/mips/tcg-target-has.h | 5 -- tcg/optimize.c | 87 -------------------------------- tcg/ppc/tcg-target-has.h | 4 -- tcg/riscv/tcg-target-has.h | 5 -- tcg/s390x/tcg-target-has.h | 7 --- tcg/sparc64/tcg-target-has.h | 7 --- tcg/tcg-has.h | 2 - tcg/tcg-op.c | 26 ---------- tcg/tcg.c | 36 ------------- tcg/tci/tcg-target-has.h | 4 -- 16 files changed, 3 insertions(+), 217 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 93bcc70639..a7147407de 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -654,14 +654,6 @@ Multiword arithmetic support code generator will use ``tcg_out_set_borrow`` and then the output routine for *subbio*. - * - add2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* - - sub2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* - - - | Similar to add/sub, except that the double-word inputs *t1* and *t2* are - formed from two single-word arguments, and the double-word output *t0* - is returned in two single-word outputs. - * - mulu2 *t0_low*, *t0_high*, *t1*, *t2* - | Similar to mul, except two unsigned inputs *t1* and *t2* yielding the full @@ -952,9 +944,9 @@ Assumptions The target word size (``TCG_TARGET_REG_BITS``) is expected to be 32 bit or 64 bit. It is expected that the pointer has the same size as the word. -On a 32 bit target, all 64 bit operations are converted to 32 bits. A -few specific operations must be implemented to allow it (see add2_i32, -sub2_i32, brcond2_i32). +On a 32 bit target, all 64 bit operations are converted to 32 bits. +A few specific operations must be implemented to allow it +(see brcond2_i32, setcond2_i32). On a 64 bit target, the values are transferred between 32 and 64-bit registers using the following ops: diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 9cc20cd62c..30ba15723a 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -102,8 +102,6 @@ DEF(st8_i32, 0, 2, 1, 0) DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) -DEF(add2_i32, 2, 4, 0, 0) -DEF(sub2_i32, 2, 4, 0, 0) DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(setcond2_i32, 1, 4, 1, 0) @@ -126,9 +124,6 @@ DEF(extu_i32_i64, 1, 1, 0, 0) DEF(extrl_i64_i32, 1, 1, 0, 0) DEF(extrh_i64_i32, 1, 1, 0, 0) -DEF(add2_i64, 2, 4, 0, 0) -DEF(sub2_i64, 2, 4, 0, 0) - #define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2) /* There are tcg_ctx->insn_start_words here, not just one. */ diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 695effd77c..b155e37639 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -13,14 +13,9 @@ #define have_lse2 (cpuinfo & CPUINFO_LSE2) /* optional instructions */ -#define TCG_TARGET_HAS_add2_i32 0 -#define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_add2_i64 0 -#define TCG_TARGET_HAS_sub2_i64 0 - /* * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load, * which requires writable pages. We must defer to the helper for user-only, diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index f4bd15c68a..187269e5bd 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -24,12 +24,8 @@ extern bool use_neon_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_add2_i32 0 -#define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 - #define TCG_TARGET_HAS_qemu_ldst_i128 0 - #define TCG_TARGET_HAS_tst 1 #define TCG_TARGET_HAS_v64 use_neon_instructions diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index a984a6af2e..628e736de7 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -26,14 +26,9 @@ #define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl) /* optional instructions */ -#define TCG_TARGET_HAS_add2_i32 0 -#define TCG_TARGET_HAS_sub2_i32 0 - #if TCG_TARGET_REG_BITS == 64 /* Keep 32-bit values zero-extended in a register. */ #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_add2_i64 0 -#define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #else #define TCG_TARGET_HAS_qemu_st8_i32 1 diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index a1bd71db6a..9c118bd1f6 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -10,14 +10,10 @@ #include "host/cpuinfo.h" /* optional instructions */ -#define TCG_TARGET_HAS_add2_i32 0 -#define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 /* 64-bit operations */ #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_add2_i64 0 -#define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_qemu_ldst_i128 (cpuinfo & CPUINFO_LSX) diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index 9d86906bf3..d8f9f7beef 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -39,13 +39,8 @@ extern bool use_mips32r2_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_add2_i32 0 -#define TCG_TARGET_HAS_sub2_i32 0 - #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_add2_i64 0 -#define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_ext32s_i64 1 #define TCG_TARGET_HAS_ext32u_i64 1 #endif diff --git a/tcg/optimize.c b/tcg/optimize.c index 95ec3b426d..52e194aaa9 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1399,82 +1399,6 @@ static bool fold_addco(OptContext *ctx, TCGOp *op) return finish_folding(ctx, op); } -static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) -{ - bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]); - bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]); - - if (a_const && b_const) { - uint64_t al = arg_info(op->args[2])->val; - uint64_t ah = arg_info(op->args[3])->val; - uint64_t bl = arg_info(op->args[4])->val; - uint64_t bh = arg_info(op->args[5])->val; - TCGArg rl, rh; - TCGOp *op2; - - if (ctx->type == TCG_TYPE_I32) { - uint64_t a = deposit64(al, 32, 32, ah); - uint64_t b = deposit64(bl, 32, 32, bh); - - if (add) { - a += b; - } else { - a -= b; - } - - al = sextract64(a, 0, 32); - ah = sextract64(a, 32, 32); - } else { - Int128 a = int128_make128(al, ah); - Int128 b = int128_make128(bl, bh); - - if (add) { - a = int128_add(a, b); - } else { - a = int128_sub(a, b); - } - - al = int128_getlo(a); - ah = int128_gethi(a); - } - - rl = op->args[0]; - rh = op->args[1]; - - /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = opt_insert_before(ctx, op, 0, 2); - - tcg_opt_gen_movi(ctx, op, rl, al); - tcg_opt_gen_movi(ctx, op2, rh, ah); - return true; - } - - /* Fold sub2 r,x,i to add2 r,x,-i */ - if (!add && b_const) { - uint64_t bl = arg_info(op->args[4])->val; - uint64_t bh = arg_info(op->args[5])->val; - - /* Negate the two parts without assembling and disassembling. */ - bl = -bl; - bh = ~bh + !bl; - - op->opc = (ctx->type == TCG_TYPE_I32 - ? INDEX_op_add2_i32 : INDEX_op_add2_i64); - op->args[4] = arg_new_constant(ctx, bl); - op->args[5] = arg_new_constant(ctx, bh); - } - return finish_folding(ctx, op); -} - -static bool fold_add2(OptContext *ctx, TCGOp *op) -{ - /* Note that the high and low parts may be independently swapped. */ - swap_commutative(op->args[0], &op->args[2], &op->args[4]); - swap_commutative(op->args[1], &op->args[3], &op->args[5]); - - return fold_addsub2(ctx, op, true); -} - static bool fold_and(OptContext *ctx, TCGOp *op) { uint64_t z1, z2, z_mask, s_mask; @@ -2811,11 +2735,6 @@ static bool fold_sub(OptContext *ctx, TCGOp *op) return finish_folding(ctx, op); } -static bool fold_sub2(OptContext *ctx, TCGOp *op) -{ - return fold_addsub2(ctx, op, false); -} - static void squash_prev_borrowout(OptContext *ctx, TCGOp *op) { TempOptInfo *t2; @@ -3150,9 +3069,6 @@ void tcg_optimize(TCGContext *s) case INDEX_op_addco: done = fold_addco(&ctx, op); break; - CASE_OP_32_64(add2): - done = fold_add2(&ctx, op); - break; case INDEX_op_and: case INDEX_op_and_vec: done = fold_and(&ctx, op); @@ -3342,9 +3258,6 @@ void tcg_optimize(TCGContext *s) case INDEX_op_sub_vec: done = fold_sub_vec(&ctx, op); break; - CASE_OP_32_64(sub2): - done = fold_sub2(&ctx, op); - break; case INDEX_op_xor: case INDEX_op_xor_vec: done = fold_xor(&ctx, op); diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 4dda668706..b978c91a62 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -18,13 +18,9 @@ /* optional instructions */ #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_add2_i32 0 -#define TCG_TARGET_HAS_sub2_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_add2_i64 0 -#define TCG_TARGET_HAS_sub2_i64 0 #endif #define TCG_TARGET_HAS_qemu_ldst_i128 \ diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index c95dc1921e..8cd099546f 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -11,13 +11,8 @@ /* optional instructions */ #define TCG_TARGET_HAS_qemu_st8_i32 0 - #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_add2_i64 0 -#define TCG_TARGET_HAS_sub2_i64 0 - #define TCG_TARGET_HAS_qemu_ldst_i128 0 - #define TCG_TARGET_HAS_tst 0 /* vector instructions */ diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index 17e61130cd..c04cc4e377 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -29,16 +29,9 @@ extern uint64_t s390_facilities[3]; ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1) /* optional instructions */ -#define TCG_TARGET_HAS_add2_i32 0 -#define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 - -#define TCG_TARGET_HAS_add2_i64 0 -#define TCG_TARGET_HAS_sub2_i64 0 - #define TCG_TARGET_HAS_qemu_ldst_i128 1 - #define TCG_TARGET_HAS_tst 1 #define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR) diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index caf7679595..d9f5ef3fc9 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -14,16 +14,9 @@ extern bool use_vis3_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_add2_i32 0 -#define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 - #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_add2_i64 0 -#define TCG_TARGET_HAS_sub2_i64 0 - #define TCG_TARGET_HAS_qemu_ldst_i128 0 - #define TCG_TARGET_HAS_tst 1 #define TCG_TARGET_extract_valid(type, ofs, len) \ diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 50e8d0cda4..2fc0e50d20 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -12,8 +12,6 @@ #if TCG_TARGET_REG_BITS == 32 /* Turn some undef macros into false macros. */ #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_add2_i64 0 -#define TCG_TARGET_HAS_sub2_i64 0 #endif #if !defined(TCG_TARGET_HAS_v64) \ diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index b0a29278ab..b0139ce05d 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -249,24 +249,6 @@ static void DNI tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, tcgv_i64_arg(a3), a4, a5); } -static void DNI tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGv_i32 a4, - TCGv_i32 a5, TCGv_i32 a6) -{ - tcg_gen_op6(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2), - tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), - tcgv_i32_arg(a6)); -} - -static void DNI tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGv_i64 a4, - TCGv_i64 a5, TCGv_i64 a6) -{ - tcg_gen_op6(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2), - tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), - tcgv_i64_arg(a6)); -} - static void DNI tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5, TCGArg a6) @@ -1108,8 +1090,6 @@ void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, tcg_gen_op3_i32(INDEX_op_addci, rh, ah, bh); tcg_gen_mov_i32(rl, t0); tcg_temp_free_i32(t0); - } else if (TCG_TARGET_HAS_add2_i32) { - tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 t1 = tcg_temp_ebb_new_i32(); @@ -1159,8 +1139,6 @@ void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, tcg_gen_op3_i32(INDEX_op_subbi, rh, ah, bh); tcg_gen_mov_i32(rl, t0); tcg_temp_free_i32(t0); - } else if (TCG_TARGET_HAS_sub2_i32) { - tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 t1 = tcg_temp_ebb_new_i32(); @@ -2880,8 +2858,6 @@ void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, tcg_gen_mov_i64(rl, t0); tcg_temp_free_i64(t0); - } else if (TCG_TARGET_HAS_add2_i64) { - tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); @@ -2985,8 +2961,6 @@ void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, tcg_gen_mov_i64(rl, t0); tcg_temp_free_i64(t0); - } else if (TCG_TARGET_HAS_sub2_i64) { - tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); diff --git a/tcg/tcg.c b/tcg/tcg.c index 3b9f519ef6..5a498b48b6 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -2430,11 +2430,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_st_i32: return true; - case INDEX_op_add2_i32: - return TCG_TARGET_HAS_add2_i32; - case INDEX_op_sub2_i32: - return TCG_TARGET_HAS_sub2_i32; - case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: return TCG_TARGET_REG_BITS == 32; @@ -2456,11 +2451,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_extrh_i64_i32: return TCG_TARGET_REG_BITS == 64; - case INDEX_op_add2_i64: - return TCG_TARGET_HAS_add2_i64; - case INDEX_op_sub2_i64: - return TCG_TARGET_HAS_sub2_i64; - case INDEX_op_mov_vec: case INDEX_op_dup_vec: case INDEX_op_dupm_vec: @@ -4101,32 +4091,6 @@ liveness_pass_1(TCGContext *s) la_reset_pref(ts); break; - case INDEX_op_add2_i32: - case INDEX_op_add2_i64: - opc_new = INDEX_op_add; - goto do_addsub2; - case INDEX_op_sub2_i32: - case INDEX_op_sub2_i64: - opc_new = INDEX_op_sub; - do_addsub2: - assert_carry_dead(s); - /* Test if the high part of the operation is dead, but not - the low part. The result can be optimized to a simple - add or sub. This happens often for x86_64 guest when the - cpu mode is set to 32 bit. */ - if (arg_temp(op->args[1])->state == TS_DEAD) { - if (arg_temp(op->args[0])->state == TS_DEAD) { - goto do_remove; - } - /* Replace the opcode and adjust the args in place, - leaving 3 unused args at the end. */ - op->opc = opc = opc_new; - op->args[1] = op->args[2]; - op->args[2] = op->args[4]; - /* Fall through and mark the single-word operation live. */ - } - goto do_not_remove; - case INDEX_op_muls2: opc_new = INDEX_op_mul; opc_new2 = INDEX_op_mulsh; diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index 310d45ba62..497e8152b7 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -8,13 +8,9 @@ #define TCG_TARGET_HAS_H #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_add2_i32 0 -#define TCG_TARGET_HAS_sub2_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_add2_i64 0 -#define TCG_TARGET_HAS_sub2_i64 0 #endif /* TCG_TARGET_REG_BITS == 64 */ #define TCG_TARGET_HAS_qemu_ldst_i128 0 From e038696c9293f34dc7ccd4adac2c2f221a65a8e3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 20 Jan 2025 20:47:42 -0800 Subject: [PATCH 147/161] tcg: Formalize tcg_out_mb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most tcg backends already have a function for this; the rest can split one out from tcg_out_op. Call it directly from tcg_gen_code. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 6 +----- tcg/arm/tcg-target.c.inc | 6 +----- tcg/i386/tcg-target.c.inc | 5 +---- tcg/loongarch64/tcg-target.c.inc | 6 +----- tcg/mips/tcg-target.c.inc | 5 +---- tcg/ppc/tcg-target.c.inc | 6 +----- tcg/riscv/tcg-target.c.inc | 6 +----- tcg/s390x/tcg-target.c.inc | 20 +++++++++++--------- tcg/sparc64/tcg-target.c.inc | 6 +----- tcg/tcg.c | 4 ++++ tcg/tci/tcg-target.c.inc | 9 +++++---- 11 files changed, 28 insertions(+), 51 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 75cf490fd2..d2babd9bab 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1575,7 +1575,7 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) tcg_out_mov(s, TCG_TYPE_I32, rd, rn); } -static inline void tcg_out_mb(TCGContext *s, TCGArg a0) +static void tcg_out_mb(TCGContext *s, unsigned a0) { static const uint32_t sync[] = { [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, @@ -2845,10 +2845,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false); break; - case INDEX_op_mb: - tcg_out_mb(s, a0); - break; - case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 3c9042ebfa..131901dabc 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1203,7 +1203,7 @@ static void tcg_out_goto_label(TCGContext *s, ARMCond cond, TCGLabel *l) } } -static void tcg_out_mb(TCGContext *s, TCGArg a0) +static void tcg_out_mb(TCGContext *s, unsigned a0) { if (use_armv7_instructions) { tcg_out32(s, INSN_DMB_ISH); @@ -2565,10 +2565,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], TCG_TYPE_I64); break; - case INDEX_op_mb: - tcg_out_mb(s, args[0]); - break; - case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index da05f13b21..bf84f9f455 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1168,7 +1168,7 @@ static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) } } -static inline void tcg_out_mb(TCGContext *s, TCGArg a0) +static void tcg_out_mb(TCGContext *s, unsigned a0) { /* Given the strength of x86 memory ordering, we only need care for store-load ordering. Experimentally, "lock orl $0,0(%esp)" is @@ -3536,9 +3536,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, break; #endif - case INDEX_op_mb: - tcg_out_mb(s, a0); - break; case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 4f640764ef..1ad577ad8d 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -301,7 +301,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, * TCG intrinsics */ -static void tcg_out_mb(TCGContext *s, TCGArg a0) +static void tcg_out_mb(TCGContext *s, unsigned a0) { /* Baseline LoongArch only has the full barrier, unfortunately. */ tcg_out_opc_dbar(s, 0); @@ -1917,10 +1917,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, TCGArg a3 = args[3]; switch (opc) { - case INDEX_op_mb: - tcg_out_mb(s, a0); - break; - case INDEX_op_goto_ptr: tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0); break; diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 0c268cef42..b0da661561 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1491,7 +1491,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, } } -static void tcg_out_mb(TCGContext *s, TCGArg a0) +static void tcg_out_mb(TCGContext *s, unsigned a0) { static const MIPSInsn sync[] = { /* Note that SYNC_MB is a slightly weaker than SYNC 0, @@ -2352,9 +2352,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_mb: - tcg_out_mb(s, a0); - break; case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 91df9610ec..ae18c84ae6 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2208,7 +2208,7 @@ static const TCGOutOpBrcond2 outop_brcond2 = { .out = tgen_brcond2, }; -static void tcg_out_mb(TCGContext *s, TCGArg a0) +static void tcg_out_mb(TCGContext *s, unsigned a0) { uint32_t insn; @@ -3758,10 +3758,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); break; - case INDEX_op_mb: - tcg_out_mb(s, args[0]); - break; - case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 707ebb8f6d..df271752b7 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1582,7 +1582,7 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, tcg_out_call_int(s, arg, false); } -static void tcg_out_mb(TCGContext *s, TCGArg a0) +static void tcg_out_mb(TCGContext *s, unsigned a0) { tcg_insn_unit insn = OPC_FENCE; @@ -2594,10 +2594,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64); break; - case INDEX_op_mb: - tcg_out_mb(s, a0); - break; - case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 09c7ca5b44..020d8ba73f 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -3008,6 +3008,17 @@ static const TCGOutOpUnary outop_not = { .out_rr = tgen_not, }; +static void tcg_out_mb(TCGContext *s, unsigned a0) +{ + /* + * The host memory model is quite strong, we simply need to + * serialize the instruction stream. + */ + if (a0 & TCG_MO_ST_LD) { + /* fast-bcr-serialization facility (45) is present */ + tcg_out_insn(s, RR, BCR, 14, 0); + } +} # define OP_32_64(x) \ case glue(glue(INDEX_op_,x),_i32): \ @@ -3107,15 +3118,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); break; - case INDEX_op_mb: - /* The host memory model is quite strong, we simply need to - serialize the instruction stream. */ - if (args[0] & TCG_MO_ST_LD) { - /* fast-bcr-serialization facility (45) is present */ - tcg_out_insn(s, RR, BCR, 14, 0); - } - break; - case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index c2251a6927..7754627a5d 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -949,7 +949,7 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest, tcg_out_nop(s); } -static void tcg_out_mb(TCGContext *s, TCGArg a0) +static void tcg_out_mb(TCGContext *s, unsigned a0) { /* Note that the TCG memory order constants mirror the Sparc MEMBAR. */ tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL)); @@ -2025,10 +2025,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_ldst(s, a0, a1, a2, STX); break; - case INDEX_op_mb: - tcg_out_mb(s, a0); - break; - case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ diff --git a/tcg/tcg.c b/tcg/tcg.c index 5a498b48b6..e7478bef77 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -133,6 +133,7 @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); static void tcg_out_goto_tb(TCGContext *s, int which); +static void tcg_out_mb(TCGContext *s, unsigned bar); static void tcg_out_set_carry(TCGContext *s); static void tcg_out_set_borrow(TCGContext *s); static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, @@ -6899,6 +6900,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) case INDEX_op_goto_tb: tcg_out_goto_tb(s, op->args[0]); break; + case INDEX_op_mb: + tcg_out_mb(s, op->args[0]); + break; case INDEX_op_dup2_vec: if (tcg_reg_alloc_dup2(s, op)) { break; diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 35c0c91f3e..64d4ac07cd 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -1131,6 +1131,11 @@ static const TCGOutOpSetcond2 outop_setcond2 = { .out = tgen_setcond2, }; +static void tcg_out_mb(TCGContext *s, unsigned a0) +{ + tcg_out_op_v(s, INDEX_op_mb); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1178,10 +1183,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, } break; - case INDEX_op_mb: - tcg_out_op_v(s, opc); - break; - case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ From 3752a5a5ba33448fe40556d781e8096b5b9dd916 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 20 Jan 2025 21:17:07 -0800 Subject: [PATCH 148/161] tcg: Formalize tcg_out_br Split these functions out from tcg_out_op. Call it directly from tcg_gen_code. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 6 +----- tcg/arm/tcg-target.c.inc | 8 +++++--- tcg/i386/tcg-target.c.inc | 8 +++++--- tcg/loongarch64/tcg-target.c.inc | 12 ++++++------ tcg/mips/tcg-target.c.inc | 10 +++++----- tcg/ppc/tcg-target.c.inc | 26 ++++++++++++-------------- tcg/riscv/tcg-target.c.inc | 11 ++++++----- tcg/s390x/tcg-target.c.inc | 9 +++++---- tcg/sparc64/tcg-target.c.inc | 10 ++++++---- tcg/tcg.c | 4 ++++ tcg/tci/tcg-target.c.inc | 9 +++++---- 11 files changed, 60 insertions(+), 53 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index d2babd9bab..fceb6e2796 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1407,7 +1407,7 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, tcg_out_call_int(s, target); } -static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) +static void tcg_out_br(TCGContext *s, TCGLabel *l) { if (!l->has_value) { tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); @@ -2779,10 +2779,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, tcg_out_insn(s, 3207, BR, a0); break; - case INDEX_op_br: - tcg_out_goto_label(s, arg_label(a0)); - break; - case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 131901dabc..327b01d377 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1203,6 +1203,11 @@ static void tcg_out_goto_label(TCGContext *s, ARMCond cond, TCGLabel *l) } } +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_out_goto_label(s, COND_AL, l); +} + static void tcg_out_mb(TCGContext *s, unsigned a0) { if (use_armv7_instructions) { @@ -2522,9 +2527,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, case INDEX_op_goto_ptr: tcg_out_b_reg(s, COND_AL, args[0]); break; - case INDEX_op_br: - tcg_out_goto_label(s, COND_AL, arg_label(args[0])); - break; case INDEX_op_ld8u_i32: tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]); diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index bf84f9f455..f89982378b 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1546,6 +1546,11 @@ static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, bool small) } } +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_out_jxx(s, JCC_JMP, l, 0); +} + static int tcg_out_cmp(TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, int rexw) { @@ -3436,9 +3441,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, /* jmp to the given host address (could be epilogue) */ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0); break; - case INDEX_op_br: - tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0); - break; OP_32_64(ld8u): /* Note that we can ignore REXW for the zero-extend to 64-bit. */ tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 1ad577ad8d..cbdc42c157 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -756,6 +756,12 @@ static const TCGOutOpMovcond outop_movcond = { * Branch helpers */ +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SD10K16, l, 0); + tcg_out_opc_b(s, 0); +} + static const struct { LoongArchInsn op; bool swap; @@ -1921,12 +1927,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0); break; - case INDEX_op_br: - tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SD10K16, arg_label(a0), - 0); - tcg_out_opc_b(s, 0); - break; - case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: tcg_out_ldst(s, OPC_LD_B, a0, a1, a2); diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index b0da661561..f4d6ee10b9 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -921,6 +921,11 @@ static const TCGOutOpBrcond outop_brcond = { .out_rr = tgen_brcond, }; +void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tgen_brcond(s, TCG_TYPE_I32, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, l); +} + static int tcg_out_setcond2_int(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) { @@ -2281,11 +2286,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_nop(s); } break; - case INDEX_op_br: - tgen_brcond(s, TCG_TYPE_I32, TCG_COND_EQ, - TCG_REG_ZERO, TCG_REG_ZERO, arg_label(a0)); - break; - case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: i1 = OPC_LBU; diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index ae18c84ae6..d88ec8d690 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -1990,6 +1990,18 @@ static const TCGOutOpSetcond outop_negsetcond = { .out_rri = tgen_negsetcondi, }; +void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + uint32_t insn = B; + + if (l->has_value) { + insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr); + } else { + tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); + } + tcg_out32(s, insn); +} + static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd) { tcg_out32(s, tcg_to_bc[cond] | bd); @@ -3669,20 +3681,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0)); tcg_out32(s, BCCTR | BO_ALWAYS); break; - case INDEX_op_br: - { - TCGLabel *l = arg_label(args[0]); - uint32_t insn = B; - - if (l->has_value) { - insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), - l->u.value_ptr); - } else { - tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); - } - tcg_out32(s, insn); - } - break; case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index df271752b7..5d8d8213cb 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1107,6 +1107,12 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0); } +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_out_reloc(s, s->code_ptr, R_RISCV_JAL, l, 0); + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0); +} + static const struct { RISCVInsn op; bool swap; @@ -2533,11 +2539,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0); break; - case INDEX_op_br: - tcg_out_reloc(s, s->code_ptr, R_RISCV_JAL, arg_label(a0), 0); - tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0); - break; - case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: tcg_out_ldst(s, OPC_LBU, a0, a1, a2); diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 020d8ba73f..cdc61de4f8 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1689,6 +1689,11 @@ static void tgen_branch(TCGContext *s, int cc, TCGLabel *l) } } +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tgen_branch(s, S390_CC_ALWAYS, l); +} + static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, TCGReg r1, TCGReg r2, TCGLabel *l) { @@ -3075,10 +3080,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_br: - tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0])); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32); break; diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 7754627a5d..0cc7567786 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -641,6 +641,12 @@ static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l) tcg_out_bpcc0(s, scond, flags, off19); } +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_out_bpcc(s, COND_A, BPCC_PT, l); + tcg_out_nop(s); +} + static void tcg_out_cmp(TCGContext *s, TCGCond cond, TCGReg c1, int32_t c2, int c2const) { @@ -1966,10 +1972,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL); tcg_out_mov_delay(s, TCG_REG_TB, a0); break; - case INDEX_op_br: - tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0)); - tcg_out_nop(s); - break; #define OP_32_64(x) \ glue(glue(case INDEX_op_, x), _i32): \ diff --git a/tcg/tcg.c b/tcg/tcg.c index e7478bef77..fbb1a43efc 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -134,6 +134,7 @@ static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); static void tcg_out_goto_tb(TCGContext *s, int which); static void tcg_out_mb(TCGContext *s, unsigned bar); +static void tcg_out_br(TCGContext *s, TCGLabel *l); static void tcg_out_set_carry(TCGContext *s); static void tcg_out_set_borrow(TCGContext *s); static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, @@ -6900,6 +6901,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) case INDEX_op_goto_tb: tcg_out_goto_tb(s, op->args[0]); break; + case INDEX_op_br: + tcg_out_br(s, arg_label(op->args[0])); + break; case INDEX_op_mb: tcg_out_mb(s, op->args[0]); break; diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 64d4ac07cd..55a1a74fb6 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -1136,6 +1136,11 @@ static void tcg_out_mb(TCGContext *s, unsigned a0) tcg_out_op_v(s, INDEX_op_mb); } +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_out_op_l(s, INDEX_op_br, l); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1145,10 +1150,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_op_r(s, opc, args[0]); break; - case INDEX_op_br: - tcg_out_op_l(s, opc, arg_label(args[0])); - break; - CASE_32_64(ld8u) CASE_32_64(ld8s) CASE_32_64(ld16u) From fee03fdde32c3d5b26429b7e691f78d1ee03d631 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 20 Jan 2025 21:57:32 -0800 Subject: [PATCH 149/161] tcg: Formalize tcg_out_goto_ptr Split these functions out from tcg_out_op. Define outop_goto_ptr generically. Call tcg_out_goto_ptr from tcg_reg_alloc_op. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 12 +++++------- tcg/arm/tcg-target.c.inc | 12 +++++------- tcg/i386/tcg-target.c.inc | 13 ++++++------- tcg/loongarch64/tcg-target.c.inc | 12 +++++------- tcg/mips/tcg-target.c.inc | 22 ++++++++++------------ tcg/ppc/tcg-target.c.inc | 15 +++++++-------- tcg/riscv/tcg-target.c.inc | 12 +++++------- tcg/s390x/tcg-target.c.inc | 15 +++++---------- tcg/sparc64/tcg-target.c.inc | 14 ++++++-------- tcg/tcg.c | 12 ++++++++++++ tcg/tci/tcg-target.c.inc | 12 +++++------- 11 files changed, 71 insertions(+), 80 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index fceb6e2796..2678e1f176 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1986,6 +1986,11 @@ static void tcg_out_goto_tb(TCGContext *s, int which) tcg_out_bti(s, BTI_J); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_insn(s, 3207, BR, a0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -2775,10 +2780,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, TCGArg a2 = args[2]; switch (opc) { - case INDEX_op_goto_ptr: - tcg_out_insn(s, 3207, BR, a0); - break; - case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); @@ -3293,9 +3294,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - case INDEX_op_ld8u_i32: case INDEX_op_ld8s_i32: case INDEX_op_ld16u_i32: diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 327b01d377..64be0a7e6d 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1795,6 +1795,11 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_b_reg(s, COND_AL, a0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -2524,10 +2529,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - case INDEX_op_goto_ptr: - tcg_out_b_reg(s, COND_AL, args[0]); - break; - case INDEX_op_ld8u_i32: tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]); break; @@ -2579,9 +2580,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - case INDEX_op_ld8u_i32: case INDEX_op_ld8s_i32: case INDEX_op_ld16u_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index f89982378b..5ea4a44264 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2593,6 +2593,12 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + /* Jump to the given host address (could be epilogue) */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -3437,10 +3443,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; switch (opc) { - case INDEX_op_goto_ptr: - /* jmp to the given host address (could be epilogue) */ - tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0); - break; OP_32_64(ld8u): /* Note that we can ignore REXW for the zero-extend to 64-bit. */ tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2); @@ -4093,9 +4095,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i32: diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index cbdc42c157..d89c27c67f 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1307,6 +1307,11 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -1923,10 +1928,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, TCGArg a3 = args[3]; switch (opc) { - case INDEX_op_goto_ptr: - tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0); - break; - case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: tcg_out_ldst(s, OPC_LD_B, a0, a1, a2); @@ -2491,9 +2492,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - case INDEX_op_st8_i32: case INDEX_op_st8_i64: case INDEX_op_st16_i32: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index f4d6ee10b9..9455a0a17b 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1571,6 +1571,16 @@ static void tcg_out_goto_tb(TCGContext *s, int which) } } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_opc_reg(s, OPC_JR, 0, a0, 0); + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0); + } else { + tcg_out_nop(s); + } +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -2277,15 +2287,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, a2 = args[2]; switch (opc) { - case INDEX_op_goto_ptr: - /* jmp to the given host address (could be epilogue) */ - tcg_out_opc_reg(s, OPC_JR, 0, a0, 0); - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0); - } else { - tcg_out_nop(s); - } - break; case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: i1 = OPC_LBU; @@ -2364,9 +2365,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - case INDEX_op_ld8u_i32: case INDEX_op_ld8s_i32: case INDEX_op_ld16u_i32: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index d88ec8d690..a2a5b1e570 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2843,6 +2843,13 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out32(s, MTSPR | RS(a0) | CTR); + tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0)); + tcg_out32(s, BCCTR | BO_ALWAYS); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -3676,11 +3683,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - case INDEX_op_goto_ptr: - tcg_out32(s, MTSPR | RS(args[0]) | CTR); - tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0)); - tcg_out32(s, BCCTR | BO_ALWAYS); - break; case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); @@ -4371,9 +4373,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - case INDEX_op_ld8u_i32: case INDEX_op_ld8s_i32: case INDEX_op_ld16u_i32: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 5d8d8213cb..c1bfd93569 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1915,6 +1915,11 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -2535,10 +2540,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, TCGArg a2 = args[2]; switch (opc) { - case INDEX_op_goto_ptr: - tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0); - break; - case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: tcg_out_ldst(s, OPC_LBU, a0, a1, a2); @@ -2824,9 +2825,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - case INDEX_op_ld8u_i32: case INDEX_op_ld8s_i32: case INDEX_op_ld16u_i32: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index cdc61de4f8..2b2e00c609 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2213,6 +2213,11 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -3033,14 +3038,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - TCGArg a0; - switch (opc) { - case INDEX_op_goto_ptr: - a0 = args[0]; - tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0); - break; - OP_32_64(ld8u): /* ??? LLC (RXY format) is only present with the extended-immediate facility, whereas LLGC is always present. */ @@ -3567,9 +3565,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i32: diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 0cc7567786..208b96487e 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1300,6 +1300,12 @@ static void tcg_out_goto_tb(TCGContext *s, int which) } } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL); + tcg_out_mov_delay(s, TCG_REG_TB, a0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -1968,11 +1974,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, a2 = args[2]; switch (opc) { - case INDEX_op_goto_ptr: - tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL); - tcg_out_mov_delay(s, TCG_REG_TB, a0); - break; - #define OP_32_64(x) \ glue(glue(case INDEX_op_, x), _i32): \ glue(glue(case INDEX_op_, x), _i64) @@ -2039,9 +2040,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i32: diff --git a/tcg/tcg.c b/tcg/tcg.c index fbb1a43efc..5ab4f5e752 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -133,6 +133,7 @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); static void tcg_out_goto_tb(TCGContext *s, int which); +static void tcg_out_goto_ptr(TCGContext *s, TCGReg dest); static void tcg_out_mb(TCGContext *s, unsigned bar); static void tcg_out_br(TCGContext *s, TCGLabel *l); static void tcg_out_set_carry(TCGContext *s); @@ -1137,6 +1138,10 @@ static const TCGOutOpUnary outop_extrl_i64_i32 = { }; #endif +static const TCGOutOp outop_goto_ptr = { + .static_constraint = C_O0_I1(r), +}; + /* * Register V as the TCGOutOp for O. * This verifies that V is of type T, otherwise give a nice compiler error. @@ -1198,6 +1203,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_subb1o, TCGOutOpAddSubCarry, outop_subbio), OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), + [INDEX_op_goto_ptr] = &outop_goto_ptr, + #if TCG_TARGET_REG_BITS == 32 OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2), OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2), @@ -5823,6 +5830,11 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) g_assert_not_reached(); #endif + case INDEX_op_goto_ptr: + tcg_debug_assert(!const_args[0]); + tcg_out_goto_ptr(s, new_args[0]); + break; + default: if (def->flags & TCG_OPF_VECTOR) { tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 55a1a74fb6..d9cd62ed3d 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -40,9 +40,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - case INDEX_op_ld8u_i32: case INDEX_op_ld8s_i32: case INDEX_op_ld16u_i32: @@ -534,6 +531,11 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_op_r(s, INDEX_op_goto_ptr, a0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -1146,10 +1148,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - case INDEX_op_goto_ptr: - tcg_out_op_r(s, opc, args[0]); - break; - CASE_32_64(ld8u) CASE_32_64(ld8s) CASE_32_64(ld16u) From 0de5c9d1f56332554c48152f535b47a1a0c2af7b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 21 Jan 2025 20:44:42 -0800 Subject: [PATCH 150/161] tcg: Convert ld to TCGOutOpLoad Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 113 ++++++++++++++++----------- tcg/arm/tcg-target.c.inc | 126 ++++++++++++++++--------------- tcg/i386/tcg-target.c.inc | 112 ++++++++++++++++----------- tcg/loongarch64/tcg-target.c.inc | 104 +++++++++++++++---------- tcg/mips/tcg-target.c.inc | 108 ++++++++++++++++---------- tcg/ppc/tcg-target.c.inc | 110 +++++++++++++++++---------- tcg/riscv/tcg-target.c.inc | 107 ++++++++++++++++---------- tcg/s390x/tcg-target.c.inc | 122 +++++++++++++++++------------- tcg/sparc64/tcg-target.c.inc | 101 ++++++++++++++++--------- tcg/tcg.c | 46 +++++++++++ tcg/tci/tcg-target.c.inc | 91 ++++++++++++++++------ 11 files changed, 721 insertions(+), 419 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 2678e1f176..903a95ad7e 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2770,6 +2770,74 @@ static const TCGOutOpExtract2 outop_extract2 = { .out_rrr = tgen_extract2, }; +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, I3312_LDRB, dest, base, offset, 0); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + AArch64Insn insn = type == TCG_TYPE_I32 ? I3312_LDRSBW : I3312_LDRSBX; + tcg_out_ldst(s, insn, dest, base, offset, 0); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, I3312_LDRH, dest, base, offset, 1); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + AArch64Insn insn = type == TCG_TYPE_I32 ? I3312_LDRSHW : I3312_LDRSHX; + tcg_out_ldst(s, insn, dest, base, offset, 1); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, I3312_LDRW, dest, base, offset, 2); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, I3312_LDRSWX, dest, base, offset, 2); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2780,37 +2848,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, TCGArg a2 = args[2]; switch (opc) { - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); - break; - case INDEX_op_ld8s_i32: - tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); - break; - case INDEX_op_ld8s_i64: - tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); - break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); - break; - case INDEX_op_ld16s_i32: - tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); - break; - case INDEX_op_ld16s_i64: - tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); - break; - case INDEX_op_ld32s_i64: - tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); - break; - case INDEX_op_ld_i64: - tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); - break; - case INDEX_op_st8_i32: case INDEX_op_st8_i64: tcg_out_ldst(s, I3312_STRB, a0, a1, a2, 0); @@ -3294,20 +3331,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - return C_O1_I1(r, r); - case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 64be0a7e6d..2079dd3bdc 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1084,26 +1084,6 @@ static void tcg_out_st32(TCGContext *s, ARMCond cond, tcg_out_st32_12(s, cond, rd, rn, offset); } -static void tcg_out_ld16u(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld16u_8(s, cond, rd, rn, offset); -} - -static void tcg_out_ld16s(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld16s_8(s, cond, rd, rn, offset); -} - static void tcg_out_st16(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn, int32_t offset) { @@ -1114,26 +1094,6 @@ static void tcg_out_st16(TCGContext *s, ARMCond cond, tcg_out_st16_8(s, cond, rd, rn, offset); } -static void tcg_out_ld8u(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, int32_t offset) -{ - if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld8_12(s, cond, rd, rn, offset); -} - -static void tcg_out_ld8s(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld8s_8(s, cond, rd, rn, offset); -} - static void tcg_out_st8(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn, int32_t offset) { @@ -2524,26 +2484,75 @@ static const TCGOutOpExtract2 outop_extract2 = { .out_rrr = tgen_extract2, }; +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rn, ptrdiff_t offset) +{ + if (offset > 0xfff || offset < -0xfff) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, offset); + tcg_out_ld8_r(s, COND_AL, rd, rn, TCG_REG_TMP); + } else { + tcg_out_ld8_12(s, COND_AL, rd, rn, offset); + } +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rn, ptrdiff_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, offset); + tcg_out_ld8s_r(s, COND_AL, rd, rn, TCG_REG_TMP); + } else { + tcg_out_ld8s_8(s, COND_AL, rd, rn, offset); + } +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rn, ptrdiff_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, offset); + tcg_out_ld16u_r(s, COND_AL, rd, rn, TCG_REG_TMP); + } else { + tcg_out_ld16u_8(s, COND_AL, rd, rn, offset); + } +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rn, ptrdiff_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, offset); + tcg_out_ld16s_r(s, COND_AL, rd, rn, TCG_REG_TMP); + } else { + tcg_out_ld16s_8(s, COND_AL, rd, rn, offset); + } +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - case INDEX_op_ld8u_i32: - tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld8s_i32: - tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld16u_i32: - tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld16s_i32: - tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i32: - tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]); - break; case INDEX_op_st8_i32: tcg_out_st8(s, COND_AL, args[0], args[1], args[2]); break; @@ -2580,13 +2589,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - return C_O1_I1(r, r); - case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 5ea4a44264..d16ddcb940 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3419,13 +3419,81 @@ static const TCGOutOpExtract2 outop_extract2 = { .out_rrr = tgen_extract2, }; +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVZBL, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVZWL, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +#if TCG_TARGET_REG_BITS == 64 +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVL_GvEv, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVSLQ, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; +#endif static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { TCGArg a0, a1, a2; - int rexw; #if TCG_TARGET_REG_BITS == 64 # define OP_32_64(x) \ @@ -3440,30 +3508,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, a0 = args[0]; a1 = args[1]; a2 = args[2]; - rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; switch (opc) { - OP_32_64(ld8u): - /* Note that we can ignore REXW for the zero-extend to 64-bit. */ - tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2); - break; - OP_32_64(ld8s): - tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, a0, a1, a2); - break; - OP_32_64(ld16u): - /* Note that we can ignore REXW for the zero-extend to 64-bit. */ - tcg_out_modrm_offset(s, OPC_MOVZWL, a0, a1, a2); - break; - OP_32_64(ld16s): - tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, a0, a1, a2); - break; -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_ld32u_i64: -#endif - case INDEX_op_ld_i32: - tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2); - break; - OP_32_64(st8): if (const_args[0]) { tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2); @@ -3524,12 +3570,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, break; #if TCG_TARGET_REG_BITS == 64 - case INDEX_op_ld32s_i64: - tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2); - break; - case INDEX_op_ld_i64: - tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2); - break; case INDEX_op_st_i64: if (const_args[0]) { tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2); @@ -4095,20 +4135,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - return C_O1_I1(r, r); - case INDEX_op_st8_i32: case INDEX_op_st8_i64: return C_O0_I2(qi, r); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index d89c27c67f..66555b8982 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1917,6 +1917,71 @@ static const TCGOutOpExtract2 outop_extract2 = { .base.static_constraint = C_NotImplemented, }; +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LD_BU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LD_B, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LD_HU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LD_H, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LD_WU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LD_W, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1928,33 +1993,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, TCGArg a3 = args[3]; switch (opc) { - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - tcg_out_ldst(s, OPC_LD_B, a0, a1, a2); - break; - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - tcg_out_ldst(s, OPC_LD_BU, a0, a1, a2); - break; - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - tcg_out_ldst(s, OPC_LD_H, a0, a1, a2); - break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - tcg_out_ldst(s, OPC_LD_HU, a0, a1, a2); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32s_i64: - tcg_out_ldst(s, OPC_LD_W, a0, a1, a2); - break; - case INDEX_op_ld32u_i64: - tcg_out_ldst(s, OPC_LD_WU, a0, a1, a2); - break; - case INDEX_op_ld_i64: - tcg_out_ldst(s, OPC_LD_D, a0, a1, a2); - break; - case INDEX_op_st8_i32: case INDEX_op_st8_i64: tcg_out_ldst(s, OPC_ST_B, a0, a1, a2); @@ -2509,18 +2547,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i128: return C_O0_I3(r, r, r); - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld_i32: - case INDEX_op_ld_i64: case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 9455a0a17b..21ed11b78d 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2274,6 +2274,74 @@ static const TCGOutOpExtract2 outop_extract2 = { .base.static_constraint = C_NotImplemented, }; +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LBU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LB, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LHU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LH, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +#if TCG_TARGET_REG_BITS == 64 +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LWU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LW, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; +#endif + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2287,32 +2355,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, a2 = args[2]; switch (opc) { - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - i1 = OPC_LBU; - goto do_ldst; - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - i1 = OPC_LB; - goto do_ldst; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - i1 = OPC_LHU; - goto do_ldst; - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - i1 = OPC_LH; - goto do_ldst; - case INDEX_op_ld_i32: - case INDEX_op_ld32s_i64: - i1 = OPC_LW; - goto do_ldst; - case INDEX_op_ld32u_i64: - i1 = OPC_LWU; - goto do_ldst; - case INDEX_op_ld_i64: - i1 = OPC_LD; - goto do_ldst; case INDEX_op_st8_i32: case INDEX_op_st8_i64: i1 = OPC_SB; @@ -2365,20 +2407,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld_i64: - return C_O1_I1(r, r); - case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index a2a5b1e570..275c5a90a5 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3677,39 +3677,81 @@ static const TCGOutOpExtract2 outop_extract2 = { .base.static_constraint = C_NotImplemented, }; +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem_long(s, LBZ, LBZX, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tgen_ld8u(s, type, dest, base, offset); + tcg_out_ext8s(s, type, dest, dest); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem_long(s, LHZ, LHZX, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem_long(s, LHA, LHAX, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +#if TCG_TARGET_REG_BITS == 64 +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem_long(s, LWZ, LWZX, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem_long(s, LWA, LWAX, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; +#endif + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); - break; - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); - tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]); - break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); - break; - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); - break; - case INDEX_op_ld32s_i64: - tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i64: - tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); - break; case INDEX_op_st8_i32: case INDEX_op_st8_i64: tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); @@ -4373,20 +4415,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - return C_O1_I1(r, r); - case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index c1bfd93569..5b987c930f 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2530,6 +2530,72 @@ static const TCGOutOpExtract2 outop_extract2 = { .base.static_constraint = C_NotImplemented, }; +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LBU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LB, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LHU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LH, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LWU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LW, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2540,33 +2606,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, TCGArg a2 = args[2]; switch (opc) { - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - tcg_out_ldst(s, OPC_LBU, a0, a1, a2); - break; - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - tcg_out_ldst(s, OPC_LB, a0, a1, a2); - break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - tcg_out_ldst(s, OPC_LHU, a0, a1, a2); - break; - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - tcg_out_ldst(s, OPC_LH, a0, a1, a2); - break; - case INDEX_op_ld32u_i64: - tcg_out_ldst(s, OPC_LWU, a0, a1, a2); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32s_i64: - tcg_out_ldst(s, OPC_LW, a0, a1, a2); - break; - case INDEX_op_ld_i64: - tcg_out_ldst(s, OPC_LD, a0, a1, a2); - break; - case INDEX_op_st8_i32: case INDEX_op_st8_i64: tcg_out_ldst(s, OPC_SB, a0, a1, a2); @@ -2825,20 +2864,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld_i64: - return C_O1_I1(r, r); - case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 2b2e00c609..fe7665b21d 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -3030,6 +3030,76 @@ static void tcg_out_mb(TCGContext *s, unsigned a0) } } +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem(s, 0, RXY_LLGC, dest, base, TCG_REG_NONE, offset); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem(s, 0, RXY_LGB, dest, base, TCG_REG_NONE, offset); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem(s, 0, RXY_LLGH, dest, base, TCG_REG_NONE, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + if (type == TCG_TYPE_I32) { + tcg_out_mem(s, RX_LH, RXY_LHY, dest, base, TCG_REG_NONE, offset); + } else { + tcg_out_mem(s, 0, RXY_LGH, dest, base, TCG_REG_NONE, offset); + } +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem(s, 0, RXY_LLGF, dest, base, TCG_REG_NONE, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem(s, 0, RXY_LGF, dest, base, TCG_REG_NONE, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; + # define OP_32_64(x) \ case glue(glue(INDEX_op_,x),_i32): \ case glue(glue(INDEX_op_,x),_i64) @@ -3039,31 +3109,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - OP_32_64(ld8u): - /* ??? LLC (RXY format) is only present with the extended-immediate - facility, whereas LLGC is always present. */ - tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]); - break; - - OP_32_64(ld8s): - /* ??? LB is no smaller than LGB, so no point to using it. */ - tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]); - break; - - OP_32_64(ld16u): - /* ??? LLH (RXY format) is only present with the extended-immediate - facility, whereas LLGH is always present. */ - tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]); - break; - - case INDEX_op_ld16s_i32: - tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]); - break; - - case INDEX_op_ld_i32: - tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); - break; - OP_32_64(st8): tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1], TCG_REG_NONE, args[2]); @@ -3097,19 +3142,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); break; - case INDEX_op_ld16s_i64: - tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]); - break; - case INDEX_op_ld32u_i64: - tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]); - break; - case INDEX_op_ld32s_i64: - tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]); - break; - case INDEX_op_ld_i64: - tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); - break; - case INDEX_op_st32_i64: tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; @@ -3565,20 +3597,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - return C_O1_I1(r, r); - case INDEX_op_st8_i32: case INDEX_op_st8_i64: case INDEX_op_st16_i32: diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 208b96487e..59a737dde4 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1962,6 +1962,73 @@ static const TCGOutOpExtract2 outop_extract2 = { .base.static_constraint = C_NotImplemented, }; +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, dest, base, offset, LDUB); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, dest, base, offset, LDSB); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, dest, base, offset, LDUH); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, dest, base, offset, LDSH); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, dest, base, offset, LDUW); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, dest, base, offset, LDSW); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1978,22 +2045,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, glue(glue(case INDEX_op_, x), _i32): \ glue(glue(case INDEX_op_, x), _i64) - OP_32_64(ld8u): - tcg_out_ldst(s, a0, a1, a2, LDUB); - break; - OP_32_64(ld8s): - tcg_out_ldst(s, a0, a1, a2, LDSB); - break; - OP_32_64(ld16u): - tcg_out_ldst(s, a0, a1, a2, LDUH); - break; - OP_32_64(ld16s): - tcg_out_ldst(s, a0, a1, a2, LDSH); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - tcg_out_ldst(s, a0, a1, a2, LDUW); - break; OP_32_64(st8): tcg_out_ldst(s, a0, a1, a2, STB); break; @@ -2018,12 +2069,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64); break; - case INDEX_op_ld32s_i64: - tcg_out_ldst(s, a0, a1, a2, LDSW); - break; - case INDEX_op_ld_i64: - tcg_out_ldst(s, a0, a1, a2, LDX); - break; case INDEX_op_st_i64: tcg_out_ldst(s, a0, a1, a2, STX); break; @@ -2040,18 +2085,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); diff --git a/tcg/tcg.c b/tcg/tcg.c index 5ab4f5e752..4cff888b7e 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1052,6 +1052,12 @@ typedef struct TCGOutOpExtract2 { TCGReg a2, unsigned shr); } TCGOutOpExtract2; +typedef struct TCGOutOpLoad { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, intptr_t offset); +} TCGOutOpLoad; + typedef struct TCGOutOpMovcond { TCGOutOp base; void (*out)(TCGContext *s, TCGType type, TCGCond cond, @@ -1142,6 +1148,11 @@ static const TCGOutOp outop_goto_ptr = { .static_constraint = C_O0_I1(r), }; +static const TCGOutOpLoad outop_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tcg_out_ld, +}; + /* * Register V as the TCGOutOp for O. * This verifies that V is of type T, otherwise give a nice compiler error. @@ -1173,6 +1184,16 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract), OUTOP(INDEX_op_extract2, TCGOutOpExtract2, outop_extract2), + OUTOP(INDEX_op_ld8u_i32, TCGOutOpLoad, outop_ld8u), + OUTOP(INDEX_op_ld8u_i64, TCGOutOpLoad, outop_ld8u), + OUTOP(INDEX_op_ld8s_i32, TCGOutOpLoad, outop_ld8s), + OUTOP(INDEX_op_ld8s_i64, TCGOutOpLoad, outop_ld8s), + OUTOP(INDEX_op_ld16u_i32, TCGOutOpLoad, outop_ld16u), + OUTOP(INDEX_op_ld16u_i64, TCGOutOpLoad, outop_ld16u), + OUTOP(INDEX_op_ld16s_i32, TCGOutOpLoad, outop_ld16s), + OUTOP(INDEX_op_ld16s_i64, TCGOutOpLoad, outop_ld16s), + OUTOP(INDEX_op_ld_i32, TCGOutOpLoad, outop_ld), + OUTOP(INDEX_op_ld_i64, TCGOutOpLoad, outop_ld), OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), @@ -1214,6 +1235,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64), OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32), OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32), + OUTOP(INDEX_op_ld32u_i64, TCGOutOpLoad, outop_ld32u), + OUTOP(INDEX_op_ld32s_i64, TCGOutOpLoad, outop_ld32s), #endif }; @@ -5740,6 +5763,29 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_ld32u_i64: + case INDEX_op_ld32s_i64: + tcg_debug_assert(type == TCG_TYPE_I64); + /* fall through */ + case INDEX_op_ld8u_i32: + case INDEX_op_ld8u_i64: + case INDEX_op_ld8s_i32: + case INDEX_op_ld8s_i64: + case INDEX_op_ld16u_i32: + case INDEX_op_ld16u_i64: + case INDEX_op_ld16s_i32: + case INDEX_op_ld16s_i64: + case INDEX_op_ld_i32: + case INDEX_op_ld_i64: + { + const TCGOutOpLoad *out = + container_of(all_outop[op->opc], TCGOutOpLoad, base); + + tcg_debug_assert(!const_args[1]); + out->out(s, type, new_args[0], new_args[1], new_args[2]); + } + break; + case INDEX_op_muls2: case INDEX_op_mulu2: { diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index d9cd62ed3d..2dcd561b77 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -40,20 +40,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - return C_O1_I1(r, r); - case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: @@ -1143,19 +1129,80 @@ static void tcg_out_br(TCGContext *s, TCGLabel *l) tcg_out_op_l(s, INDEX_op_br, l); } +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld8u_i32, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld8s_i32, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld16u_i32, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld16s_i32, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +#if TCG_TARGET_REG_BITS == 64 +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld32u_i64, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld32s_i64, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; +#endif + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - CASE_32_64(ld8u) - CASE_32_64(ld8s) - CASE_32_64(ld16u) - CASE_32_64(ld16s) - case INDEX_op_ld_i32: - CASE_64(ld32u) - CASE_64(ld32s) - CASE_64(ld) CASE_32_64(st8) CASE_32_64(st16) case INDEX_op_st_i32: From e996804d40c10572550a1d3ca936a5dfb29ca0fc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 21 Jan 2025 21:47:16 -0800 Subject: [PATCH 151/161] tcg: Merge INDEX_op_ld*_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- include/tcg/tcg-opc.h | 19 +++++------- tcg/optimize.c | 27 ++++++++--------- tcg/tcg-op.c | 24 +++++++-------- tcg/tcg.c | 64 ++++++++++++++-------------------------- tcg/tci.c | 43 +++++++++++---------------- tcg/tci/tcg-target.c.inc | 28 +++++++----------- 6 files changed, 83 insertions(+), 122 deletions(-) diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 30ba15723a..6e8fcefaef 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -57,6 +57,13 @@ DEF(divu2, 2, 3, 0, TCG_OPF_INT) DEF(eqv, 1, 2, 0, TCG_OPF_INT) DEF(extract, 1, 1, 2, TCG_OPF_INT) DEF(extract2, 1, 2, 1, TCG_OPF_INT) +DEF(ld8u, 1, 1, 1, TCG_OPF_INT) +DEF(ld8s, 1, 1, 1, TCG_OPF_INT) +DEF(ld16u, 1, 1, 1, TCG_OPF_INT) +DEF(ld16s, 1, 1, 1, TCG_OPF_INT) +DEF(ld32u, 1, 1, 1, TCG_OPF_INT) +DEF(ld32s, 1, 1, 1, TCG_OPF_INT) +DEF(ld, 1, 1, 1, TCG_OPF_INT) DEF(movcond, 1, 4, 1, TCG_OPF_INT) DEF(mul, 1, 2, 0, TCG_OPF_INT) DEF(muls2, 2, 2, 0, TCG_OPF_INT) @@ -93,11 +100,6 @@ DEF(subbi, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN) DEF(subbio, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN | TCG_OPF_CARRY_OUT) /* load/store */ -DEF(ld8u_i32, 1, 1, 1, 0) -DEF(ld8s_i32, 1, 1, 1, 0) -DEF(ld16u_i32, 1, 1, 1, 0) -DEF(ld16s_i32, 1, 1, 1, 0) -DEF(ld_i32, 1, 1, 1, 0) DEF(st8_i32, 0, 2, 1, 0) DEF(st16_i32, 0, 2, 1, 0) DEF(st_i32, 0, 2, 1, 0) @@ -106,13 +108,6 @@ DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(setcond2_i32, 1, 4, 1, 0) /* load/store */ -DEF(ld8u_i64, 1, 1, 1, 0) -DEF(ld8s_i64, 1, 1, 1, 0) -DEF(ld16u_i64, 1, 1, 1, 0) -DEF(ld16s_i64, 1, 1, 1, 0) -DEF(ld32u_i64, 1, 1, 1, 0) -DEF(ld32s_i64, 1, 1, 1, 0) -DEF(ld_i64, 1, 1, 1, 0) DEF(st8_i64, 0, 2, 1, 0) DEF(st16_i64, 0, 2, 1, 0) DEF(st32_i64, 0, 2, 1, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index 52e194aaa9..d928a38e14 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2880,22 +2880,22 @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) /* We can't do any folding with a load, but we can record bits. */ switch (op->opc) { - CASE_OP_32_64(ld8s): + case INDEX_op_ld8s: s_mask = INT8_MIN; break; - CASE_OP_32_64(ld8u): + case INDEX_op_ld8u: z_mask = MAKE_64BIT_MASK(0, 8); break; - CASE_OP_32_64(ld16s): + case INDEX_op_ld16s: s_mask = INT16_MIN; break; - CASE_OP_32_64(ld16u): + case INDEX_op_ld16u: z_mask = MAKE_64BIT_MASK(0, 16); break; - case INDEX_op_ld32s_i64: + case INDEX_op_ld32s: s_mask = INT32_MIN; break; - case INDEX_op_ld32u_i64: + case INDEX_op_ld32u: z_mask = MAKE_64BIT_MASK(0, 32); break; default: @@ -3126,16 +3126,15 @@ void tcg_optimize(TCGContext *s) case INDEX_op_extrh_i64_i32: done = fold_extu(&ctx, op); break; - CASE_OP_32_64(ld8s): - CASE_OP_32_64(ld8u): - CASE_OP_32_64(ld16s): - CASE_OP_32_64(ld16u): - case INDEX_op_ld32s_i64: - case INDEX_op_ld32u_i64: + case INDEX_op_ld8s: + case INDEX_op_ld8u: + case INDEX_op_ld16s: + case INDEX_op_ld16u: + case INDEX_op_ld32s: + case INDEX_op_ld32u: done = fold_tcg_ld(&ctx, op); break; - case INDEX_op_ld_i32: - case INDEX_op_ld_i64: + case INDEX_op_ld: case INDEX_op_ld_vec: done = fold_tcg_ld_memcopy(&ctx, op); break; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index b0139ce05d..680f752cf9 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1379,27 +1379,27 @@ void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a) void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_ld8u_i32, ret, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_ld8u, ret, arg2, offset); } void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_ld8s_i32, ret, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_ld8s, ret, arg2, offset); } void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_ld16u_i32, ret, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_ld16u, ret, arg2, offset); } void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_ld16s_i32, ret, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_ld16s, ret, arg2, offset); } void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_ld_i32, ret, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_ld, ret, arg2, offset); } void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) @@ -1463,7 +1463,7 @@ void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg) void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_ld8u_i64, ret, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_ld8u, ret, arg2, offset); } else { tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); @@ -1473,7 +1473,7 @@ void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_ld8s_i64, ret, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_ld8s, ret, arg2, offset); } else { tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset); tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); @@ -1483,7 +1483,7 @@ void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_ld16u_i64, ret, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_ld16u, ret, arg2, offset); } else { tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); @@ -1493,7 +1493,7 @@ void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_ld16s_i64, ret, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_ld16s, ret, arg2, offset); } else { tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset); tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); @@ -1503,7 +1503,7 @@ void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_ld32u_i64, ret, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_ld32u, ret, arg2, offset); } else { tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); @@ -1513,7 +1513,7 @@ void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_ld32s_i64, ret, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_ld32s, ret, arg2, offset); } else { tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); @@ -1527,7 +1527,7 @@ void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) * they cannot be the same temporary -- no chance of overlap. */ if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_ld_i64, ret, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_ld, ret, arg2, offset); } else if (HOST_BIG_ENDIAN) { tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset); tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4); diff --git a/tcg/tcg.c b/tcg/tcg.c index 4cff888b7e..a9d62d9e17 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1184,16 +1184,11 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract), OUTOP(INDEX_op_extract2, TCGOutOpExtract2, outop_extract2), - OUTOP(INDEX_op_ld8u_i32, TCGOutOpLoad, outop_ld8u), - OUTOP(INDEX_op_ld8u_i64, TCGOutOpLoad, outop_ld8u), - OUTOP(INDEX_op_ld8s_i32, TCGOutOpLoad, outop_ld8s), - OUTOP(INDEX_op_ld8s_i64, TCGOutOpLoad, outop_ld8s), - OUTOP(INDEX_op_ld16u_i32, TCGOutOpLoad, outop_ld16u), - OUTOP(INDEX_op_ld16u_i64, TCGOutOpLoad, outop_ld16u), - OUTOP(INDEX_op_ld16s_i32, TCGOutOpLoad, outop_ld16s), - OUTOP(INDEX_op_ld16s_i64, TCGOutOpLoad, outop_ld16s), - OUTOP(INDEX_op_ld_i32, TCGOutOpLoad, outop_ld), - OUTOP(INDEX_op_ld_i64, TCGOutOpLoad, outop_ld), + OUTOP(INDEX_op_ld8u, TCGOutOpLoad, outop_ld8u), + OUTOP(INDEX_op_ld8s, TCGOutOpLoad, outop_ld8s), + OUTOP(INDEX_op_ld16u, TCGOutOpLoad, outop_ld16u), + OUTOP(INDEX_op_ld16s, TCGOutOpLoad, outop_ld16s), + OUTOP(INDEX_op_ld, TCGOutOpLoad, outop_ld), OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond), OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), @@ -1235,8 +1230,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64), OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32), OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32), - OUTOP(INDEX_op_ld32u_i64, TCGOutOpLoad, outop_ld32u), - OUTOP(INDEX_op_ld32s_i64, TCGOutOpLoad, outop_ld32s), + OUTOP(INDEX_op_ld32u, TCGOutOpLoad, outop_ld32u), + OUTOP(INDEX_op_ld32s, TCGOutOpLoad, outop_ld32s), #endif }; @@ -2443,6 +2438,11 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_brcond: case INDEX_op_deposit: case INDEX_op_extract: + case INDEX_op_ld8u: + case INDEX_op_ld8s: + case INDEX_op_ld16u: + case INDEX_op_ld16s: + case INDEX_op_ld: case INDEX_op_mov: case INDEX_op_movcond: case INDEX_op_negsetcond: @@ -2452,11 +2452,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_xor: return has_type; - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: case INDEX_op_st8_i32: case INDEX_op_st16_i32: case INDEX_op_st_i32: @@ -2466,13 +2461,8 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_setcond2_i32: return TCG_TARGET_REG_BITS == 32; - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: + case INDEX_op_ld32u: + case INDEX_op_ld32s: case INDEX_op_st8_i64: case INDEX_op_st16_i64: case INDEX_op_st32_i64: @@ -4428,10 +4418,7 @@ liveness_pass_2(TCGContext *s) arg_ts = arg_temp(op->args[i]); dir_ts = arg_ts->state_ptr; if (dir_ts && arg_ts->state == TS_DEAD) { - TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 - ? INDEX_op_ld_i32 - : INDEX_op_ld_i64); - TCGOp *lop = tcg_op_insert_before(s, op, lopc, + TCGOp *lop = tcg_op_insert_before(s, op, INDEX_op_ld, arg_ts->type, 3); lop->args[0] = temp_arg(dir_ts); @@ -5763,20 +5750,13 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - tcg_debug_assert(type == TCG_TYPE_I64); - /* fall through */ - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - case INDEX_op_ld_i32: - case INDEX_op_ld_i64: + case INDEX_op_ld8u: + case INDEX_op_ld8s: + case INDEX_op_ld16u: + case INDEX_op_ld16s: + case INDEX_op_ld32u: + case INDEX_op_ld32s: + case INDEX_op_ld: { const TCGOutOpLoad *out = container_of(all_outop[op->opc], TCGOutOpLoad, base); diff --git a/tcg/tci.c b/tcg/tci.c index a18478a07a..890ccbe85b 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -466,31 +466,30 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, /* Load/store operations (32 bit). */ - CASE_32_64(ld8u) + case INDEX_op_ld8u: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(uint8_t *)ptr; break; - CASE_32_64(ld8s) + case INDEX_op_ld8s: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(int8_t *)ptr; break; - CASE_32_64(ld16u) + case INDEX_op_ld16u: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(uint16_t *)ptr; break; - CASE_32_64(ld16s) + case INDEX_op_ld16s: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(int16_t *)ptr; break; - case INDEX_op_ld_i32: - CASE_64(ld32u) + case INDEX_op_ld: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); - regs[r0] = *(uint32_t *)ptr; + regs[r0] = *(tcg_target_ulong *)ptr; break; CASE_32_64(st8) tci_args_rrs(insn, &r0, &r1, &ofs); @@ -716,16 +715,16 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, #if TCG_TARGET_REG_BITS == 64 /* Load/store operations (64 bit). */ - case INDEX_op_ld32s_i64: + case INDEX_op_ld32u: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + regs[r0] = *(uint32_t *)ptr; + break; + case INDEX_op_ld32s: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(int32_t *)ptr; break; - case INDEX_op_ld_i64: - tci_args_rrs(insn, &r0, &r1, &ofs); - ptr = (void *)(regs[r1] + ofs); - regs[r0] = *(uint64_t *)ptr; - break; case INDEX_op_st_i64: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); @@ -970,18 +969,12 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) info->fprintf_func(info->stream, "%-12s", op_name); break; - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i32: - case INDEX_op_ld_i64: + case INDEX_op_ld8u: + case INDEX_op_ld8s: + case INDEX_op_ld16u: + case INDEX_op_ld16s: + case INDEX_op_ld32u: + case INDEX_op_ld: case INDEX_op_st8_i32: case INDEX_op_st8_i64: case INDEX_op_st16_i32: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 2dcd561b77..d549dc90f5 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -339,18 +339,12 @@ static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val, static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg val, TCGReg base, intptr_t offset) { - switch (type) { - case TCG_TYPE_I32: - tcg_out_ldst(s, INDEX_op_ld_i32, val, base, offset); - break; -#if TCG_TARGET_REG_BITS == 64 - case TCG_TYPE_I64: - tcg_out_ldst(s, INDEX_op_ld_i64, val, base, offset); - break; -#endif - default: - g_assert_not_reached(); + TCGOpcode op = INDEX_op_ld; + + if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { + op = INDEX_op_ld32u; } + tcg_out_ldst(s, op, val, base, offset); } static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) @@ -1132,7 +1126,7 @@ static void tcg_out_br(TCGContext *s, TCGLabel *l) static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg base, ptrdiff_t offset) { - tcg_out_ldst(s, INDEX_op_ld8u_i32, dest, base, offset); + tcg_out_ldst(s, INDEX_op_ld8u, dest, base, offset); } static const TCGOutOpLoad outop_ld8u = { @@ -1143,7 +1137,7 @@ static const TCGOutOpLoad outop_ld8u = { static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg base, ptrdiff_t offset) { - tcg_out_ldst(s, INDEX_op_ld8s_i32, dest, base, offset); + tcg_out_ldst(s, INDEX_op_ld8s, dest, base, offset); } static const TCGOutOpLoad outop_ld8s = { @@ -1154,7 +1148,7 @@ static const TCGOutOpLoad outop_ld8s = { static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg base, ptrdiff_t offset) { - tcg_out_ldst(s, INDEX_op_ld16u_i32, dest, base, offset); + tcg_out_ldst(s, INDEX_op_ld16u, dest, base, offset); } static const TCGOutOpLoad outop_ld16u = { @@ -1165,7 +1159,7 @@ static const TCGOutOpLoad outop_ld16u = { static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg base, ptrdiff_t offset) { - tcg_out_ldst(s, INDEX_op_ld16s_i32, dest, base, offset); + tcg_out_ldst(s, INDEX_op_ld16s, dest, base, offset); } static const TCGOutOpLoad outop_ld16s = { @@ -1177,7 +1171,7 @@ static const TCGOutOpLoad outop_ld16s = { static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, TCGReg base, ptrdiff_t offset) { - tcg_out_ldst(s, INDEX_op_ld32u_i64, dest, base, offset); + tcg_out_ldst(s, INDEX_op_ld32u, dest, base, offset); } static const TCGOutOpLoad outop_ld32u = { @@ -1188,7 +1182,7 @@ static const TCGOutOpLoad outop_ld32u = { static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, TCGReg base, ptrdiff_t offset) { - tcg_out_ldst(s, INDEX_op_ld32s_i64, dest, base, offset); + tcg_out_ldst(s, INDEX_op_ld32s, dest, base, offset); } static const TCGOutOpLoad outop_ld32s = { From 4a686aa9d9dcf8805de654ae09788c4e264c1439 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 22 Jan 2025 12:49:41 -0800 Subject: [PATCH 152/161] tcg: Convert st to TCGOutOpStore Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 52 +++++++------- tcg/arm/tcg-target.c.inc | 72 +++++++++---------- tcg/i386/tcg-target.c.inc | 114 ++++++++++++++----------------- tcg/loongarch64/tcg-target.c.inc | 50 +++++++------- tcg/mips/tcg-target.c.inc | 55 ++++++++------- tcg/ppc/tcg-target.c.inc | 52 +++++++------- tcg/riscv/tcg-target.c.inc | 52 +++++++------- tcg/s390x/tcg-target.c.inc | 60 ++++++++-------- tcg/sparc64/tcg-target.c.inc | 53 +++++++------- tcg/tcg.c | 37 ++++++++++ tcg/tci/tcg-target.c.inc | 56 ++++++++------- 11 files changed, 341 insertions(+), 312 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 903a95ad7e..39a44507d1 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2838,6 +2838,33 @@ static const TCGOutOpLoad outop_ld32s = { .out = tgen_ld32s, }; +static void tgen_st8_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, I3312_STRB, data, base, offset, 0); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st8_r, +}; + +static void tgen_st16_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, I3312_STRH, data, base, offset, 1); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st16_r, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tcg_out_st, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2848,22 +2875,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, TCGArg a2 = args[2]; switch (opc) { - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - tcg_out_ldst(s, I3312_STRB, a0, a1, a2, 0); - break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - tcg_out_ldst(s, I3312_STRH, a0, a1, a2, 1); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_ldst(s, I3312_STRW, a0, a1, a2, 2); - break; - case INDEX_op_st_i64: - tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); - break; - case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: tcg_out_qemu_ld(s, a0, a1, a2, ext); @@ -3331,15 +3342,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - return C_O0_I2(rz, r); - case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 2079dd3bdc..5b34f61ca1 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1084,26 +1084,6 @@ static void tcg_out_st32(TCGContext *s, ARMCond cond, tcg_out_st32_12(s, cond, rd, rn, offset); } -static void tcg_out_st16(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_st16_8(s, cond, rd, rn, offset); -} - -static void tcg_out_st8(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, int32_t offset) -{ - if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_st8_12(s, cond, rd, rn, offset); -} - /* * The _goto case is normally between TBs within the same code buffer, and * with the code buffer limited to 16MB we wouldn't need the long case. @@ -2548,21 +2528,48 @@ static const TCGOutOpLoad outop_ld16s = { .out = tgen_ld16s, }; +static void tgen_st8(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rn, ptrdiff_t offset) +{ + if (offset > 0xfff || offset < -0xfff) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, offset); + tcg_out_st8_r(s, COND_AL, rd, rn, TCG_REG_TMP); + } else { + tcg_out_st8_12(s, COND_AL, rd, rn, offset); + } +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st8, +}; + +static void tgen_st16(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rn, ptrdiff_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, offset); + tcg_out_st16_r(s, COND_AL, rd, rn, TCG_REG_TMP); + } else { + tcg_out_st16_8(s, COND_AL, rd, rn, offset); + } +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st16, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tcg_out_st, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - case INDEX_op_st8_i32: - tcg_out_st8(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_st16_i32: - tcg_out_st16(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_st_i32: - tcg_out_st32(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); break; @@ -2589,11 +2596,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - return C_O0_I2(r, r); - case INDEX_op_qemu_ld_i32: return C_O1_I1(r, q); case INDEX_op_qemu_ld_i64: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index d16ddcb940..52285bcd54 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3489,55 +3489,69 @@ static const TCGOutOpLoad outop_ld32s = { }; #endif +static void tgen_st8_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, data, base, offset); +} + +static void tgen_st8_i(TCGContext *s, TCGType type, tcg_target_long data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, base, offset); + tcg_out8(s, data); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(qi, r), + .out_r = tgen_st8_r, + .out_i = tgen_st8_i, +}; + +static void tgen_st16_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, data, base, offset); +} + +static void tgen_st16_i(TCGContext *s, TCGType type, tcg_target_long data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, base, offset); + tcg_out16(s, data); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(ri, r), + .out_r = tgen_st16_r, + .out_i = tgen_st16_i, +}; + +static void tgen_st_i(TCGContext *s, TCGType type, tcg_target_long data, + TCGReg base, ptrdiff_t offset) +{ + bool ok = tcg_out_sti(s, type, data, base, offset); + tcg_debug_assert(ok); +} + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(re, r), + .out_r = tcg_out_st, + .out_i = tgen_st_i, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { TCGArg a0, a1, a2; -#if TCG_TARGET_REG_BITS == 64 -# define OP_32_64(x) \ - case glue(glue(INDEX_op_, x), _i64): \ - case glue(glue(INDEX_op_, x), _i32) -#else -# define OP_32_64(x) \ - case glue(glue(INDEX_op_, x), _i32) -#endif - /* Hoist the loads of the most common arguments. */ a0 = args[0]; a1 = args[1]; a2 = args[2]; switch (opc) { - OP_32_64(st8): - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2); - tcg_out8(s, a0); - } else { - tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, a0, a1, a2); - } - break; - OP_32_64(st16): - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, a1, a2); - tcg_out16(s, a0); - } else { - tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, a0, a1, a2); - } - break; -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_st32_i64: -#endif - case INDEX_op_st_i32: - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, a1, a2); - tcg_out32(s, a0); - } else { - tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2); - } - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, a0, -1, a1, a2, TCG_TYPE_I32); break; @@ -3569,25 +3583,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_st(s, a0, a1, a2, args[3], TCG_TYPE_I128); break; -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_st_i64: - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2); - tcg_out32(s, a0); - } else { - tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2); - } - break; -#endif - case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: g_assert_not_reached(); } - -#undef OP_32_64 } static int const umin_insn[4] = { @@ -4135,19 +4136,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - return C_O0_I2(qi, r); - - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - return C_O0_I2(ri, r); - - case INDEX_op_st_i64: - return C_O0_I2(re, r); - case INDEX_op_qemu_ld_i32: return C_O1_I1(r, L); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 66555b8982..73a1196d8b 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1983,6 +1983,33 @@ static const TCGOutOpLoad outop_ld32s = { .out = tgen_ld32s, }; +static void tgen_st8_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_ST_B, data, base, offset); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st8_r, +}; + +static void tgen_st16_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_ST_H, data, base, offset); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st16_r, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tcg_out_st, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1993,22 +2020,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, TCGArg a3 = args[3]; switch (opc) { - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - tcg_out_ldst(s, OPC_ST_B, a0, a1, a2); - break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - tcg_out_ldst(s, OPC_ST_H, a0, a1, a2); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_ldst(s, OPC_ST_W, a0, a1, a2); - break; - case INDEX_op_st_i64: - tcg_out_ldst(s, OPC_ST_D, a0, a1, a2); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); break; @@ -2530,13 +2541,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i32: - case INDEX_op_st_i64: case INDEX_op_qemu_st_i32: case INDEX_op_qemu_st_i64: return C_O0_I2(rz, r); diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 21ed11b78d..5e41729d88 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2342,12 +2342,38 @@ static const TCGOutOpLoad outop_ld32s = { }; #endif +static void tgen_st8_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_SB, data, base, offset); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st8_r, +}; + +static void tgen_st16_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_SH, data, base, offset); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st16_r, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tcg_out_st, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - MIPSInsn i1; TCGArg a0, a1, a2; a0 = args[0]; @@ -2355,24 +2381,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, a2 = args[2]; switch (opc) { - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - i1 = OPC_SB; - goto do_ldst; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - i1 = OPC_SH; - goto do_ldst; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - i1 = OPC_SW; - goto do_ldst; - case INDEX_op_st_i64: - i1 = OPC_SD; - do_ldst: - tcg_out_ldst(s, i1, a0, a1, a2); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, a0, 0, a1, a2, TCG_TYPE_I32); break; @@ -2407,15 +2415,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - return C_O0_I2(rz, r); - case INDEX_op_qemu_ld_i32: return C_O1_I1(r, r); case INDEX_op_qemu_st_i32: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 275c5a90a5..9cf24831df 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3746,28 +3746,39 @@ static const TCGOutOpLoad outop_ld32s = { }; #endif +static void tgen_st8(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem_long(s, STB, STBX, data, base, offset); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st8, +}; + +static void tgen_st16(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem_long(s, STH, STHX, data, base, offset); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st16, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tcg_out_st, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); - break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); - break; - case INDEX_op_st_i64: - tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); break; @@ -4415,15 +4426,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - return C_O0_I2(r, r); - case INDEX_op_qemu_ld_i32: return C_O1_I1(r, r); case INDEX_op_qemu_ld_i64: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 5b987c930f..bcfdb6c545 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2596,6 +2596,33 @@ static const TCGOutOpLoad outop_ld32s = { .out = tgen_ld32s, }; +static void tgen_st8_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_SB, data, base, offset); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st8_r, +}; + +static void tgen_st16_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_SH, data, base, offset); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st16_r, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tcg_out_st, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2606,22 +2633,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, TCGArg a2 = args[2]; switch (opc) { - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - tcg_out_ldst(s, OPC_SB, a0, a1, a2); - break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - tcg_out_ldst(s, OPC_SH, a0, a1, a2); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_ldst(s, OPC_SW, a0, a1, a2); - break; - case INDEX_op_st_i64: - tcg_out_ldst(s, OPC_SD, a0, a1, a2); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); break; @@ -2864,15 +2875,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - return C_O0_I2(rz, r); - case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index fe7665b21d..e266c19829 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -3100,29 +3100,39 @@ static const TCGOutOpLoad outop_ld32s = { .out = tgen_ld32s, }; -# define OP_32_64(x) \ - case glue(glue(INDEX_op_,x),_i32): \ - case glue(glue(INDEX_op_,x),_i64) +static void tgen_st8(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem(s, RX_STC, RXY_STCY, data, base, TCG_REG_NONE, offset); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st8, +}; + +static void tgen_st16(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem(s, RX_STH, RXY_STHY, data, base, TCG_REG_NONE, offset); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st16, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tcg_out_st, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - OP_32_64(st8): - tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1], - TCG_REG_NONE, args[2]); - break; - - OP_32_64(st16): - tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1], - TCG_REG_NONE, args[2]); - break; - - case INDEX_op_st_i32: - tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32); break; @@ -3142,13 +3152,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); break; - case INDEX_op_st32_i64: - tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); - break; - case INDEX_op_st_i64: - tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); - break; - case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ @@ -3597,15 +3600,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - return C_O0_I2(r, r); - case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 59a737dde4..a0efeee98c 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -2028,6 +2028,33 @@ static const TCGOutOpLoad outop_ld32s = { .out = tgen_ld32s, }; +static void tgen_st8_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, data, base, offset, STB); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st8_r, +}; + +static void tgen_st16_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, data, base, offset, STH); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st16_r, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tcg_out_st, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2041,21 +2068,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, a2 = args[2]; switch (opc) { -#define OP_32_64(x) \ - glue(glue(case INDEX_op_, x), _i32): \ - glue(glue(case INDEX_op_, x), _i64) - - OP_32_64(st8): - tcg_out_ldst(s, a0, a1, a2, STB); - break; - OP_32_64(st16): - tcg_out_ldst(s, a0, a1, a2, STH); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_ldst(s, a0, a1, a2, STW); - break; - case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); break; @@ -2069,10 +2081,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64); break; - case INDEX_op_st_i64: - tcg_out_ldst(s, a0, a1, a2, STX); - break; - case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ @@ -2089,13 +2097,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_ld_i64: return C_O1_I1(r, r); - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: case INDEX_op_qemu_st_i32: case INDEX_op_qemu_st_i64: return C_O0_I2(rz, r); diff --git a/tcg/tcg.c b/tcg/tcg.c index a9d62d9e17..28791c6567 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1090,6 +1090,14 @@ typedef struct TCGOutOpSetcond2 { TCGArg bl, bool const_bl, TCGArg bh, bool const_bh); } TCGOutOpSetcond2; +typedef struct TCGOutOpStore { + TCGOutOp base; + void (*out_r)(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, intptr_t offset); + void (*out_i)(TCGContext *s, TCGType type, tcg_target_long data, + TCGReg base, intptr_t offset); +} TCGOutOpStore; + typedef struct TCGOutOpSubtract { TCGOutOp base; void (*out_rrr)(TCGContext *s, TCGType type, @@ -1211,6 +1219,12 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_sextract, TCGOutOpExtract, outop_sextract), OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), + OUTOP(INDEX_op_st_i32, TCGOutOpStore, outop_st), + OUTOP(INDEX_op_st_i64, TCGOutOpStore, outop_st), + OUTOP(INDEX_op_st8_i32, TCGOutOpStore, outop_st8), + OUTOP(INDEX_op_st8_i64, TCGOutOpStore, outop_st8), + OUTOP(INDEX_op_st16_i32, TCGOutOpStore, outop_st16), + OUTOP(INDEX_op_st16_i64, TCGOutOpStore, outop_st16), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), OUTOP(INDEX_op_subbi, TCGOutOpAddSubCarry, outop_subbi), OUTOP(INDEX_op_subbio, TCGOutOpAddSubCarry, outop_subbio), @@ -1232,6 +1246,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32), OUTOP(INDEX_op_ld32u, TCGOutOpLoad, outop_ld32u), OUTOP(INDEX_op_ld32s, TCGOutOpLoad, outop_ld32s), + OUTOP(INDEX_op_st32_i64, TCGOutOpStore, outop_st), #endif }; @@ -5779,6 +5794,28 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_st32_i64: + /* Use tcg_op_st w/ I32. */ + type = TCG_TYPE_I32; + /* fall through */ + case INDEX_op_st_i32: + case INDEX_op_st_i64: + case INDEX_op_st8_i32: + case INDEX_op_st8_i64: + case INDEX_op_st16_i32: + case INDEX_op_st16_i64: + { + const TCGOutOpStore *out = + container_of(all_outop[op->opc], TCGOutOpStore, base); + + if (const_args[0]) { + out->out_i(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_r(s, type, new_args[0], new_args[1], new_args[2]); + } + } + break; + case INDEX_op_brcond: { const TCGOutOpBrcond *out = &outop_brcond; diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index d549dc90f5..be9270a861 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -40,15 +40,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - return C_O0_I2(r, r); - case INDEX_op_qemu_ld_i32: return C_O1_I1(r, r); case INDEX_op_qemu_ld_i64: @@ -487,18 +478,6 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, tcg_out32(s, insn); } -#if TCG_TARGET_REG_BITS == 64 -# define CASE_32_64(x) \ - case glue(glue(INDEX_op_, x), _i64): \ - case glue(glue(INDEX_op_, x), _i32): -# define CASE_64(x) \ - case glue(glue(INDEX_op_, x), _i64): -#else -# define CASE_32_64(x) \ - case glue(glue(INDEX_op_, x), _i32): -# define CASE_64(x) -#endif - static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) { tcg_out_op_p(s, INDEX_op_exit_tb, (void *)arg); @@ -1191,20 +1170,39 @@ static const TCGOutOpLoad outop_ld32s = { }; #endif +static void tgen_st8(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_st8_i32, data, base, offset); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st8, +}; + +static void tgen_st16(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_st16_i32, data, base, offset); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st16, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tcg_out_st, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - CASE_32_64(st8) - CASE_32_64(st16) - case INDEX_op_st_i32: - CASE_64(st32) - CASE_64(st) - tcg_out_ldst(s, opc, args[0], args[1], args[2]); - break; - case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_st_i64: if (TCG_TARGET_REG_BITS == 32) { From a28f151d61604feae1d6c75b79e67d1c6c6a8b18 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 22 Jan 2025 13:28:55 -0800 Subject: [PATCH 153/161] tcg: Merge INDEX_op_st*_{i32,i64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- include/tcg/tcg-opc.h | 15 ++++---------- tcg/optimize.c | 28 +++++++------------------ tcg/tcg-op.c | 14 ++++++------- tcg/tcg.c | 45 +++++++++++++--------------------------- tcg/tci.c | 36 +++++++++----------------------- tcg/tci/tcg-target.c.inc | 20 +++++++----------- 6 files changed, 50 insertions(+), 108 deletions(-) diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 6e8fcefaef..a22433d8b5 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -86,6 +86,10 @@ DEF(setcond, 1, 2, 1, TCG_OPF_INT) DEF(sextract, 1, 1, 2, TCG_OPF_INT) DEF(shl, 1, 2, 0, TCG_OPF_INT) DEF(shr, 1, 2, 0, TCG_OPF_INT) +DEF(st8, 0, 2, 1, TCG_OPF_INT) +DEF(st16, 0, 2, 1, TCG_OPF_INT) +DEF(st32, 0, 2, 1, TCG_OPF_INT) +DEF(st, 0, 2, 1, TCG_OPF_INT) DEF(sub, 1, 2, 0, TCG_OPF_INT) DEF(xor, 1, 2, 0, TCG_OPF_INT) @@ -99,20 +103,9 @@ DEF(subb1o, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_OUT) DEF(subbi, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN) DEF(subbio, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN | TCG_OPF_CARRY_OUT) -/* load/store */ -DEF(st8_i32, 0, 2, 1, 0) -DEF(st16_i32, 0, 2, 1, 0) -DEF(st_i32, 0, 2, 1, 0) - DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(setcond2_i32, 1, 4, 1, 0) -/* load/store */ -DEF(st8_i64, 0, 2, 1, 0) -DEF(st16_i64, 0, 2, 1, 0) -DEF(st32_i64, 0, 2, 1, 0) -DEF(st_i64, 0, 2, 1, 0) - /* size changing ops */ DEF(ext_i32_i64, 1, 1, 0, 0) DEF(extu_i32_i64, 1, 1, 0, 0) diff --git a/tcg/optimize.c b/tcg/optimize.c index d928a38e14..cfb407c7fc 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -30,14 +30,6 @@ #include "tcg-internal.h" #include "tcg-has.h" -#define CASE_OP_32_64(x) \ - glue(glue(case INDEX_op_, x), _i32): \ - glue(glue(case INDEX_op_, x), _i64) - -#define CASE_OP_32_64_VEC(x) \ - glue(glue(case INDEX_op_, x), _i32): \ - glue(glue(case INDEX_op_, x), _i64): \ - glue(glue(case INDEX_op_, x), _vec) typedef struct MemCopyInfo { IntervalTreeNode itree; @@ -2938,19 +2930,16 @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op) } switch (op->opc) { - CASE_OP_32_64(st8): + case INDEX_op_st8: lm1 = 0; break; - CASE_OP_32_64(st16): + case INDEX_op_st16: lm1 = 1; break; - case INDEX_op_st32_i64: - case INDEX_op_st_i32: + case INDEX_op_st32: lm1 = 3; break; - case INDEX_op_st_i64: - lm1 = 7; - break; + case INDEX_op_st: case INDEX_op_st_vec: lm1 = tcg_type_size(ctx->type) - 1; break; @@ -3138,13 +3127,12 @@ void tcg_optimize(TCGContext *s) case INDEX_op_ld_vec: done = fold_tcg_ld_memcopy(&ctx, op); break; - CASE_OP_32_64(st8): - CASE_OP_32_64(st16): - case INDEX_op_st32_i64: + case INDEX_op_st8: + case INDEX_op_st16: + case INDEX_op_st32: done = fold_tcg_st(&ctx, op); break; - case INDEX_op_st_i32: - case INDEX_op_st_i64: + case INDEX_op_st: case INDEX_op_st_vec: done = fold_tcg_st_memcopy(&ctx, op); break; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 680f752cf9..dfa5c38728 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1404,17 +1404,17 @@ void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_st8_i32, arg1, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_st8, arg1, arg2, offset); } void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_st16_i32, arg1, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_st16, arg1, arg2, offset); } void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_st_i32, arg1, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_st, arg1, arg2, offset); } @@ -1540,7 +1540,7 @@ void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_st8_i64, arg1, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_st8, arg1, arg2, offset); } else { tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); } @@ -1549,7 +1549,7 @@ void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_st16_i64, arg1, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_st16, arg1, arg2, offset); } else { tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); } @@ -1558,7 +1558,7 @@ void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_st32_i64, arg1, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_st32, arg1, arg2, offset); } else { tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); } @@ -1567,7 +1567,7 @@ void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_st_i64, arg1, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_st, arg1, arg2, offset); } else if (HOST_BIG_ENDIAN) { tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset); tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4); diff --git a/tcg/tcg.c b/tcg/tcg.c index 28791c6567..44b6b8319f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1219,12 +1219,9 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_sextract, TCGOutOpExtract, outop_sextract), OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), - OUTOP(INDEX_op_st_i32, TCGOutOpStore, outop_st), - OUTOP(INDEX_op_st_i64, TCGOutOpStore, outop_st), - OUTOP(INDEX_op_st8_i32, TCGOutOpStore, outop_st8), - OUTOP(INDEX_op_st8_i64, TCGOutOpStore, outop_st8), - OUTOP(INDEX_op_st16_i32, TCGOutOpStore, outop_st16), - OUTOP(INDEX_op_st16_i64, TCGOutOpStore, outop_st16), + OUTOP(INDEX_op_st, TCGOutOpStore, outop_st), + OUTOP(INDEX_op_st8, TCGOutOpStore, outop_st8), + OUTOP(INDEX_op_st16, TCGOutOpStore, outop_st16), OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), OUTOP(INDEX_op_subbi, TCGOutOpAddSubCarry, outop_subbi), OUTOP(INDEX_op_subbio, TCGOutOpAddSubCarry, outop_subbio), @@ -1246,7 +1243,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32), OUTOP(INDEX_op_ld32u, TCGOutOpLoad, outop_ld32u), OUTOP(INDEX_op_ld32s, TCGOutOpLoad, outop_ld32s), - OUTOP(INDEX_op_st32_i64, TCGOutOpStore, outop_st), + OUTOP(INDEX_op_st32, TCGOutOpStore, outop_st), #endif }; @@ -2464,24 +2461,19 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_or: case INDEX_op_setcond: case INDEX_op_sextract: + case INDEX_op_st8: + case INDEX_op_st16: + case INDEX_op_st: case INDEX_op_xor: return has_type; - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - return true; - case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: return TCG_TARGET_REG_BITS == 32; case INDEX_op_ld32u: case INDEX_op_ld32s: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: + case INDEX_op_st32: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: @@ -4494,10 +4486,7 @@ liveness_pass_2(TCGContext *s) arg_ts->state = 0; if (NEED_SYNC_ARG(0)) { - TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 - ? INDEX_op_st_i32 - : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc, + TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st, arg_ts->type, 3); TCGTemp *out_ts = dir_ts; @@ -4531,10 +4520,7 @@ liveness_pass_2(TCGContext *s) /* Sync outputs upon their last write. */ if (NEED_SYNC_ARG(i)) { - TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 - ? INDEX_op_st_i32 - : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc, + TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st, arg_ts->type, 3); sop->args[0] = temp_arg(dir_ts); @@ -5794,16 +5780,13 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; - case INDEX_op_st32_i64: + case INDEX_op_st32: /* Use tcg_op_st w/ I32. */ type = TCG_TYPE_I32; /* fall through */ - case INDEX_op_st_i32: - case INDEX_op_st_i64: - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: + case INDEX_op_st: + case INDEX_op_st8: + case INDEX_op_st16: { const TCGOutOpStore *out = container_of(all_outop[op->opc], TCGOutOpStore, base); diff --git a/tcg/tci.c b/tcg/tci.c index 890ccbe85b..b08288e7d3 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -325,18 +325,6 @@ static void tci_qemu_st(CPUArchState *env, uint64_t taddr, uint64_t val, } } -#if TCG_TARGET_REG_BITS == 64 -# define CASE_32_64(x) \ - case glue(glue(INDEX_op_, x), _i64): \ - case glue(glue(INDEX_op_, x), _i32): -# define CASE_64(x) \ - case glue(glue(INDEX_op_, x), _i64): -#else -# define CASE_32_64(x) \ - case glue(glue(INDEX_op_, x), _i32): -# define CASE_64(x) -#endif - /* Interpret pseudo code in tb. */ /* * Disable CFI checks. @@ -491,21 +479,20 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, ptr = (void *)(regs[r1] + ofs); regs[r0] = *(tcg_target_ulong *)ptr; break; - CASE_32_64(st8) + case INDEX_op_st8: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); *(uint8_t *)ptr = regs[r0]; break; - CASE_32_64(st16) + case INDEX_op_st16: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); *(uint16_t *)ptr = regs[r0]; break; - case INDEX_op_st_i32: - CASE_64(st32) + case INDEX_op_st: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); - *(uint32_t *)ptr = regs[r0]; + *(tcg_target_ulong *)ptr = regs[r0]; break; /* Arithmetic operations (mixed 32/64 bit). */ @@ -725,10 +712,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, ptr = (void *)(regs[r1] + ofs); regs[r0] = *(int32_t *)ptr; break; - case INDEX_op_st_i64: + case INDEX_op_st32: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); - *(uint64_t *)ptr = regs[r0]; + *(uint32_t *)ptr = regs[r0]; break; /* Arithmetic operations (64 bit). */ @@ -975,13 +962,10 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_ld16s: case INDEX_op_ld32u: case INDEX_op_ld: - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i32: - case INDEX_op_st_i64: + case INDEX_op_st8: + case INDEX_op_st16: + case INDEX_op_st32: + case INDEX_op_st: tci_args_rrs(insn, &r0, &r1, &s2); info->fprintf_func(info->stream, "%-12s %s, %s, %d", op_name, str_r(r0), str_r(r1), s2); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index be9270a861..1fb7575061 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -1173,7 +1173,7 @@ static const TCGOutOpLoad outop_ld32s = { static void tgen_st8(TCGContext *s, TCGType type, TCGReg data, TCGReg base, ptrdiff_t offset) { - tcg_out_ldst(s, INDEX_op_st8_i32, data, base, offset); + tcg_out_ldst(s, INDEX_op_st8, data, base, offset); } static const TCGOutOpStore outop_st8 = { @@ -1184,7 +1184,7 @@ static const TCGOutOpStore outop_st8 = { static void tgen_st16(TCGContext *s, TCGType type, TCGReg data, TCGReg base, ptrdiff_t offset) { - tcg_out_ldst(s, INDEX_op_st16_i32, data, base, offset); + tcg_out_ldst(s, INDEX_op_st16, data, base, offset); } static const TCGOutOpStore outop_st16 = { @@ -1232,18 +1232,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, static void tcg_out_st(TCGContext *s, TCGType type, TCGReg val, TCGReg base, intptr_t offset) { - switch (type) { - case TCG_TYPE_I32: - tcg_out_ldst(s, INDEX_op_st_i32, val, base, offset); - break; -#if TCG_TARGET_REG_BITS == 64 - case TCG_TYPE_I64: - tcg_out_ldst(s, INDEX_op_st_i64, val, base, offset); - break; -#endif - default: - g_assert_not_reached(); + TCGOpcode op = INDEX_op_st; + + if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { + op = INDEX_op_st32; } + tcg_out_ldst(s, op, val, base, offset); } static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, From bf7ca5fb3032b95fd83dbb9883e904ee28baa229 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 23 Jan 2025 09:46:57 -0800 Subject: [PATCH 154/161] tcg: Stash MemOp size in TCGOP_FLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will enable removing INDEX_op_qemu_st8_*_i32, by exposing the operand size to constraint selection. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/tcg-op-ldst.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c index 3b073b4ce0..9e4626e51d 100644 --- a/tcg/tcg-op-ldst.c +++ b/tcg/tcg-op-ldst.c @@ -91,11 +91,15 @@ static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st) static void gen_ldst(TCGOpcode opc, TCGType type, TCGTemp *vl, TCGTemp *vh, TCGTemp *addr, MemOpIdx oi) { + TCGOp *op; + if (vh) { - tcg_gen_op4(opc, type, temp_arg(vl), temp_arg(vh), temp_arg(addr), oi); + op = tcg_gen_op4(opc, type, temp_arg(vl), temp_arg(vh), + temp_arg(addr), oi); } else { - tcg_gen_op3(opc, type, temp_arg(vl), temp_arg(addr), oi); + op = tcg_gen_op3(opc, type, temp_arg(vl), temp_arg(addr), oi); } + TCGOP_FLAGS(op) = get_memop(oi) & MO_SIZE; } static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi) From 33aba058c8fcc9b1581b03a1fbac45d8d91baac6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 26 Jan 2025 17:34:19 -0800 Subject: [PATCH 155/161] tcg: Remove INDEX_op_qemu_st8_* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The i386 backend can now check TCGOP_FLAGS to select the correct set of constraints. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- docs/devel/tcg-ops.rst | 6 ------ include/tcg/tcg-opc.h | 4 ---- tcg/aarch64/tcg-target-has.h | 1 - tcg/arm/tcg-target-has.h | 1 - tcg/i386/tcg-target-con-str.h | 2 +- tcg/i386/tcg-target-has.h | 3 --- tcg/i386/tcg-target.c.inc | 9 ++++----- tcg/loongarch64/tcg-target-has.h | 3 --- tcg/mips/tcg-target-has.h | 1 - tcg/optimize.c | 1 - tcg/ppc/tcg-target-has.h | 2 -- tcg/riscv/tcg-target-has.h | 1 - tcg/s390x/tcg-target-has.h | 1 - tcg/sparc64/tcg-target-has.h | 1 - tcg/tcg-op-ldst.c | 9 ++------- tcg/tcg.c | 4 ---- tcg/tci/tcg-target-has.h | 2 -- 17 files changed, 7 insertions(+), 44 deletions(-) diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index a7147407de..f26b837a30 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -744,8 +744,6 @@ QEMU specific operations qemu_st_i32/i64/i128 *t0*, *t1*, *flags*, *memidx* - qemu_st8_i32 *t0*, *t1*, *flags*, *memidx* - - | Load data at the guest address *t1* into *t0*, or store data in *t0* at guest address *t1*. The _i32/_i64/_i128 size applies to the size of the input/output register *t0* only. The address *t1* is always sized according to the guest, @@ -763,10 +761,6 @@ QEMU specific operations 64-bit memory access specified in *flags*. | | For qemu_ld/st_i128, these are only supported for a 64-bit host. - | - | For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of - the memory operation is known to be 8-bit. This allows the backend to - provide a different set of register constraints. Host vector operations diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index a22433d8b5..0ce8332aab 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -133,10 +133,6 @@ DEF(qemu_ld_i64, DATA64_ARGS, 1, 1, DEF(qemu_st_i64, 0, DATA64_ARGS + 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -/* Only used by i386 to cope with stupid register constraints. */ -DEF(qemu_st8_i32, 0, 1 + 1, 1, - TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) - /* Only for 64-bit hosts at the moment. */ DEF(qemu_ld_i128, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) DEF(qemu_st_i128, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index b155e37639..69e83efb69 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -14,7 +14,6 @@ /* optional instructions */ #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_qemu_st8_i32 0 /* * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load, diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index 187269e5bd..3bbbde5d59 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -24,7 +24,6 @@ extern bool use_neon_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 #define TCG_TARGET_HAS_tst 1 diff --git a/tcg/i386/tcg-target-con-str.h b/tcg/i386/tcg-target-con-str.h index 52142ab121..dbedff1f54 100644 --- a/tcg/i386/tcg-target-con-str.h +++ b/tcg/i386/tcg-target-con-str.h @@ -20,7 +20,7 @@ REGS('r', ALL_GENERAL_REGS) REGS('x', ALL_VECTOR_REGS) REGS('q', ALL_BYTEL_REGS) /* regs that can be used as a byte operand */ REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_ld/st */ -REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_st8_i32 data */ +REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_st MO_8 data */ /* * Define constraint letters for constants: diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index 628e736de7..42647fabbd 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -29,9 +29,6 @@ #if TCG_TARGET_REG_BITS == 64 /* Keep 32-bit values zero-extended in a register. */ #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_qemu_st8_i32 0 -#else -#define TCG_TARGET_HAS_qemu_st8_i32 1 #endif #define TCG_TARGET_HAS_qemu_ldst_i128 \ diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 52285bcd54..6c4c2ebd0e 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2457,7 +2457,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, switch (memop & MO_SIZE) { case MO_8: - /* This is handled with constraints on INDEX_op_qemu_st8_i32. */ + /* This is handled with constraints on INDEX_op_qemu_st_i32. */ tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || datalo < 4); tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + h.seg, datalo, h.base, h.index, 0, h.ofs); @@ -3568,7 +3568,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, break; case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st8_i32: tcg_out_qemu_st(s, a0, -1, a1, a2, TCG_TYPE_I32); break; case INDEX_op_qemu_st_i64: @@ -4140,9 +4139,9 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) return C_O1_I1(r, L); case INDEX_op_qemu_st_i32: - return C_O0_I2(L, L); - case INDEX_op_qemu_st8_i32: - return C_O0_I2(s, L); + return (TCG_TARGET_REG_BITS == 32 && flags == MO_8 + ? C_O0_I2(s, L) + : C_O0_I2(L, L)); case INDEX_op_qemu_ld_i64: return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I1(r, r, L); diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 9c118bd1f6..32abc6f457 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -9,9 +9,6 @@ #include "host/cpuinfo.h" -/* optional instructions */ -#define TCG_TARGET_HAS_qemu_st8_i32 0 - /* 64-bit operations */ #define TCG_TARGET_HAS_extr_i64_i32 1 diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index d8f9f7beef..b9eb338528 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -46,7 +46,6 @@ extern bool use_mips32r2_instructions; #endif /* optional instructions detected at runtime */ -#define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 #define TCG_TARGET_HAS_tst 0 diff --git a/tcg/optimize.c b/tcg/optimize.c index cfb407c7fc..4d2220664a 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -3192,7 +3192,6 @@ void tcg_optimize(TCGContext *s) case INDEX_op_qemu_ld_i128: done = fold_qemu_ld_2reg(&ctx, op); break; - case INDEX_op_qemu_st8_i32: case INDEX_op_qemu_st_i32: case INDEX_op_qemu_st_i64: case INDEX_op_qemu_st_i128: diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index b978c91a62..81ec5aece7 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -17,8 +17,6 @@ #define have_vsx (cpuinfo & CPUINFO_VSX) /* optional instructions */ -#define TCG_TARGET_HAS_qemu_st8_i32 0 - #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extr_i64_i32 0 #endif diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index 8cd099546f..aef10c2d9d 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -10,7 +10,6 @@ #include "host/cpuinfo.h" /* optional instructions */ -#define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 1 #define TCG_TARGET_HAS_qemu_ldst_i128 0 #define TCG_TARGET_HAS_tst 0 diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index c04cc4e377..0aeb5ba01a 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -30,7 +30,6 @@ extern uint64_t s390_facilities[3]; /* optional instructions */ #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_qemu_ldst_i128 1 #define TCG_TARGET_HAS_tst 1 diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index d9f5ef3fc9..af6a949da3 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -14,7 +14,6 @@ extern bool use_vis3_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 #define TCG_TARGET_HAS_tst 1 diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c index 9e4626e51d..ac1af9f77c 100644 --- a/tcg/tcg-op-ldst.c +++ b/tcg/tcg-op-ldst.c @@ -270,7 +270,6 @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr, { TCGv_i32 swap = NULL; MemOpIdx orig_oi, oi; - TCGOpcode opc; tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); memop = tcg_canonicalize_memop(memop, 0, 1); @@ -293,12 +292,8 @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr, oi = make_memop_idx(memop, idx); } - if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) { - opc = INDEX_op_qemu_st8_i32; - } else { - opc = INDEX_op_qemu_st_i32; - } - gen_ldst(opc, TCG_TYPE_I32, tcgv_i32_temp(val), NULL, addr, oi); + gen_ldst(INDEX_op_qemu_st_i32, TCG_TYPE_I32, + tcgv_i32_temp(val), NULL, addr, oi); plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W); if (swap) { diff --git a/tcg/tcg.c b/tcg/tcg.c index 44b6b8319f..5c0cab205c 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -2438,9 +2438,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_qemu_st_i64: return true; - case INDEX_op_qemu_st8_i32: - return TCG_TARGET_HAS_qemu_st8_i32; - case INDEX_op_qemu_ld_i128: case INDEX_op_qemu_st_i128: return TCG_TARGET_HAS_qemu_ldst_i128; @@ -3012,7 +3009,6 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) break; case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st8_i32: case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_st_i64: case INDEX_op_qemu_ld_i128: diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index 497e8152b7..ab07ce1fcb 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -7,8 +7,6 @@ #ifndef TCG_TARGET_HAS_H #define TCG_TARGET_HAS_H -#define TCG_TARGET_HAS_qemu_st8_i32 0 - #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extr_i64_i32 0 #endif /* TCG_TARGET_REG_BITS == 64 */ From aae2456ac0b4eb91da7ee8a4b31052f2e8a77af8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 9 Feb 2025 12:55:15 -0800 Subject: [PATCH 156/161] tcg: Merge INDEX_op_{ld,st}_{i32,i64,i128} Merge into INDEX_op_{ld,st,ld2,st2}, where "2" indicates that two inputs or outputs are required. This simplifies the processing of i64/i128 depending on host word size. Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- include/tcg/tcg-opc.h | 16 ++----- tcg/aarch64/tcg-target.c.inc | 20 ++++----- tcg/arm/tcg-target.c.inc | 16 +++---- tcg/i386/tcg-target.c.inc | 50 ++++++--------------- tcg/loongarch64/tcg-target.c.inc | 28 +++++------- tcg/mips/tcg-target.c.inc | 38 +++++++--------- tcg/optimize.c | 15 ++----- tcg/ppc/tcg-target.c.inc | 47 ++++++++------------ tcg/riscv/tcg-target.c.inc | 20 +++------ tcg/s390x/tcg-target.c.inc | 28 +++++------- tcg/sparc64/tcg-target.c.inc | 20 +++------ tcg/tcg-op-ldst.c | 75 +++++++++++++++++--------------- tcg/tcg.c | 28 +++++++----- tcg/tci.c | 69 ++++++++++++----------------- tcg/tci/tcg-target.c.inc | 36 ++++++--------- 15 files changed, 200 insertions(+), 306 deletions(-) diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 0ce8332aab..995b79383e 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -124,18 +124,10 @@ DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END) DEF(plugin_cb, 0, 0, 1, TCG_OPF_NOT_PRESENT) DEF(plugin_mem_cb, 0, 1, 1, TCG_OPF_NOT_PRESENT) -DEF(qemu_ld_i32, 1, 1, 1, - TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_st_i32, 0, 1 + 1, 1, - TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_ld_i64, DATA64_ARGS, 1, 1, - TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_st_i64, 0, DATA64_ARGS + 1, 1, - TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) - -/* Only for 64-bit hosts at the moment. */ -DEF(qemu_ld_i128, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_st_i128, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_INT) +DEF(qemu_st, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_INT) +DEF(qemu_ld2, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_INT) +DEF(qemu_st2, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_INT) /* Host vector support. */ diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 39a44507d1..a48cb46799 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2875,18 +2875,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, TCGArg a2 = args[2]; switch (opc) { - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_ld_i64: + case INDEX_op_qemu_ld: tcg_out_qemu_ld(s, a0, a1, a2, ext); break; - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: + case INDEX_op_qemu_st: tcg_out_qemu_st(s, a0, a1, a2, ext); break; - case INDEX_op_qemu_ld_i128: + case INDEX_op_qemu_ld2: tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true); break; - case INDEX_op_qemu_st_i128: + case INDEX_op_qemu_st2: tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false); break; @@ -3342,15 +3340,13 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_ld_i64: + case INDEX_op_qemu_ld: return C_O1_I1(r, r); - case INDEX_op_qemu_ld_i128: + case INDEX_op_qemu_ld2: return C_O2_I1(r, r, r); - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: + case INDEX_op_qemu_st: return C_O0_I2(rz, r); - case INDEX_op_qemu_st_i128: + case INDEX_op_qemu_st2: return C_O0_I3(rz, rz, r); case INDEX_op_add_vec: diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 5b34f61ca1..29fd82e9e0 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -2570,17 +2570,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld: tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); break; - case INDEX_op_qemu_ld_i64: + case INDEX_op_qemu_ld2: tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], TCG_TYPE_I64); break; - case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st: tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); break; - case INDEX_op_qemu_st_i64: + case INDEX_op_qemu_st2: tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], TCG_TYPE_I64); break; @@ -2596,13 +2596,13 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld: return C_O1_I1(r, q); - case INDEX_op_qemu_ld_i64: + case INDEX_op_qemu_ld2: return C_O2_I1(e, p, q); - case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st: return C_O0_I2(q, q); - case INDEX_op_qemu_st_i64: + case INDEX_op_qemu_st2: return C_O0_I3(Q, p, q); case INDEX_op_st_vec: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 6c4c2ebd0e..cb66f6c27f 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2457,7 +2457,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, switch (memop & MO_SIZE) { case MO_8: - /* This is handled with constraints on INDEX_op_qemu_st_i32. */ + /* This is handled with constraints on INDEX_op_qemu_st. */ tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || datalo < 4); tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + h.seg, datalo, h.base, h.index, 0, h.ofs); @@ -3552,34 +3552,18 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, a2 = args[2]; switch (opc) { - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, a0, -1, a1, a2, TCG_TYPE_I32); + case INDEX_op_qemu_ld: + tcg_out_qemu_ld(s, a0, -1, a1, a2, type); break; - case INDEX_op_qemu_ld_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_ld(s, a0, -1, a1, a2, TCG_TYPE_I64); - } else { - tcg_out_qemu_ld(s, a0, a1, a2, args[3], TCG_TYPE_I64); - } - break; - case INDEX_op_qemu_ld_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - tcg_out_qemu_ld(s, a0, a1, a2, args[3], TCG_TYPE_I128); + case INDEX_op_qemu_ld2: + tcg_out_qemu_ld(s, a0, a1, a2, args[3], type); break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, a0, -1, a1, a2, TCG_TYPE_I32); + case INDEX_op_qemu_st: + tcg_out_qemu_st(s, a0, -1, a1, a2, type); break; - case INDEX_op_qemu_st_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_st(s, a0, -1, a1, a2, TCG_TYPE_I64); - } else { - tcg_out_qemu_st(s, a0, a1, a2, args[3], TCG_TYPE_I64); - } - break; - case INDEX_op_qemu_st_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - tcg_out_qemu_st(s, a0, a1, a2, args[3], TCG_TYPE_I128); + case INDEX_op_qemu_st2: + tcg_out_qemu_st(s, a0, a1, a2, args[3], type); break; case INDEX_op_call: /* Always emitted via tcg_out_call. */ @@ -4135,25 +4119,17 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld: return C_O1_I1(r, L); - case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st: return (TCG_TARGET_REG_BITS == 32 && flags == MO_8 ? C_O0_I2(s, L) : C_O0_I2(L, L)); - case INDEX_op_qemu_ld_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I1(r, r, L); - - case INDEX_op_qemu_st_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L); - - case INDEX_op_qemu_ld_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); + case INDEX_op_qemu_ld2: return C_O2_I1(r, r, L); - case INDEX_op_qemu_st_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); + case INDEX_op_qemu_st2: return C_O0_I3(L, L, L); case INDEX_op_ld_vec: diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 73a1196d8b..e4a8b43578 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -2020,22 +2020,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, TCGArg a3 = args[3]; switch (opc) { - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); + case INDEX_op_qemu_ld: + tcg_out_qemu_ld(s, a0, a1, a2, type); break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64); - break; - case INDEX_op_qemu_ld_i128: + case INDEX_op_qemu_ld2: tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, true); break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32); + case INDEX_op_qemu_st: + tcg_out_qemu_st(s, a0, a1, a2, type); break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64); - break; - case INDEX_op_qemu_st_i128: + case INDEX_op_qemu_st2: tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, false); break; @@ -2541,18 +2535,16 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: + case INDEX_op_qemu_st: return C_O0_I2(rz, r); - case INDEX_op_qemu_ld_i128: + case INDEX_op_qemu_ld2: return C_N2_I1(r, r, r); - case INDEX_op_qemu_st_i128: + case INDEX_op_qemu_st2: return C_O0_I3(r, r, r); - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_ld_i64: + case INDEX_op_qemu_ld: return C_O1_I1(r, r); case INDEX_op_ld_vec: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 5e41729d88..eaaf0f2024 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2381,26 +2381,20 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, a2 = args[2]; switch (opc) { - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, a0, 0, a1, a2, TCG_TYPE_I32); + case INDEX_op_qemu_ld: + tcg_out_qemu_ld(s, a0, 0, a1, a2, type); break; - case INDEX_op_qemu_ld_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_ld(s, a0, 0, a1, a2, TCG_TYPE_I64); - } else { - tcg_out_qemu_ld(s, a0, a1, a2, args[3], TCG_TYPE_I64); - } + case INDEX_op_qemu_ld2: + tcg_debug_assert(TCG_TARGET_REG_BITS == 32); + tcg_out_qemu_ld(s, a0, a1, a2, args[3], type); break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, a0, 0, a1, a2, TCG_TYPE_I32); + case INDEX_op_qemu_st: + tcg_out_qemu_st(s, a0, 0, a1, a2, type); break; - case INDEX_op_qemu_st_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_st(s, a0, 0, a1, a2, TCG_TYPE_I64); - } else { - tcg_out_qemu_st(s, a0, a1, a2, args[3], TCG_TYPE_I64); - } + case INDEX_op_qemu_st2: + tcg_debug_assert(TCG_TARGET_REG_BITS == 32); + tcg_out_qemu_st(s, a0, a1, a2, args[3], type); break; case INDEX_op_call: /* Always emitted via tcg_out_call. */ @@ -2415,14 +2409,14 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld: return C_O1_I1(r, r); - case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st: return C_O0_I2(rz, r); - case INDEX_op_qemu_ld_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r); - case INDEX_op_qemu_st_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rz, r) : C_O0_I3(rz, rz, r); + case INDEX_op_qemu_ld2: + return TCG_TARGET_REG_BITS == 64 ? C_NotImplemented : C_O2_I1(r, r, r); + case INDEX_op_qemu_st2: + return TCG_TARGET_REG_BITS == 64 ? C_NotImplemented : C_O0_I3(rz, rz, r); default: return C_NotImplemented; diff --git a/tcg/optimize.c b/tcg/optimize.c index 4d2220664a..10a76c5461 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -3180,21 +3180,14 @@ void tcg_optimize(TCGContext *s) case INDEX_op_orc_vec: done = fold_orc(&ctx, op); break; - case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld: done = fold_qemu_ld_1reg(&ctx, op); break; - case INDEX_op_qemu_ld_i64: - if (TCG_TARGET_REG_BITS == 64) { - done = fold_qemu_ld_1reg(&ctx, op); - break; - } - QEMU_FALLTHROUGH; - case INDEX_op_qemu_ld_i128: + case INDEX_op_qemu_ld2: done = fold_qemu_ld_2reg(&ctx, op); break; - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - case INDEX_op_qemu_st_i128: + case INDEX_op_qemu_st: + case INDEX_op_qemu_st2: done = fold_qemu_st(&ctx, op); break; case INDEX_op_rems: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 9cf24831df..bb26769d53 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3779,35 +3779,27 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); + case INDEX_op_qemu_ld: + tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], type); break; - case INDEX_op_qemu_ld_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I64); - } else { + case INDEX_op_qemu_ld2: + if (TCG_TARGET_REG_BITS == 32) { tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], TCG_TYPE_I64); + break; } - break; - case INDEX_op_qemu_ld_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); + case INDEX_op_qemu_st: + tcg_out_qemu_st(s, args[0], -1, args[1], args[2], type); break; - case INDEX_op_qemu_st_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I64); - } else { + case INDEX_op_qemu_st2: + if (TCG_TARGET_REG_BITS == 32) { tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], TCG_TYPE_I64); + break; } - break; - case INDEX_op_qemu_st_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); break; @@ -4426,20 +4418,17 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld: return C_O1_I1(r, r); - case INDEX_op_qemu_ld_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r); + case INDEX_op_qemu_ld2: + return TCG_TARGET_REG_BITS == 64 + ? C_N1O1_I1(o, m, r) : C_O2_I1(r, r, r); - case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st: return C_O0_I2(r, r); - case INDEX_op_qemu_st_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); - - case INDEX_op_qemu_ld_i128: - return C_N1O1_I1(o, m, r); - case INDEX_op_qemu_st_i128: - return C_O0_I3(o, m, r); + case INDEX_op_qemu_st2: + return TCG_TARGET_REG_BITS == 64 + ? C_O0_I3(o, m, r) : C_O0_I3(r, r, r); case INDEX_op_add_vec: case INDEX_op_sub_vec: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index bcfdb6c545..89c7736f9a 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2633,17 +2633,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, TCGArg a2 = args[2]; switch (opc) { - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); + case INDEX_op_qemu_ld: + tcg_out_qemu_ld(s, a0, a1, a2, type); break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64); + case INDEX_op_qemu_st: + tcg_out_qemu_st(s, a0, a1, a2, type); break; case INDEX_op_call: /* Always emitted via tcg_out_call. */ @@ -2875,11 +2869,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_ld_i64: + case INDEX_op_qemu_ld: return C_O1_I1(r, r); - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: + case INDEX_op_qemu_st: return C_O0_I2(rz, r); case INDEX_op_st_vec: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index e266c19829..652ce9023e 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -3133,22 +3133,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32); + case INDEX_op_qemu_ld: + tcg_out_qemu_ld(s, args[0], args[1], args[2], type); break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I64); + case INDEX_op_qemu_st: + tcg_out_qemu_st(s, args[0], args[1], args[2], type); break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I32); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64); - break; - case INDEX_op_qemu_ld_i128: + case INDEX_op_qemu_ld2: tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); break; - case INDEX_op_qemu_st_i128: + case INDEX_op_qemu_st2: tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); break; @@ -3600,15 +3594,13 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_ld_i64: + case INDEX_op_qemu_ld: return C_O1_I1(r, r); - case INDEX_op_qemu_st_i64: - case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st: return C_O0_I2(r, r); - case INDEX_op_qemu_ld_i128: + case INDEX_op_qemu_ld2: return C_O2_I1(o, m, r); - case INDEX_op_qemu_st_i128: + case INDEX_op_qemu_st2: return C_O0_I3(o, m, r); case INDEX_op_st_vec: diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index a0efeee98c..bf27b6b54b 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -2068,17 +2068,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, a2 = args[2]; switch (opc) { - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); + case INDEX_op_qemu_ld: + tcg_out_qemu_ld(s, a0, a1, a2, type); break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64); + case INDEX_op_qemu_st: + tcg_out_qemu_st(s, a0, a1, a2, type); break; case INDEX_op_call: /* Always emitted via tcg_out_call. */ @@ -2093,12 +2087,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_ld_i64: + case INDEX_op_qemu_ld: return C_O1_I1(r, r); - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: + case INDEX_op_qemu_st: return C_O0_I2(rz, r); default: diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c index ac1af9f77c..fa9e52277b 100644 --- a/tcg/tcg-op-ldst.c +++ b/tcg/tcg-op-ldst.c @@ -88,28 +88,40 @@ static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st) return op; } -static void gen_ldst(TCGOpcode opc, TCGType type, TCGTemp *vl, TCGTemp *vh, - TCGTemp *addr, MemOpIdx oi) +static void gen_ldst1(TCGOpcode opc, TCGType type, TCGTemp *v, + TCGTemp *addr, MemOpIdx oi) { - TCGOp *op; - - if (vh) { - op = tcg_gen_op4(opc, type, temp_arg(vl), temp_arg(vh), - temp_arg(addr), oi); - } else { - op = tcg_gen_op3(opc, type, temp_arg(vl), temp_arg(addr), oi); - } + TCGOp *op = tcg_gen_op3(opc, type, temp_arg(v), temp_arg(addr), oi); TCGOP_FLAGS(op) = get_memop(oi) & MO_SIZE; } -static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi) +static void gen_ldst2(TCGOpcode opc, TCGType type, TCGTemp *vl, TCGTemp *vh, + TCGTemp *addr, MemOpIdx oi) +{ + TCGOp *op = tcg_gen_op4(opc, type, temp_arg(vl), temp_arg(vh), + temp_arg(addr), oi); + TCGOP_FLAGS(op) = get_memop(oi) & MO_SIZE; +} + +static void gen_ld_i64(TCGv_i64 v, TCGTemp *addr, MemOpIdx oi) { if (TCG_TARGET_REG_BITS == 32) { - TCGTemp *vl = tcgv_i32_temp(TCGV_LOW(v)); - TCGTemp *vh = tcgv_i32_temp(TCGV_HIGH(v)); - gen_ldst(opc, TCG_TYPE_I64, vl, vh, addr, oi); + gen_ldst2(INDEX_op_qemu_ld2, TCG_TYPE_I64, + tcgv_i32_temp(TCGV_LOW(v)), tcgv_i32_temp(TCGV_HIGH(v)), + addr, oi); } else { - gen_ldst(opc, TCG_TYPE_I64, tcgv_i64_temp(v), NULL, addr, oi); + gen_ldst1(INDEX_op_qemu_ld, TCG_TYPE_I64, tcgv_i64_temp(v), addr, oi); + } +} + +static void gen_st_i64(TCGv_i64 v, TCGTemp *addr, MemOpIdx oi) +{ + if (TCG_TARGET_REG_BITS == 32) { + gen_ldst2(INDEX_op_qemu_st2, TCG_TYPE_I64, + tcgv_i32_temp(TCGV_LOW(v)), tcgv_i32_temp(TCGV_HIGH(v)), + addr, oi); + } else { + gen_ldst1(INDEX_op_qemu_st, TCG_TYPE_I64, tcgv_i64_temp(v), addr, oi); } } @@ -236,8 +248,7 @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr, } copy_addr = plugin_maybe_preserve_addr(addr); - gen_ldst(INDEX_op_qemu_ld_i32, TCG_TYPE_I32, - tcgv_i32_temp(val), NULL, addr, oi); + gen_ldst1(INDEX_op_qemu_ld, TCG_TYPE_I32, tcgv_i32_temp(val), addr, oi); plugin_gen_mem_callbacks_i32(val, copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R); @@ -292,8 +303,7 @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr, oi = make_memop_idx(memop, idx); } - gen_ldst(INDEX_op_qemu_st_i32, TCG_TYPE_I32, - tcgv_i32_temp(val), NULL, addr, oi); + gen_ldst1(INDEX_op_qemu_st, TCG_TYPE_I32, tcgv_i32_temp(val), addr, oi); plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W); if (swap) { @@ -340,7 +350,7 @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr, } copy_addr = plugin_maybe_preserve_addr(addr); - gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, oi); + gen_ld_i64(val, addr, oi); plugin_gen_mem_callbacks_i64(val, copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R); @@ -407,7 +417,7 @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr, oi = make_memop_idx(memop, idx); } - gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, oi); + gen_st_i64(val, addr, oi); plugin_gen_mem_callbacks_i64(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W); if (swap) { @@ -546,8 +556,8 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, hi = TCGV128_HIGH(val); } - gen_ldst(INDEX_op_qemu_ld_i128, TCG_TYPE_I128, tcgv_i64_temp(lo), - tcgv_i64_temp(hi), addr, oi); + gen_ldst2(INDEX_op_qemu_ld2, TCG_TYPE_I128, tcgv_i64_temp(lo), + tcgv_i64_temp(hi), addr, oi); if (need_bswap) { tcg_gen_bswap64_i64(lo, lo); @@ -575,8 +585,7 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, y = TCGV128_LOW(val); } - gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, - make_memop_idx(mop[0], idx)); + gen_ld_i64(x, addr, make_memop_idx(mop[0], idx)); if (need_bswap) { tcg_gen_bswap64_i64(x, x); @@ -592,8 +601,7 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, addr_p8 = tcgv_i64_temp(t); } - gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, - make_memop_idx(mop[1], idx)); + gen_ld_i64(y, addr_p8, make_memop_idx(mop[1], idx)); tcg_temp_free_internal(addr_p8); if (need_bswap) { @@ -657,8 +665,8 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, hi = TCGV128_HIGH(val); } - gen_ldst(INDEX_op_qemu_st_i128, TCG_TYPE_I128, - tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi); + gen_ldst2(INDEX_op_qemu_st2, TCG_TYPE_I128, + tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi); if (need_bswap) { tcg_temp_free_i64(lo); @@ -685,8 +693,7 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, x = b; } - gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, - make_memop_idx(mop[0], idx)); + gen_st_i64(x, addr, make_memop_idx(mop[0], idx)); if (tcg_ctx->addr_type == TCG_TYPE_I32) { TCGv_i32 t = tcg_temp_ebb_new_i32(); @@ -700,12 +707,10 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, if (b) { tcg_gen_bswap64_i64(b, y); - gen_ldst_i64(INDEX_op_qemu_st_i64, b, addr_p8, - make_memop_idx(mop[1], idx)); + gen_st_i64(b, addr_p8, make_memop_idx(mop[1], idx)); tcg_temp_free_i64(b); } else { - gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, - make_memop_idx(mop[1], idx)); + gen_st_i64(y, addr_p8, make_memop_idx(mop[1], idx)); } tcg_temp_free_internal(addr_p8); } else { diff --git a/tcg/tcg.c b/tcg/tcg.c index 5c0cab205c..6c0866d446 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -2432,14 +2432,20 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_exit_tb: case INDEX_op_goto_tb: case INDEX_op_goto_ptr: - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_ld_i64: - case INDEX_op_qemu_st_i64: return true; - case INDEX_op_qemu_ld_i128: - case INDEX_op_qemu_st_i128: + case INDEX_op_qemu_ld: + case INDEX_op_qemu_st: + tcg_debug_assert(type <= TCG_TYPE_REG); + return true; + + case INDEX_op_qemu_ld2: + case INDEX_op_qemu_st2: + if (TCG_TARGET_REG_BITS == 32) { + tcg_debug_assert(type == TCG_TYPE_I64); + return true; + } + tcg_debug_assert(type == TCG_TYPE_I128); return TCG_TARGET_HAS_qemu_ldst_i128; case INDEX_op_add: @@ -3007,12 +3013,10 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) } i = 1; break; - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_ld_i64: - case INDEX_op_qemu_st_i64: - case INDEX_op_qemu_ld_i128: - case INDEX_op_qemu_st_i128: + case INDEX_op_qemu_ld: + case INDEX_op_qemu_st: + case INDEX_op_qemu_ld2: + case INDEX_op_qemu_st2: { const char *s_al, *s_op, *s_at; MemOpIdx oi = op->args[k++]; diff --git a/tcg/tci.c b/tcg/tci.c index b08288e7d3..700e672616 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -789,46 +789,33 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tb_ptr = ptr; break; - case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld: tci_args_rrm(insn, &r0, &r1, &oi); taddr = regs[r1]; regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr); break; - case INDEX_op_qemu_ld_i64: - if (TCG_TARGET_REG_BITS == 64) { - tci_args_rrm(insn, &r0, &r1, &oi); - taddr = regs[r1]; - } else { - tci_args_rrrr(insn, &r0, &r1, &r2, &r3); - taddr = regs[r2]; - oi = regs[r3]; - } - tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr); - if (TCG_TARGET_REG_BITS == 32) { - tci_write_reg64(regs, r1, r0, tmp64); - } else { - regs[r0] = tmp64; - } - break; - - case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st: tci_args_rrm(insn, &r0, &r1, &oi); taddr = regs[r1]; tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr); break; - case INDEX_op_qemu_st_i64: - if (TCG_TARGET_REG_BITS == 64) { - tci_args_rrm(insn, &r0, &r1, &oi); - tmp64 = regs[r0]; - taddr = regs[r1]; - } else { - tci_args_rrrr(insn, &r0, &r1, &r2, &r3); - tmp64 = tci_uint64(regs[r1], regs[r0]); - taddr = regs[r2]; - oi = regs[r3]; - } + case INDEX_op_qemu_ld2: + tcg_debug_assert(TCG_TARGET_REG_BITS == 32); + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); + taddr = regs[r2]; + oi = regs[r3]; + tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr); + tci_write_reg64(regs, r1, r0, tmp64); + break; + + case INDEX_op_qemu_st2: + tcg_debug_assert(TCG_TARGET_REG_BITS == 32); + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); + tmp64 = tci_uint64(regs[r1], regs[r0]); + taddr = regs[r2]; + oi = regs[r3]; tci_qemu_st(env, taddr, tmp64, oi, tb_ptr); break; @@ -1056,23 +1043,21 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) str_r(r2), str_r(r3)); break; - case INDEX_op_qemu_ld_i64: - case INDEX_op_qemu_st_i64: - if (TCG_TARGET_REG_BITS == 32) { - tci_args_rrrr(insn, &r0, &r1, &r2, &r3); - info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", - op_name, str_r(r0), str_r(r1), - str_r(r2), str_r(r3)); - break; - } - /* fall through */ - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_ld: + case INDEX_op_qemu_st: tci_args_rrm(insn, &r0, &r1, &oi); info->fprintf_func(info->stream, "%-12s %s, %s, %x", op_name, str_r(r0), str_r(r1), oi); break; + case INDEX_op_qemu_ld2: + case INDEX_op_qemu_st2: + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); + info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", + op_name, str_r(r0), str_r(r1), + str_r(r2), str_r(r3)); + break; + case 0: /* tcg_out_nop_fill uses zeros */ if (insn == 0) { diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 1fb7575061..6b8f71f49e 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -40,14 +40,14 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld: return C_O1_I1(r, r); - case INDEX_op_qemu_ld_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r); - case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st: return C_O0_I2(r, r); - case INDEX_op_qemu_st_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); + case INDEX_op_qemu_ld2: + return TCG_TARGET_REG_BITS == 64 ? C_NotImplemented : C_O2_I1(r, r, r); + case INDEX_op_qemu_st2: + return TCG_TARGET_REG_BITS == 64 ? C_NotImplemented : C_O0_I3(r, r, r); default: return C_NotImplemented; @@ -1203,22 +1203,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - case INDEX_op_qemu_ld_i64: - case INDEX_op_qemu_st_i64: - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[3]); - tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], TCG_REG_TMP); - break; - } - /* fall through */ - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_st_i32: - if (TCG_TARGET_REG_BITS == 64 && s->addr_type == TCG_TYPE_I32) { - tcg_out_ext32u(s, TCG_REG_TMP, args[1]); - tcg_out_op_rrm(s, opc, args[0], TCG_REG_TMP, args[2]); - } else { - tcg_out_op_rrm(s, opc, args[0], args[1], args[2]); - } + case INDEX_op_qemu_ld: + case INDEX_op_qemu_st: + tcg_out_op_rrm(s, opc, args[0], args[1], args[2]); + break; + case INDEX_op_qemu_ld2: + case INDEX_op_qemu_st2: + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[3]); + tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], TCG_REG_TMP); break; case INDEX_op_call: /* Always emitted via tcg_out_call. */ From 3bedb9d3e2855606f2b4982ce07f8ae399957c3d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 11 Feb 2025 13:41:42 -0800 Subject: [PATCH 157/161] tcg: Convert qemu_ld{2} to TCGOutOpLoad{2} Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 30 +++++++++------ tcg/arm/tcg-target.c.inc | 63 +++++++++++++++++++++++--------- tcg/i386/tcg-target.c.inc | 47 ++++++++++++++++-------- tcg/loongarch64/tcg-target.c.inc | 37 ++++++++++--------- tcg/mips/tcg-target.c.inc | 57 +++++++++++++++++++++-------- tcg/ppc/tcg-target.c.inc | 45 ++++++++++++++--------- tcg/riscv/tcg-target.c.inc | 22 ++++++----- tcg/s390x/tcg-target.c.inc | 32 +++++++++------- tcg/sparc64/tcg-target.c.inc | 21 ++++++----- tcg/tcg.c | 32 +++++++++++++++- tcg/tci/tcg-target.c.inc | 30 ++++++++++++--- 11 files changed, 283 insertions(+), 133 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index a48cb46799..feda64afa3 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1806,8 +1806,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, } } -static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType data_type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; @@ -1822,6 +1822,11 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, } } +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { @@ -1940,6 +1945,17 @@ static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, } } +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr_reg, MemOpIdx oi) +{ + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr_reg, oi, true); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_O2_I1(r, r, r), + .out = tgen_qemu_ld2, +}; + static const tcg_insn_unit *tb_ret_addr; static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) @@ -2875,15 +2891,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, TCGArg a2 = args[2]; switch (opc) { - case INDEX_op_qemu_ld: - tcg_out_qemu_ld(s, a0, a1, a2, ext); - break; case INDEX_op_qemu_st: tcg_out_qemu_st(s, a0, a1, a2, ext); break; - case INDEX_op_qemu_ld2: - tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true); - break; case INDEX_op_qemu_st2: tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false); break; @@ -3340,10 +3350,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld: - return C_O1_I1(r, r); - case INDEX_op_qemu_ld2: - return C_O2_I1(r, r, r); case INDEX_op_qemu_st: return C_O0_I2(rz, r); case INDEX_op_qemu_st2: diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 29fd82e9e0..681ecc3d7a 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1586,8 +1586,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo, } } -static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg addr, MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) { MemOp opc = get_memop(oi); TCGLabelQemuLdst *ldst; @@ -1595,7 +1595,41 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, ldst = prepare_host_addr(s, &h, addr, oi, true); if (ldst) { - ldst->type = data_type; + ldst->type = type; + ldst->datalo_reg = data; + ldst->datahi_reg = -1; + + /* + * This a conditional BL only to load a pointer within this + * opcode into LR for the slow path. We will not be using + * the value for a tail call. + */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_bl_imm(s, COND_NE, 0); + } + + tcg_out_qemu_ld_direct(s, opc, data, -1, h); + + if (ldst) { + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } +} + +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, q), + .out = tgen_qemu_ld, +}; + +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) +{ + MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; + HostAddress h; + + ldst = prepare_host_addr(s, &h, addr, oi, true); + if (ldst) { + ldst->type = type; ldst->datalo_reg = datalo; ldst->datahi_reg = datahi; @@ -1606,14 +1640,20 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, */ ldst->label_ptr[0] = s->code_ptr; tcg_out_bl_imm(s, COND_NE, 0); + } - tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); + tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); + + if (ldst) { ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); - } else { - tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); } } +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_O2_I1(e, p, q), + .out = tgen_qemu_ld2, +}; + static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, TCGReg datahi, HostAddress h) { @@ -2570,13 +2610,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - case INDEX_op_qemu_ld: - tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); - break; - case INDEX_op_qemu_ld2: - tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], TCG_TYPE_I64); - break; - case INDEX_op_qemu_st: tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); break; @@ -2596,10 +2629,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld: - return C_O1_I1(r, q); - case INDEX_op_qemu_ld2: - return C_O2_I1(e, p, q); case INDEX_op_qemu_st: return C_O0_I2(q, q); case INDEX_op_qemu_st2: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index cb66f6c27f..7ec06f57ee 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2422,23 +2422,50 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } } -static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg addr, MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; ldst = prepare_host_addr(s, &h, addr, oi, true); - tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, get_memop(oi)); + tcg_out_qemu_ld_direct(s, data, -1, h, type, get_memop(oi)); if (ldst) { - ldst->type = data_type; + ldst->type = type; + ldst->datalo_reg = data; + ldst->datahi_reg = -1; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } +} + +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, L), + .out = tgen_qemu_ld, +}; + +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) +{ + TCGLabelQemuLdst *ldst; + HostAddress h; + + ldst = prepare_host_addr(s, &h, addr, oi, true); + tcg_out_qemu_ld_direct(s, datalo, datahi, h, type, get_memop(oi)); + + if (ldst) { + ldst->type = type; ldst->datalo_reg = datalo; ldst->datahi_reg = datahi; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_O2_I1(r, r, L), + .out = tgen_qemu_ld2, +}; + static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, HostAddress h, MemOp memop) { @@ -3552,13 +3579,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, a2 = args[2]; switch (opc) { - case INDEX_op_qemu_ld: - tcg_out_qemu_ld(s, a0, -1, a1, a2, type); - break; - case INDEX_op_qemu_ld2: - tcg_out_qemu_ld(s, a0, a1, a2, args[3], type); - break; - case INDEX_op_qemu_st: tcg_out_qemu_st(s, a0, -1, a1, a2, type); break; @@ -4119,16 +4139,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld: - return C_O1_I1(r, L); - case INDEX_op_qemu_st: return (TCG_TARGET_REG_BITS == 32 && flags == MO_8 ? C_O0_I2(s, L) : C_O0_I2(L, L)); - case INDEX_op_qemu_ld2: - return C_O2_I1(r, r, L); case INDEX_op_qemu_st2: return C_O0_I3(L, L, L); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index e4a8b43578..e2f0b7f894 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1167,22 +1167,27 @@ static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type, } } -static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; ldst = prepare_host_addr(s, &h, addr_reg, oi, true); - tcg_out_qemu_ld_indexed(s, get_memop(oi), data_type, data_reg, h); + tcg_out_qemu_ld_indexed(s, get_memop(oi), type, data_reg, h); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data_reg; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc, TCGReg rd, HostAddress h) { @@ -1270,6 +1275,17 @@ static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg data_lo, TCGReg data_hi } } +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr_reg, MemOpIdx oi) +{ + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr_reg, oi, true); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_N2_I1(r, r, r), + .out = tgen_qemu_ld2, +}; + /* * Entry-points */ @@ -2020,12 +2036,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, TCGArg a3 = args[3]; switch (opc) { - case INDEX_op_qemu_ld: - tcg_out_qemu_ld(s, a0, a1, a2, type); - break; - case INDEX_op_qemu_ld2: - tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, true); - break; case INDEX_op_qemu_st: tcg_out_qemu_st(s, a0, a1, a2, type); break; @@ -2537,16 +2547,9 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) switch (op) { case INDEX_op_qemu_st: return C_O0_I2(rz, r); - - case INDEX_op_qemu_ld2: - return C_N2_I1(r, r, r); - case INDEX_op_qemu_st2: return C_O0_I3(r, r, r); - case INDEX_op_qemu_ld: - return C_O1_I1(r, r); - case INDEX_op_ld_vec: case INDEX_op_dupm_vec: case INDEX_op_dup_vec: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index eaaf0f2024..14bffcd404 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1387,8 +1387,8 @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, } } -static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg addr, MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) { MemOp opc = get_memop(oi); TCGLabelQemuLdst *ldst; @@ -1397,19 +1397,56 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, ldst = prepare_host_addr(s, &h, addr, oi, true); if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) { - tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, data_type); + tcg_out_qemu_ld_direct(s, data, 0, h.base, opc, type); } else { - tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, data_type); + tcg_out_qemu_ld_unalign(s, data, 0, h.base, opc, type); } if (ldst) { - ldst->type = data_type; + ldst->type = type; + ldst->datalo_reg = data; + ldst->datahi_reg = 0; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } +} + +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) +{ + MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; + HostAddress h; + + tcg_debug_assert(TCG_TARGET_REG_BITS == 32); + ldst = prepare_host_addr(s, &h, addr, oi, true); + + if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) { + tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, type); + } else { + tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, type); + } + + if (ldst) { + ldst->type = type; ldst->datalo_reg = datalo; ldst->datahi_reg = datahi; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + /* Ensure that the mips32 code is compiled but discarded for mips64. */ + .base.static_constraint = + TCG_TARGET_REG_BITS == 32 ? C_O2_I1(r, r, r) : C_NotImplemented, + .out = + TCG_TARGET_REG_BITS == 32 ? tgen_qemu_ld2 : NULL, +}; + static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc) { @@ -2381,14 +2418,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, a2 = args[2]; switch (opc) { - case INDEX_op_qemu_ld: - tcg_out_qemu_ld(s, a0, 0, a1, a2, type); - break; - case INDEX_op_qemu_ld2: - tcg_debug_assert(TCG_TARGET_REG_BITS == 32); - tcg_out_qemu_ld(s, a0, a1, a2, args[3], type); - break; - case INDEX_op_qemu_st: tcg_out_qemu_st(s, a0, 0, a1, a2, type); break; @@ -2409,8 +2438,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld: - return C_O1_I1(r, r); case INDEX_op_qemu_st: return C_O0_I2(rz, r); case INDEX_op_qemu_ld2: diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index bb26769d53..e4e6b7b2d9 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2695,6 +2695,33 @@ static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, } } +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + tcg_out_qemu_ld(s, data, -1, addr, oi, type); +} + +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) +{ + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_qemu_ld(s, datalo, datahi, addr, oi, type); + } else { + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr, oi, true); + } +} + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = + TCG_TARGET_REG_BITS == 64 ? C_N1O1_I1(o, m, r) : C_O2_I1(r, r, r), + .out = tgen_qemu_ld2, +}; + static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { int i; @@ -3779,18 +3806,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - case INDEX_op_qemu_ld: - tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], type); - break; - case INDEX_op_qemu_ld2: - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_qemu_ld(s, args[0], args[1], args[2], - args[3], TCG_TYPE_I64); - break; - } - tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); - break; - case INDEX_op_qemu_st: tcg_out_qemu_st(s, args[0], -1, args[1], args[2], type); break; @@ -4418,12 +4433,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld: - return C_O1_I1(r, r); - case INDEX_op_qemu_ld2: - return TCG_TARGET_REG_BITS == 64 - ? C_N1O1_I1(o, m, r) : C_O2_I1(r, r, r); - case INDEX_op_qemu_st: return C_O0_I2(r, r); case INDEX_op_qemu_st2: diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 89c7736f9a..94e6f04fa6 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1833,22 +1833,31 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val, } } -static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; TCGReg base; ldst = prepare_host_addr(s, &base, addr_reg, oi, true); - tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), data_type); + tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), type); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data_reg; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val, TCGReg base, MemOp opc) { @@ -2633,9 +2642,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, TCGArg a2 = args[2]; switch (opc) { - case INDEX_op_qemu_ld: - tcg_out_qemu_ld(s, a0, a1, a2, type); - break; case INDEX_op_qemu_st: tcg_out_qemu_st(s, a0, a1, a2, type); break; @@ -2869,8 +2875,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld: - return C_O1_I1(r, r); case INDEX_op_qemu_st: return C_O0_I2(rz, r); diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 652ce9023e..bf99b765cf 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2081,8 +2081,8 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, return ldst; } -static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; @@ -2091,12 +2091,17 @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, tcg_out_qemu_ld_direct(s, get_memop(oi), data_reg, h); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data_reg; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi, TCGType data_type) { @@ -2187,6 +2192,17 @@ static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, } } +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr_reg, MemOpIdx oi) +{ + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr_reg, oi, true); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_O2_I1(o, m, r), + .out = tgen_qemu_ld2, +}; + static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) { /* Reuse the zeroing that exists for goto_ptr. */ @@ -3133,15 +3149,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - case INDEX_op_qemu_ld: - tcg_out_qemu_ld(s, args[0], args[1], args[2], type); - break; case INDEX_op_qemu_st: tcg_out_qemu_st(s, args[0], args[1], args[2], type); break; - case INDEX_op_qemu_ld2: - tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); - break; case INDEX_op_qemu_st2: tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); break; @@ -3594,12 +3604,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld: - return C_O1_I1(r, r); case INDEX_op_qemu_st: return C_O0_I2(r, r); - case INDEX_op_qemu_ld2: - return C_O2_I1(o, m, r); case INDEX_op_qemu_st2: return C_O0_I3(o, m, r); diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index bf27b6b54b..4426168354 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1186,8 +1186,8 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, return ldst; } -static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) { static const int ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = { [MO_UB] = LDUB, @@ -1219,12 +1219,21 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, ld_opc[get_memop(oi) & (MO_BSWAP | MO_SSIZE)]); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, MemOpIdx oi, TCGType data_type) { @@ -2068,9 +2077,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, a2 = args[2]; switch (opc) { - case INDEX_op_qemu_ld: - tcg_out_qemu_ld(s, a0, a1, a2, type); - break; case INDEX_op_qemu_st: tcg_out_qemu_st(s, a0, a1, a2, type); break; @@ -2087,9 +2093,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld: - return C_O1_I1(r, r); - case INDEX_op_qemu_st: return C_O0_I2(rz, r); diff --git a/tcg/tcg.c b/tcg/tcg.c index 6c0866d446..f338deb019 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1071,6 +1071,18 @@ typedef struct TCGOutOpMul2 { TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3); } TCGOutOpMul2; +typedef struct TCGOutOpQemuLdSt { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGReg dest, + TCGReg addr, MemOpIdx oi); +} TCGOutOpQemuLdSt; + +typedef struct TCGOutOpQemuLdSt2 { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGReg dlo, TCGReg dhi, + TCGReg addr, MemOpIdx oi); +} TCGOutOpQemuLdSt2; + typedef struct TCGOutOpUnary { TCGOutOp base; void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); @@ -1210,6 +1222,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), + OUTOP(INDEX_op_qemu_ld, TCGOutOpQemuLdSt, outop_qemu_ld), + OUTOP(INDEX_op_qemu_ld2, TCGOutOpQemuLdSt2, outop_qemu_ld2), OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), @@ -2446,7 +2460,7 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return true; } tcg_debug_assert(type == TCG_TYPE_I128); - return TCG_TARGET_HAS_qemu_ldst_i128; + goto do_lookup; case INDEX_op_add: case INDEX_op_and: @@ -2558,6 +2572,7 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return false; } + do_lookup: outop = all_outop[op]; tcg_debug_assert(outop != NULL); @@ -5799,6 +5814,21 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } break; + case INDEX_op_qemu_ld: + { + const TCGOutOpQemuLdSt *out = &outop_qemu_ld; + out->out(s, type, new_args[0], new_args[1], new_args[2]); + } + break; + + case INDEX_op_qemu_ld2: + { + const TCGOutOpQemuLdSt2 *out = &outop_qemu_ld2; + out->out(s, type, new_args[0], new_args[1], + new_args[2], new_args[3]); + } + break; + case INDEX_op_brcond: { const TCGOutOpBrcond *out = &outop_brcond; diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 6b8f71f49e..f69e35e6ce 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -40,12 +40,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_ld: - return C_O1_I1(r, r); case INDEX_op_qemu_st: return C_O0_I2(r, r); - case INDEX_op_qemu_ld2: - return TCG_TARGET_REG_BITS == 64 ? C_NotImplemented : C_O2_I1(r, r, r); case INDEX_op_qemu_st2: return TCG_TARGET_REG_BITS == 64 ? C_NotImplemented : C_O0_I3(r, r, r); @@ -1197,17 +1193,39 @@ static const TCGOutOpStore outop_st = { .out_r = tcg_out_st, }; +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + tcg_out_op_rrm(s, INDEX_op_qemu_ld, data, addr, oi); +} + +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) +{ + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, oi); + tcg_out_op_rrrr(s, INDEX_op_qemu_ld2, datalo, datahi, addr, TCG_REG_TMP); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = + TCG_TARGET_REG_BITS == 64 ? C_NotImplemented : C_O2_I1(r, r, r), + .out = + TCG_TARGET_REG_BITS == 64 ? NULL : tgen_qemu_ld2, +}; static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { switch (opc) { - case INDEX_op_qemu_ld: case INDEX_op_qemu_st: tcg_out_op_rrm(s, opc, args[0], args[1], args[2]); break; - case INDEX_op_qemu_ld2: case INDEX_op_qemu_st2: tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[3]); tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], TCG_REG_TMP); From 86fe5c2597ca165228ee9cd082886846de4c9ece Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 16 Feb 2025 14:02:00 -0800 Subject: [PATCH 158/161] tcg: Convert qemu_st{2} to TCGOutOpLdSt{2} Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 45 ++++++++----------- tcg/arm/tcg-target.c.inc | 61 ++++++++++++++++--------- tcg/i386/tcg-target.c.inc | 73 ++++++++++++++++-------------- tcg/loongarch64/tcg-target.c.inc | 47 +++++++++---------- tcg/mips/tcg-target.c.inc | 77 +++++++++++++++++--------------- tcg/ppc/tcg-target.c.inc | 47 +++++++++++-------- tcg/riscv/tcg-target.c.inc | 34 ++++++-------- tcg/s390x/tcg-target.c.inc | 42 +++++++++-------- tcg/sparc64/tcg-target.c.inc | 42 ++++++----------- tcg/tcg.c | 12 ++++- tcg/tci/tcg-target.c.inc | 51 +++++++++++---------- 11 files changed, 272 insertions(+), 259 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index feda64afa3..59e7992a5f 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1827,8 +1827,8 @@ static const TCGOutOpQemuLdSt outop_qemu_ld = { .out = tgen_qemu_ld, }; -static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_st(TCGContext *s, TCGType data_type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; @@ -1843,6 +1843,11 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, } } +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out = tgen_qemu_st, +}; + static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg addr_reg, MemOpIdx oi, bool is_ld) { @@ -1956,6 +1961,17 @@ static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { .out = tgen_qemu_ld2, }; +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr_reg, MemOpIdx oi) +{ + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr_reg, oi, false); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_O0_I3(rz, rz, r), + .out = tgen_qemu_st2, +}; + static const tcg_insn_unit *tb_ret_addr; static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) @@ -2885,25 +2901,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - /* Hoist the loads of the most common arguments. */ - TCGArg a0 = args[0]; - TCGArg a1 = args[1]; - TCGArg a2 = args[2]; - - switch (opc) { - case INDEX_op_qemu_st: - tcg_out_qemu_st(s, a0, a1, a2, ext); - break; - case INDEX_op_qemu_st2: - tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false); - break; - - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - default: - g_assert_not_reached(); - } + g_assert_not_reached(); } static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, @@ -3350,11 +3348,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_st: - return C_O0_I2(rz, r); - case INDEX_op_qemu_st2: - return C_O0_I3(rz, rz, r); - case INDEX_op_add_vec: case INDEX_op_sub_vec: case INDEX_op_mul_vec: diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 681ecc3d7a..014a441420 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1711,8 +1711,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, } } -static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg addr, MemOpIdx oi, TCGType data_type) +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) { MemOp opc = get_memop(oi); TCGLabelQemuLdst *ldst; @@ -1720,7 +1720,37 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, ldst = prepare_host_addr(s, &h, addr, oi, false); if (ldst) { - ldst->type = data_type; + ldst->type = type; + ldst->datalo_reg = data; + ldst->datahi_reg = -1; + + h.cond = COND_EQ; + tcg_out_qemu_st_direct(s, opc, data, -1, h); + + /* The conditional call is last, as we're going to return here. */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_bl_imm(s, COND_NE, 0); + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } else { + tcg_out_qemu_st_direct(s, opc, data, -1, h); + } +} + +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(q, q), + .out = tgen_qemu_st, +}; + +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) +{ + MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; + HostAddress h; + + ldst = prepare_host_addr(s, &h, addr, oi, false); + if (ldst) { + ldst->type = type; ldst->datalo_reg = datalo; ldst->datahi_reg = datahi; @@ -1736,6 +1766,11 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, } } +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_O0_I3(Q, p, q), + .out = tgen_qemu_st2, +}; + static void tcg_out_epilogue(TCGContext *s); static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) @@ -2609,31 +2644,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - switch (opc) { - case INDEX_op_qemu_st: - tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); - break; - case INDEX_op_qemu_st2: - tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], TCG_TYPE_I64); - break; - - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - default: - g_assert_not_reached(); - } + g_assert_not_reached(); } static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_st: - return C_O0_I2(q, q); - case INDEX_op_qemu_st2: - return C_O0_I3(Q, p, q); - case INDEX_op_st_vec: return C_O0_I2(w, r); case INDEX_op_ld_vec: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 7ec06f57ee..9f294f28ed 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2484,7 +2484,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, switch (memop & MO_SIZE) { case MO_8: - /* This is handled with constraints on INDEX_op_qemu_st. */ + /* This is handled with constraints in cset_qemu_st(). */ tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || datalo < 4); tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + h.seg, datalo, h.base, h.index, 0, h.ofs); @@ -2576,8 +2576,38 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } } -static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg addr, MemOpIdx oi, TCGType data_type) +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + TCGLabelQemuLdst *ldst; + HostAddress h; + + ldst = prepare_host_addr(s, &h, addr, oi, false); + tcg_out_qemu_st_direct(s, data, -1, h, get_memop(oi)); + + if (ldst) { + ldst->type = type; + ldst->datalo_reg = data; + ldst->datahi_reg = -1; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } +} + +static TCGConstraintSetIndex cset_qemu_st(TCGType type, unsigned flags) +{ + return flags == MO_8 ? C_O0_I2(s, L) : C_O0_I2(L, L); +} + +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = + TCG_TARGET_REG_BITS == 32 ? C_Dynamic : C_O0_I2(L, L), + .base.dynamic_constraint = + TCG_TARGET_REG_BITS == 32 ? cset_qemu_st : NULL, + .out = tgen_qemu_st, +}; + +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; @@ -2586,13 +2616,18 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, tcg_out_qemu_st_direct(s, datalo, datahi, h, get_memop(oi)); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = datalo; ldst->datahi_reg = datahi; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_O0_I3(L, L, L), + .out = tgen_qemu_st2, +}; + static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) { /* Reuse the zeroing that exists for goto_ptr. */ @@ -3571,27 +3606,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - TCGArg a0, a1, a2; - - /* Hoist the loads of the most common arguments. */ - a0 = args[0]; - a1 = args[1]; - a2 = args[2]; - - switch (opc) { - case INDEX_op_qemu_st: - tcg_out_qemu_st(s, a0, -1, a1, a2, type); - break; - case INDEX_op_qemu_st2: - tcg_out_qemu_st(s, a0, a1, a2, args[3], type); - break; - - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - default: - g_assert_not_reached(); - } + g_assert_not_reached(); } static int const umin_insn[4] = { @@ -4139,14 +4154,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_st: - return (TCG_TARGET_REG_BITS == 32 && flags == MO_8 - ? C_O0_I2(s, L) - : C_O0_I2(L, L)); - - case INDEX_op_qemu_st2: - return C_O0_I3(L, L, L); - case INDEX_op_ld_vec: case INDEX_op_dupm_vec: return C_O1_I1(x, r); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index e2f0b7f894..c74ddee644 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1212,8 +1212,8 @@ static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc, } } -static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; @@ -1222,12 +1222,17 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, tcg_out_qemu_st_indexed(s, get_memop(oi), data_reg, h); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data_reg; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out = tgen_qemu_st, +}; + static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg data_lo, TCGReg data_hi, TCGReg addr_reg, MemOpIdx oi, bool is_ld) { @@ -1286,6 +1291,17 @@ static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { .out = tgen_qemu_ld2, }; +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr_reg, MemOpIdx oi) +{ + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr_reg, oi, false); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_O0_I3(r, r, r), + .out = tgen_qemu_st2, +}; + /* * Entry-points */ @@ -2030,25 +2046,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - TCGArg a0 = args[0]; - TCGArg a1 = args[1]; - TCGArg a2 = args[2]; - TCGArg a3 = args[3]; - - switch (opc) { - case INDEX_op_qemu_st: - tcg_out_qemu_st(s, a0, a1, a2, type); - break; - case INDEX_op_qemu_st2: - tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, false); - break; - - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - default: - g_assert_not_reached(); - } + g_assert_not_reached(); } static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, @@ -2545,11 +2543,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_st: - return C_O0_I2(rz, r); - case INDEX_op_qemu_st2: - return C_O0_I3(r, r, r); - case INDEX_op_ld_vec: case INDEX_op_dupm_vec: case INDEX_op_dup_vec: diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 14bffcd404..1f12500344 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1510,8 +1510,8 @@ static void tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi, } } -static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg addr, MemOpIdx oi, TCGType data_type) +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) { MemOp opc = get_memop(oi); TCGLabelQemuLdst *ldst; @@ -1519,6 +1519,35 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, ldst = prepare_host_addr(s, &h, addr, oi, false); + if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) { + tcg_out_qemu_st_direct(s, data, 0, h.base, opc); + } else { + tcg_out_qemu_st_unalign(s, data, 0, h.base, opc); + } + + if (ldst) { + ldst->type = type; + ldst->datalo_reg = data; + ldst->datahi_reg = 0; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } +} + +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out = tgen_qemu_st, +}; + +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) +{ + MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; + HostAddress h; + + tcg_debug_assert(TCG_TARGET_REG_BITS == 32); + ldst = prepare_host_addr(s, &h, addr, oi, false); + if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) { tcg_out_qemu_st_direct(s, datalo, datahi, h.base, opc); } else { @@ -1526,13 +1555,21 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, } if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = datalo; ldst->datahi_reg = datahi; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + /* Ensure that the mips32 code is compiled but discarded for mips64. */ + .base.static_constraint = + TCG_TARGET_REG_BITS == 32 ? C_O0_I3(rz, rz, r) : C_NotImplemented, + .out = + TCG_TARGET_REG_BITS == 32 ? tgen_qemu_st2 : NULL, +}; + static void tcg_out_mb(TCGContext *s, unsigned a0) { static const MIPSInsn sync[] = { @@ -2411,43 +2448,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - TCGArg a0, a1, a2; - - a0 = args[0]; - a1 = args[1]; - a2 = args[2]; - - switch (opc) { - case INDEX_op_qemu_st: - tcg_out_qemu_st(s, a0, 0, a1, a2, type); - break; - case INDEX_op_qemu_st2: - tcg_debug_assert(TCG_TARGET_REG_BITS == 32); - tcg_out_qemu_st(s, a0, a1, a2, args[3], type); - break; - - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - default: - g_assert_not_reached(); - } + g_assert_not_reached(); } static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { - switch (op) { - case INDEX_op_qemu_st: - return C_O0_I2(rz, r); - case INDEX_op_qemu_ld2: - return TCG_TARGET_REG_BITS == 64 ? C_NotImplemented : C_O2_I1(r, r, r); - case INDEX_op_qemu_st2: - return TCG_TARGET_REG_BITS == 64 ? C_NotImplemented : C_O0_I3(rz, rz, r); - - default: - return C_NotImplemented; - } + return C_NotImplemented; } static const int tcg_target_callee_save_regs[] = { diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index e4e6b7b2d9..824cced94a 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2722,6 +2722,33 @@ static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { .out = tgen_qemu_ld2, }; +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + tcg_out_qemu_st(s, data, -1, addr, oi, type); +} + +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(r, r), + .out = tgen_qemu_st, +}; + +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) +{ + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_qemu_st(s, datalo, datahi, addr, oi, type); + } else { + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr, oi, false); + } +} + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = + TCG_TARGET_REG_BITS == 64 ? C_O0_I3(o, m, r) : C_O0_I3(r, r, r), + .out = tgen_qemu_st2, +}; + static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { int i; @@ -3805,25 +3832,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - switch (opc) { - case INDEX_op_qemu_st: - tcg_out_qemu_st(s, args[0], -1, args[1], args[2], type); - break; - case INDEX_op_qemu_st2: - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_qemu_st(s, args[0], args[1], args[2], - args[3], TCG_TYPE_I64); - break; - } - tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); - break; - - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - default: - g_assert_not_reached(); - } + g_assert_not_reached(); } int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 94e6f04fa6..eca1283742 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1882,8 +1882,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val, } } -static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; TCGReg base; @@ -1892,12 +1892,21 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, tcg_out_qemu_st_direct(s, data_reg, base, get_memop(oi)); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data_reg; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out = tgen_qemu_st, +}; + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_NotImplemented, +}; + static const tcg_insn_unit *tb_ret_addr; static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) @@ -2637,21 +2646,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - TCGArg a0 = args[0]; - TCGArg a1 = args[1]; - TCGArg a2 = args[2]; - - switch (opc) { - case INDEX_op_qemu_st: - tcg_out_qemu_st(s, a0, a1, a2, type); - break; - - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - default: - g_assert_not_reached(); - } + g_assert_not_reached(); } static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, @@ -2875,9 +2870,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_st: - return C_O0_I2(rz, r); - case INDEX_op_st_vec: return C_O0_I2(v, r); case INDEX_op_dup_vec: diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index bf99b765cf..a316c8de41 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2102,8 +2102,8 @@ static const TCGOutOpQemuLdSt outop_qemu_ld = { .out = tgen_qemu_ld, }; -static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; @@ -2112,12 +2112,17 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data_reg; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(r, r), + .out = tgen_qemu_st, +}; + static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg addr_reg, MemOpIdx oi, bool is_ld) { @@ -2203,6 +2208,17 @@ static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { .out = tgen_qemu_ld2, }; +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr_reg, MemOpIdx oi) +{ + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr_reg, oi, false); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_O0_I3(o, m, r), + .out = tgen_qemu_st2, +}; + static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) { /* Reuse the zeroing that exists for goto_ptr. */ @@ -3148,20 +3164,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - switch (opc) { - case INDEX_op_qemu_st: - tcg_out_qemu_st(s, args[0], args[1], args[2], type); - break; - case INDEX_op_qemu_st2: - tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); - break; - - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - default: - g_assert_not_reached(); - } + g_assert_not_reached(); } static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, @@ -3604,11 +3607,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_qemu_st: - return C_O0_I2(r, r); - case INDEX_op_qemu_st2: - return C_O0_I3(o, m, r); - case INDEX_op_st_vec: return C_O0_I2(v, r); case INDEX_op_ld_vec: diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 4426168354..d1dd0fa33c 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1234,8 +1234,8 @@ static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { .base.static_constraint = C_NotImplemented, }; -static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) { static const int st_opc[(MO_SIZE | MO_BSWAP) + 1] = { [MO_UB] = STB, @@ -1258,12 +1258,21 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, st_opc[get_memop(oi) & (MO_BSWAP | MO_SIZE)]); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out = tgen_qemu_st, +}; + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) { if (check_fit_ptr(a0, 13)) { @@ -2069,36 +2078,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - TCGArg a0, a1, a2; - - /* Hoist the loads of the most common arguments. */ - a0 = args[0]; - a1 = args[1]; - a2 = args[2]; - - switch (opc) { - case INDEX_op_qemu_st: - tcg_out_qemu_st(s, a0, a1, a2, type); - break; - - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - default: - g_assert_not_reached(); - } + g_assert_not_reached(); } static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { - switch (op) { - case INDEX_op_qemu_st: - return C_O0_I2(rz, r); - - default: - return C_NotImplemented; - } + return C_NotImplemented; } static void tcg_target_init(TCGContext *s) diff --git a/tcg/tcg.c b/tcg/tcg.c index f338deb019..302f7025e7 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1224,6 +1224,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = { OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), OUTOP(INDEX_op_qemu_ld, TCGOutOpQemuLdSt, outop_qemu_ld), OUTOP(INDEX_op_qemu_ld2, TCGOutOpQemuLdSt2, outop_qemu_ld2), + OUTOP(INDEX_op_qemu_st, TCGOutOpQemuLdSt, outop_qemu_st), + OUTOP(INDEX_op_qemu_st2, TCGOutOpQemuLdSt2, outop_qemu_st2), OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), @@ -5815,15 +5817,21 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; case INDEX_op_qemu_ld: + case INDEX_op_qemu_st: { - const TCGOutOpQemuLdSt *out = &outop_qemu_ld; + const TCGOutOpQemuLdSt *out = + container_of(all_outop[op->opc], TCGOutOpQemuLdSt, base); + out->out(s, type, new_args[0], new_args[1], new_args[2]); } break; case INDEX_op_qemu_ld2: + case INDEX_op_qemu_st2: { - const TCGOutOpQemuLdSt2 *out = &outop_qemu_ld2; + const TCGOutOpQemuLdSt2 *out = + container_of(all_outop[op->opc], TCGOutOpQemuLdSt2, base); + out->out(s, type, new_args[0], new_args[1], new_args[2], new_args[3]); } diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index f69e35e6ce..50e205211d 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -39,15 +39,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { - switch (op) { - case INDEX_op_qemu_st: - return C_O0_I2(r, r); - case INDEX_op_qemu_st2: - return TCG_TARGET_REG_BITS == 64 ? C_NotImplemented : C_O0_I3(r, r, r); - - default: - return C_NotImplemented; - } + return C_NotImplemented; } static const int tcg_target_reg_alloc_order[] = { @@ -1218,25 +1210,36 @@ static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { TCG_TARGET_REG_BITS == 64 ? NULL : tgen_qemu_ld2, }; +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + tcg_out_op_rrm(s, INDEX_op_qemu_st, data, addr, oi); +} + +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(r, r), + .out = tgen_qemu_st, +}; + +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) +{ + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, oi); + tcg_out_op_rrrr(s, INDEX_op_qemu_st2, datalo, datahi, addr, TCG_REG_TMP); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = + TCG_TARGET_REG_BITS == 64 ? C_NotImplemented : C_O0_I3(r, r, r), + .out = + TCG_TARGET_REG_BITS == 64 ? NULL : tgen_qemu_st2, +}; + static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) { - switch (opc) { - case INDEX_op_qemu_st: - tcg_out_op_rrm(s, opc, args[0], args[1], args[2]); - break; - case INDEX_op_qemu_st2: - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[3]); - tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], TCG_REG_TMP); - break; - - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - default: - g_assert_not_reached(); - } + g_assert_not_reached(); } static void tcg_out_st(TCGContext *s, TCGType type, TCGReg val, TCGReg base, From eafecf08059953a2d1d06b6a6ded3c56916cbdd4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 16 Feb 2025 14:22:48 -0800 Subject: [PATCH 159/161] tcg: Remove tcg_out_op MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All integer opcodes are now converted to TCGOutOp. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 7 ------- tcg/arm/tcg-target.c.inc | 7 ------- tcg/i386/tcg-target.c.inc | 7 ------- tcg/loongarch64/tcg-target.c.inc | 7 ------- tcg/mips/tcg-target.c.inc | 7 ------- tcg/ppc/tcg-target.c.inc | 7 ------- tcg/riscv/tcg-target.c.inc | 7 ------- tcg/s390x/tcg-target.c.inc | 7 ------- tcg/sparc64/tcg-target.c.inc | 7 ------- tcg/tcg.c | 12 +++--------- tcg/tci/tcg-target.c.inc | 7 ------- 11 files changed, 3 insertions(+), 79 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 59e7992a5f..4cb647cb34 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -2897,13 +2897,6 @@ static const TCGOutOpStore outop_st = { .out_r = tcg_out_st, }; -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) -{ - g_assert_not_reached(); -} - static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg args[TCG_MAX_OP_ARGS], diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 014a441420..447e43583e 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -2640,13 +2640,6 @@ static const TCGOutOpStore outop_st = { .out_r = tcg_out_st, }; -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) -{ - g_assert_not_reached(); -} - static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 9f294f28ed..09fce27b06 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3602,13 +3602,6 @@ static const TCGOutOpStore outop_st = { .out_i = tgen_st_i, }; -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) -{ - g_assert_not_reached(); -} - static int const umin_insn[4] = { OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ }; diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index c74ddee644..e5580d69a8 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -2042,13 +2042,6 @@ static const TCGOutOpStore outop_st = { .out_r = tcg_out_st, }; -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) -{ - g_assert_not_reached(); -} - static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg rd, TCGReg rs) { diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 1f12500344..2c0457e588 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2444,13 +2444,6 @@ static const TCGOutOpStore outop_st = { }; -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) -{ - g_assert_not_reached(); -} - static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 824cced94a..2e94778104 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3828,13 +3828,6 @@ static const TCGOutOpStore outop_st = { }; -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) -{ - g_assert_not_reached(); -} - int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) { switch (opc) { diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index eca1283742..f9417d15f7 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2642,13 +2642,6 @@ static const TCGOutOpStore outop_st = { }; -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) -{ - g_assert_not_reached(); -} - static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg args[TCG_MAX_OP_ARGS], diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index a316c8de41..7ca0071f24 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -3160,13 +3160,6 @@ static const TCGOutOpStore outop_st = { }; -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) -{ - g_assert_not_reached(); -} - static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg dst, TCGReg src) { diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index d1dd0fa33c..83167aa29d 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -2074,13 +2074,6 @@ static const TCGOutOpStore outop_st = { }; -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) -{ - g_assert_not_reached(); -} - static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { diff --git a/tcg/tcg.c b/tcg/tcg.c index 302f7025e7..c4e866e9c3 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -138,9 +138,6 @@ static void tcg_out_mb(TCGContext *s, unsigned bar); static void tcg_out_br(TCGContext *s, TCGLabel *l); static void tcg_out_set_carry(TCGContext *s); static void tcg_out_set_borrow(TCGContext *s); -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]); #if TCG_TARGET_MAYBE_vec static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg dst, TCGReg src); @@ -5920,12 +5917,9 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; default: - if (def->flags & TCG_OPF_VECTOR) { - tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, - TCGOP_VECE(op), new_args, const_args); - } else { - tcg_out_op(s, op->opc, type, new_args, const_args); - } + tcg_debug_assert(def->flags & TCG_OPF_VECTOR); + tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, + TCGOP_VECE(op), new_args, const_args); break; } diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 50e205211d..35c66a4836 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -1235,13 +1235,6 @@ static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { TCG_TARGET_REG_BITS == 64 ? NULL : tgen_qemu_st2, }; -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) -{ - g_assert_not_reached(); -} - static void tcg_out_st(TCGContext *s, TCGType type, TCGReg val, TCGReg base, intptr_t offset) { From 33b6c61cce2403193f9fc3a221f8a8656ba9cc37 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 25 Apr 2025 11:59:58 -0700 Subject: [PATCH 160/161] tcg/sparc64: Unexport use_vis3_instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This variable is no longer used outside tcg-target.c.inc. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/sparc64/tcg-target-has.h | 6 ------ tcg/sparc64/tcg-target.c.inc | 6 ++++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index af6a949da3..b29fd177f6 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -7,12 +7,6 @@ #ifndef TCG_TARGET_HAS_H #define TCG_TARGET_HAS_H -#if defined(__VIS__) && __VIS__ >= 0x300 -#define use_vis3_instructions 1 -#else -extern bool use_vis3_instructions; -#endif - /* optional instructions */ #define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 83167aa29d..260dd461bd 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -274,8 +274,10 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE)) #define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE)) -#ifndef use_vis3_instructions -bool use_vis3_instructions; +#if defined(__VIS__) && __VIS__ >= 0x300 +#define use_vis3_instructions 1 +#else +static bool use_vis3_instructions; #endif static bool check_fit_i64(int64_t val, unsigned int bits) From 70ab4f4ed9bbe9bcfdb105681291b4695f151522 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 25 Apr 2025 12:57:11 -0700 Subject: [PATCH 161/161] tcg/sparc64: Implement CTPOP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/sparc64/tcg-target.c.inc | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 260dd461bd..9e004fb511 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -210,6 +210,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d)) #define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d)) #define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c)) +#define ARITH_POPC (INSN_OP(2) | INSN_OP3(0x2e)) #define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f)) #define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11)) @@ -274,6 +275,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE)) #define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE)) +static bool use_popc_instructions; #if defined(__VIS__) && __VIS__ >= 0x300 #define use_vis3_instructions 1 #else @@ -1511,8 +1513,23 @@ static const TCGOutOpBinary outop_clz = { .base.static_constraint = C_NotImplemented, }; +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_arith(s, a0, TCG_REG_G0, a1, ARITH_POPC); +} + +static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) +{ + if (use_popc_instructions && type == TCG_TYPE_I64) { + return C_O1_I1(r, r); + } + return C_NotImplemented; +} + static const TCGOutOpUnary outop_ctpop = { - .base.static_constraint = C_NotImplemented, + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctpop, + .out_rr = tgen_ctpop, }; static const TCGOutOpBinary outop_ctz = { @@ -2084,15 +2101,15 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) static void tcg_target_init(TCGContext *s) { + unsigned long hwcap = qemu_getauxval(AT_HWCAP); + /* * Only probe for the platform and capabilities if we haven't already * determined maximum values at compile time. */ + use_popc_instructions = (hwcap & HWCAP_SPARC_POPC) != 0; #ifndef use_vis3_instructions - { - unsigned long hwcap = qemu_getauxval(AT_HWCAP); - use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0; - } + use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0; #endif tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;