Skip to content

Commit

Permalink
Merge pull request #103 from vacantron/opt
Browse files Browse the repository at this point in the history
Implement CSE and peephole optimization
  • Loading branch information
jserv authored Jan 7, 2024
2 parents 8344804 + 3b7995e commit 83609c0
Show file tree
Hide file tree
Showing 8 changed files with 211 additions and 35 deletions.
3 changes: 0 additions & 3 deletions src/arm-codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,6 @@ void cfg_flatten()

ph2_ir_t *insn;
for (insn = bb->ph2_ir_list.head; insn; insn = insn->next) {
if (insn->op == OP_assign && insn->dest == insn->src0)
continue;

flatten_ir = add_ph2_ir(OP_generic);
memcpy(flatten_ir, insn, sizeof(ph2_ir_t));

Expand Down
3 changes: 2 additions & 1 deletion src/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#define MAX_FIELDS 32
#define MAX_FUNCS 256
#define MAX_FUNC_TRIES 1950
#define MAX_BLOCKS 1050
#define MAX_BLOCKS 1150
#define MAX_TYPES 64
#define MAX_IR_INSTR 36864
#define MAX_BB_PRED 128
Expand Down Expand Up @@ -299,6 +299,7 @@ typedef struct phi_operand phi_operand_t;

struct insn {
struct insn *next;
struct insn *prev;
int idx;
opcode_t opcode;
var_t *rd;
Expand Down
1 change: 1 addition & 0 deletions src/globals.c
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,7 @@ void add_insn(block_t *block,
else
bb->insn_list.tail->next = n;

n->prev = bb->insn_list.tail;
bb->insn_list.tail = n;
}

Expand Down
8 changes: 8 additions & 0 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
/* Register allocator */
#include "reg-alloc.c"

/* Peephole optimization */
#include "peephole.c"

/* Machine code generation. support ARMv7-A and RISC-V32I */
#include "codegen.c"

Expand Down Expand Up @@ -83,12 +86,17 @@ int main(int argc, char *argv[])

ssa_build(dump_ir);

/* SSA-based optimization */
optimize();

/* SSA-based liveness analyses */
liveness_analysis();

/* allocate register from IR */
reg_alloc();

peephole();

/* flatten CFG to linear instruction */
cfg_flatten();

Expand Down
73 changes: 73 additions & 0 deletions src/peephole.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* shecc - Self-Hosting and Educational C Compiler.
*
* shecc is freely redistributable under the BSD 2 clause license. See the
* file "LICENSE" for information on usage and redistribution of this file.
*/

int is_fusible_insn(ph2_ir_t *ph2_ir)
{
switch (ph2_ir->op) {
case OP_add:
case OP_sub:
case OP_mul:
case OP_div:
case OP_mod:
case OP_lshift:
case OP_rshift:
case OP_bit_and:
case OP_bit_or:
case OP_bit_xor:
case OP_log_and:
case OP_log_or:
case OP_log_not:
case OP_negate:
case OP_load:
case OP_global_load:
case OP_load_data_address:
return 1;
default:
return 0;
}
}

void insn_fusion(ph2_ir_t *ph2_ir)
{
ph2_ir_t *next = ph2_ir->next;
if (!next)
return;

if (next->op == OP_assign) {
/* eliminate {ALU rn, rs1, rs2; mv rd, rn;} */
if (!is_fusible_insn(ph2_ir))
return;
if (ph2_ir->dest == next->src0) {
ph2_ir->dest = next->dest;
ph2_ir->next = next->next;
return;
}
}
/* other insn fusions */
}

/* FIXME: release detached basic blocks */
void peephole()
{
fn_t *fn;
for (fn = FUNC_LIST.head; fn; fn = fn->next) {
basic_block_t *bb;
for (bb = fn->bbs; bb; bb = bb->rpo_next) {
ph2_ir_t *ph2_ir;
for (ph2_ir = bb->ph2_ir_list.head; ph2_ir; ph2_ir = ph2_ir->next) {
ph2_ir_t *next = ph2_ir->next;
if (!next)
continue;
if (next->op == OP_assign && next->dest == next->src0) {
ph2_ir->next = next->next;
continue;
}
insn_fusion(ph2_ir);
}
}
}
}
50 changes: 36 additions & 14 deletions src/reg-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,17 @@ void spill_var(basic_block_t *bb, var_t *var, int idx)
REGS[idx].polluted = 0;
}

/* Return the index of register for given variable. Otherwise, return -1. */
int find_in_regs(var_t *var)
{
int i;
for (i = 0; i < REG_CNT; i++) {
if (REGS[i].var == var)
return i;
}
return -1;
}

void load_var(basic_block_t *bb, var_t *var, int idx)
{
ph2_ir_t *ir = var->is_global ? bb_add_ph2_ir(bb, OP_global_load)
Expand All @@ -85,11 +96,9 @@ void load_var(basic_block_t *bb, var_t *var, int idx)

int prepare_operand(basic_block_t *bb, var_t *var, int operand_0)
{
int i;
for (i = 0; i < REG_CNT; i++) {
if (REGS[i].var == var)
return i;
}
int i = find_in_regs(var);
if (i > -1)
return i;

for (i = 0; i < REG_CNT; i++) {
if (!REGS[i].var) {
Expand Down Expand Up @@ -125,12 +134,11 @@ int prepare_operand(basic_block_t *bb, var_t *var, int operand_0)

int prepare_dest(basic_block_t *bb, var_t *var, int operand_0, int operand_1)
{
int i;
for (i = 0; i < REG_CNT; i++)
if (REGS[i].var == var) {
REGS[i].polluted = 1;
return i;
}
int i = find_in_regs(var);
if (i > -1) {
REGS[i].polluted = 1;
return i;
}

for (i = 0; i < REG_CNT; i++) {
if (!REGS[i].var) {
Expand Down Expand Up @@ -328,7 +336,7 @@ void reg_alloc()
func_t *func;
ph2_ir_t *ir;
int dest, src0, src1;
int i, sz;
int i, sz, clear_reg;

refresh(bb, insn);

Expand Down Expand Up @@ -404,9 +412,19 @@ void reg_alloc()
ir->dest = dest;
break;
case OP_assign:
src0 = prepare_operand(bb, insn->rs1, -1);
src0 = find_in_regs(insn->rs1);

/* If operand is loaded from stack, clear the original slot
* after moving.
*/
if (src0 > -1)
clear_reg = 0;
else {
clear_reg = 1;
src0 = prepare_operand(bb, insn->rs1, -1);
}
dest = prepare_dest(bb, insn->rd, src0, -1);
ir = bb_add_ph2_ir(bb, insn->opcode);
ir = bb_add_ph2_ir(bb, OP_assign);
ir->src0 = src0;
ir->dest = dest;

Expand All @@ -417,6 +435,10 @@ void reg_alloc()
ir->src1 = insn->rd->offset;
REGS[dest].polluted = 0;
}

if (clear_reg)
REGS[src0].var = NULL;

break;
case OP_read:
src0 = prepare_operand(bb, insn->rs1, -1);
Expand Down
12 changes: 5 additions & 7 deletions src/riscv-codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,6 @@ void cfg_flatten()

ph2_ir_t *insn;
for (insn = bb->ph2_ir_list.head; insn; insn = insn->next) {
if (insn->op == OP_assign && insn->dest == insn->src0)
continue;

flatten_ir = add_ph2_ir(OP_generic);
memcpy(flatten_ir, insn, sizeof(ph2_ir_t));

Expand All @@ -146,6 +143,7 @@ void emit(int code)

void emit_ph2_ir(ph2_ir_t *ph2_ir)
{
func_t *func;
int rd = ph2_ir->dest + 10;
int rs1 = ph2_ir->src0 + 10;
int rs2 = ph2_ir->src1 + 10;
Expand Down Expand Up @@ -249,16 +247,16 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
emit(__jal(__zero, ph2_ir->next_bb->elf_offset - elf_code_idx));
return;
case OP_call:
emit(__jal(__ra, find_func(ph2_ir->func_name)->fn->bbs->elf_offset -
elf_code_idx));
func = find_func(ph2_ir->func_name);
emit(__jal(__ra, func->fn->bbs->elf_offset - elf_code_idx));
return;
case OP_load_data_address:
emit(__lui(rd, rv_hi(elf_data_start + ph2_ir->src0)));
emit(__addi(rd, rd, rv_lo(elf_data_start + ph2_ir->src0)));
return;
case OP_address_of_func:
ofs =
elf_code_start + find_func(ph2_ir->func_name)->fn->bbs->elf_offset;
func = find_func(ph2_ir->func_name);
ofs = elf_code_start + func->fn->bbs->elf_offset;
emit(__lui(__t0, rv_hi(ofs)));
emit(__addi(__t0, __t0, rv_lo(ofs)));
emit(__sw(__t0, rs1, 0));
Expand Down
96 changes: 86 additions & 10 deletions src/ssa.c
Original file line number Diff line number Diff line change
Expand Up @@ -651,18 +651,16 @@ void append_unwound_phi_insn(basic_block_t *bb, var_t *dest, var_t *rs)
} else {
/* insert it before branch instruction */
if (tail->opcode == OP_branch) {
insn_t *prev = bb->insn_list.head;
if (!prev->next) {
if (tail->prev) {
tail->prev->next = n;
n->prev = tail->prev;
} else
bb->insn_list.head = n;
n->next = prev;
} else {
while (prev->next != tail)
prev = prev->next;
prev->next = n;
n->next = tail;
}

n->next = tail;
tail->prev = n;
} else {
bb->insn_list.tail->next = n;
tail->next = n;
bb->insn_list.tail = n;
}
}
Expand Down Expand Up @@ -1051,6 +1049,84 @@ void ssa_build(int dump_ir)
unwind_phi();
}

/* Common Subexpression Elimination (CSE) */
/* TODO: simplify with def-use chain */
/* TODO: release detached insns node */
int cse(insn_t *insn, basic_block_t *bb)
{
if (insn->opcode != OP_read)
return 0;

insn_t *prev = insn->prev;

if (!prev)
return 0;
if (prev->opcode != OP_add)
return 0;
if (prev->rd != insn->rs1)
return 0;

var_t *def = NULL, *base = prev->rs1, *idx = prev->rs2;
basic_block_t *b;
insn_t *i = prev;
for (b = bb;; b = b->idom) {
if (!i)
i = b->insn_list.tail;

for (; i; i = i->prev) {
if (i == prev)
continue;
if (i->opcode != OP_add)
continue;
if (!i->next)
continue;
if (i->next->opcode != OP_read)
continue;
if (i->rs1 != base || i->rs2 != idx)
continue;
def = i->next->rd;
}
if (def)
break;
if (b->idom == b)
break;
}

if (!def)
return 0;

if (prev->prev) {
insn->prev = prev->prev;
prev->next = insn;
} else {
bb->insn_list.head = insn;
insn->prev = NULL;
}

insn->opcode = OP_assign;
insn->rs1 = def;
return 1;
}

void optimize()
{
int changed = 0;
fn_t *fn;
for (fn = FUNC_LIST.head; fn; fn = fn->next) {
/* basic block level (control flow) optimizations */

basic_block_t *bb;
for (bb = fn->bbs; bb; bb = bb->rpo_next) {
/* instruction level optimizations */
insn_t *insn;
for (insn = bb->insn_list.head; insn; insn = insn->next) {
changed |= cse(insn, bb);
/* more optimizations */
}
}
}
}

void bb_index_reversed_rpo(fn_t *fn, basic_block_t *bb)
{
bb->rpo_r = fn->bb_cnt++;
Expand Down

0 comments on commit 83609c0

Please sign in to comment.