Skip to content

Commit

Permalink
Reduce generated code size with -Os
Browse files Browse the repository at this point in the history
  • Loading branch information
drmortalwombat committed Sep 27, 2024
1 parent 1fb68c1 commit 57d8747
Show file tree
Hide file tree
Showing 12 changed files with 339 additions and 78 deletions.
43 changes: 37 additions & 6 deletions include/crt.c
Original file line number Diff line number Diff line change
Expand Up @@ -2858,6 +2858,11 @@ __asm freg

__asm faddsub
{
fsub:
lda tmp + 3
eor #$80
sta tmp + 3
fadd:
lda #$ff
cmp tmp + 4
beq INF
Expand Down Expand Up @@ -3008,7 +3013,7 @@ L2: lsr
__asm inp_binop_add_f32
{
jsr freg.split_exp
jsr faddsub
jsr faddsub.fadd
jmp startup.exec
}

Expand All @@ -3017,10 +3022,7 @@ __asm inp_binop_add_f32
__asm inp_binop_sub_f32
{
jsr freg.split_exp
lda tmp + 3
eor #$80
sta tmp + 3
jsr faddsub
jsr faddsub.fsub
jmp startup.exec
}

Expand Down Expand Up @@ -3983,11 +3985,38 @@ W3: sta accu + 2
jmp freg.merge_aexp
}

__asm store32
{
lda accu + 0
sta $00, x
lda accu + 1
sta $01, x
lda accu + 2
sta $02, x
lda accu + 3
sta $03, x
rts
}

__asm load32
{
lda $00, x
sta accu + 0
lda $01, x
sta accu + 1
lda $02, x
sta accu + 2
lda $03, x
sta accu + 3
rts
}

#pragma runtime(fsplita, freg.split_aexp)
#pragma runtime(fsplitt, freg.split_texp)
#pragma runtime(fsplitx, freg.split_xexp)
#pragma runtime(fmergea, freg.merge_aexp)
#pragma runtime(faddsub, faddsub)
#pragma runtime(fadd, faddsub.fadd)
#pragma runtime(fsub, faddsub.fsub)
#pragma runtime(fmul, fmul)
#pragma runtime(fdiv, fdiv)
#pragma runtime(fcmp, fcmp)
Expand All @@ -4001,6 +4030,8 @@ W3: sta accu + 2
#pragma runtime(ffromlu, uint32_to_float)
#pragma runtime(ftoli, f32_to_i32)
#pragma runtime(ftolu, f32_to_u32)
#pragma runtime(store32, store32)
#pragma runtime(load32, load32)

__asm inp_op_floor_f32
{
Expand Down
6 changes: 5 additions & 1 deletion oscar64/Compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -943,7 +943,8 @@ bool Compiler::GenerateCode(void)
RegisterRuntime(loc, Ident::Unique("fsplitt"));
RegisterRuntime(loc, Ident::Unique("fsplitx"));
RegisterRuntime(loc, Ident::Unique("fsplita"));
RegisterRuntime(loc, Ident::Unique("faddsub"));
RegisterRuntime(loc, Ident::Unique("fadd"));
RegisterRuntime(loc, Ident::Unique("fsub"));
RegisterRuntime(loc, Ident::Unique("fmul"));
RegisterRuntime(loc, Ident::Unique("fdiv"));
RegisterRuntime(loc, Ident::Unique("mul16"));
Expand Down Expand Up @@ -971,6 +972,9 @@ bool Compiler::GenerateCode(void)
RegisterRuntime(loc, Ident::Unique("divu32"));
RegisterRuntime(loc, Ident::Unique("modu32"));

RegisterRuntime(loc, Ident::Unique("store32"));
RegisterRuntime(loc, Ident::Unique("load32"));

RegisterRuntime(loc, Ident::Unique("malloc"));
RegisterRuntime(loc, Ident::Unique("free"));
RegisterRuntime(loc, Ident::Unique("breakpoint"));
Expand Down
2 changes: 1 addition & 1 deletion oscar64/CompilerTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ static const uint64 COPT_DEFAULT = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE |

static const uint64 COPT_OPTIMIZE_DEFAULT = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_CONST_EXPRESSIONS;

static const uint64 COPT_OPTIMIZE_SIZE = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_CONST_EXPRESSIONS | COPT_OPTIMIZE_CODE_SIZE | COPT_OPTIMIZE_CONST_PARAMS | COPT_OPTIMIZE_MERGE_CALLS | COPT_OPTIMIZE_GLOBAL;
static const uint64 COPT_OPTIMIZE_SIZE = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_CONST_EXPRESSIONS | COPT_OPTIMIZE_CODE_SIZE | COPT_OPTIMIZE_CONST_PARAMS | COPT_OPTIMIZE_MERGE_CALLS | COPT_OPTIMIZE_GLOBAL;

static const uint64 COPT_OPTIMIZE_SPEED = COPT_OPTIMIZE_BASIC | COPT_OPTIMIZE_INLINE | COPT_OPTIMIZE_AUTO_INLINE | COPT_OPTIMIZE_AUTO_UNROLL | COPT_OPTIMIZE_CONST_EXPRESSIONS | COPT_OPTIMIZE_ASSEMBLER | COPT_OPTIMIZE_CONST_PARAMS | COPT_OPTIMIZE_MERGE_CALLS | COPT_OPTIMIZE_GLOBAL;

Expand Down
1 change: 1 addition & 0 deletions oscar64/Errors.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ enum ErrorID
EWARN_UNDEFINED_POINTER_ARITHMETIC,
EWARN_INVALID_VALUE_RANGE,
EWARN_DEFAULT_COPY_DEPRECATED,
EWARN_INSUFFICIENT_MEMORY,

EERR_GENERIC = 3000,
EERR_FILE_NOT_FOUND,
Expand Down
37 changes: 31 additions & 6 deletions oscar64/GlobalAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,25 @@ void GlobalAnalyzer::TopoSort(Declaration* procDec)
}
}

int GlobalAnalyzer::CallerInvokes(Declaration* called)
{
int n = 0;
for (int i = 0; i < called->mCallers.Size(); i++)
{
Declaration* f = called->mCallers[i];
n += CallerInvokes(f, called);
}
return n;
}

int GlobalAnalyzer::CallerInvokes(Declaration* caller, Declaration* called)
{
int n = 1;
if (caller->mType == DT_CONST_FUNCTION && (caller->mFlags & (DTF_INLINE | DTF_REQUEST_INLINE)) && !(caller->mFlags & DTF_PREVENT_INLINE) && !(caller->mFlags & DTF_FUNC_RECURSIVE) && !(caller->mFlags & DTF_FUNC_VARIABLE) && !(caller->mFlags & DTF_EXPORT))
n = CallerInvokes(caller);
return n > 1 ? n : 1;
}

void GlobalAnalyzer::AutoInline(void)
{
for (int i = 0; i < mFunctions.Size(); i++)
Expand Down Expand Up @@ -181,33 +200,39 @@ void GlobalAnalyzer::AutoInline(void)
dec = dec->mNext;
}

int invokes = CallerInvokes(f);
int cost = (f->mComplexity - 20 * nparams - 10);

// printf("CHECK INLINING %s (%d) %d * (%d - 1)\n", f->mIdent->mString, f->mComplexity, cost, f->mCallers.Size());
// printf("CHECK INLINING %s (%d) %d * (%d - 1)\n", f->mIdent->mString, f->mComplexity, cost, invokes);

bool doinline = false;
if ((f->mCompilerOptions & COPT_OPTIMIZE_INLINE) && (f->mFlags & DTF_REQUEST_INLINE))
doinline = true;
if (f->mLocalSize < 100)
{
if ((f->mCompilerOptions & COPT_OPTIMIZE_AUTO_INLINE) && ((cost - 20) * (f->mCallers.Size() - 1) <= 20))
if ((f->mCompilerOptions & COPT_OPTIMIZE_AUTO_INLINE) && ((cost - 20) * (invokes - 1) <= 20))
{
if (f->mCallers.Size() == 1 && f->mComplexity > 100)
if (f->mCompilerOptions & COPT_OPTIMIZE_CODE_SIZE)
{
if (invokes == 1 && f->mSection == f->mCallers[0]->mSection || cost < 0)
doinline = true;
}
else if (invokes == 1 && f->mComplexity > 100)
{
// printf("CHECK INLINING2 %s <- %s %d\n", f->mIdent->mString, f->mCallers[0]->mIdent->mString, f->mCallers[0]->mCalled.Size());
if (cost < 0 || f->mCallers[0]->mComplexity + cost < 1000 || f->mCallers[0]->mCalled.Size() == 1)
doinline = true;
}
else
else
doinline = true;
}
if ((f->mCompilerOptions & COPT_OPTIMIZE_AUTO_INLINE_ALL) && (cost * (f->mCallers.Size() - 1) <= 10000))
if ((f->mCompilerOptions & COPT_OPTIMIZE_AUTO_INLINE_ALL) && (cost * (invokes - 1) <= 10000))
doinline = true;
}

if (doinline)
{
// printf("INLINING %s %d * (%d - 1)\n", f->mIdent->mString, cost, f->mCallers.Size());
// printf("INLINING %s %d * (%d - 1)\n", f->mIdent->mString, cost, invokes);

f->mFlags |= DTF_INLINE;
for (int j = 0; j < f->mCallers.Size(); j++)
Expand Down
2 changes: 2 additions & 0 deletions oscar64/GlobalAnalyzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ class GlobalAnalyzer
GrowingArray<Declaration*> mGlobalVariables;

void AnalyzeInit(Declaration* mdec);
int CallerInvokes(Declaration* called);
int CallerInvokes(Declaration* caller, Declaration* called);

Declaration* Analyze(Expression* exp, Declaration* procDec, bool lhs, bool aliasing);

Expand Down
83 changes: 70 additions & 13 deletions oscar64/InterCode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23479,7 +23479,7 @@ void InterCodeProcedure::Close(void)

DisassembleDebug("mapped variabled");

ReduceTemporaries();
ReduceTemporaries(true);

DisassembleDebug("Reduced Temporaries");

Expand Down Expand Up @@ -24224,12 +24224,20 @@ void InterCodeProcedure::HoistCommonConditionalPath(void)
}


void InterCodeProcedure::ReduceTemporaries(void)
void InterCodeProcedure::ReduceTemporaries(bool final)
{
NumberSet* collisionSet;
int i, j, numRenamedTemps;
int numTemps = mTemporaries.Size();

NumberSet callerSaved(numTemps);

if (final)
{
ResetVisited();
mEntryBlock->BuildCallerSaveTempSet(callerSaved);
}

ResetVisited();
mEntryBlock->BuildLocalTempSets(numTemps);

Expand Down Expand Up @@ -24261,24 +24269,73 @@ void InterCodeProcedure::ReduceTemporaries(void)

NumberSet usedTemps(numTemps);

for (i = 0; i < numTemps; i++)
if (final)
{
usedTemps.Clear();

for (j = 0; j < numTemps; j++)
for (int sz = 4; sz > 0; sz >>= 1)
{
if (mRenameTable[j] >= 0 && (collisionSet[i][j] || InterTypeSize[mTemporaries[j]] != InterTypeSize[mTemporaries[i]]))
for (i = 0; i < numTemps; i++)
{
usedTemps += mRenameTable[j];
if (InterTypeSize[mTemporaries[i]] == sz && !callerSaved[i])
{
usedTemps.Clear();

for (j = 0; j < numTemps; j++)
{
if (mRenameTable[j] >= 0 && collisionSet[i][j])
usedTemps += mRenameTable[j];
}

j = 0;
while (usedTemps[j])
j++;

mRenameTable[i] = j;
if (j >= numRenamedTemps) numRenamedTemps = j + 1;
}
}
for (i = 0; i < numTemps; i++)
{
if (InterTypeSize[mTemporaries[i]] == sz && callerSaved[i])
{
usedTemps.Clear();

for (j = 0; j < numTemps; j++)
{
if (mRenameTable[j] >= 0 && collisionSet[i][j])
usedTemps += mRenameTable[j];
}

j = 0;
while (usedTemps[j])
j++;

mRenameTable[i] = j;
if (j >= numRenamedTemps) numRenamedTemps = j + 1;
}
}
}
}
else
{
for (i = 0; i < numTemps; i++)
{
usedTemps.Clear();

j = 0;
while (usedTemps[j])
j++;
for (j = 0; j < numTemps; j++)
{
if (mRenameTable[j] >= 0 && (collisionSet[i][j] || InterTypeSize[mTemporaries[j]] != InterTypeSize[mTemporaries[i]]))
{
usedTemps += mRenameTable[j];
}
}

j = 0;
while (usedTemps[j])
j++;

mRenameTable[i] = j;
if (j >= numRenamedTemps) numRenamedTemps = j + 1;
mRenameTable[i] = j;
if (j >= numRenamedTemps) numRenamedTemps = j + 1;
}
}

mTemporaries.SetSize(numRenamedTemps, true);
Expand Down
2 changes: 1 addition & 1 deletion oscar64/InterCode.h
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,7 @@ friend class InterCodeBasicBlock;
bool ModifiesGlobal(int varindex);

void MapVariables(void);
void ReduceTemporaries(void);
void ReduceTemporaries(bool final = false);
void Disassemble(FILE* file);
void Disassemble(const char* name, bool dumpSets = false);
protected:
Expand Down
8 changes: 6 additions & 2 deletions oscar64/Linker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,8 @@ void LinkerRegion::PlaceStackSection(LinkerSection* stackSection, LinkerSection*

void Linker::CopyObjects(bool inlays)
{
bool errors = false;

for (int i = 0; i < mObjects.Size(); i++)
{
LinkerObject* obj = mObjects[i];
Expand Down Expand Up @@ -809,7 +811,9 @@ void Linker::CopyObjects(bool inlays)
{
if (!obj->mRegion)
{
mErrors->Error(obj->mLocation, ERRR_INSUFFICIENT_MEMORY, "Could not place object", obj->mIdent);
mErrors->Error(obj->mLocation, errors ? EWARN_INSUFFICIENT_MEMORY : ERRR_INSUFFICIENT_MEMORY, "Could not place object", obj->mIdent);
if (mCompilerOptions & COPT_ERROR_FILES)
errors = true;

int avail = 0;
for (int i = 0; i < mRegions.Size(); i++)
Expand Down Expand Up @@ -1645,7 +1649,7 @@ bool Linker::WriteCrtFile(const char* filename, uint16 id)

bool Linker::WriteMapFile(const char* filename)
{
bool banked = mCartridgeBankUsed[0];
bool banked = mCartridgeBankUsed[0] || mCartridgeBankUsed[1];

FILE* file;
fopen_s(&file, filename, "wb");
Expand Down
Loading

0 comments on commit 57d8747

Please sign in to comment.