diff --git a/45gs02_optimization_guide.md b/45gs02_optimization_guide.md index 0cd5478..d07d680 100644 --- a/45gs02_optimization_guide.md +++ b/45gs02_optimization_guide.md @@ -8,7 +8,6 @@ This guide details the specialized optimization techniques for the 45GS02 CPU fo - **Z Register**: A general-purpose 8-bit register, similar to A, X, Y. - **Q Register**: A 32-bit composite register `[Z:Y:X:A]`. Operations on Q affect all four underlying 8-bit registers. - **New Instructions**: `LDZ`, `STZ`, `NEG`, `ASR`, `LDQ`, `STQ`, `ADCQ`, `SBCQ`, `CMPQ`, `ASRQ`, `RORQ`, `ROLQ`, `INC16`, `DEC16`, `PHW`, `PLW`, `BRA` (always). -- **Extended NOP**: `NOP #cycles` allows for precise multi-cycle delays. ## 1. Z Register for Repeated Stores @@ -98,27 +97,6 @@ The 45GS02 includes an `ASR` instruction, which performs an arithmetic shift rig ``` **Benefit**: Saves 2 bytes and 2 cycles. -## 5. Extended NOP -The 45GS02 `NOP` instruction can take an operand to specify a delay in cycles, making it ideal for precise timing loops or replacing multiple `NOP` instructions. - -**Pattern**: Multiple consecutive `NOP` instructions. - -**Before:** -```asm - NOP - NOP - NOP - NOP -``` - -**After:** -```asm - NOP #8 ; Four NOPs (2 cycles each) replaced with NOP for 8 cycles -; OPT: NOP removed -; OPT: NOP removed -; OPT: NOP removed -``` -**Benefit**: Saves bytes by consolidating multiple `NOP`s into a single instruction with a cycle count. (Each NOP is 2 cycles; so NOP #8 replaces 4 NOP instructions). This guide covers the major 45GS02-specific optimizations within `opt6502`. Utilizing these features effectively can lead to highly performant and compact code on the MEGA65. diff --git a/README.md b/README.md index 0264eb8..b2766b6 100644 --- a/README.md +++ b/README.md @@ -243,9 +243,7 @@ When `-cpu 45gs02` is specified: - Saves 2 bytes, 2 cycles - Preserves sign bit -28. **Extended NOP** - - Multiple NOPs → Single NOP with cycle count - - NOP, NOP, NOP → NOP #6 + ## CPU-Specific Optimization Summary @@ -305,7 +303,7 @@ When `-cpu 45gs02` is specified: - **STZ stores Z register, NOT zero** (critical difference!) - Z register for repeated value stores (any value, not just zero) - Q register composite [Z:Y:X:A] for 32-bit operations -- NEG, ASR, extended NOP instructions +- NEG, ASR instructions **Passes Applied**: 1. Subroutine inlining @@ -313,7 +311,6 @@ When `-cpu 45gs02` is specified: 3. Z register repeated value optimization 4. Q register 32-bit operation detection 5. NEG/ASR pattern replacement -6. Extended NOP consolidation **Special Handling**: - Never converts LDA #0, STA to STZ (would store Z register!) diff --git a/opt6502.c b/opt6502.c index 45fd7c2..c0e996d 100644 --- a/opt6502.c +++ b/opt6502.c @@ -620,7 +620,8 @@ void optimize_dead_code(Program *prog) { // Unconditional jump followed by unreachable code if ((strcmp(prog->lines[i].opcode, "JMP") == 0 || strcmp(prog->lines[i].opcode, "RTS") == 0 || - strcmp(prog->lines[i].opcode, "RTI") == 0) && + strcmp(prog->lines[i].opcode, "RTI") == 0 || + (prog->is_45gs02 && strcmp(prog->lines[i].opcode, "BRA") == 0) ) && !prog->lines[i+1].is_branch_target && !prog->lines[i+1].is_label) { @@ -655,6 +656,10 @@ void optimize_jumps(Program *prog) { // Branch to next instruction (remove) if ((strstr(prog->lines[i].opcode, "BEQ") || + strstr(prog->lines[i].opcode, "BNE") || + ( prog->is_45gs02 && + strstr(prog->lines[i].opcode, "BRA") != NULL + ) || strstr(prog->lines[i].opcode, "BNE") || strstr(prog->lines[i].opcode, "BCC") || strstr(prog->lines[i].opcode, "BCS")) && i + 1 < prog->count) { @@ -1625,42 +1630,7 @@ void optimize_45gs02_instructions(Program *prog) { // MAP instruction allows memory banking // Pattern: Multiple bank switches could be optimized - // ===== Extended NOP modes ===== - // 45GS02 NOP takes operand for variable-cycle delays - // Pattern: Multiple NOPs -> single NOP with cycle count - if (i + 2 < prog->count && - strcmp(prog->lines[i].opcode, "NOP") == 0 && - strcmp(prog->lines[i+1].opcode, "NOP") == 0 && - strcmp(prog->lines[i+2].opcode, "NOP") == 0 && - !prog->lines[i+1].no_optimize && !prog->lines[i+2].no_optimize && - !prog->lines[i+1].is_branch_target && !prog->lines[i+2].is_branch_target) { - - // Three NOPs (6 cycles) -> NOP with operand - // Count consecutive NOPs - int nop_count = 1; - for (int j = i + 1; j < prog->count && j < i + 10; j++) { - if (strcmp(prog->lines[j].opcode, "NOP") == 0 && - !prog->lines[j].no_optimize && !prog->lines[j].is_branch_target) { - nop_count++; - } else { - break; - } - } - - if (nop_count >= 3) { - // Replace with extended NOP - int cycles = nop_count * 2; - char nop_operand[16]; - snprintf(nop_operand, 16, "#%d", cycles); - strcpy(prog->lines[i].operand, nop_operand); - - // Mark subsequent NOPs as dead - for (int j = 1; j < nop_count; j++) { - prog->lines[i+j].is_dead = true; - } - prog->optimizations++; - } - } + } // Note: Q register optimization requires careful register tracking