/*
 * Apple II Elite string extractor.
 *
 * Expects the single-file descrambled binary used in the Apple II
 * disassembly.  Attempts to emulate some of the functions and
 * upper/lower case transformation.
 *
 * Thanks: https://xania.org/201406/elites-crazy-string-format
 *         https://github.com/Kroc/elite-harmless/tree/master/src/text
 *
 * TODO: leading spaces should be "&nbsp;" so they're visible in the browser.
 *
 * Copyright 2020 faddenSoft.  Licensed under the Apache License, Version 2.0.
 */
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>

const int START = 0x0a00;

const size_t FLIGHT_START = 0x0b60;
const size_t FLIGHT_LEN = 0x0f1b - FLIGHT_START;
const size_t DOCKED_START = 0x0f40;
const size_t DOCKED_LEN = 0x1a5e - DOCKED_START;
const size_t EXT_START = 0x1a92;
const size_t EXT_LEN = 0x1cf9 - EXT_START;

const size_t DIGRAM_START = 0x4d26;
const size_t DIGRAM_LEN = 0x4d80 - DIGRAM_START;

const uint8_t FLIGHT_XOR = 0x23;
const uint8_t DOCKED_XOR = 0x57;

uint8_t* gFlightData;
uint8_t* gDockedData;
uint8_t* gExtData;
uint8_t* gDigrams;

bool readChunk(FILE* fp, uint8_t* addr, size_t offset, size_t len,
        const char* msgStr) {
    if (fseek(fp, offset, SEEK_SET) != 0) {
        fprintf(stderr, "Failed to seek to %s\n", msgStr);
        return false;
    }
    if (fread(addr, 1, len, fp) != len) {
        fprintf(stderr, "Failed to read %s\n", msgStr);
        return false;
    }
    return true;
}

bool load(const char* fileName) {
    FILE* fp = fopen(fileName, "rb");
    if (fp == NULL) {
        fprintf(stderr, "Unable to open '%s': %s\n", fileName, strerror(errno));
        return false;
    }

    gFlightData = new uint8_t[FLIGHT_LEN];
    if (!readChunk(fp, gFlightData, FLIGHT_START - START, FLIGHT_LEN,
            "flight data")) {
        fclose(fp);
        return false;
    }

    gDockedData = new uint8_t[DOCKED_LEN];
    if (!readChunk(fp, gDockedData, DOCKED_START - START, DOCKED_LEN,
            "docked data")) {
        fclose(fp);
        return false;
    }

    gExtData = new uint8_t[EXT_LEN];
    if (!readChunk(fp, gExtData, EXT_START - START, EXT_LEN,
            "ext data")) {
        fclose(fp);
        return false;
    }

    gDigrams = new uint8_t[DIGRAM_LEN];
    if (!readChunk(fp, gDigrams, DIGRAM_START - START, DIGRAM_LEN,
            "digram data")) {
        fclose(fp);
        return false;
    }

    return true;
}

void printHtml(FILE* out, uint8_t ch) {
    if (ch == '&') {
        fprintf(out, "&amp;");
    } else if (ch == '<') {
        fprintf(out, "&lt;");
    } else if (ch == '>') {
        fprintf(out, "&gt;");
    } else {
        fprintf(out, "%c", ch);
    }
}

void printFlightToken(FILE* out, uint8_t tok);

void printFlightMessage(FILE* out, uint8_t index) {
    //printf("<%02x>", index);
    const uint8_t* strStart = gFlightData;

    // Find the Nth message (0-145).
    if (index != 0) {
        while (index > 0 && strStart < gFlightData + FLIGHT_LEN) {
            while (*++strStart != 0x00)
                ;
            index--;
        }
        // move past the '\0'
        strStart++;
    }
    size_t len = strlen((const char*)strStart);

    if (strStart + len > gFlightData + FLIGHT_LEN) {
        fprintf(stderr, "WHOOPS!  start=%p len=%zd bufLen=%zd\n",
            strStart, len, FLIGHT_LEN);
        return;
    }

    while (len--) {
        printFlightToken(out, *strStart++ ^ FLIGHT_XOR);
    }
}

void printFlightDigram(FILE* out, uint8_t index) {
    const uint8_t* flightDigrams = gDigrams + (0x4d40 - 0x4d26);

    printFlightToken(out, flightDigrams[index * 2]);
    if (flightDigrams[index * 2 + 1] != '?') {
        printFlightToken(out, flightDigrams[index * 2 + 1]);
    }
}

// bit 7 set if we allow lower-case conversions; bit 6 set if
// the next A-Z should be converted to lower case
uint8_t gTextFlags = 0;

//
// Prints a flight token.  This might be a single letter, a digram, a
// magic function, or a recursive message expansion.
//
//  $00-0d: special values, e.g. current cash on hand
//  $0e-1f: canned messages 128-145
//  $20-5f: ASCII
//  $60-7f: canned messages 96-127
//  $80-9f: flight digram
//  $a0-ff: canned messages 0-95
//
void printFlightToken(FILE* out, uint8_t tok) {
    //printf("{%02x}", tok);

    switch (tok) {
    case 0:
        fprintf(out, "$$$$$$$.0 Cr ");
        break;
    case 1:
        fprintf(out, " [gal#]");
        break;
    case 2:
        fprintf(out, "[cur-planet]");
        break;
    case 3:
        fprintf(out, "[sel-planet]");
        break;
    case 4:
        fprintf(out, "[cmdr-nam]");
        break;
    case 5:
        fprintf(out, "$$$$$$$.0 Cr ");
        break;
    case 6:
        // set flag to $80 (enable lower-case conversion)
        gTextFlags = 0x80;
        break;
    case 8:
        // set flag to $00 (force caps)
        gTextFlags = 0x00;
        break;
    case 9:
        fprintf(out, "    :");      // should tab to column 21
        break;
    default:
        if ((tok & 0x80) != 0) {
            if (tok >= 0xa0) {
                // $a0-ff --> messages 0-95
                printFlightMessage(out, tok - 0xa0);
            } else {
                // $80-9f --> digram 0-31
                printFlightDigram(out, tok & 0x7f);
            }
        } else if (tok >= 0x60) {
            // $60-7f --> messages 96-127
            printFlightMessage(out, tok);
        } else if (tok < 0x0e) {
            // 7/9/a/b/c/d: print as text (e.g. Ctrl+G beeps)
            switch (tok) {
            case 0x07:
                fprintf(out, "[beep]");
                break;
            case 0x0a:
                fprintf(out, "[nl] ");
                break;
            case 0x0c:
                fprintf(out, "[lf] ");
                // not in original, but this is the behavior
                if (gTextFlags != 0) {
                    gTextFlags = 0x80;
                }
                break;
            default:
                fprintf(out, "^%c", tok + 0x40);
            }
        } else if (tok < 0x20) {
            // $0e-1f --> messages 128-145
            printFlightMessage(out, tok + 0x72);
        } else {
            // $20-5f
            if (gTextFlags == 0) {
                // no change
            } else if ((gTextFlags & 0x80) != 0) {
                if ((gTextFlags & 0x40) != 0) {
                    // both flags set
                    if (tok >= 'A' && tok < '[') {
                        tok += 0x20;    // convert to lower
                    } else {
                        gTextFlags &= 0xbf;     // clear bit 6
                    }
                } else {
                    // only bit 7 set
                    if (tok >= 'A') {
                        // print as-is, but make next char lower
                        gTextFlags |= 0x40;
                    }
                }
            } else if ((gTextFlags & 0x40) != 0) {
                // only bit 6 set
                gTextFlags &= 0xbf;
            }
            printHtml(out, tok);
        }
    }
}

void dumpFlightRange(FILE* out, uint8_t first, uint8_t last, int index) {
    for (uint8_t tok = first; tok >= first && tok <= last; tok++, index++) {
        // reset lower-case enable flag
        gTextFlags = 0x80;

        // Some things are shown in all caps.  We can force it like this,
        // but the same string might have different handling in different
        // contexts.
        //if (tok == 0xff) {
        //    gTextFlags = 0x00;
        //}

        fprintf(out, "  <tr><td>%d</td><td>$%02x</td><td>", index, tok);
        printFlightToken(out, tok);
        fprintf(out, "</td></tr>\n");
    }
}

//
// Dumps the 147 "canned" flight messages.
//
void dumpFlight(FILE* out) {
    dumpFlightRange(out, 0xa0, 0xff, 0);
    dumpFlightRange(out, 0x60, 0x7f, 96);
    dumpFlightRange(out, 0x0e, 0x1f, 128);

    // GAME OVER is printed by calling the print-message function directly
    fprintf(out, "  <tr><td>146</td><td></td><td>");
    printFlightMessage(out, 146);
    fprintf(out, "</td></tr>\n");
}


void printDockedToken(FILE* out, uint8_t token);

void printDockedCommon(FILE* out, uint8_t index, const uint8_t* data,
        size_t dataLen) {
    //printf("<%02x>", index);
    const uint8_t* strStart = data;

    if (index == 0) {
        return;
    }

    // Find the Nth message (0-55).
    while (index > 0 && strStart < data + dataLen) {
        while (*strStart++ != 0x57) {
        }
        index--;
    }
    // move past the '\0'

    // Find the end.
    size_t len;
    for (len = 0; strStart[len] != DOCKED_XOR; len++)
        ;

    if (strStart + len > data + dataLen) {
        fprintf(stderr, "WHOOPS!  start=%p len=%zd bufLen=%zd\n",
            strStart, len, DOCKED_LEN);
        return;
    }

    while (len--) {
        printDockedToken(out, *strStart++ ^ DOCKED_XOR);
    }
}

void printDockedMessage(FILE* out, uint8_t index) {
    printDockedCommon(out, index, gDockedData, DOCKED_LEN);
}
void printExtMessage(FILE* out, uint8_t index) {
    printDockedCommon(out, index, gExtData, EXT_LEN);
}

void printDockedDigram(FILE* out, uint8_t index) {
    printDockedToken(out, gDigrams[index * 2]);
    if (gDigrams[index * 2 + 1] != '?') {
        printDockedToken(out, gDigrams[index * 2 + 1]);
    }
}

bool gFlightTokenMode = false;
uint8_t gTextUcaseMask = 0x00;
uint8_t gTextUcaseFlag = 0;
uint8_t gTextLcaseMask = 0xff;
uint8_t gTextLcaseFlag = 0;

//
// Prints a docked token.  This might be a single letter, a digram, a
// magic function, or a recursive message expansion.
//
//  $00: (invalid)
//  $01-1f: (invokes function)
//  $20-5a: ASCII
//  $5b-80: planet description tokens
//  $81-d6: message index 0-55
//  $d7-ff: digrams 0-40
//
void printDockedToken(FILE* out, uint8_t tok) {
    //printf("{%02x}", tok);
    if (tok < 0x20) {
        // special functions; see table at $4ce6
        switch (tok) {
        case 0x01:
            // clear ucase mask
            gTextUcaseMask = 0x00;
            gTextUcaseFlag = 0x00;
            break;
        case 0x02:
            // set ucase mask
            gTextUcaseMask = 0x20;
            gTextUcaseFlag = 0x00;
            break;
        case 0x03:
        case 0x04:
            printFlightToken(out, tok);
            break;
        case 0x05:
            gFlightTokenMode = false;
            break;
        case 0x06:
            gFlightTokenMode = true;
            break;
        case 0x07:
            fprintf(out, "[beep]");
            break;
        case 0x08:
            // htab 6, set lcase flag
            gTextLcaseFlag = 0xff;
            break;
        case 0x09:
            // htab 1, print HUD flight text?
            break;
        case 0x0a:
            fprintf(out, "[lf]");
            break;
        case 0x0b:
            fprintf(out, "[clr]");
            break;
        case 0x0c:
            fprintf(out, "[ff]");
            break;
        case 0x0d:
            gTextUcaseFlag = 0x80;
            gTextUcaseMask = 0x20;
            break;
        case 0x0e:
            // enable text buffering
            fprintf(out, "[buffer ON]");
            break;
        case 0x0f:
            // disable text buffering
            fprintf(out, "[buffer OFF]");
            break;
        case 0x10:
            fprintf(out, "A");
            break;
        case 0x11:
            gTextFlags &= 0xbf;
            fprintf(out, "[species-ian]");
            break;
        case 0x12:
            fprintf(out, "[rnd-name]");
            break;
        case 0x13:
            gTextLcaseMask = 0xdf;
            break;
        case 0x14:
            fprintf(out, "[^T]");
            break;
        case 0x15:
            // erases stuff near bottom of text/hi-res screen
            fprintf(out, "[func15]");
            break;
        case 0x16:
            // wait for key, draw HUD text
            fprintf(out, "[waitkey]");
            break;
        case 0x17:
            // sets vposn to 9
            gTextUcaseFlag = 0x80;
            gTextUcaseMask = 0x20;
            break;
        case 0x18:
            // wait for key
            fprintf(out, "[waitkey]");
            break;
        case 0x19:
            printDockedMessage(out, 0xd8);  // INCOMING MESSAGE
            // pause
            break;
        case 0x1a:
            // input commander name
            fprintf(out, "[input-name]");
            break;
        case 0x1b:
            fprintf(out, "[navy-cap-name]");
            break;
        case 0x1c:
            fprintf(out, "[believed-to-have-jumped]");
            break;
        case 0x1d:
            // vposn 5
            gTextUcaseFlag = 0x80;
            gTextUcaseMask = 0x20;
            break;
        case 0x1e:
            fprintf(out, "[disk]");
            break;
        case 0x1f:
            fprintf(out, "[tape]");
            break;
        default:
            // shouldn't be here
            fprintf(out, "[???]");
            break;
        }
    } else if (gFlightTokenMode) {
       printFlightToken(out, tok);
    } else if (tok < '[') {
        if (tok >= 'A') {
            if (gTextUcaseFlag) {
                tok = (tok | gTextUcaseMask) & gTextLcaseMask;
            } else if (gTextLcaseFlag) {
                tok = tok & gTextLcaseMask;
            }
        }
        printHtml(out, tok);

        if (tok == '.' || tok == ':' || tok == ' ') {
            gTextLcaseFlag = 0xff;
        } else {
            gTextLcaseFlag = 0;
        }
        gTextLcaseMask = 0xff;
    } else if (tok < 0x81) {
        fprintf(out, "[planet%02x]", tok);
    } else if (tok < 0xd7) {
        printDockedMessage(out, tok);
    } else {
        printDockedDigram(out, tok - 0xd7);
    }
}

//
// Dumps the 256 docked messages.
//
void dumpDocked(FILE* out) {
    for (int index = 0; index < 256; index++) {
        gFlightTokenMode = false;
        gTextUcaseMask = gTextUcaseFlag = gTextLcaseFlag = 0;
        gTextLcaseMask = 0xff;
        fprintf(out, "  <tr><td>%d</td><td>", index);
        if (index >= 0x81 && index <= 0xd6) {
            fprintf(out, "$%02x</td><td>", index);
        } else {
            fprintf(out, "</td><td>");
        }
        printDockedMessage(out, index);
        fprintf(out, "</td></tr>\n");
    }
}

void dumpExt(FILE* out) {
    for (int index = 0; index < 27; index++) {
        gTextUcaseMask = gTextUcaseFlag = gTextLcaseFlag = 0;
        gTextLcaseMask = 0xff;
        fprintf(out, "  <tr><td>%d</td><td></td><td>", index);
        printExtMessage(out, index);
        fprintf(out, "</td></tr>\n");
    }
}

// dumnp raw decrypted data, for debugging
void dumpRawDocked(FILE* out) {
    const uint8_t* ptr = gDockedData;
    int index = 0;
    bool first = true;

    printf("ALL STRINGS:\n");
    while (ptr < gDockedData + DOCKED_LEN) {
        if (first) {
            fprintf(out, "%-3d: ", index);
            first = false;
        }
        uint8_t val = *ptr++ ^ DOCKED_XOR;
        if (val == 0) {
            fprintf(out, "\n");
            first = true;
            index++;
        } else {
            fprintf(out, "%02x ", val);
        }
    }

    printf("\nEXT STRINGS:\n");
    ptr = gExtData;
    index = 0;
    first = true;
    while (ptr < gExtData + EXT_LEN) {
        if (first) {
            fprintf(out, "%-3d: ", index);
            first = false;
        }
        uint8_t val = *ptr++ ^ DOCKED_XOR;
        if (val == 0) {
            fprintf(out, "\n");
            first = true;
            index++;
        } else {
            fprintf(out, "%02x ", val);
        }
    }

    const int FLIGHT_OFF = 13;

    printf("\nDIGRAMS:\n");
    ptr = gDigrams;
    index = 0;
    while (ptr < gDigrams + DIGRAM_LEN) {
        if (ptr[0] < 0x20) {
            // first entry
            fprintf(out, "         %-3d($%02x) ^%c^%c\n", index, index,
                ptr[0] + 0x40, ptr[1] + 0x40);
        } else if (index < FLIGHT_OFF) {
            fprintf(out, "         %-3d($%02x) %c%c\n", index, index,
                ptr[0], ptr[1]);
        } else {
            fprintf(out, "%-3d($%02x) %-3d($%02x) %c%c\n",
                index - FLIGHT_OFF, index - FLIGHT_OFF, index, index,
                ptr[0], ptr[1]);
        }
        index++;
        ptr += 2;
    }
}

int main(int argc, char** argv) {
    if (argc != 2) {
        fprintf(stderr, "Usage: extract <Elite>\n");
        return 2;
    }

    if (!load(argv[1])) {
        return 1;
    }

    //dumpRawDocked(stdout);

    printf("<!-- flight strings -->\n");
    dumpFlight(stdout);

    printf("<!-- docked strings -->\n");
    dumpDocked(stdout);

    printf("<!-- ext strings -->\n");
    dumpExt(stdout);

    return 0;
}
