/*
 * ambpack: packs, unpacks and lists AMB archives (Ancient Machine Book)
 *
 * http://ambook.sourceforge.net
 *
 * Copyright (C) 2020 Mateusz Viste
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#include <sys/types.h>
#include <dirent.h>

#include <sys/stat.h>   /* mkdir */


#define PVER "20201217"
#define PDATE "2020"

#ifdef SLASHPATHSEP
#define PATHSEP "/"
#else
#define PATHSEP "\\"
#endif


/* mkdir() is different under different OSes, this function is a wrapper that
 * takes care to call it right. returns 0 on success. */
static int makedirectory(const char *d) {
  #ifdef WIN32
  return(_mkdir(d));
  #else
  return(mkdir(d, S_IRWXU));
  #endif
}


static void amb_help(void) {
  puts("ambpack is an archiver that packs/unpacks AMB (Ancient Machine Book) files.");
  puts("");
  puts("usage: ambpack <action> <input_file|input_dir> [<output_file|output_dir>]");
  puts("");
  puts("actions:");
  puts("");
  puts("  c   creates an AMB file by packing the content of a directory");
  puts("  cc  same as 'c', but also converts AMA files from UTF-8 to target codepage");
  puts("  u   unpacks an AMB file to a directory of the same name");
  puts("  uc  same as 'u', but also converts AMA files to UTF-8");
  puts("  l   lists the content of an AMB file");
  puts("  t   tests an AMB file (checksums verification)");
  puts("");
  puts("NOTE: the cc action requires an unicode.map file to be included in the AMB set");
  puts("      so it can map unicode glyphs to the target codepage. Such file can be");
  puts("      generated by the utf8tocp program.");
  puts("");
  puts("ambpack ver " PVER " -- Copyright (C) " PDATE " Mateusz Viste");
}


static unsigned short bsd_sum(const void *buff, unsigned short flen) {
  unsigned short sum = 0;
  unsigned short i;
  for (i = 0; i < flen; i++) {
    unsigned short bsum_ror = (sum & 1) << 15;
    sum >>= 1;
    sum |= bsum_ror;
    sum += ((unsigned char *)buff)[i];
  }
  return(sum);
}


/* look into a list of files for a given filename (case-insensitive)
 * returns offset where file found, or -1 if not found */
static int findfileinlist(char * const *list, int listlen, const char *fname) {
  int i;
  for (i = 0; i < listlen; i++) {
    if (strcasecmp(list[i], fname) == 0) return(i);
  }
  return(-1);
}


/* returns "wide" unicode char read from a UTF-8 stream
 * sets blen to the number of bytes consumed */
static unsigned short readutf8char(unsigned char *s, int *blen) {
  unsigned short res = 0xfffdu; /* preset to the unicode "replacement" character */
  if (*s < 128) { /* single char, same as low-ascii */
    *blen = 1;
    res = s[0];
  } else if ((*s < 192) || (*s >= 248)) { /* illegal (10xxxxxx and 11111xxx) */
    *blen = 1;
  } else if (*s < 224) { /* 2-bytes codes (110xxxxx) */
    *blen = 2;
    res = s[0] & 31;
    res <<= 6;
    res |= s[1] & 63;
  } else if (*s < 240) { /* 3-bytes codes */
    *blen = 3;
    res = s[0] & 15;
    res <<= 6;
    res |= s[1] & 63;
    res <<= 6;
    res |= s[2] & 63;
  } else { /* 4-bytes codes (unsupported) */
    *blen = 4;
  }
  return(res);
}


/* returns the codepage byte that encodes unicode character, returns -1 if no match found */
static int unicode2cp(unsigned short unicode, const unsigned short *unicodemap) {
  int i;
  for (i = 0; i < 128; i++) {
    if (unicodemap[i] == unicode) return(i + 128);
  }
  return(-1);
}


/* returns 0 on success, 1 on failure */
static int amb_create(const char *dirname, const char *outfile, int utfflag) {
  DIR *d = NULL;
  struct dirent *de = NULL;
  FILE *fd_amb = NULL;
  unsigned short i, fcount = 0;
  char *flist[65536];
  int ecode = 1; /* assume failure */
  unsigned short unicodemap[128];
  int unicodemapid = -1;

  /* try opening the output file (should fail, file is expected not to exist) */
  fd_amb = fopen(outfile, "rb");
  if (fd_amb != NULL) {
    printf("ERROR: file '%s' already exists", outfile);
    puts("");
    goto ERR_QUIT;
  }

  /* open directory */
  d = opendir(dirname);
  if (d == NULL) {
    printf("ERROR: failed to open directory '%s'", dirname);
    puts("");
    goto ERR_QUIT;
  }

  puts("Computing the list of files...");

  /* get the list of files */
  for (de = readdir(d); de != NULL; de = readdir(d)) {
    /* filter out unwanted things */
    if (de->d_name[0] == '.') continue; /* skip '.', '..' and hidden files */
    if (de->d_type == DT_DIR) {
      printf("ERROR: a subdirectory has been found (%s)", de->d_name);
      puts("");
      goto ERR_QUIT;
    }
    /* check filename length and look for non-ascii characters in filename */
    for (i = 0; de->d_name[i] != 0; i++) {
      if (i > 11) {
        printf("ERROR: filename too long (%s), max len is 12 characters", de->d_name);
        puts("");
        goto ERR_QUIT;
      }
      if (de->d_name[i] & 128) {
        printf("ERROR: filename '%s' contains invalid (non-ASCII) characters", de->d_name);
        puts("");
        goto ERR_QUIT;
      }
    }
    /* */
    if (fcount == 65535) {
      puts("ERROR: too many files in directory (max: 65535)");
      goto ERR_QUIT;
    }
    flist[fcount] = strdup(de->d_name);
    if (flist[fcount] == NULL) {
      puts("ERROR: out of memory");
      goto ERR_QUIT;
    }
    fcount++;
  }

  /* is index.ama present? */
  if (findfileinlist(flist, fcount, "index.ama") < 0) {
    puts("ERROR: no 'index.ama' file found");
    goto ERR_QUIT;
  }

  /* look for unicode.map, maybe it will be needed */
  unicodemapid = findfileinlist(flist, fcount, "unicode.map");

  /* if UTF-8 conversion required, look for unicode.map */
  if (utfflag != 0) {
    unsigned char buff[258]; /* must be more than 256 */
    FILE *fd;
    if (unicodemapid < 0) {
      puts("ERROR: cannot perform UTF-8 conversion without a unicode.map file");
      goto ERR_QUIT;
    }
    /* load unicode map (could be named UNIcode.MaP or any other case! */
    snprintf((char *)buff, sizeof(buff), "%s" PATHSEP "%s", dirname, flist[unicodemapid]);
    fd = fopen((char *)buff, "rb");
    if (fd == NULL) {
      puts("ERROR: failed to open the unicode.map file");
      goto ERR_QUIT;
    }
    if (fread(buff, 1, sizeof(buff), fd) != 256) {
      puts("ERROR: invalid unicode.map file");
      fclose(fd);
      goto ERR_QUIT;
    }
    fclose(fd);
    /* load the table, file contains little-endian values */
    for (i = 0; i < 128; i++) {
      unicodemap[i] = buff[i * 2];
      unicodemap[i] |= buff[i * 2 + 1] << 8;
    }
  }

  /* look for duplicate filenames */
  for (i = 1; i < fcount; i++) {
    int dup = findfileinlist(flist, i, flist[i]);
    if (dup >= 0) {
      printf("ERROR: duplicate filenames found ('%s' and '%s')", flist[i], flist[dup]);
      puts("");
      goto ERR_QUIT;
    }
  }

  puts("Packing files...");

  fd_amb = fopen(outfile, "wb");
  if (fd_amb == NULL) {
    printf("ERROR: failed to create output file '%s'", outfile);
    puts("");
    goto ERR_QUIT;
  }

  /* write the AMB header */
  fprintf(fd_amb, "AMB1%c%c", fcount & 255, fcount >> 8);

  /* write a dummy (all-spaces) list of files */
  for (i = 0; i < fcount; i++) {
    if (fwrite("                    ", 1, 20, fd_amb) != 20) {
      puts("ERROR: write failure");
      goto ERR_QUIT;
    }
  }

  /* process each file and write it to the archive */
  for (i = 0; i < fcount; i++) {
    FILE *fd;
    uint16_t bsum;
    long offset;
    size_t bytes, t;
    uint8_t buff[65540]; /* 65535 + a few spare bytes */
    printf("  %s --> %s", flist[i], outfile);
    if (utfflag != 0) printf("  (conversion from UTF-8)");
    puts("");
    /* open file to be inserted into amb */
    snprintf((char *)buff, sizeof(buff), "%s" PATHSEP "%s", dirname, flist[i]);
    fd = fopen((char *)buff, "rb");
    if (fd == NULL) {
      printf("ERROR: failed to open file '%s'", (char *)buff);
      puts("");
      goto ERR_QUIT;
    }
    /* move amb file pointer to end of file and remember this position */
    fseek(fd_amb, 0, SEEK_END);
    offset = ftell(fd_amb);

    /* load the file to buffer */
    bytes = fread(buff, 1, sizeof(buff), fd);
    fclose(fd); /* close handler - no longer needed */
    if (bytes > 0xffff) {
      puts("ERROR: file too big (max allowed size = 65535 bytes)");
      goto ERR_QUIT;
    }

    /* UTF-8 conversion? of course unicode.map shall never be "converted" */
    if ((utfflag != 0) && (i != unicodemapid)) {
      size_t src = 0;
      size_t srclen = bytes;
      t = 0;
      for (src = 0; src < srclen; src++) {
        unsigned short wc;
        int blen, cpchar;
        if (buff[src] < 128) { /* skip low-ascii chars */
          buff[t++] = buff[src];
          continue;
        }
        wc = readutf8char(buff + src, &blen);
        blen--; /* I'm only interested in the extra bytes used by utf-8 */
        bytes -= blen;
        src += blen;
        /* special treatment for the BOM mark (ignore it) */
        if (wc == 0xFEFFu) {
          bytes--;
          continue;
        }
        /* output */
        cpchar = unicode2cp(wc, unicodemap);
        if (cpchar < 0) {
          printf("WARNING: unicode glyph 0x%04X is not present in the target codepage", wc);
          puts("");
          cpchar = '?';
        }
        buff[t++] = cpchar;
      }
    }

    /* compute bsum */
    bsum = 0;
    for (t = 0; t < bytes; t++) {
      uint16_t bsum_ror = (bsum & 1) << 15;
      bsum >>= 1;
      bsum |= bsum_ror;
      bsum += buff[t];
    }

    /* insert file to amb */
    if (fwrite(buff, 1, bytes, fd_amb) != bytes) {
      puts("ERROR: write failure");
      goto ERR_QUIT;
    }

    /* update file header in amb file list */
    fseek(fd_amb, 4 + 2 + (i * 20), SEEK_SET);
    /* fname, zero-padded to 12 bytes */
    memset(buff, 0, 12);
    memcpy(buff, flist[i], strlen(flist[i]));
    /* offset */
    buff[12] = offset;
    buff[13] = offset >> 8;
    buff[14] = offset >> 16;
    buff[15] = offset >> 24;
    /* file len */
    buff[16] = bytes;
    buff[17] = bytes >> 8;
    /* bsum */
    buff[18] = bsum;
    buff[19] = bsum >> 8;
    fwrite(buff, 1, 20, fd_amb);
  }

  ecode = 0;

  /* was a title present? */
  if (findfileinlist(flist, fcount, "title") < 0) {
    puts("WARNING: AMB archive contains no 'title' file");
  }

  ERR_QUIT:
  if (fd_amb != NULL) fclose(fd_amb);
  if (d != NULL) closedir(d);
  for (i = 0; i < fcount; i++) {
    free(flist[i]);
  }
  return(ecode);
}


static int amb_list(const char *fname) {
  FILE *fd = NULL;
  unsigned char buff[20];
  unsigned short fcount;
  fd = fopen(fname, "rb");
  if (fd == NULL) {
    printf("ERROR: failed to open file '%s'", fname);
    puts("");
    goto ERR_QUIT;
  }
  /* read header */
  if ((fread(buff, 1, 6, fd) != 6) || (memcmp(buff, "AMB1", 4) != 0)) {
    puts("ERROR: not an AMB archive");
    goto ERR_QUIT;
  }
  fcount = buff[5];
  fcount <<= 8;
  fcount |= buff[4];
  /* read file entries */
  while (fcount--) {
    if (fread(buff, 1, 20, fd) != 20) {
      puts("ERROR: archive is corrupted");
      goto ERR_QUIT;
    }
    /* print file info (filename string limited to 12 characters) */
    printf("%.12s  (%u bytes)", (char *)buff, buff[16] | (buff[17] << 8));
    puts("");
  }

  fclose(fd);
  return(0);

  ERR_QUIT:
  if (fd != NULL) fclose(fd);
  return(1);
}


/* computes the utf-8 representation of a codepage byte, writes it into
 * a NULL-terminated string */
static void utf8gen(char *dest, unsigned short unicodevalue) {
  /* cover the easy case first */
  if (unicodevalue < 128) { /* single byte */
    dest[0] = unicodevalue;
    dest[1] = 0;
  } else if (unicodevalue < 0x800) { /* 2-bytes encoding */
    dest[0] = 192 | (unicodevalue >> 6);
    dest[1] = 128 | (unicodevalue & 63);
    dest[2] = 0;
  } else { /* 3-bytes encoding (unicodevalue cannot be larger than 0xffff because it is a unsigned short) */
    dest[0] = 224 | (unicodevalue >> 12);
    dest[1] = 128 | ((unicodevalue >> 6) & 63);
    dest[2] = 128 | (unicodevalue & 63);
    dest[3] = 0;
  }
}


static int get_amahdr(char *filename, long *offset, unsigned short *flen, unsigned short *bsum, unsigned short id, FILE *fd) {
  unsigned char hdr[20];
  fseek(fd, 4 + 2 + (20 * id), SEEK_SET);
  if (fread(hdr, 1, 20, fd) != 20) return(-1);
  memcpy(filename, hdr, 12);
  filename[12] = 0;
  *offset = hdr[15];
  *offset <<= 8;
  *offset |= hdr[14];
  *offset <<= 8;
  *offset |= hdr[13];
  *offset <<= 8;
  *offset |= hdr[12];
  *flen = hdr[17];
  *flen <<= 8;
  *flen |= hdr[16];
  *bsum = hdr[19];
  *bsum <<= 8;
  *bsum |= hdr[18];
  return(0);
}


static int amb_unpack(const char *fname, const char *dirname, int utfflag) {
  FILE *fd = NULL;
  int ecode = 1;
  unsigned char buff[65535];
  unsigned short i, fcount;
  unsigned short unicodemap[128];
  int unicodemapid = -1;

  fd = fopen(fname, "rb");
  if (fd == NULL) {
    printf("ERROR: failed to open file '%s'", fname);
    puts("");
    goto ERR_QUIT;
  }

  /* read header */
  if ((fread(buff, 1, 6, fd) != 6) || (memcmp(buff, "AMB1", 4) != 0)) {
    puts("ERROR: not an AMB archive");
    goto ERR_QUIT;
  }
  fcount = buff[5];
  fcount <<= 8;
  fcount |= buff[4];

  /* if UTF-encoding is required, locate the unicode.map file and load it */
  if (utfflag != 0) {
    long offset;
    unsigned short flen, bsum;
    for (i = 0; i < fcount; i++) {
      char filename[16];
      if (get_amahdr(filename, &offset, &flen, &bsum, i, fd) != 0) {
        puts("ERROR: archive is corrupted");
        goto ERR_QUIT;
      }
      if (strcasecmp(filename, "unicode.map") == 0) break;
    }
    if (i >= fcount) {
      puts("ERROR: UTF-8 conversion not possible because AMB does not contain a unicode.map");
      goto ERR_QUIT;
    }
    unicodemapid = i;
    /* load the file (+bsum check) */
    if ((flen != 256) || (fseek(fd, offset, SEEK_SET) != 0) || (fread(buff, 1, 256, fd) != 256) || (bsd_sum(buff, 256) != bsum)) {
      puts("ERROR: UTF-8 conversion not possible because unicode.map seems to be corrupted");
      goto ERR_QUIT;
    }
    /* load the unicodemap table, file contains little-endian values */
    for (i = 0; i < 128; i++) {
      unicodemap[i] = buff[i * 2];
      unicodemap[i] |= buff[i * 2 + 1] << 8;
    }
  }

  /* create the target directory (unless I run in NULL mode) */
  if ((dirname != NULL) && (makedirectory(dirname) != 0)) {
    printf("ERROR: failed to create directory '%s'", dirname);
    puts("");
    goto ERR_QUIT;
  }

  /* iterate over file entries */
  for (i = 0; i < fcount; i++) {
    FILE *fdout;
    long offset;
    unsigned short flen, bsum, mybsum;
    char filename[16];
    if (get_amahdr(filename, &offset, &flen, &bsum, i, fd) != 0) {
      puts("ERROR: archive is corrupted");
      goto ERR_QUIT;
    }
    /* compute dest file name */
    if (dirname != NULL) {
      snprintf((char *)buff, sizeof(buff), "%s" PATHSEP "%s", dirname, filename);
    } else {
      snprintf((char *)buff, sizeof(buff), "%s", filename);
    }
    /* output a log */
    printf(" --> %s (%u bytes, bsum=0x%04X)", filename, flen, bsum);
    if ((utfflag != 0) && (i != unicodemapid)) printf(" (conversion to UTF-8)");
    puts("");
    /* create dst file (unless null mode) */
    if (dirname != NULL) {
      fdout = fopen((char *)buff, "wb");
      if (fdout == NULL) {
        puts("ERROR: failed to create output file");
        goto ERR_QUIT;
      }
    }
    /* read the file from archive and write it to dest (unless NULL mode) */
    fseek(fd, offset, SEEK_SET);
    if (fread(buff, 1, flen, fd) != flen) {
      puts("ERROR: read failure from archive");
      fclose(fdout);
      goto ERR_QUIT;
    }
    /* compute and match the BSD sum */
    mybsum = bsd_sum(buff, flen);
    if (mybsum != bsum) {
      printf("ERROR: checksum mistmatch (expected=%04X != computed=%04X), the archive file appears to be corrupted!", bsum, mybsum);
      puts("");
      goto ERR_QUIT;
    }
    if (dirname != NULL) {
      unsigned short bi;
      char utf8buff[8];
      /* add a UTF-8 BOM (U+FEFF) if utf conversion is asked */
      if ((utfflag != 0) && (i != unicodemapid)) {
        utf8gen(utf8buff, 0xFEFFu);
        fwrite(utf8buff, 1, strlen(utf8buff), fdout);
      }
      /* write the file byte by byte, and UTF-8 encode if asked to */
      for (bi = 0; bi < flen; bi++) {
        if ((utfflag != 0) && (i != unicodemapid) && (buff[bi] & 128)) {
          utf8gen(utf8buff, unicodemap[buff[bi] - 128]);
          if (fwrite(utf8buff, 1, strlen(utf8buff), fdout) != strlen(utf8buff)) {
            puts("ERROR: write failure to dest file during utf8 output");
            fclose(fdout);
            goto ERR_QUIT;
          }
        } else {
          if (fwrite(buff + bi, 1, 1, fdout) != 1) {
            puts("ERROR: write failure to dest file");
            fclose(fdout);
            goto ERR_QUIT;
          }
        }
      }
      fclose(fdout);
    }
  }

  ecode = 0;

  ERR_QUIT:
  if (fd != NULL) fclose(fd);
  return(ecode);
}


int main(int argc, char **argv) {
  int ecode;

  /* recognize the action */
  if ((argc == 4) && (strcasecmp(argv[1], "c") == 0)) {
    ecode = amb_create(argv[2], argv[3], 0);
  } else if ((argc == 4) && (strcasecmp(argv[1], "cc") == 0)) {
    ecode = amb_create(argv[2], argv[3], 1);
  } else if ((argc == 4) && (strcasecmp(argv[1], "u") == 0)) {
    ecode = amb_unpack(argv[2], argv[3], 0);
  } else if ((argc == 4) && (strcasecmp(argv[1], "uc") == 0)) {
    ecode = amb_unpack(argv[2], argv[3], 1);
  } else if ((argc == 3) && (strcasecmp(argv[1], "l") == 0)) {
    ecode = amb_list(argv[2]);
  } else if ((argc == 3) && (strcasecmp(argv[1], "t") == 0)) {
    ecode = amb_unpack(argv[2], NULL, 0);
  } else {
    amb_help();
    ecode = 1;
  }

  if (ecode == 0) puts("OK");
  return(ecode);
}
