/* Align converts tabs to spaces, and aligns columns.

usage : Align Myfile.TXT
        - aligns comma or space-separated data in columns.
        - expands tabs to spaces
        - trims off ^z Eof char
        - ensures each line ends in CrLf
        - inserts space to separate columns, removing commas.
        - handles spaces embedded in C-style comments or quotes as
          part of the same column.

          compiled with Borland C++ 5.01, small DOS model.

Align 1.1 copyright 1993, 1998 by:
Roedy Green
Canadian Mind Products
#208 - 525 Ninth Street
New Westminster BC Canada
V5H 2N6
tel:(604) 777-1804
mailto:roedy@mindprod.com
http://mindprod.com

version 1.1 1998 November 8
   - embed new address

   version 1.0 1998 Sept 20


*/

/* ==================================== */

#include <stdlib.h>
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <conio.h>
#include <dos.h>
#define Esc '\x1B'

/* ==================================== */

typedef enum Category
  {
  comment, quote, code, whitespace, newline, eof
  }               Category;

/* ==================================== */

/* use all global variables and no parameter passing for simplicity. */

int             Padding = 2;    /* how much padding to put
                                 * between the cols */

FILE           *Before;         /* input file containing no ^Z
                                 * chars, except possibly at the
                                 * end */

FILE           *After;          /* output file with tabs
                                 * expanded to tabs */

char           *BFilename;      /* pointer to string name of
                                 * file we will convert */

char           *AFilename;      /* pointer to string name of the
                                 * temporary output file */

int             BiggestWidth[40];       /* width of widest field
                                         * in given column in
                                         * any record */

int             ColIndex = -1;  /* which field/column we are
                                 * working on. 0 is first */

int             Width = 0;      /* width of current column */

int             Cols = 0;       /* how many columns there are */

int             Pass = 1;       /* pass=1 when deciding col
                                 * widths, and pass=2 when
                                 * outputting */

/* ==================================== */

/* P R O T O T Y P E S */

void            SafeFilename(char *fname);
void            BldAName(void);
Category        Categorize(int);
void            DoAPass(void);
void            StartLine(void);
void            EndLine(void);
void            StartField(void);
void            EndField(void);
void            InField(int c);

void Honk (void);
void Die (void);
void Banner (void);
int  main (int argc,  char *argv[]);

/* ==================================== */

int
  main(int argc, char *argv[])
/* main Align */
{



  if ( _osmajor < 3 )
    {
    printf("Oops!  DOS 3.0 or later needed\n\a");
    /* \a=alert beep.  Can't use die since files not
     * open */
    exit(1);

    }
  if ( argc != 2 /* 0=Align.Exe 1=MyFile.Txt    */ )
    {
    printf("Oops!  usage:  ALIGN    Myfile.TXT\n\a");
    exit(1);
    }

  BFilename = argv[1];        /* Want first arg */
  /* grab just a pointer. */
  /* Don't copy the string. */
  SafeFilename(BFilename);    /* check before we open */
  BldAName();                 /* Go build a temp after name */

  if ( (Before = fopen(BFilename, "rt")) == NULL )
    {
    printf("Oops! Cannot open file %s\n\a", BFilename);
    exit(1);
    }
  setvbuf(Before,NULL,_IOFBF,40*512);

  if ( (After = fopen(AFilename, "wt")) == NULL )
    {
    printf("Oops! Cannot open file %s\n\a", AFilename);
    exit(1);
    }
  setvbuf(After,NULL,_IOFBF,40*512);

  /* clear all column widths */
  for ( ColIndex = 0; ColIndex < 40; ColIndex++ )
    BiggestWidth[ColIndex] = 0;

  Pass = 1;
  DoAPass();
  /* calculate how wide each column is and store it in
   * ColIndex */

  /* pad the column widths to put a little space between
   * the columns */
  for ( ColIndex = 0; ColIndex < Cols; ColIndex++ )
    BiggestWidth[ColIndex] += Padding;

  /* reset input file to beginning */
  fseek(Before, 0, SEEK_SET);

  Pass = 2;
  DoAPass();                  /* repass the file, this time
                               * copying to the output file */

  /* Rename output to input */
  fclose(Before);
  fclose(After);
  remove(BFilename);
  rename(AFilename, BFilename);

  return (0);
}                               /* main Align */

/* ==================================== */

void            DoAPass(void)

/* Calculate how wide each column is and store it in
 * BiggestWidth[ColIndex] on pass1.  First column in index
 * 0.  On pass2, generate the output. */
{
  int             c;          /* char just read */

  int             blankState = 0;
  /** blankState - we implement the algorithm as a finite state machine.
   =0 reading leading blanks on a field.
   =1 reading non-blanks or quotes in middle of a field.
   =2 reading trailing blanks.

     when call EndField
         v      v     v
 ____xxxx____,__,__xxx___
     ^          ^  ^
     when call StartField
     */

  Categorize(EOF);            /* reset */
  StartLine();

  while ( (c = getc(Before)) != EOF )
    {
    switch ( Categorize(c) )
      {
      case whitespace:        /* blanks */
        switch ( blankState )
          {
          case 0:
            blankState = 0;
            break;
          case 1:
            EndField();
            blankState = 2;
            break;
          case 2:
            blankState = 2;
            break;
          }
        break;

      case code:
        if ( c == ',' )
          {
          switch ( blankState )  /* comma */
            {
            case 0:
              StartField();       /* null field */
              EndField();
              blankState = 0;
              break;
            case 1:
              EndField();
              blankState = 0;
              break;
            case 2:
              blankState = 0;
              break;
            }
          } else
          {
          switch ( blankState )  /* ordinary non-blank */
            {
            case 0:
              StartField();
              InField(c);
              blankState = 1;
              break;
            case 1:
              InField(c);
              blankState = 1;
              break;
            case 2:
              StartField();
              InField(c);
              blankState = 1;
              break;
            }
          }                   /* end else */
        break;

      case comment:           /* treat comments like a
                               * non-blank */
      case quote:             /* something in quoted string */
        switch ( blankState )
          {
          case 0:
            StartField();
            InField(c);
            blankState = 1;
            break;
          case 1:
            InField(c);
            blankState = 1;
            break;
          case 2:
            StartField();
            InField(c);
            blankState = 1;
            break;
          }
        break;

      case newline:           /* new line */
        switch ( blankState )
          {
          case 0:
            break;
          case 1:
            EndField();
            break;
          case 2:
            break;
          }
        EndLine();
        StartLine();
        blankState = 0;
        break;
      }                       /* end switch on char */
    }                           /* end while */
}                               /* end DoAPass */

/* ==================================== */

void StartLine(void)
{
  switch ( Pass )
    {
    case 1:
      ColIndex = -1;
      Width = 0;
      break;
    case 2:
      ColIndex = -1;
      Width = 0;
      break;
    }
  return;
}

/* ==================================== */

void EndLine(void)
{
  switch ( Pass )
    {
    case 1:
      break;
    case 2:
      putc('\n', After);
      break;
    }
  return;

}

/* ==================================== */

void StartField(void)

/* Field may have lead and trail spaces on it.  We have just
 * hit the first non-blank. */

{
  switch ( Pass )
    {
    case 1:
      Width = 0;
      if ( ++ColIndex > (Cols - 1) )
        Cols = ColIndex + 1;
      break;
    case 2:
      Width = 0;
      ++ColIndex;
      break;
    }
  return;
}

/* ==================================== */

void  InField(int c)

/* Field may have lead and trail spaces on it. This is
 * called to process non-space chars in the middle of a
 * field. */

{
  switch ( Pass )
    {
    case 1:
      if ( ++Width > BiggestWidth[ColIndex] )
        BiggestWidth[ColIndex] = Width;
      break;
    case 2:
      ++Width;
      putc(c, After);
      break;
    }
  return;
}

/* ==================================== */

void EndField(void)

/* Field may have lead and trail spaces on it.   We just hit
 * the first space etc. after the last non-blank. */

{
  switch ( Pass )
    {
    case 1:
      break;
    case 2:
      if ( ColIndex < (Cols - 1) )
        {                       /* pad all but the last column
                                 * with, then spaces */
        for (
            Width = BiggestWidth[ColIndex] - Width;
            /* how many chars too * short we are. */

            Width; Width-- )
          {
          putc(' ', After);       /* pad column on right
                                   * with * spaces, AFTER
                                   * the comma */
          }
        }
      break;
    }
  return;
}

/* ==================================== */

Category Categorize(int c)

/** accept a character and categorize it.
 *
 * comment -- inside // or /* comment
 * quote   -- inside single or double quote string
 * code    -- normal code
 * whitespace -- whitespace in code.
 *              Whitespace in comments and quotes counts as comment or
 *              quote.
 * newline -- newline character.  Newline inside comment counts
 *              as comment.
 * eof     -- end of file. also resets state.
 *
 * Comments require two chars to start them.  The first char will be
 * considered code, and only the second as comment. */

{
  static int      commentState = 0;

  /** commentState remembered between calls.  We implement this as yet another
      finite state automaton.
   =0 normal C code
   =1 inside a " "
   =2 inside a ' '
   =3 just seen "\
   =4 just seen '\
   =5 inside |* *|
   =6 just seen |
   =7 just seen |* ... *
   =8 inside ||    */

  switch ( c )
    {

    case EOF:                   /* end of file or reset */
      commentState = 0;
      return (eof);

    case '\n':                  /* new line */
      switch ( commentState )
        {
        case 0:         /* normal code */
          commentState = 0;
          return (newline);
        case 1:         /* inside a " " */
          commentState = 0;
          return (newline);
        case 2:         /* inside a ' ' */
          commentState = 0;
          return (newline);
        case 3:         /* just seen "\ */
          commentState = 0;
          return (newline);
        case 4:         /* just seen '\ */
          commentState = 0;
          return (newline);
        case 5:         /* inside |*   *| comment */
          commentState = 5;
          return (comment);
        case 6:         /* just seen / */
          commentState = 0;
          return (newline);
        case 7:         /* just seen |* ... *    */
          commentState = 5;
          return (comment);
        case 8:         /* inside || C++ style comment */
          commentState = 0;
          return (newline);
        }

    case ' ':                   /* blanks */
    case '\t':                  /* treat tab as white space. */
    case '\x1a':                /* treat ^Z as white space. */
      switch ( commentState )
        {
        case 0:         /* normal code */
          commentState = 0;
          return (whitespace);
        case 1:         /* inside a " " */
          commentState = 1;
          return (quote);
        case 2:         /* inside a ' ' */
          commentState = 2;
          return (quote);
        case 3:         /* just seen "\ */
          commentState = 1;
          return (quote);
        case 4:         /* just seen '\ */
          commentState = 2;
          return (quote);
        case 5:         /* inside |*   *| comment */
          commentState = 5;
          return (comment);
        case 6:         /* just seen / */
          commentState = 0;
          return (whitespace);
        case 7:         /* just seen |* ... *    */
          commentState = 5;
          return (comment);
        case 8:         /* inside || C++ style comment */
          commentState = 8;
          return (comment);
        }

    case '\"':                  /* double quote */
      switch ( commentState )
        {
        case 0:         /* normal code */
          commentState = 1;
          return (quote);
        case 1:         /* inside a " " */
          commentState = 0;
          return (quote);
        case 2:         /* inside a ' ' */
          commentState = 2;
          return (quote);
        case 3:         /* just seen "\ */
          commentState = 1;
          return (quote);
        case 4:         /* just seen '\ */
          commentState = 2;
          return (quote);
        case 5:         /* inside |*   *| comment */
          commentState = 5;
          return (comment);
        case 6:         /* just seen / */
          commentState = 1;
          return (quote);
        case 7:         /* just seen |* ... *    */
          commentState = 5;
          return (comment);
        case 8:         /* inside || C++ style comment */
          commentState = 8;
          return (comment);
        }

    case '\'':                  /* singlequote */
      switch ( commentState )
        {
        case 0:         /* normal code */
          commentState = 2;
          return (quote);
        case 1:         /* inside a " " */
          commentState = 1;
          return (quote);
        case 2:         /* inside a ' ' */
          commentState = 0;
          return (quote);
        case 3:         /* just seen "\ */
          commentState = 1;
          return (quote);
        case 4:         /* just seen '\ */
          commentState = 2;
          return (quote);
        case 5:         /* inside |*   *| comment */
          commentState = 5;
          return (comment);
        case 6:         /* just seen / */
          commentState = 1;
          return (quote);
        case 7:         /* just seen |* ... *    */
          commentState = 5;
          return (comment);
        case 8:         /* inside || C++ style comment */
          commentState = 8;
          return (comment);
        }
    case '*':                   /* star */
      switch ( commentState )
        {
        case 0:         /* normal code */
          commentState = 0;
          return (code);
        case 1:         /* inside a " " */
          commentState = 1;
          return (quote);
        case 2:         /* inside a ' ' */
          commentState = 2;
          return (quote);
        case 3:         /* just seen "\ */
          commentState = 1;
          return (quote);
        case 4:         /* just seen '\ */
          commentState = 2;
          return (quote);
        case 5:         /* inside |*   *| comment */
          commentState = 7;
          return (comment);
        case 6:         /* just seen |   */
          commentState = 5;
          return (comment);
        case 7:         /* just seen |* ... *    */
          commentState = 7;
          return (comment);
        case 8:         /* inside || C++ style comment */
          commentState = 8;
          return (comment);
        }

    case '\\':                  /* backslash */
      switch ( commentState )
        {
        case 0:         /* normal code */
          commentState = 0;
          return (code);
        case 1:         /* inside a " " */
          commentState = 3;
          return (quote);
        case 2:         /* inside a ' ' */
          commentState = 4;
          return (quote);
        case 3:         /* just seen "\ */
          commentState = 1;
          return (quote);
        case 4:         /* just seen '\ */
          commentState = 2;
          return (quote);
        case 5:         /* inside |*   *| comment */
          commentState = 5;
          return (comment);
        case 6:         /* just seen |   */
          commentState = 0;
          return (code);
        case 7:         /* just seen |* ... *    */
          commentState = 5;
          return (comment);
        case 8:         /* inside || C++ style comment */
          commentState = 8;
          return (comment);
        }

    case '/':                   /* forwardslash */
      switch ( commentState )
        {
        case 0:         /* normal code */
          commentState = 6;
          return (code);      /* might be comment, but don't
                               * know that yet */
        case 1:         /* inside a " " */
          commentState = 1;
          return (quote);
        case 2:         /* inside a ' ' */
          commentState = 2;
          return (quote);
        case 3:         /* just seen "\ */
          commentState = 1;
          return (quote);
        case 4:         /* just seen '\ */
          commentState = 2;
          return (quote);
        case 5:         /* inside |*   *| comment */
          commentState = 5;
          return (comment);
        case 6:         /* just seen |   */
          commentState = 8;
          return (comment);
        case 7:         /* just seen |* ... *    */
          commentState = 0;
          return (comment);
        case 8:         /* inside || C++ style comment */
          commentState = 8;
          return (comment);
        }

    default:                    /* non blank */

      switch ( commentState )
        {
        case 0:         /* normal code */
          commentState = 0;
          return (code);
        case 1:         /* inside a " " */
          commentState = 1;
          return (quote);
        case 2:         /* inside a ' ' */
          commentState = 2;
          return (quote);
        case 3:         /* just seen "\ */
          commentState = 1;
          return (quote);
        case 4:         /* just seen '\ */
          commentState = 2;
          return (quote);
        case 5:         /* inside |*   *| comment */
          commentState = 5;
          return (comment);
        case 6:         /* just seen |   */
          commentState = 0;
          return (code);
        case 7:         /* just seen |* ... *    */
          commentState = 7;
          return (comment);
        case 8:         /* inside || C++ style comment */
          commentState = 8;
          return (comment);
        }
    }                           /* end switch(c) */
  exit(2);                    /* should never happen */
  return (eof);
}                               /* end Categorize */

/* ==================================== */

void SafeFilename(char* BFilename)
{
  /* Ensure appropriate file name extensions.
     good =.ASM .PAS .etc - done without prompt
      bad =.EXE .COM .OBJ - abort
  warning =.DOC & others
  */
  static const char  * GoodExtensions  [] =
  { ".C", ".CPP",  ".H", ".HPP", ".RH", ".IH", ".TXT",
    ".ASM",".PAS",".BAT",".CTL",".CMD",
    ".LST",".MAC",".TXT",".ANS",
    ".USE",".KEY",0};
  /* 0 is just end marker */

  static const char * BadExtensions [] =
  { ".EXE",".COM",".OBJ",0};

  int Response; /* Y or N, yes Virginia, int, C is weird */

  char Extension[_MAX_EXT];

  int i; /* local loop counter */

  _splitpath(BFilename,
             NULL /* drive */, NULL /* dir */, NULL /* name */, Extension);

  strupr(Extension); /* convert to upper case for compare */

  for ( i=0 ; GoodExtensions[i]; i++ )
    {
    if ( strcmp(Extension,GoodExtensions[i])==0 )
      { /* match, it is Good */
      return;
      }
    }
  for ( i=0 ; BadExtensions[i] ; i++ )
    {
    if ( strcmp(Extension,BadExtensions[i])==0 )
      { /* match, it is bad */
      Banner();
      printf("Oops!  Align cannot be used on EXE COM or OBJ files "
             "such as %s\n",
             BFilename);
      Die();
      }
    }
  /* just give a warning */
  printf("Warning!\n"     /* new line to give room for long filename */
         "Align is not usually used on %s files such as %s\n",
         Extension,BFilename);
  printf("Do you want to align the file anyway?"
         "  (Y)es (N)o  ");
  while ( 1 ) /* loop forever till user enters Y or N */
    {
    Honk();
    Response = getch();
    /* not echoed because user might hit tab or Enter */
    /* and mess up the screen */
    Response = toupper(Response);
    /* toupper is a macro, so needs simple argument */
    switch ( Response )
      {
      case 'Y':
        printf("Yes\n");
        return;
      case 'N':
        printf("No\n");
        /* fallthru */
      case Esc :
        printf("\nAlign aborted\n");
        Die();
        /* others, keep looping */
      }
    }
}  /* SafeFileName */

/* ==================================== */


void
  BldAName(void)
{
  char            drive[_MAX_DRIVE];
  char            dir[_MAX_DIR];
  char            name[_MAX_FNAME];
  char            ext[_MAX_EXT];

  char            filepath[_MAX_DRIVE + _MAX_DIR];

  _splitpath(BFilename, drive, dir, name, ext);

  strcpy(filepath, drive);
  strcat(filepath, dir);

  /* Force to current directory if empty */
  if ( strcmp(filepath, "") == 0 )
    strcpy(filepath, ".");
  else
    filepath[strlen(filepath) - 1] = 0;
  putenv("TMP=");             /* Sets TMP just for this and
                               * any "children" */
  /* processes -- doesn't change parents TMP */

  if ( (AFilename = tempnam(filepath, "")) == NULL )
    {
    printf("Oops!  Cannot create the temporary work file\n\a");
    exit(1);
    }
}

void Banner(void)
{
  /* display copyright banner.  Usually not displayed, just embedded. */

  printf("\n Align 1.1 ۲\n"
         "\nFreeware to align columns."
         "\nCopyright (c) 1990, 1998 Canadian Mind Products"
         "\n" "#208 - 525 Ninth Street, New Westminster, BC Canada V3M 5T9"
         "\n" "tel:(604) 777-1804   mailto:roedy@mindprod.com   http://mindprod.com"
         "\n" "May be used freely for non-military use only"
         "\n\n");

} /* Banner */
/* ==================================== */


void Honk (void)
{
  /* make a noise */
  printf("\a");
}

/* =================================== */
void Die (void)
{
  Honk();
  fclose (Before);
  fclose (After);
  exit(1);   /* exit with errorlevel = 1 */
} /* Die */

/* ==================================== */


/* -30- */
