/*
 *   Simple recode of text files:
 *     - unix to dos (^J --> ^M^J)
 *     - dos to unix (^M^J --> ^J)
 *     - unix manual pages to text (a^Hb -> b)
 *     - russian letters: alt (cp-866), win (cp-1251), koi-8r, GOST, bulgarian
 *     - fix russian HTML-files produced by WinWord95 ( &#nnn; --> sym )
 *
 *   Compilers & systems:
 *     - DOS: Turbo C 2.0, Borland C++ 3.1
 *     - Win32: Visual C++
 *     - Linux (GNU C), Convex, HP-UX, AIX (IBM C Set), and so on...
 *
 *   History (little changes are omitted):
 *     - Aug 96  initial release. unix2dos, dos2unix, man2text
 *     - Nov 96  improve performance - more fast
 *     - May 97  recoding koi8r, alt866, win1251, GOST, bulg
 *     - Apr 98  fix HTML-output of WinWord
 *     - Nov 98  fix setting of permissions of target file
 *
 *   Version 4.02 at 18 Nov 1998.
 *   Copyright(c) EBCEEB,inc. aLL rIghtS pReserved.
 *   Public domain, yeah..;) Absolutely no warranties.
 *   Contact e-mail: 2:5030/445.8@fidonet.org, evseev@csa.ru
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>

#if defined( __MSDOS__ ) || defined( _WIN32 )

#   include <sys\stat.h>
#   include <io.h>

#   define SLASH '\\'
#   define CREAT_MODE S_IREAD|S_IWRITE

#else

#   include <sys/types.h>
#   include <sys/stat.h>
#   include <unistd.h>

#   define SLASH '/'
#   define CREAT_MODE 0600    /* "I can read & write" */
#   ifndef O_BINARY
#       define O_BINARY 0
#   endif

#endif

#define BUF_SZ 8192

/* Exit codes */

#define STOP_SYNTAX     100
#define STOP_OPEN_SRC   101
#define STOP_CREAT_DEST 102
#define STOP_WRITE_DEST 103

/* Special char codes */

#define BS  8
#define CR 13
#define LF 10

#define DOS_EOF 26  /* Ctrl-Z */
#define UNIX_EOF 4  /* Ctrl-D */

/* Print message macros */

#define out(x)  fputs(x,stdout)   /* info */
#define outE(x) fputs(x,stdout)   /* error */

/* Recoding tables */

#define TBL_BASE 128
#define TBL_SIZE 128

unsigned char koi_table[TBL_SIZE] = {
    192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
    208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    238,160,161,230,164,165,228,163,229,168,169,170,171,172,173,174,
    175,239,224,225,226,227,166,162,236,235,167,232,237,233,231,234,
    158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
    143,239,144,145,146,147,134,130,156,155,135,152,157,153,151, -1};

unsigned char win_table[TBL_SIZE] = {
      0,  0, 44,  0,  0,  0,  0,  0,  0,  0,  0, 60,  0,  0,  0,  0,
      0, 96, 39, 34, 34,249, 45, 45,  0,  0,  0, 62,  0,  0,  0,  0,
      0,  0,  0, 74,253,  0,124, 21,240,  0,  0, 17,  0,196,  0, 73,
    248,241, 73,105, 39,  0, 20,250,241,252,  0, 16,106, 83,115,105,
    128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
    144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
    160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
    224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239};

unsigned char gost_table[TBL_SIZE] = {
    207,208,209,181,182,183,184,210,211,212,213,189,190,198,199,214,
    201,187,188,200,205,186,203,185,202,204,206,176,177,178,215,216,
    218,191,217,192,196,179,194,180,193,195,197,219,220,221,222,223,
    128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
    144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
    160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0};

unsigned char bulg_table[TBL_SIZE] = {
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0};

unsigned char *table[] ={ NULL,  koi_table, win_table, gost_table, bulg_table};
char *charset[] =       { "Alt", "Koi8",    "Win",     "GOST",     "Bulgar" };
enum                    {  ALT,   KOI,       WIN,       GOST,       BULG }
    charset_in, charset_out;

unsigned char buf_in[BUF_SZ], buf_out[BUF_SZ];
int used_in = BUF_SZ, pos_in = BUF_SZ, pos_out;

int h1, h2, word_htm;

void stop( int code, const char *msg )
{
    outE( msg );
    exit( code );
}

void syntax( void )
{
    stop( STOP_SYNTAX,
    "Recode 3.02 (c) 1998 by EBCEEB,inc.\n\n"
    "Syntax: recode [-switch[es]] srcfile [destfile]\n"
    "Switches:\n"
    "  +  (unix -> dos) add ^M for each ^J\n"
    "  -  (dos -> unix) truncates ^M\n"
    "  m  (man -> text) convert c1^Hc2 to c2\n"
    "Russian letters conversion:\n"
    "  a  source is Alt (CP-866) - default\n"
    "  A  target is Alt (CP-866) - default\n"
    "  k  source is KOI-8r\n"
    "  K  target is KOI-8r\n"
    "  w  source is Win (CP-1251)\n"
    "  W  target is Win (CP-1251)\n"
    "  g  source is GOST\n"
    "  G  target is GOST\n"
    "  b  source is Bulgarian\n"
    "  B  target is Bulgarian\n"
    "  #  source is HTML produced by WinWord\n");
}

int sym2alt( int c, unsigned char *table )
{
    int i;
    return( c < TBL_BASE || c >= TBL_BASE+TBL_SIZE ) ? c :
        ( ( i = table[ c - TBL_BASE ] ) != 0 ) ? i : c;
}

int alt2sym( int c, unsigned char *table )
{
  char *p;
  if( c < TBL_BASE || c >= TBL_BASE+TBL_SIZE )
      return c;
  return ( p = memchr( (char*)table, c, TBL_SIZE )) == NULL ? c :
      ( TBL_BASE + (int)( p - ( char* )table ) );
}

int ansi2alt( int c )    /* is valid for russian part of ANSI only */
{
    if( c < 128 )
        return c;
    if( c >= 1040 && c <= 1071 )
        c -= ( 1040 - 128 );
    else if( c >= 1072 && c <= 1087 )
        c -= ( 1072 - 160 );
    else if( c >= 1088 && c <= 1103 )
        c -= ( 1088 - 224 );
    else if( c == 1025 )
        c = 240;
    else if( c == 1105 )
        c = 241;
    else c = -1;
    return c;
}

int char_out( int c, int flush );

int htm2alt( int c )
{
    static enum { Normal, Amp, Dies, Digit } state;
    static int code, level;
    int n;
    if( level )
        return c;
    if( c < 0 && state == Digit )
        c = ';';
    if( ( n = c - '0' ) < 0 || n > 9 )
        n = -1;
    switch( state )
    {
    case Digit:
        if( c == ';' ) {
            state = Normal;
            c = ansi2alt( code );
        } else if( n >= 0 ) {
            code = code * 10 + n;
            c = -1;
        } else {
            puts("WARNING: \';\' expected");
            state = Normal;
            c = ansi2alt( code ); }
        break;
    case Dies:
        if( n >= 0 ) {
            code = n;
            state = Digit;
            c = -1;
        } else {
            puts("WARNING: digit omitted");
            state = Normal; }
        break;
    case Amp:
        if( ( state = ( c == '#' ) ? Dies : Normal ) == Normal ) {
            ++level;
            char_out( '&', 0 );
            --level;
        } else c = -1;
        break;
    default:
        if( c == '&' ) {
            state = Amp;
            code = 0;
            c = -1; }
    } /* switch */
    return c;
} /* htm2alt */

int char_in( void )
{
    if ( pos_in >= used_in ) {
        if( ( used_in = read( h1, buf_in, BUF_SZ ) ) <= 0 )
            return -1;
        pos_in = 0; }
    return buf_in[ pos_in++ ];
}

int char_out( int c, int flush )
{
    if( flush || pos_out >= BUF_SZ ) {
        if( write( h2, buf_out, pos_out ) < 0 )
            stop( STOP_WRITE_DEST, "\nERROR: cannot write, disk full?" );
        pos_out = 0;
    }
    if( charset_in != charset_out || word_htm ) {
        if( charset_in != ALT )
            c = sym2alt( c, table[ charset_in ] );
        if( word_htm )
            if( ( c = htm2alt( c ) ) < 0 )
                return c;
        if( charset_out != ALT )
            c = alt2sym( c, table[ charset_out ] );
    }
    return buf_out[ pos_out++ ] = ( unsigned char )c;
}

int ch, pred_ch = BS, pred_cr, bsoff, d2u = -1, over, comma;
char *src, *dest;

void putx( int ch )
{
    if( bsoff ) {
        if( ch != BS && pred_ch != BS)
            char_out( pred_ch, 0);
        pred_ch = ch;
    } else char_out( ch, 0);
}

main( int argc, char** argv )
{
    char destbuf[256];
    struct stat stbuf;

/**  parsing command line  **/

    for( argv++; --argc; argv++)
        if( **argv == '-' )
            for( (*argv)++; **argv; (*argv)++ )
                switch( **argv )
    {
        case '+': d2u = 0; break;
        case '-': d2u = 1; break;
        case 'm': case 'M': bsoff = 1; break;
        case 'a': charset_in  = ALT;  break;
        case 'k': charset_in  = KOI;  break;
        case 'w': charset_in  = WIN;  break;
        case 'g': charset_in  = GOST; break;
        case 'b': charset_in  = BULG; break;
        case 'A': charset_out = ALT;  break;
        case 'K': charset_out = KOI;  break;
        case 'W': charset_out = WIN;  break;
        case 'G': charset_out = GOST; break;
        case 'B': charset_out = BULG; break;
        case '#': word_htm = 1; break;
        default: syntax();
    } /* switch; for; if */
    else if( src == NULL )  src  = *argv;
    else if( dest == NULL ) dest = *argv;
    else syntax();

    if( src == NULL )
        syntax();

/**  build destination filename  **/

    if( dest == NULL ) {
        if( ( dest = strrchr( strncpy( destbuf, src, sizeof(destbuf) - 1 ),
                SLASH ) ) == NULL )
            dest = destbuf;
        for( ; *dest; dest++ )
            if( *dest != '.' )
                *dest = '!';
        dest = destbuf;
        over = 1;
    }

/**  print recoding filenames  **/

    out( "(recode) " );
    out( src );
    if( !over ) {
        out(" -> ");
        out( dest ); }

/**  open source file, create destination file  **/

    if(( h1 = open( src, O_RDONLY|O_BINARY ) ) < 0 )
        stop( STOP_OPEN_SRC, "\nERROR: Cannot open source\n" );

    if(( h2 = open( dest, O_WRONLY|O_BINARY|O_TRUNC|O_CREAT, CREAT_MODE )) < 0 )
        stop( STOP_CREAT_DEST, "\nERROR: Cannot create target\n" );

/**  print recoding params  **/

    out( " (" );
    if( charset_in != charset_out ) {
        out( charset[charset_in] );
        out("->");
        out( charset[charset_out] );
        comma = 1; }
    if( d2u >= 0 ) {
        if( comma ) out(", ");
        out( d2u ? "skip" : "add" );
        out(" CR");
        comma = 1; }
    if( bsoff ) {
        if( comma ) out(", ");
        out("skip BS");
        comma = 1; }
    if( word_htm ) {
        if( comma ) out(", ");
        out("fix Word-95 HTML");
        comma = 1; }
    if( !comma ) out( "nothing to do??" );
    out( ")\n" );

/**  do recoding  **/

    while( ( ch = char_in() ) >= 0 ) {
        switch( d2u ) {
            case 0:
                if( ch == UNIX_EOF ) break;
                if( ch == LF && !pred_cr ) putx(CR);
                putx(ch);
                pred_cr = ( ch == CR );
                break;
            case 1:
                if( ch == DOS_EOF ) break;
                if( ch != LF && pred_cr ) putx(CR);
                if( ( pred_cr = ( ch == CR ) ) == 0 )
                    putx( ch );
                break;
            default:
                putx(ch);
        } /* switch */
    } /* while */

/**  flush unsaved chars, close files and exit  **/

    if( bsoff && pred_ch != BS )
        putx( pred_ch );
    char_out( 0, 1 );
    close( h2 );
#if !defined( __MSDOS__ ) && !defined( _WIN32 )
    if( fstat( h1, &stbuf ) >= 0 )
        chmod( dest, stbuf.st_mode );
#endif    
    close( h1 );
    if( over ) {
        unlink( src );
        rename( dest, src ); }
    return 0;
}
