// ------------------------------- //
// -------- Start of File -------- //
// ------------------------------- //
// ----------------------------------------------------------- // 
// C++ Source Code File Name: txfilter.cpp
// Compiler Used: MSVC, BCC32, GCC, HPUX aCC, SOLARIS CC
// Produced By: glNET Software
// File Creation Date: 09/17/1997
// Date Last Modified: 06/12/2001
// Copyright (c) 2001 glNET Software
// ----------------------------------------------------------- // 
// ------------- Program Description and Details ------------- // 
// ----------------------------------------------------------- // 
/*
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
 
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  
USA

Text file filter program used to filter out unwanted characters
from ASCII text files. This program can also be used to convert
DOS text files to a UNIX format of UNIX text files to a DOS
format.
*/
// ----------------------------------------------------------- // 
#include <iostream.h>
#include <fstream.h>
#include <string.h>
#include <stdlib.h> 
#include <stdio.h>
#include <ctype.h>
#include "ustring.h"
#include "gxlist.h"
#include "futils.h"

#ifdef __MSVC_DEBUG__
#include "leaktest.h"
#endif

// Txfilter version number and program name
const double TXFilterVersionNumber = 4000.101;
const char *ProgramName = "txfilter";

// Program globals
const int MAX_LINE = 1024; // Maximum characters per line
char *open_file = 0;       // Name of file currently opened
unsigned num_files = 0;      // Total number of files processed
int UNIX_TEXT_FORMAT = 1;  // Output text file in UNIX format
int DOS_TEXT_FORMAT = 0;   // Output text file in DOS CR/LF format
int filter_text = 1;       // Filter the text
int printable_chars = 1;   // Output all printable characters only
int alpha_chars = 0;       // Output alphabetic characters only
int alpha_num_chars = 0;   // Output alphanumeric characters only
int numeric_chars = 0;     // Output numeric characters only
const int txLF = 0x0A;     // Line feed
const int txCR = 0x0D;     // Carriage return
int write_to_stdio = 1;    // Write output to stdio 
int filter_spaces = 0;     // Filter spaces
int filter_tabs = 0;       // Filter tabs

// Program functions
int ProcessArgs(char *arg);
void HelpMessage(const char *program_name, const double version_number);
int ConvertTextFile(fstream &iofile);
void FilterChar(char OutputBuffer[MAX_LINE], char c);

void HelpMessage(const char *program_name, const double version_number)
{
  char gxuffer[255];
  sprintf(gxuffer, "%.3f", version_number);
  cout << endl;
  cout << "ASCII Text file filter program version "
       << gxuffer  << endl;
  cout << "Usage: " << program_name << " [switches] infile.txt " << endl;
  cout << "Switches:  -?      = Display this help message." << endl;
  cout << "           -A      = Output alphabetic characters only."
       << endl;
  cout << "           -d      = Output text file in DOS CR/LF format."
       << endl;
  cout << "           -D      = Do not insert line feeds." << endl;
  cout << "           -M      = Output alphanumeric characters only."
       << endl;
  cout << "           -n      = No text filtering (defaults to filtered)."
       << endl;
  cout << "           -N      = Output numeric characters only."
       << endl;
  cout << "           -o      = Write output to the existing file (defaults to stdout)." << endl;
  cout << "           -P      = Output printable characters only (default)."
       << endl;
  cout << "           -t      = Filter tabs." << endl;
  cout << "           -s      = Filter spaces." << endl;
  cout << "           -u      = Output text file in UNIX format (default)."
       << endl;
  cout << endl;
  exit(0);
}

int ProcessArgs(char *arg)
// Process the program's argument list
{
  switch(arg[1]) {
    case '?' :
      HelpMessage(ProgramName, TXFilterVersionNumber);
      break;

    case 'd' :
      DOS_TEXT_FORMAT = 1;   
      UNIX_TEXT_FORMAT = 0;  
      break;
      
    case 'D':
      DOS_TEXT_FORMAT = 0;   
      UNIX_TEXT_FORMAT = 0;  
      break;
	  
    case 'n' :
      filter_text = 0;
      break;

    case 'o' :
      write_to_stdio = 0; // Overwrite existing file
      break;
      
    case 's' :
      filter_spaces = 1;
      break;

    case 't' :
      filter_tabs = 1;
      break;
      
    case 'u' :
      DOS_TEXT_FORMAT = 0;   
      UNIX_TEXT_FORMAT = 1;  
      break;
      
    case 'P' :
      printable_chars = 1;   // Output all printable characters only
      alpha_chars = 0;       // Output alphabetic characters only
      alpha_num_chars = 0;   // Output alphanumeric characters only
      numeric_chars = 0;     // Output numeric characters only
      break;
      
    case 'A' :
      printable_chars = 0;   // Output all printable characters only
      alpha_chars = 1;       // Output alphabetic characters only
      alpha_num_chars = 0;   // Output alphanumeric characters only
      numeric_chars = 0;     // Output numeric characters only
      break;
      
    case 'M' :
      printable_chars = 0;   // Output all printable characters only
      alpha_chars = 0;       // Output alphabetic characters only
      alpha_num_chars = 1;   // Output alphanumeric characters only
      numeric_chars = 0;     // Output numeric characters only
      break;
      
    case 'N' :
      printable_chars = 0;   // Output all printable characters only
      alpha_chars = 0;       // Output alphabetic characters only
      alpha_num_chars = 0;   // Output alphanumeric characters only
      numeric_chars = 1;     // Output numeric characters only
      break;
      
    default:
      cerr << endl;
      cerr << "Unknown switch " << arg << endl;
      cerr << "Exiting..." << endl;
      cerr << endl;
      return 0;
  }
  arg[0] = '\0';
  return 1; // All command line arguments were valid
}

void FilterChar(char OutputBuffer[MAX_LINE], char c)
// Filter out a specified character 
{
  char LineBuffer[MAX_LINE];
  int i, j = 0;
  for(i = 0; i < MAX_LINE; i++) LineBuffer[i] = '\0';
  strcpy(LineBuffer, OutputBuffer);
  for(i = 0; i < MAX_LINE; i++) OutputBuffer[i] = '\0';
  for(i = 0, j = 0; i < MAX_LINE; i++) 
    if(LineBuffer[i] != c)  OutputBuffer[j++] = LineBuffer[i];
}

int ConvertTextFile(fstream &iofile)
{
  char LineBuffer[MAX_LINE];    // Unfiltered line with no line feeds
  char OutputBuffer[MAX_LINE];  // Filtered line of text
  char rawLineBuffer[MAX_LINE]; // Lines read from the file
  UString LineData;
  UString appendLineBuffer;
  gxList<UString> list;
  int i, j = 0;
  
  while(!iofile.eof()) { // Read in the file line by line

    // Clear the buffers
    for(i = 0; i < MAX_LINE; i++) LineBuffer[i] = '\0';
    for(i = 0; i < MAX_LINE; i++) rawLineBuffer[i] = '\0';
    for(i = 0; i < MAX_LINE; i++) OutputBuffer[i] = '\0';
    LineData.DeleteAt(0, LineData.length());
    
    iofile.getline(rawLineBuffer, MAX_LINE);
    
    // Strip off the end of line sequence
    for(i = 0, j = 0; i < MAX_LINE; i++) {
      if((rawLineBuffer[i] == '\r') || (rawLineBuffer[i] == '\n')) break;
      LineBuffer[j++] = rawLineBuffer[i];
    }

    // Filter each line of text allowing tabs and spaces
    if(filter_text) { // Output specified character sets only
      for(i = 0, j = 0; i < MAX_LINE; i++) {
      if(printable_chars)
	if(isgraph(LineBuffer[i]) || LineBuffer[i] == ' ' ||
	   LineBuffer[i] == '\t')
	  OutputBuffer[j++] = LineBuffer[i];
	
      if(alpha_chars)
	if(isalpha(LineBuffer[i]) || LineBuffer[i] == ' '  ||
	   LineBuffer[i] == '\t')
	  OutputBuffer[j++] = LineBuffer[i];
      
      if(alpha_num_chars)
	if(isalnum(LineBuffer[i]) || LineBuffer[i] == ' '  ||
	   LineBuffer[i] == '\t')
	  OutputBuffer[j++] = LineBuffer[i];
      
      if(numeric_chars)
	if(isdigit(LineBuffer[i]) || LineBuffer[i] == ' ' ||
	   LineBuffer[i] == '\t')
	  OutputBuffer[j++] = LineBuffer[i];
      }

      // Filter tabs or 
      if(filter_tabs) FilterChar(OutputBuffer, '\t');
      if(filter_spaces) FilterChar(OutputBuffer, ' ');
      LineData = OutputBuffer;
    }
    else
      LineData = LineBuffer;

    // Get rid of EOF marker
    if((strcmp(LineData.c_str(), "\0") == 0) && (iofile.eof())) break;

    list.Add(LineData); // Store the file line by line 
  } 

  iofile.close();

  char line_feed = (char)txLF;
  char carriage_return = (char)txCR;

  if(write_to_stdio) {
    gxListNode<UString> *list_ptr = list.GetHead();
    while(list_ptr) {
      cout << list_ptr->data.c_str();
      if(DOS_TEXT_FORMAT) {
	cout.write(&carriage_return, 1);
	cout.write(&line_feed, 1);
      }
      if(UNIX_TEXT_FORMAT)
	cout.write(&line_feed, 1);
      list_ptr = list_ptr->next; 
    }
  }
  else {
    iofile.close(); 
#if defined(__DOS__) || defined(__WIN32__)
    // In DOS/Windows there are two file types, text and binary
    fstream iofile(open_file, ios::out|ios::trunc|ios::binary);
#else 
    // In UNIX there is only one file type
    fstream iofile(open_file, ios::out|ios::in|ios::trunc);
#endif
    
    if(!iofile) {
      cerr << endl;
      cerr << "Cannot write to: " << open_file << endl;
      cerr << "Exiting..." << endl;
      cerr << endl;
      exit(0);
    }

    gxListNode<UString> *list_ptr = list.GetHead();
    while(list_ptr) {
      iofile.write(list_ptr->data.c_str(), list_ptr->data.length());
      if(DOS_TEXT_FORMAT) {
	iofile.write(&carriage_return, 1);
	iofile.write(&line_feed, 1);
      }
      if(UNIX_TEXT_FORMAT)
	iofile.write(&line_feed, 1);
    
      list_ptr = list_ptr->next; 
    }
    
    iofile.close();
  }
  
  list.ClearList();
  return 1;
}

// Program's main thread of execution.
// ----------------------------------------------------------- 
int main(int argc,     // Number of strings in array argv.
	 char *argv[]) // Array of command-line argument strings.
// NOTE: None of the MSVC compilers will expand wildcard characters
// used in command-line arguments unless linked with the setargv.obj
// library. All the UNIX compliers will expand wildcard characters
// by default.
{
#ifdef __MSVC_DEBUG__
  InitLeakTest();
#endif

  // If no argument is given print usage message to the screen 1
  if(argc < 2) {
    HelpMessage(ProgramName, TXFilterVersionNumber);
    return(0);
  }

  // Process command ling arguments and files 
  int narg;
  char *arg = argv[narg = 1];
  while (narg < argc) {
    if (arg[0] != '\0') {
      if (arg[0] == '-') { // Look for command line arguments
	if(!ProcessArgs(arg)) return 0; // Exit if argument is not valid
      }
      else { 
	if(futils_isfile((const char *)arg)) {
	  open_file = arg; // Update the open file name pointer
#if defined(__DOS__) || defined(__WIN32__) 
	  // In MS-DOS there are two file types, text and binary
#ifdef __BCC32__
	  // The BCC 32 ios class does not have an enumeration for "nocreate"
	  fstream iofile(open_file, ios::in|ios::binary);
#else
	  fstream iofile(open_file, ios::in|ios::nocreate|ios::binary);
#endif // __BCC32__
	  
#else
	  // In UNIX there is only one file type
	  fstream iofile(open_file, ios::in|ios::nocreate);
#endif
	  if(!iofile) {
	    cerr << endl;
	    cerr << "Cannot open file: " << open_file << endl;
	    cerr << "Exiting..." << endl;
	    cerr << endl;
	    return 0;
	  }
	  num_files++;
	  if(!write_to_stdio) cout << "Processing: " << open_file << endl;
	  ConvertTextFile(iofile);
	}
      } 
      arg = argv[++narg];
    }
  }
    
  if(num_files == 0) {
    cerr << endl;
    cerr << "You must enter a file name." << endl;
    cerr << "Exiting..." << endl;
    cerr << endl;
    return 0;
  }
  
  if(!write_to_stdio) {
    cout << endl;
    cout << "Processed " << num_files << " files." << endl;
    cout << endl;
  }

  return 0;
}
// ----------------------------------------------------------- //
// ------------------------------- //
// --------- End of File --------- //
// ------------------------------- //
