/*
From: cloister@milton.u.washington.edu (cloister bell)
Newsgroups: alt.sources
Subject: ps2txt version 2.1
Summary: postscript to ascii converter.
Keywords: postscript, ascii, convert
Date: 23 Feb 92 01:47:15 GMT
Organization: University of Washington
Followups-To: alt.sources.d
Archive-Name: ps2txt.c

due to popular request, here is my new version of this utility.  This version
supports regular postscript files via Iqbal Qazi's original algorithm, and
files generated by dvitps via my algorithm and the -dvi option.  If you're not
sure whether or not to use -dvi, try it both ways; the difference is marked.

improvements over the original:
-- you no longer need to use the filename "-" for stdin.  stdin is the default.
-- -dvi option added
-- cleaned up the code and control structures a *lot*
-- comments!  what a concept.

the following may be compiled with cc.  no special options are needed that i
know of.  -o is nice, however...

------------------------------ cut here ------------------------------
*/
/*  Jason Black, Feb 22 1992

Input can come from stdin, from '-' or from a file named on the command line:
Flags: -dvi for use with dvitps PostScript files.
Usage:
                ps2txt [-dvi] [-] [input_file.ps]

ps2txt.c extracts strings from a PostScript file.  This version has been 
modified to correctly deal with the oddities of PostScript files generated by 
dvi-to-PostScript converters, so if you keep this and the original program 
around, you might want to rename one of them.

VERSION:  1.1	Fixed bug dealing with comments.
	  1.2	By popular demand:  put spaces between strings.
	  2.0   Fixed most problems of extraneous spaces and newlines
		between strings.
		added support for the ligatures ff, fi, fl, ffi, & ffl.
		re-designed the control structures, and otherwise cleaned
		up the code.
	  2.1   Put Qazi's original algorithm back in, and added -dvi flag to
	        use my more specific algorithm.  Also by popular demand.
		Re-wrote the command line parsing yet again.
		
History:  Modified Qazi's program on Feb. 18 1992 so that it could do dvitps
          files well.  Posted to alt.sources.  Got feedback requesting support
	  for regular PostScript files as well.  Retrieved Qazi's original
	  source code, and put it back in on Feb. 22 1992.  While the original
	  program concept and source code is from Iqbal Qazi, this version
	  has had enough modifications that I am claiming it as my own.  Qazi's
	  sections are well marked if you want to see them.

Comments/suggestions to cloister@u.washington.edu
*/

#include <stdio.h>
#define  Putc(x)  putchar(x);       /* makes some lines not exceed 80 chars. */
#define  TRUE     1
#define  FALSE    0

void dviparse();  /* function prototypes */
void psparse();

#ifndef	orig
int	new_line	= 0;
#endif

void main(argc, argv)
int argc; 
char *argv[];
{
  int i,                  /* everybody's favorite counter */
      known_flag,         /* used during command line parsing */
      dvi_file = FALSE;   /* true if -dvi option found on command line */
  FILE *file, *source;    /* input stream */

  source = stdin;                                    /* default input source */
  for(i=1; i<argc; i++)                           /* parse command line args */
    {
      known_flag = FALSE;
      if (strcmp(argv[i],"-dvi") == 0)    /* is it a dvitps PostScript file? */
	{ 
	  dvi_file = TRUE; 
	  known_flag = TRUE;
	}
      if (strcmp(argv[i],"-") == 0)    /* weirdo-user explicitly wants stdin */
	{ 
	  source = stdin; 
	  known_flag = TRUE; 
	}
#ifndef	orig
      if (strcmp(argv[i],"-n") == 0)
      { 
	   new_line = 1;
	   continue;
      }
#endif
      if (!known_flag)                        /* must be the input file name */
	{
	  if ((file=fopen(argv[i],"r")) != NULL )
	    source=file;
	  else 
	    {
	      fprintf(stderr,"ps2txt: error opening file %s\n",argv[i]);
#ifndef	orig
	      fprintf(stderr,"usage:  ps2txt [-dvi] [-n] [-] [input_file.ps]\n");
#else
	      fprintf(stderr,"usage:  ps2txt [-dvi] [-] [input_file.ps]\n");
#endif
	      exit(1);
	    }
	}
    }

  if (dvi_file)
    dviparse(source);  /* use my algorithm */
  else
    psparse(source);   /* use Iqbal's algorithm */
}

void dviparse(source)
FILE *source;
{
  int ch,                 /* current character */
      prev_ch = '\n',     /* previously read character */
      in_paren = FALSE,   /* inside or outside of parentheses? */
      b_flag = FALSE,     /* true if previous character was ')' */
      b_space = TRUE;     /* true if a 'b' should produce a space */
  char junk[80];          /* place to throw away comment lines */

  while ((ch = fgetc(source)) != EOF)
    {
      if (ch == '\n') ch = fgetc(source);       /* ignore newlines in input! */
      if (in_paren)              /* strings to print come inside parentheses */
	switch(ch)
	  {
	  case ')'  : in_paren--; b_flag=1; break; /* not in paren's anymore */
	  case '\n' : Putc(' '); break;              /* <cr> = ' ' in parens */
	  case '\\' : 
	    switch(ch=fgetc(source))
	      {
	      case '(' :
	      case ')' : Putc(ch); break;                         /* from \? */
	      case 't' : Putc('\t'); break;                   /* write a tab */
	      case 'n' : Putc('\n'); break;                  /* write a <cr> */
	      case '\\': Putc('"'); break;                    /* open quotes */
	      case '0' : switch(ch=fgetc(source))
		{
		case '1': switch(ch=fgetc(source))
		  {
		  case '3' : fputs("ff",stdout); break;         /* from \01? */
		  case '4' : fputs("fi",stdout); break;
		  case '5' : fputs("fl",stdout); break;
		  case '6' : fputs("ffi",stdout); break;
		  case '7' : fputs("ffl",stdout); break;
		  default: fputs("\\01",stdout); Putc(ch);   /* unknown code */
		  } break;                                       /* from \0? */
		default: fputs("\\0",stdout); Putc(ch);      /* unknown code */
		} break;
	      case '1' : case '2' : case '3' : case '4' :
	      case '5' : case '6' : case '7' : Putc('\\');   /* unknown code */
	      default: Putc(ch);
	      } break;                               /* from original switch */
	  default: Putc(ch);
	  }
      else                                                 /* not in paren's */
	switch(ch)
	  {
	  case '%'  : fgets(junk, 80, source); break;   /* toss out comments */
	  case '\n' : break;                /* skip <cr>'s outside of parens */
	  case '-'  : if (b_flag) 
	    {
	      b_flag = 0;                   /* because now prev. char != ')' */
	      b_space = 0;    /* but the number after ')' is negative, so no */
                              /*       space in case the letter code is 'b'. */
                              /*                  the default is b_space = 1 */
	    } break;
	  case '('  : in_paren++;                    /* back in parens again */
	    switch(prev_ch)     /* check prev char to see if we need a space */
	      {
	      case 'l' : case 'm' : case 'n' : case 'o' : /* not for these 8 */
	      case 'q' : case 'r' : case 's' : case 't' : 
		break;
	      case 'y' : Putc('\n'); break;                /* need a newline */
	      case 'b' : if (b_space) Putc(' '); break; /* 'b' w/ a + number */
	      case 'a' : case 'c' : case 'd' : case 'e' : 
	      case 'f' : case 'g' : case 'h' : case 'i' : 
	      case 'j' : case 'k' : case 'x' : Putc(' '); break;
	      default: break;
	      } 
	    b_space = 1;              /* reset flag to default for next time */
	    break;
	  default: b_flag = 0; break;            /* junk stuff not in parens */
	  }
      prev_ch=ch;  /* remember this char in case !in_paren and next ch = '(' */
    }
}

void psparse(source)     /* Iqbal's original uncommented program, unmodified */
FILE *source;            /* except for stripping i/o stuff off the top, etc: */
{
char *str;
char junk[80];
int ch, para=0, last=0;
while ((ch=fgetc(source)) != EOF)
  {
    switch (ch)
      {
      case '%'  : if (para==0) fgets(junk, 80, source);
      else putchar(ch);
#ifndef	orig
	   break;
#endif
#ifndef	orig
      case '\n' :
	   if ( new_line )
	   {
		if (last==1) { puts(""); last=0; }
	   }
	   break;
#else
      case '\n' : if (last==1) { puts(""); last=0; } break;
#endif
      case '('  : if (para++>0) putchar(ch); break;
      case ')'  : if (para-->1) putchar(ch); 
      else putchar(' ');
	last=1; break;
	
      case '\\' : if (para>0)
	switch(ch=fgetc(source))
	  {
	  case '(' :
	  case ')' :  putchar(ch); break;
	  case 't' :  putchar('\t'); break;
	  case 'n' :  putchar('\n'); break;
	  case '\\':  putchar('\\'); break;
#ifndef	orig
	  case '\n':	break;			/* \ before \n .. so omit it*/
#endif

#ifndef	orig
	  /*
	    Real funny ... the are defined at dvi-file handling,
	    but not here at ps-file handling ;-)
	  */
	  case '0' :  case '1' : case '2' : case '3' :
	  case '4' :  case '5' : case '6' : case '7' :
	  {
	       int	i = 0;
	       char	f[4] = { '\000', '\000', '\000', '\000' };
	       f[0]=ch;
	       for ( i=1; i<3; i++)
		    f[i]=fgetc(source);
	       i = atoi( f );		/* should take math.h */
	       switch ( i )
	       {
	       case 13:		printf("ff");	break;
	       case 14:		printf("fi");	break;
	       case 15:		printf("fl");	break;
	       case 16:		printf("ffi");	break;
	       case 17:		printf("ffl");	break;
	       case 31:		printf("ss");	break;
	       case 177:	printf("~");	break;
	       default:
		    printf("%s", f );
		    break;
	       }
	  }
	       break;
#else
	  case '0' :  case '1' : case '2' : case '3' :
	  case '4' :  case '5' : case '6' : case '7' :
	    putchar('\\');
#endif
	  default:  putchar(ch); break;
	  }
	break;
      default:	if (para>0) putchar(ch);
      }
  }
}
/*
------------------------------ cut here ------------------------------
*/
