/* Conversion of files between different charsets and usages.
   Copyright (C) 1990 Free Software Foundation, Inc.
   Francois Pinard <pinard@iro.umontreal.ca>, 1990.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/* Global declarations and definitions.  */

#include <stdio.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifdef USG
#include <string.h>
#else
#include <strings.h>
#define strchr index
#define strrchr rindex
#endif

#include "common.h"
#include "steps.h"

#ifdef MSDOS

#include <dir.h>
#define unlink dummy1
#include <io.h>
#undef unlink
#include <fcntl.h>

#endif

/* Standard declarations.  */

#ifdef STDC_HEADERS
#include <stdlib.h>
#endif

/* Some systems do not define EXIT_*, even with STDC_HEADERS.  */
#ifndef EXIT_SUCCESS
#define EXIT_SUCCESS 0
#endif
#ifndef EXIT_FAILURE
#define EXIT_FAILURE 1
#endif

#ifdef __STDC__

extern int	fstat (int, struct stat *);
extern int	getopt (int, const char **, const char *);
#ifndef OS2EMX
extern int	link (const char *, const char *);
#endif
extern int	unlink (const char *);
extern void	perror (const char *);
#ifndef MSDOS
extern int	utime (const char *, time_t[2]);
#endif

#endif /* __STDC__ */

/* Prototypes specific to the program.  */

#ifndef __STDC__
void print_version ();
void print_copyright ();
#else
void print_version (void);
void print_copyright (void);
#endif

/* In the `BEFORE:AFTER' parameter, there is a default supplied whenever
   `:AFTER' or `BEFORE:' are used.  */

#ifdef MSDOS
#define DEFAULT_CODE "ibmpc"
#else
#define DEFAULT_CODE "latin1"
#endif

/* OS/2 additions (lebeld@jsp.umontreal.ca) */
#ifdef OS2EMX
#ifndef HAVE_RENAME
#define HAVE_RENAME 1
#endif
#endif

/* It is expected that no conversion sequence will need more than this
   number of steps.  */

#define MAX_CONVERSIONS 10

/* Program name.  */

const char *program_name;

/* If the recoding yields some problems in reversability, the replacement is
   normally not completed and the file is left unrecoded.  The following
   option forces the replacement even if the case the recoding is not
   reversible.  But if recode is used as a mere filter, there is no file
   replacement and this option is then irrelevant.  */

int force_option = 0;

/* By selecting the following option, the program will echo to stderr the
   sequence of elementary recoding steps which will be taken to effect
   the requested recoding.  */

int verbose_option = 0;

/* When a file is recoded over itself, precautions are taken to move the
   timestamps of the original file into the recoded file, so to make the
   recoding the most transparent possible to make, and other tools.
   However, selecting the following option inhibit the timestamps handling,
   thus effectively `touching' the file.  */

int touch_option = 0;

/* In `texte' charset, some countries use double quotes to mark diaeresis,
   while other countries prefer colons.  The following variable contains the
   diaeresis character for `texte' charset.  Nominally set to a double
   quote, it can be forced to a colon by an option on recode command.  */

char diaeresis_char = '"';

/* For `latex' charset, it is often convenient to convert the diacritics
   only, while letting other LaTeX code using backslashes unconverted.
   In the other charset, one can edit text as well as LaTeX directives.  */

int diacritics_only = 0;

/* Tells how various passes will be interconnected.  */

enum sequence_strategy
{
  STRATEGY_UNDECIDED,		/* sequencing strategy is undecided yet */
  SEQUENCE_WITH_FILES,		/* do not fork, use intermediate files */
  SEQUENCE_WITH_POPEN,		/* use `popen(3)' to fork processes */
  SEQUENCE_WITH_PIPE		/* fork processes connected with `pipe(2)' */
};
enum sequence_strategy sequence_strategy = STRATEGY_UNDECIDED;


/* Sequence manipulation routines.  */

TYPE_of_step
  *sequence[MAX_CONVERSIONS];	/* sequence of conversions */
int length_of_sequence;		/* length of conversion sequence */

/*-------------------------------------------------------------------------.
| Give code index of name into code_keywords.  Returns a negative value if |
| keyword not recognized.						   |
`-------------------------------------------------------------------------*/

int
code_index (const char *keyword, int length)
{
  int counter;

  /* If keyword not provided, supply the default code if known.  */

#ifdef DEFAULT_CODE
  if (length == 0)
    {
      keyword = DEFAULT_CODE;
      length = strlen (DEFAULT_CODE);
    }
#endif /* DEFAULT_CODE */

  /* Search for the keyword.  */

  for (counter = 0; counter < NUMBER_OF_KEYWORDS; counter++)
    if ((strlen (code_keywords[counter].s) == length
	 && strncmp (keyword, code_keywords[counter].s, length) == 0)
	|| (strlen (code_keywords[counter].l) == length
	    && strncmp (keyword, code_keywords[counter].l, length) == 0))
      return counter;

  return -1;
}

/*----------------------------------------------------------.
| Find a sequence of single_steps to achieve a conversion.  |
`----------------------------------------------------------*/

void
find_sequence (TYPE_code start, TYPE_code goal)
{
  struct 
    {
      TYPE_of_step *step;	/* step who will bring us nearer to goal */
      int cost;			/* cost from here through goal */
    }
  critical_tree[NUMBER_OF_KEYWORDS]; /* critical path tree */

  int code;			/* current code */
  TYPE_of_step *step;		/* cursor in possible single_steps */
  int cost;			/* cost under consideration */
  int modified;			/* != 0 if modified since last iteration */

  for (code = 0; code < NUMBER_OF_KEYWORDS; code++)
    {
      critical_tree[code].step = NULL;
      critical_tree[code].cost = NOWAY;
    }
  critical_tree[(int) goal].cost = ALREADY;
  modified = 1;

  while (modified) 
    {
      modified = 0;
      for (step = single_steps;
	   step < single_steps + NUMBER_OF_SINGLE_STEPS;
	   step++)
	if ((cost = critical_tree[(int) step->code_after].cost) != NOWAY) 
	  {
	    cost += step->conversion_cost;
	    code = (int) step->code_before;
	    if (cost < critical_tree[code].cost) 
	      {
		critical_tree[code].step = step;
		critical_tree[code].cost = cost;
		modified = 1;
	      }
	  }
    }

  if (critical_tree[(int) start].cost == NOWAY) 
    {
      fprintf (stderr, "recode: no way to convert from %s to %s.\n",
               code_keywords[(int) start].l, code_keywords[(int) goal].l);
      exit (EXIT_FAILURE);
    }

  length_of_sequence = 0;
  for (code = (int) start; code != (int) goal; code = (int) step->code_after)
    {
      step = critical_tree[code].step;
      if (step->routine != NULL)
	if (length_of_sequence < MAX_CONVERSIONS)
	  sequence[length_of_sequence++] = step;
	else
	  {
	    fprintf (stderr, "recode: conversion is too complex.\n");
	    exit (EXIT_FAILURE);
	  }
    }
}

/*-------------------------------------------------------------------.
| Execute the conversion sequence, using several passes with two     |
| alternating intermediate files.  This routine assumes at least one |
| needed recoding step.						     |
`-------------------------------------------------------------------*/

void
execute_pass_sequence (const char *input_name, const char *output_name)
{
  int sequence_index;		/* index into sequence */
  char *temp_input_name;	/* step input file name */
  char *temp_output_name;	/* step output file name */
#ifdef MSDOS
  char temp_name_1[13];		/* one temporary file name */
  char temp_name_2[13];		/* another temporary file name */
#endif
  FILE *input_file;		/* input file to recoding step */
  FILE *output_file;		/* output file from recoding step */
  char *exchange_temp;		/* for exchanging temporary names */

  /* Choose names for intermediate files.  */

#ifdef MSDOS
  strcpy (temp_name_1, "recodex1.tmp");
  strcpy (temp_name_2, "recodex2.tmp");
  temp_input_name = temp_name_1;
  temp_output_name = temp_name_2;
#else
#ifdef OS2EMX
  /* the temporary name must not be greater than five characters
     on EMX/gcc (as per the documentations) */
  temp_input_name = tempnam (NULL, "reci.");
  temp_output_name = tempnam (NULL, "reco.");
#else /* Any other plateforms */
  temp_input_name = tempnam (NULL, "recode.");
  temp_output_name = tempnam (NULL, "recode.");
#endif /* OS2EMX */
#endif /* MSDOS */

  /* Execute one pass for each step of the sequence.  */

  for (sequence_index = 0;
       sequence_index < length_of_sequence;
       sequence_index++)
    {

      /* Select the input file for this step.  */

      if (sequence_index == 0)
	if (input_name)
	  {
	    input_file = fopen (input_name, "r");
	    assert (input_file);
	  }
	else
	  input_file = stdin;
      else
	{
	  input_file = fopen (temp_input_name, "r");
	  assert (input_file);
	}

      /* Select the output file for this step.  */

      if (sequence_index == length_of_sequence - 1)
	if (output_name)
	  {
	    output_file = fopen (output_name, "w");
	    assert (output_file);
	  }
	else
	  output_file = stdout;
      else
	{
	  output_file = fopen (temp_output_name, "w");
	  assert (output_file);
	}

      /* Execute one recoding step.  */

      (*sequence[sequence_index]->routine) (input_file, output_file);

      /* Close the input file, unlink it if it was temporary.  */

      if (sequence_index == 0)
	{
/* lebeld@jsp.umontreal.ca as corrected a bug here... it was
   if (!input_name) instead of the following... (Mar 15 1993) */
	  if (input_name)  
	    fclose (input_file);
	}
      else
	{
	  fclose (input_file);
	  unlink (temp_input_name);
	}

      /* Close the output file, exchange names for subsequent step.  */

      if (sequence_index == length_of_sequence - 1)
	{
	  if (output_name) 
              fclose (output_file);
	}
      else
	{
	  fclose (output_file);

	  exchange_temp = temp_input_name;
	  temp_input_name = temp_output_name;
	  temp_output_name = exchange_temp;
	}
    }

#ifndef MSDOS
  free (temp_input_name);
  free (temp_output_name);
#endif
}

/*-------------------------------------------------------------------------.
| Execute the conversion sequence, using a chain of invocations of the	   |
| program through popen.  This routine assumes that more than one recoding |
| step is needed.							   |
`-------------------------------------------------------------------------*/

#ifdef HAVE_POPEN

void
execute_popen_sequence (const char *input_name, const char *output_name)
{
  FILE *input_file;		/* input file to recoding step */
  FILE *output_file;		/* output file from recoding step */
  char popen_command[80];	/* to receive command string */
  int status;			/* status to be asserted */

  /* Construct a `recode' command for all recoding steps but the first.  */

  sprintf (popen_command, "%s -o %s %s:%s %s%s",
	   program_name,
	   diaeresis_char == ':' ? " -c" : "",
	   code_keywords[(int) sequence[1]->code_before].l,
	   code_keywords[(int) sequence[length_of_sequence-1]->code_after].l,
	   output_name ? "> " : "",
	   output_name ? output_name : "");

  /* Execute the first recoding step.  */

  if (!input_name)
    input_file = stdin;
  else if ((input_file = fopen (input_name, "r")) == NULL)
    {
      perror (input_name);
      exit (EXIT_FAILURE);
    }

  if ((output_file = popen (popen_command, "w")) == NULL)
    {
      perror (popen_command);
      exit (EXIT_FAILURE);
    }

  (*sequence[0]->routine) (input_file, output_file);

  if (input_name)
    fclose (input_file);
  status = pclose (output_file);
  assert (status == 0);
}

#endif /* HAVE_POPEN */

/*-------------------------------------------------------------------------.
| Execute the conversion sequence, forking the program many times for all  |
| elementary steps, interconnecting them with pipes.  This routine assumes |
| at least one recoding step is needed.					   |
`-------------------------------------------------------------------------*/

#ifndef HAVE_DUP2
#undef HAVE_PIPE
#endif

#ifdef HAVE_PIPE

void
execute_pipe_sequence (const char *input_name, const char *output_name)
{
  int sequence_index;		/* index into sequence */
  TYPE_of_step *step;		/* pointer into single_steps */

  FILE *input_file;		/* input file to recoding step */
  FILE *output_file;		/* output file from recoding step */
  int pipe_pair[2];		/* pair of file descriptors for a pipe */
  int child_process;		/* child process number, zero if child */
  int status;			/* status to be asserted */

  /* Prepare the final output file.  */

  if (output_name)
    {
      output_file = fopen (output_name, "w");
      assert (output_file);
    }
  else
    output_file = stdout;

  /* Create all subprocesses and interconnect them.  */

  for (sequence_index = length_of_sequence - 1;
       sequence_index > 0;
       sequence_index--)
    {
      status = pipe (pipe_pair);
      assert (status == 0);
      child_process = fork ();
      assert (child_process >= 0);
      if (child_process == 0)
	{

          /* The child executes its recoding step, reading from the pipe
             and writing to the current output file; then it exits.  */

	  status = close (pipe_pair[1]);
	  assert (status == 0);
	  input_file = fdopen (pipe_pair[0], "r");
	  assert (input_file);

	  (*sequence[sequence_index]->routine) (input_file, output_file);

	  fclose (input_file);
	  if (sequence_index < length_of_sequence - 1 || output_name)
	    fclose (output_file);
	  exit (EXIT_SUCCESS);
	}
      else
	{

          /* The parent redirects the current output file to the pipe.  */

	  status = dup2 (pipe_pair[1], fileno (output_file));
	  assert (status != -1);
	  status = close (pipe_pair[0]);
	  assert (status == 0);
	  status = close (pipe_pair[1]);
	  assert (status == 0);
	}
    }
  
  /* All the children are created, blocked on read.  Now, feed the whole
     chain of processes with the output of the first recoding step.  */

  if (!input_name)
    input_file = stdin;
  else
    {
      input_file = fopen (input_name, "r");
      assert (input_file);
    }

  (*sequence[0]->routine) (input_file, output_file);

  if (input_name)
    fclose (input_file);
  if (output_name)
    fclose (output_file);

  /* Wait on all children, mainly to avoid synchronisation problems on
     output file contents, but also to reduce the number of zombie
     processes in case the user recodes many files at once.  */

  while (wait (NULL) > 0)
    ;
}

#endif /* HAVE_PIPE */

/*-----------------------------------------------------------------------.
| Execute the conversion sequence, using the selected strategy whenever	 |
| more than one conversion step is needed.  If no conversion are needed, |
| merely copy the input onto the output.				 |
`-----------------------------------------------------------------------*/

/* If some sequencing strategies are missing, this routine automatically
   uses fallback strategies.  */

void
execute_sequence (const char *input_name, const char *output_name)
{
  FILE *input_file;		/* input file to recoding step */
  FILE *output_file;		/* output file from recoding step */
  int character;		/* the whole file will go through */

#ifdef MSDOS
  if (!input_name)
    setmode (fileno (stdin), O_BINARY);
  if (!output_name)
    setmode (fileno (stdout), O_BINARY);
  _fmode = O_BINARY;
#endif

  if (verbose_option && input_name)
    {
      fprintf (stderr, "Recoding %s...", input_name);
      fflush (stderr);
    }

  if (length_of_sequence > 1)
    switch (sequence_strategy)
      {
      case STRATEGY_UNDECIDED:
	assert (0);

      case SEQUENCE_WITH_PIPE:
#ifdef HAVE_PIPE
	execute_pipe_sequence (input_name, output_name);
	break;
#endif

      case SEQUENCE_WITH_POPEN:
#ifdef HAVE_POPEN
	execute_popen_sequence (input_name, output_name);
	break;
#endif

      case SEQUENCE_WITH_FILES:
	execute_pass_sequence (input_name, output_name);
	break;
      }
  else
    {

      /* This is a single-step recoding or a mere copy.  Do it.  */

      if (!input_name)
	input_file = stdin;
      else if ((input_file = fopen (input_name, "r")) == NULL)
	{
	  perror (input_name);
	  exit (EXIT_FAILURE);
	}

      if (!output_name)
	output_file = stdout;
      else if ((output_file = fopen (output_name, "w")) == NULL)
	{
	  perror (output_name);
	  exit (EXIT_FAILURE);
	}

      if (length_of_sequence == 1)
	(*sequence[0]->routine) (input_file, output_file);
      else
	while ((character = getc (input_file)) != EOF)
	  putc (character, output_file);

      if (input_name)
	fclose (input_file);
      if (output_name)
	fclose (output_file);
    }

  if (verbose_option && input_name)
    {
      fprintf (stderr, " done\n");
      fflush (stderr);
    }
}


/* Main program.  */

void
echo_sequence (void)
{
  int sequence_index;		/* index into sequence */
  TYPE_of_step *step;		/* pointer into single_steps */

  if (length_of_sequence == 0)
    fprintf (stderr, "Recoding by mere copying\n");
  else
    {
      fprintf (stderr, "Recoding through ");
      for (sequence_index = 0;
	   sequence_index < length_of_sequence;
	   sequence_index++)
	{
	  step = sequence[sequence_index];
	  fprintf (stderr, "%s%s:%s",
		   sequence_index == 0 ? "" : " | ",
		   code_keywords[(int) step->code_before].l,
		   code_keywords[(int) step->code_after].l);
	}
      fprintf (stderr, "\n");
    }
}

void
usage_and_exit (void)
{
  int keyword_index;

  print_version ();

  fprintf (stderr, "\
usage: recode [OPTION]... [BEFORE]:[AFTER] [FILE]...\n\
  -C	display Copyright and copying conditions, then exit\n\
  -c	use colons instead of double quotes for diaeresis\n\
  -d	limit conversion to diacritics or alike for LaTeX\n\
  -f	force file replacement even if non reversible\n\
  -i	use intermediate files for sequencing passes\n\
");
#ifdef HAVE_POPEN
  fprintf (stderr, "\
  -o	use popen machinery for sequencing passes\n\
");
#else
  fprintf (stderr, "\
  -o	interpreted as -i on this system\n\
");
#endif
#ifdef HAVE_PIPE
  fprintf (stderr, "\
  -p	use pipe machinery for sequencing passes\n\
");
#else
  fprintf (stderr, "\
  -p	interpreted as -o on this system\n\
");
#endif
  fprintf (stderr, "\
  -t	touch the recoded files after replacement\n\
  -v	be verbose, tell elementary steps sequence\n\
");

  for (keyword_index = 0;
       keyword_index < NUMBER_OF_KEYWORDS;
       keyword_index++)
    {
      if (keyword_index % 4 == 0)
	fprintf (stderr, keyword_index == 0 ? "  CODE" : "\n");
      fprintf (stderr, "\t%s %s",
	       code_keywords[keyword_index].s,
	       code_keywords[keyword_index].l);
    }
  fprintf (stderr, "\n");

  fprintf (stderr, "\
Each FILE is recoded over itself, destroying the original.  If no\n\
FILE is specified, then act as a filter and recode stdin to stdout.\n\
If none of -i, -o and -p are given, presume -p if no FILE, else -i.\n\
Beware that option `-f' is always selected, even if not given.\n\
\n");

  exit (EXIT_FAILURE);
}

int
main (int argc, const char *argv[])
{
  extern int optind;		/* index of argument */
  int option_char;		/* option character */
  int start_index;		/* index of starting code */
  int end_index;		/* index of end code */
  const char *input_name;	/* input file name */
  char output_name[200];	/* output file name */
  FILE *file;			/* file to check or stat */
  char *cursor;			/* all purpose cursor */
#ifdef MSDOS
  struct ftime stamp_stat;	/* input file time stamps */
#else
  struct stat stamp_stat;	/* input file time stamps */
  time_t stamp_utime[2];	/* recoded file time stamps */
#endif

  /* Decode command options.  */

  program_name = argv[0];

  while ((option_char = getopt (argc, argv, "Ccdfioptv")) != EOF)
    switch (option_char)
      {
      case 'C':
	print_version ();
	print_copyright ();
	exit (EXIT_SUCCESS);

      case 'c':
	diaeresis_char = ':';
	break;

      case 'd':
	diacritics_only = 1;
	break;

      case 'f':
	force_option = 1;
	break;

      case 'i':
	sequence_strategy = SEQUENCE_WITH_FILES;
	break;

      case 'o':
	sequence_strategy = SEQUENCE_WITH_POPEN;
	break;

      case 'p':
	sequence_strategy = SEQUENCE_WITH_PIPE;
	break;

      case 't':
	touch_option = 1;
	break;

      case 'v':
	verbose_option = 1;
	break;

      default:
	usage_and_exit ();
      }

  /* Decode the BEFORE:AFTER argument.  */

  if (optind+1 > argc)
    usage_and_exit ();

  cursor = strchr (argv[optind], ':');
  if (!cursor)
    usage_and_exit ();
  start_index = code_index (argv[optind], cursor - argv[optind]);
  if (start_index < 0)
    usage_and_exit ();
  cursor++;
  end_index = code_index (cursor, strlen(cursor));
  if (end_index < 0)
    usage_and_exit ();
  optind++;

  /* Establish the sequence of recoding steps.  */

  length_of_sequence = 0;
  find_sequence (start_index, end_index);

  if (verbose_option)
    echo_sequence ();

  /* If there is no input file, act as a filter.  Else, recode all files
     over themselves.  */

  if (optind < argc)
    {

      /* When reading and writing files, unless the user selected otherwise,
	 avoid forking and use intermediate files.  */

      if (sequence_strategy == STRATEGY_UNDECIDED)
	sequence_strategy = SEQUENCE_WITH_FILES;

      /* In case files are recoded over themselves and there is no
         recoding step at all, do not even try to touch the files.  */

      if (length_of_sequence > 0)

	/* Process files, one at a time.  */

	for (; optind < argc; optind++)
	  {
	    input_name = argv[optind];

	    /* Check if the file can be read and rewritten.  */

	    if ((file = fopen (input_name, "r+")) == NULL)
	      {
		perror (input_name);
		exit (EXIT_FAILURE);
	      }

	    /* Save the input file time stamp.  */

	    if (!touch_option)
	      {
#ifdef MSDOS
		getftime (fileno (file), &stamp_stat);
#else
		fstat (fileno (file), &stamp_stat);
#endif
	      }

	    fclose (file);
	    
	    /* Choose an output file in the same directory.  */

	    strcpy (output_name, input_name);
	    for (cursor = output_name + strlen (output_name);
		 cursor > output_name && cursor[-1] != '/'
#ifdef OS2EMX
#define MSDOS
#endif
#ifdef MSDOS
		 && cursor[-1] != '\\' && cursor[-1] != ':'
#ifdef OS2EMX
#undef MSDOS
#endif
#endif
		 ; cursor--)
	      ;
	    strcpy (cursor, "recodeXX.TMP");

	    /* Recode the file.  */

	    execute_sequence (input_name, output_name);

	    /* Move the new file over the original.  */

	    if(unlink (input_name) == -1) fprintf(stderr, "error!\n");

#ifdef HAVE_RENAME
	    rename (output_name, input_name);
#else
	    link (output_name, input_name);
#endif
	    unlink (output_name);

	    /* Adjust the time stamp for the new file.  */

	    if (!touch_option)
	      {
#ifdef MSDOS
		file = fopen (input_name, "r");
		assert (file);
		setftime (fileno (file), &stamp_stat);
		fclose (file);
#else
		stamp_utime[0] = stamp_stat.st_atime;
		stamp_utime[1] = stamp_stat.st_mtime;
		utime (input_name, stamp_utime);
#endif
	      }
	  }
    }
  else
    {

      /* When reading stdin and writing stdout, unless the user selected
         otherwise, fork processes interconnected with pipes.  */

      if (sequence_strategy == STRATEGY_UNDECIDED)
	sequence_strategy = SEQUENCE_WITH_PIPE;

      execute_sequence (NULL, NULL);
    }
  exit (EXIT_SUCCESS);
}
