#define RKFREQ_C TRUE
/*========================================================================
 *
 * Name - %M%
 *
 * Version:    %I%
 *
 * ccsid:      %W% - %G% %U%
 * from:       %F%
 * date:       %H% %T%
 *
 * %Q%
 *
 * Description:
 *
/*     Written by John Darragh, Calgary Alberta, revised 3-89.
 *
 * rkfreq.c: tabulates ASCII character frequencies from stdin and outputs
 *           freq sorted ASCII chars on the stdout in a number of formats.
 *
 *     intended as a utility for rk_button to create its zero_freq
 *     startup FREQ_DATA_FILE and/or default zero_freq[] char array.
 *
 *     this info is used by rk to predict chars it hasn't yet added
 *     to it's model (either by file priming or user command input).
 *
 *     it puts any unseen chars in ASCII sequence, and puts any un-
 *     represented control codes (including DEL) at the end.
 *
 * defaults:  outputs 256 chars as 128 ASCII char/NL pairs.
 *    (the NLs are just for readabilities sake, rk ignores them.)
 *
 * options:
 *         -c   write output as a C array for inclusion in programs.
 *         -h   print help
 * <filename>   read data from file other than stdin.
 *========================================================================
 */

#include "rkglobal.h"
#include "functions.h"
#include "system.h"

static FILE    * infile = stdin;  /* for <filename> arg */
static char    c_array  = FALSE;  /* -c option */

struct ch_table_struct {
  int label;
  int count; } ch_table[MAX_SET];

static void print_usage()
{
printf("usage: rkfreq [-c] [-h] [<input file>]\n");
printf("               -c  Produce C output to recompile rkfreq\n");
printf("               -h  Print this help\n");
printf("     <input file>  Read from file instead of stdin\n");
printf("\nSee \"man rk\" for more information on this program.\n");
}

static get_args (argc, argv)   /* may change infile */
int argc;
char **argv;
/*
 * Get any command arguments and set any appropriate
 * global flags and variables.
 */
{
while (argc > 1 && argv[1][0] == '-') {
  switch (argv[1][1]) {
  case 'c':
  case 'C':               /* turn on C char array output */
    c_array = TRUE;
    break;

  case 'h':
  case 'H':
    print_usage();
    exit(0);
/*NOTREACHED*/
    break;

  default:
    print_usage();
    exit(1);
/*NOTREACHED*/
    break;
    } /*switch*/
  argc--;
  argv++;
  }
if (argc == 2) {            /* file argument present */
  if ((infile = fopen(&argv[1][0], "r")) == NULL) {
    fprintf(stderr, "rkfreq:  cannot open input file");
    fflush(stderr);
    perror(argv[1][0]);
    print_usage();
    exit(1);
    }
    }
else
if (argc != 1) {
  print_usage();
  exit(1);
  }
}/*get_args*/

int main (argc, argv)
int argc;
char **argv;
{
int chr;  register int i, j;  int gap;
struct ch_table_struct temp;

get_args (argc, argv);  /* may change infile, c_array      */

/* init table labels and do the freq count */
for (i = 0; i < MAX_SET; i++) {
  ch_table[i].label = i;   /* the ASCII code       */
  ch_table[i].count = -(i + 1); /* ensure ASCII order for 0-freqs */
  }            /* by intially -ve freq counts */

while ((chr = getc(infile)) != EOF) {
  chr = STRIP_PARITY(chr);
  if (ch_table[chr].count < 0)  /* seen, so fix -ve freqs */
    ch_table[chr].count = 0;
  ch_table[chr].count++;
  }
for (i = 0; i < ' '; i++) {
  if (ch_table[i].count < 0)     /* ensure 0-freq control codes */
    ch_table[i].count -= MAX_SET;       /* end up at the end */
  }
if (ch_table[ASCII_RUBOUT].count < 0)     /* ensure same for DEL char   */
    ch_table[ASCII_RUBOUT].count -= MAX_SET;

/* frequency sort the thing (shell sort)   */
for (gap = MAX_SET / 2; gap > 0; gap /= 2)
  for (i = gap; i < MAX_SET; i++)
    for (j = i - gap; j >= 0; j -= gap) {
      if (ch_table[j].count <= ch_table[j + gap].count)
        break;
      temp = ch_table[j];
      ch_table[j] = ch_table[j + gap];
      ch_table[j + gap] = temp;
      }

/* output a compilable "C" character array */
if (c_array) {
  printf(
"static char zero_freq[MAX_SET] = { /* by a user supplied $home/file */\n");
  for (i = MAX_SET - 1; i >= 0; i--) {
    switch (chr = STRIP_PARITY((char)ch_table[i].label)) {
    case '\n':
      printf("  \'\\n\',");
      break;
    case '\t':
      printf("  \'\\t\',");
      break;
    case '\b':
      printf("  \'\\b\',");
      break;
    case '\r':
      printf("  \'\\r\',");
      break;
    case '\f':
      printf("  \'\\f\',");
      break;
    case '\\':
      printf("  \'\\\\\',");
      break;
    case '\'':
      printf("  \'\\\'\',");
      break;
    case  ASCII_RUBOUT:
      printf("\'\\%03o\',", (int)chr);
      break;
    default:
      if (chr < ' ')
        printf(" \'\\%02o\',", (int)chr);
      else
        printf("   \'%1c\',", chr);
      break;
      }
    if ((i % 8) == 0 && i != 0)
      printf("\n");
    }
  printf("};\n");

/* otherwise just put out sorted chars */
  }
else
  for (i = MAX_SET - 1; i >= 0; i--)
      printf("%c\n", (char)ch_table[i].label);

exit(0);
}
