#ifndef lint
static char *RCSid = "$Header: /m/webster/src/c/misc/RCS/makeindex.c,v 1.2 86/12/26 22:04:18 davy Exp $";
#endif

/*
 * makeindex - make an index of the words in the dictionary
 *
 * We read in all the words from the dictionary files, and write them out
 * to wordfileindex.  We also create the DBM files wordfileindex.{dir,pag}
 * to hold the data about each word.  For each word we store the file it is
 * in and the seek position to get to the start of the first definition for
 * that word.  We also create wordfilehdr, which contains the header info,
 * namely a set of seek positions into wordfileindex for the start of each
 * letter (wordfileindex is sorted).
 *
 * David A. Curry
 * Purdue University
 * Engineering Computer Network
 * Original: April, 1986
 * Revised: December, 1986
 *
 * $Log:	makeindex.c,v $
 * Revision 1.2  86/12/26  22:04:18  davy
 * Changed to create a DBM file index.
 * 
 */
#include <sys/types.h>
#include <sys/time.h>
#include <sys/file.h>
#include <sys/resource.h>
#include <ctype.h>
#include <stdio.h>
#include <ndbm.h>

#include "../h/index.h"
#include "../h/wordfiles.h"

struct index idx;			/* index for current word	*/
struct header hdr;			/* the header			*/

char word[BUFSIZ];			/* current word			*/

main()
{
	DBM *db;
	register int i;
	char buf[BUFSIZ];
	struct rlimit rlim;
	datum key, content;
	register FILE *fp, *gp;
	register daddr_t addr, len;

	/*
	 * Kick our limits.
	 */
	rlim.rlim_max = rlim.rlim_cur = RLIM_INFINITY;
	setrlimit(RLIMIT_FSIZE, &rlim, 0);
	setrlimit(RLIMIT_STACK, &rlim, 0);
	setrlimit(RLIMIT_DATA, &rlim, 0);

	/*
	 * Create wordfileindex and the database files.
	 */
	sprintf(buf, "%s/%s", wordfiledir, wordfileindex);

	if ((gp = fopen(buf, "w")) == NULL) {
		fprintf(stderr, "makeindex: cannot create \"%s\".\n", buf);
		exit(1);
	}

	if ((db = dbm_open(buf, O_WRONLY | O_CREAT, 0644)) == NULL) {
		fprintf(stderr, "makeindex: cannot create index database.\n");
		exit(1);
	}

	word[0] = NULL;
	hdr.h_nwords = 0;
	
	/*
	 * For each word file...
	 */
	for (i=0; i < NWORDFILES; i++) {
		/*
		 * Open the word file.
		 */
		sprintf(buf, "%s/%s", wordfiledir, wordfiles[i]);

		if ((fp = fopen(buf, "r")) == NULL) {
			fprintf(stderr, "makeindex: cannot open \"%s\".\n", buf);
			exit(1);
		}

		/*
		 * Read lines; words are in "F:" lines.
		 */
		addr = 0L;
		while (fgets(buf, BUFSIZ, fp) != NULL) {
			len = strlen(buf);

			/*
			 * Not a word line.
			 */
			if (*buf != 'F') {
				addr += len;
				continue;
			}

			/*
			 * If this is another definition of the same word,
			 * keep going.
			 */
			if (sameword(word, buf)) {
				addr += len;
				continue;
			}

			/*
			 * Save file number, file position,
			 * and buffer position.
			 */
			idx.i_file = i;
			idx.i_filepos = addr;
			saveword(word, buf);

			/*
			 * Store the info in the database.
			 */
			key.dptr = word;
			key.dsize = strlen(word);
			content.dptr = (char *) &idx;
			content.dsize = sizeof(struct index);

			if (dbm_store(db, key, content, DBM_REPLACE) < 0) {
				fprintf(stderr, "makeindex: cannot store to database.\n");
				exit(1);
			}

			/*
			 * Put the word into the file.
			 */
			fprintf(gp, "%s\n", word);
			hdr.h_nwords++;
			addr += len;
		}

		fclose(fp);
	}

	fclose(gp);

	/*
	 * Mark start of each letter.
	 */
	setstarts();

	/*
	 * Write the header.
	 */
	sprintf(buf, "%s/%s", wordfiledir, wordfilehdr);

	if ((fp = fopen(buf, "w")) == NULL) {
		fprintf(stderr, "makeindex: cannot create \"%s\".\n", buf);
		exit(1);
	}

	if (fwrite(&hdr, sizeof(struct header), 1, fp) != 1) {
		fprintf(stderr, "makeindex: header write failed.\n");
		exit(1);
	}

	fclose(fp);
	exit(0);
}

/*
 * saveword - save the word from buf into word.
 */
saveword(word, buf)
register char *word;
register char *buf;
{
	char *index();
	register char *s;

	/*
	 * Extract the word.
	 */
	buf += 2;
	s = index(buf, ';');
	*s = NULL;

	/*
	 * Copy the word.
	 */
	while (*buf) {
		*word++ = (isupper(*buf) ? tolower(*buf) : *buf);
		buf++;
	}

	*word = NULL;
}

/*
 * setstarts - find starts of words.
 */
setstarts()
{
	FILE *fp;
	daddr_t addr;
	char buf[BUFSIZ];
	register char lastc;

	/*
	 * Sort from dictionary order to ASCII collating sequence.
	 */
	sprintf(buf, "sort -T /tmp -o %s/%s %s/%s", wordfiledir, wordfileindex, wordfiledir, wordfileindex);
	system(buf);

	/*
	 * Open the index.
	 */
	sprintf(buf, "%s/%s", wordfiledir, wordfileindex);

	if ((fp = fopen(buf, "r")) == NULL) {
		fprintf(stderr, "makeindex: cannot open \"%s\".\n", buf);
		exit(1);
	}

	fgets(buf, BUFSIZ, fp);

	lastc = *buf;
	addr = strlen(buf);
	hdr.h_starts[SUBSCRIPT(lastc)] = 0;

	/*
	 * Save the address of the start of each new letter in the file.
	 */
	while (fgets(buf, BUFSIZ, fp) != 0) {
		if (*buf == lastc) {
			addr += strlen(buf);
			continue;
		}

		lastc = *buf;
		hdr.h_starts[SUBSCRIPT(lastc)] = addr;
		addr += strlen(buf);
	}

	hdr.h_idxsize = addr;

	fclose(fp);
}

/*
 * sameword - return non-zero if word and buf have the same word in them.
 */
sameword(word, buf)
register char *word;
register char *buf;
{
	char tmp[64];
	char *index();
	register char *s, *t;

	s = buf + 2;

	for (t=tmp; *s != ';'; s++, t++)
		*t = isupper(*s) ? tolower(*s) : *s;
	*t = NULL;

	return(strcmp(word, tmp) == 0);
}
