/*
 * Copyright (c) 1990 Paul Pomes
 * Copyright (c) 1990 University of Illinois Board of Trustees
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms are permitted
 * provided that the above copyright notice and this paragraph are
 * duplicated in all such forms and that any documentation,
 * advertising materials, and other materials related to such
 * distribution and use acknowledge that the software was developed
 * by the University of Illinois, Urbana.  In addition, redistribution
 * and use must conform to the terms listed in the Copying file in
 * this directory.
 *
 * The name of the University may not be used to endorse or promote products
 * derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 */

#ifndef lint
static char rcsid[] = "@(#)$Header: /usr/local/src/cso/oed2/RCS/FindSymbol.c,v 2.0.0.1 90/03/30 16:31:54 paul Exp $";
#endif /* lint */

#include	"oed2.h"

/*
 * FindSymbol -- Locate a symbol in the master SymList
 *
 *	Given a character string of a symbol found in the text, do a binary
 *	search of the lexicographically sorted master SymList for the
 *	matching symbol.  Associated with each symbol in SymList is its
 *	printable string representation.  Some symbols, particularly those
 *	used for pronunciation purposes, don't have nroff/troff equivalents.
 *	For these the string returned is the argument string.
 *
 *	parameters:
 *		Str -- char pointer to symbol desired
 *	returns:
 *		Pointer to a copy of the selected SYM struct in SymList or
 *		NULL if no match is found.
 *	side effects:
 *		Prints error messages for unknown symbols.
 *	deficiencies:
 *
 */

/*
 * This list must be sorted in lexicographic order using the first
 * (character string) field.  Use sort.
 */
static struct SYM	SymList[] = {
/*
Argument	F_ROFF		F_SIMPLE	F_VT200
 */
"0",		"(omitted)",	"(omitted)",	"Sa",	/* numeric symbol */
"AE",		"\\(AE",	"AE",		"MF",	/* OE vowel */
"Aacu",		"\\*'\\s-2A\\s0", "A'",		"MA",	/* A topped w. ' */
"Acirc",	"\\*^\\s-2A\\s0", "A",		"MB",	/* A topped w. ^ */
"Ae",		"\\(AE",	"Ae",		"MF",	/* OE vowel */
"Agrave",	"\\*`\\s-2A\\s0", "A",		"M@",	/* A topped w. ` */
"Alpha",	"\\(*A",	"A",		"AA",	/* Greek Alpha */
"Asg",		"&Asg.",	"&Asg.",	"A&Asg.", /* ??? OE letter */
"Auml",		"\\*:\\s-2A\\s0", "A",		"MD",	/* A topped w. " */
"Beta",		"\\(*B",	"B",		"AB",	/* Greek Beta */
"Ccdil",	"\\*,C",	"C",		"MG",	/* C cedilla */
"Chi",		"\\(*X",	"X",		"AX",	/* Greek Chi */
"Delta",	"\\(*D",	"Delta",	"TD",	/* Greek Delta */
"Eacu",		"\\*'\\s-2E\\s0", "E",		"MI",	/* E topped w. ' */
"Ecirc",	"\\*^\\s-2E\\s0", "E",		"MJ",	/* E topped w. ^ */
"Edh",		"\\*-D",	"D",		"AD",	/* OE crossed d */
"Egrave",	"\\*`\\s-2E\\s0", "E",		"MH",	/* E topped w. ` */
"Epsilon",	"\\(*E",	"E",		"AE",	/* Greek Epsilon */
"Eta",		"\\(*Y",	"H",		"AH",	/* Greek Eta */
"Eth",		"&Eth.",	"&Eth.",	"A&Eth.", /* ??? */
"Euml",		"\\*:\\s-2E\\s0", "E",		"MK",	/* E topped w. " */
"Gamma",	"\\(*G",	"Gamma",	"TG",	/* Greek Gamma */
"Iacu",		"\\*'\\s-2I\\s0", "I",		"MM",	/* I topped w. ' */
"Icirc",	"\\*^\\s-2I\\s0", "I",		"MN",	/* I topped w. ^ */
"Igrave",	"\\*`\\s-2I\\s0", "I",		"ML",	/* I topped w. ` */
"Iota",		"\\(*I",	"I",		"AI",	/* Greek Iota */
"Iuml",		"\\*:\\s-2I\\s0", "I",		"MO",	/* I topped w. " */
"Kappa",	"\\(*K",	"K",		"AK",	/* Greek Kappa */
"Lambda",	"\\(*L",	"Lambda",	"TL",	/* Greek Lambda */
"Mu",		"\\(*M",	"M",		"AM",	/* Greek Mu */
"Nu",		"\\(*N",	"N",		"AN",	/* Greek Nu */
"OE",		"OE",		"OE",		"MW",	/* OE vowel */
"Oacu",		"\\*'\\s-2O\\s0", "O",		"MS",	/* O topped w. ' */
"Obar",		"\\o'/O'",	"O",		"MX",	/* O crossed w. / */
"Ocirc",	"\\*^\\s-2O\\s0", "O",		"MT",	/* O topped w. ^ */
"Oe",		"Oe",		"Oe",		"MW",	/* OE vowel */
"Ograve",	"\\*`\\s-2O\\s0", "O",		"MR",	/* O topped w. ` */
"Omega",	"\\(*W",	"Omega",	"TW",	/* Greek Omega */
"Omicron",	"\\(*O",	"O",		"AO",	/* Greek Omicron */
"Ouml",		"\\*:\\s-2O\\s0", "O",		"MV",	/* O topped w. " */
"Page",		"p. ",		"p. ",		"Ap.",	/* Page */
"Phi",		"\\(*F",	"Phi",		"TF",	/* Greek Phi */
"Pi",		"\\(*P",	"Pi",		"TP",	/* Greek Pi */
"Psi",		"\\(*Q",	"Psi",		"TQ",	/* Greek Psi */
"Rho",		"\\(*R",	"P",		"AP",	/* Greek Rho */
"Sigma",	"\\(*S",	"Sigma",	"TS",	/* Greek Sigma */
"Tau",		"\\(*T",	"T",		"AT",	/* Greek Tau */
"Th",		"P",		"Th",		"ATh",	/* ??? OE consonant */
"Theta",	"\\(*H",	"Theta",	"TJ",	/* Greek Theta */
"Uacu",		"\\*'\\s-2U\\s0", "U",		"MZ",	/* U topped w. ' */
"Ucirc",	"\\*^\\s-2U\\s0", "U",		"M[",	/* U topped w. ^ */
"Ugrave",	"\\*`\\s-2U\\s0", "U",		"MY",	/* U topped w. ` */
"Upsilon",	"\\(*U",	"Upsilon",	"TY",	/* Greek Upsilon */
"Uuml",		"\\*:\\s-2U\\s0", "U",		"M\\",	/* U topped w. " */
"Xi",		"\\(*C",	"Xi",		"TX",	/* Greek Xi */
"Yacu",		"\\*'\\s-2Y\\s0", "Y",		"AY'",	/* Y topped w. ' */
"Ygh",		"3",		"3",		"A3",	/* OE letter */
"Zeta",		"\\(*Z",	"Z",		"AZ",	/* Greek Zeta */
"aacu",		"\\*'a",	"a",		"Ma",	/* a topped w. ' */
"acirc",	"\\*^a",	"a",		"Mb",	/* a topped w. ^ */
"acu",		"\\h'-\\w'e'u'\\z\\(aa\\h'\\w'e'u'", "", "", /* ' over prev */
"ae",		"\\(ae",	"ae",		"Mf",	/* OE vowel */
"agrave",	"\\*`a",	"a",		"M`",	/* a topped w. ` */
"alpha",	"\\(*a",	"alpha",	"Ta",	/* Greek alpha */
"amp",		"&",		"&",		"A&",	/* Ampersand */
"ang",		"\\z\\(dea",	"a",		"Me",	/* a topped with o */
"asg",		"&asg.",	"&asg.",	"A&asg.", /* OE letter */
"asper",	"\\h'-\\w'e'u'\\z\\(ga\\h'\\w'e'u'", "", "", /* ` over prev */
"auml",		"\\*:a",	"a",		"Md",	/* a topped w. " */
"ayin",		"c",		"c",		"Ac",	/* arabic ??? */
"b1",		"\\-",		"-",		"A-",	/* single bond */
"b2",		"=",		"=",		"A=",	/* double bond */
"bbar",		"\\*-b'",	"b",		"Ab",	/* b crossed w. - */
"bbc1",		"&bbc1.",	"&bbc1.",	"A&bbc1.", /* ??? chemistry */
"bbc2",		"&bbc2.",	"&bbc2.",	"A&bbc2.", /* ??? chemistry */
"bbl1",		"&bbl1.",	"&bbl1.",	"A&bbl1.", /* ??? chemistry */
"bbr1",		"&bbr1.",	"&bbr1.",	"A&bbr1.", /* ??? chemistry */
"bbr2",		"&bbr2.",	"&bbr2.",	"A&bbr2.", /* ??? chemistry */
"beta",		"\\(*b",	"beta",		"Tb",	/* Greek beta */
"breve",	"\\h'-\\w'e'u'\\z\\(aa\\z`\\h'\\w'e'u'", "", "", /* `' prev */
"btc1",		"&btc1.",	"&btc1.",	"A&btc1.", /* ??? chemistry */
"btc2",		"&btc2.",	"&btc2.",	"A&btc2.", /* ??? chemistry */
"ccdil",	"\\*,c",	"c",		"Mg",	/* c cedilla */
"cdil",		"\\h'-\\w'e'u'\\z,\\h'\\w'e'u'", "",	"", /* cedilla prev */
"cdl",		"\\o',o'",	"o",		"Ao",	/* o cedilla w. bar */
"cent",		"\\(ct",	"cent",		"M\"",	/* cents */
"chi",		"\\(*x",	"x",		"Tc",	/* Greek chi */
"circ",		"\\h'-\\w'e'u'\\z^\\h'\\w'e'u'", "", "", /* ^ over prev */
"circbl",	"",		"",		"",	/* circ below prev */
"dag",		"\\(dg",	"+",		"T\014", /* Dagger */
"dbar",		"||",		"||",		"A||",	/* Double bar */
"deg",		"\\(de",	"deg.",		"M0",	/* Degrees */
"delta",	"\\(*d",	"delta",	"Td",	/* Greek delta */
"div",		"\\(di",	"/",		"TC",	/* divide sign */
"dollar",	"$",		"$",		"A$",	/* $ */
"dotab",	"",		"",		"",	/* dot above prev */
"dotbl",	"",		"",		"",	/* dot below prev */
"dubh",		"-",		"-",		"A-",	/* line break in orig */
"eacu",		"\\*'e",	"e",		"Mi",	/* e topped w. ' */
"ecirc",	"\\*^e",	"e",		"Mj",	/* e topped w. ^ */
"edh",		"\\*-d",	"d",		"Ad",	/* d crossed w - */
"egrave",	"\\*`e",	"e",		"Mh",	/* e topped w. ` */
"epsilon",	"\\(*e",	"epsilon",	"Te",	/* Greek epsilon */
"eta",		"\\(*y",	"n",		"Th",	/* Greek eta */
"eth",		"&eth.",	"&eth.",	"A&eth.", /* OE ??? */
"euml",		"\\*:e",	"e",		"Mk",	/* e topped w. " */
"fata",		"\\*-a",	"a",		"Aa",	/* a topped w. - */
"fatax",	"\\*-a",	"a",		"Aa",	/* a topped w. - */
"fatpara",	"P",		"P",		"M6",	/* paragraph symbol */
"frown",	"\\h'-\\w'e'u'\\z^\\h'\\w'e'u'", "", "", /* ^ over prev */
"gamma",	"\\(*g",	"gamma",	"Tn",	/* Greek gamma */
"ge",		"\\(>=",	">=",		"T>",	/* Greater than|equal */
"grave",	"\\(ga",	"",		"",	/* Grave accent */
"gt",		">",		">",		"A>",	/* Greater than */
"hacek",	"\\h'-\\w'e'u'\\z\\(rn\\h'\\w'e'u'",	"", "", /* _ ov prev */
"hash",		"#",		"#",		"A#",	/* Hash mark */
"hbar",		"\\*-h",	"h",		"Ah",	/* h crossed w. - */
"hgz",		"\\d3\\u",	"3",		"A3",	/* Funky char */
"hook",		"&hook.",	"&hook.",	"A&hook.", /* only used MPR */
"ia",		"\\fI\\(*a\\fP", "alpha",	"Ta",	/* Italic greek alpha */
"iacu",		"\\*'i",	"i",		"Mm",	/* i topped w. ' */
"ib",		"\\fI\\(*b\\fP", "beta",	"Tb",	/* Italic greek beta */
"icirc",	"\\*^i",	"i",		"Mj",	/* i topped w. ^ */
"id",		"\\fI\\(*d\\fP", "delta",	"Td",	/* Italic greek delta */
"ie",		"\\fI\\(*e\\fP", "epsilon",	"Te",	/* Italic gk. epsilon */
"ig",		"\\fI\\*(g\\fP", "gamma",	"Tg",	/* Italic greek gamma */
"igrave",	"\\*`i",	"i",		"Ml",	/* i topped w. ` */
"index",	"index",	"index",	"Aindex", /* index ??? */
"infin",	"\\(if",	"oo",		"TB",	/* Infinity */
"iota",		"\\(*i",	"i",		"Ti",	/* Greek iota */
"isub",		"\\h'-\\w'e'u'\\z,\\h'\\w'e'u'", "", "", /* , under prev */
"iuml",		"\\*:i",	"i",		"Mo",	/* i topped w. " */
"kappa",	"\\(*k",	"k",		"Tk",	/* Greek kappa */
"lambda",	"\\(*l",	"lambda",	"Tl",	/* Greek lambda */
"le",		"\\(<=",	"<=",		"T<",	/* Less than or equal */
"lenis",	"\\h'-\\w'e'u'\\z\\(aa\\h'\\w'e'u'", "", "", /* ' over prev */
"lm",		":",		":",		"A:",	/* Length mark */
"lt",		"<",		"<",		"A<",	/* Less than */
"mac",		"\\h'-\\w'e'u'\\z\\(rn\\h'\\w'e'u'",	"", "", /* _ ov prev */
"min",		"min",		"min",		"Amin",	/* minima */
"mu",		"\\(*m",	"u",		"Tm",	/* Greek mu */
"ng",		"\\o'nj'",	"n",		"An",	/* n combined w. j */
"ngx",		"\\o'nj'",	"n",		"An",	/* n combined w. j */
"nu",		"\\(*n",	"v",		"Av",	/* Greek nu */
"oacu",		"\\*'o",	"o",		"Ms",	/* o topped w. ' */
"obar",		"\\*-o",	"o",		"Ao",	/* o crossed w. - */
"ocirc",	"\\*^o",	"o",		"Mt",	/* o topped w. ^ */
"oe",		"oe",		"oe",		"Mw",	/* OE vowel */
"ograve",	"\\*`o",	"o",		"Mr",	/* o topped w. ` */
"omega",	"\\(*w",	"w",		"Tw",	/* Greek omega */
"omicron",	"\\(*o",	"o",		"Ao",	/* Greek omicron */
"ope",		"e",		"e",		"Ae",	/* funky char */
"ouml",		"\\*:o",	"o",		"Mv",	/* o topped w. " */
"p",		"'",		"'",		"A'",	/* ??? */
"pa",		"\\(pd",	"d",		"To",	/* partial derivative */
"page",		"\\s-1P\\s0 ",	"P ",		"AP ",	/* Page */
"pall",		"l",		"l",		"Al",	/* ??? */
"paln",		"n",		"n",		"An",	/* ??? */
"para",		"P",		"P",		"M6",	/* paragraph symbol */
"phi",		"\\(*f",	"phi",		"Tf",	/* Greek phi */
"pi",		"\\(*p",	"pi",		"Tp",	/* Greek pi */
"pm",		"&pm.",		"&pm.",		"A&pm.", /* ??? */
"pmil",		"ppm",		"ppm",		"Appm",	/* Parts per million */
"pp",		"\"",		"\"",		"A\"",	/* ??? */
"psi",		"\\(*q",	"psi",		"Tq",	/* Greek psi */
"pstlg",	"L",		"L",		"M#",	/* Pounds sterling */
"rar",		"&rar.",	"&rar.",	"A&rar.", /* ??? */
"rdot",		".",		".",		"M7",	/* raised dot ??? */
"reva",		"A",		"A",		"AA",	/* Reverse a */
"revc",		"O",		"O",		"AO",	/* Reverse c */
"revope",	"3",		"3",		"A3",	/* ??? */
"revr",		"r",		"r",		"Ar",	/* Reverse r */
"revrx",	"r",		"r",		"Ar",	/* Reverse r */
"revsc",	"\\o'\\(aa.'",	";",		"A;",	/* Reverse ; */
"revv",		"\\s-2\\(*L\\s0", "&revv.",	"T^",	/* Upside dn small V */
"rfa",		"\\*,o",	"o",		"Ao",	/* o w. cedilla */
"rho",		"\\(*r",	"p",		"Tr",	/* Greek rho */
"schwa",	"&schwa.",	"&schwa.",	"A&schwa.", /* Upside down e */
"schwax",	"\\*-e",	"e",		"Ae",	/* e topped w. - */
"sd",		"\\u.\\d",	".",		"M7",	/* ??? */
"sdd",		":",		":",		"A:",	/* ??? */
"sdx",		"\\u.\\d",	".",		"M7",	/* ??? */
"sect",		"\\(sc",	"Sect.",	"M'",	/* Section */
"sh",		"\\(is",	"S",		"T?",	/* integral */
"shti",		"\\s-3I\\s0",	"I",		"AI",	/* Short I */
"shtu",		"\\s-3U\\s0",	"U",		"AU",	/* Short U */
"shty",		"\\s-3Y\\s0",	"Y",		"AY",	/* Short Y */
"shx",		"&shx.",	"&shx.",	"A&shx.", /* ??? *
"sigma",	"\\(*s",	"o",		"Ts",	/* Greek sigma */
"sm",		"\\(fm",	"'",		"T\032", /* Stress mark */
"smm",		",",		",",		"S\030", /* Sec. stress mark */
"sqrt",		"\\(sr",	"sq root",	"TV",	/* Square root */
"sylab",	",",		",",		"S\030", /* ??? */
"sylam",	",",		",",		"S\030", /* ??? */
"tau",		"\\(*t",	"t",		"Mt",	/* Greek tau */
"th",		"p",		"p",		"Ap",	/* OE consonant */
"thbar",	"\\*-p",	"p",		"Ap",	/* OE consonant */
"theta",	"\\(*h",	"theta",	"Tj",	/* Greek theta */
"tilde",	"\\h'-\\w'e'u'\\z~\\h'\\w'e'u'","", "", /* ~ over prev */
"times",	"\\(mu",	"x",		"T9",	/* Math multiply */
"trli",		"||",		"||",		"A||",	/* ??? */
"uacu",		"\\*'u",	"u",		"Mz",	/* u topped w. ' */
"ucirc",	"\\*^u",	"u",		"M{",	/* u topped w. ^ */
"udtr",		"\\(gr",	"del",		"TE",	/* math del */
"ugrave",	"\\*`u",	"u",		"My",	/* u topped w. ` */
"uml",		"\\h'-\\w'e'u'\\z\"\\h'\\w'e'u'", "",	"", /* Umlaut ov prev */
"undl",		"",		"",		"",	/* dot under prev */
"upsilon",	"\\(*u",	"upsilon",	"Ty",	/* Greek upsilon */
"uuml",		"\\*:u",	"u",		"M|",	/* u topped w. " */
"vb",		"|",		"|",		"A|",	/* Vertical bar */
"vvf",		"\\(*g",	"gamma",	"Tn",	/* Looks like gamma */
"xi",		"\\(*c",	"E",		"Tx",	/* Greek xi */
"yacu",		"\\*'y",	"y",		"Ay",	/* y topped w. ' */
"ygh",		"\\d3\\u", 	"3", 		"A3",	/* OE character */
"yuml",		"\\*:y",	"y",		"M}",	/* y topped w. " */
"zeta",		"\\(*z",	"zeta",		"Tz",	/* Greek zeta */
"zh",		"\\d3\\u",	"3",		"A3",	/* ??? */
"zhx",		"&zhx.",	"&zhx.",	"A&zhx." /* ??? */
};

#define		SYM_TOP	(sizeof (SymList) / sizeof (struct SYM))

struct SYM *
FindSymbol (Str)
char	*Str;
{
	int			top, bot, mid;	/* binary search variables */
	int			result;		/* compare result in search */
	static struct SYM	SymCopy;	/* copy of matching symbol */

	/* For verifying the SYM struct */

#ifdef notdef
	struct SYM	*Spnt;

	for (Spnt = SymList; Spnt < (SymList + SYM_TOP); Spnt++)
		printf ("%s\t%s\n", Spnt->String, Spnt->vt200Str);
	Finis ();
#endif /* notdef */

	bot = 0;
	top = SYM_TOP - 1;

	if (tTd(18,5))
		printf ("FindSymbol: Searching for symbol %s\n", Str);

	/*
	 * Numeric values refer to one or few times used symbols.
	 * Handle by setting Str to "0" which will return the (omitted)
	 * string.
	 */
	if (isdigit (*Str))
		Str = "0";

	/* Symbols falling below or above the list are unknown */
	if (strcmp (Str, SymList[bot].String) < 0
	 || strcmp (Str, SymList[top].String) > 0) {
		if (tTd(18,2))
			printf ("FindSymbol: Symbol %s out of list range\n", Str);
		return (SYMNULL);
	}

	/* Special case of a match on the top of list */
	if (strcmp (Str, SymList[top].String) == 0) {
		bcopy ((char *)&SymList[top], (char *)&SymCopy,
			sizeof (struct SYM));
		return (&SymCopy);
	}

	/* Divide and conquer in log(n) tempo */
	for (;;) {
		mid = (top + bot) / 2;
		if ((result = strcmp (Str, SymList[mid].String)) == 0) {
			bcopy ((char *)&SymList[mid], (char *)&SymCopy,
				sizeof (struct SYM));
			return (&SymCopy);
		}
		if (mid == top || mid == bot)
			return (SYMNULL);
		else if (result > 0) {
			bot = mid;
		}
		else {
			top = mid;
		}
	}
	/* NOTREACHED */
} /* FindSymbol */
