/*  File: dnacpt.c
 *  Author: Jean Thierry-Mieg (mieg@mrc-lmba.cam.ac.uk)
 *  Copyright (C) J Thierry-Mieg and R Durbin, 1991
 *-------------------------------------------------------------------
 * This file is part of the ACEDB genome database package, written by
 * 	Richard Durbin (MRC LMB, UK) rd@mrc-lmba.cam.ac.uk, and
 *	Jean Thierry-Mieg (CRBM du CNRS, France) mieg@frmop11.bitnet
 *
 * Description:
 * Exported functions:
 * HISTORY:
 * Last edited: Apr  2 18:34 1992 (mieg)
 * * Jan 15 17:50 1992 (mieg): splice site concensus
 * * Oct 24 17:45 1991 (mieg): wobble on first letter
 * Created: Thu Oct 24 17:45:14 1991 (mieg)
 *-------------------------------------------------------------------
 */


#include "acedb.h"
#include "array.h"
#include "graph.h"
#include "sysclass.wrm"
#include "systags.wrm"
#include "tags.wrm"
#include "classes.wrm"
#include "a.h"
#include "bs.h"
#include "dna.h"
#include "lex.h"
#include "plot.h"
#include "pick.h"
#include "keyset.h"

typedef struct DNACPTSTUFF
  { int   magic;        /* == MAGIC*/
    Graph graph ;
    int restrictionBox ;
    char* restriction ;
    int curr, line , frame ;
    BOOL useKeySet , complement ;
  } *DNACPT ;


#define MAGIC  93452
#define MAGIC_CODON_USAGE  93453
static Graph dnacptGraph = 0 ;

#include "display.h"	/* must come after DNACPT definition */

static void dnacptDisplay(void) ;
void dnaAnalyse (void) ;
				/* from fmapdisp.c */

#define STATUS_PROTEIN 0x02

#define DNACPTGET(name)     DNACPT dnacpt ;\
                                       \
                          if (!graphAssFind (dnaAnalyse,&dnacpt)) \
		            messcrash ("(%s) can't find graph",name) ; \
			  if (!dnacpt) \
                            messcrash ("(%s) received a null pointer",name) ; \
                          if (dnacpt->magic != MAGIC) \
                            messcrash ("(%s) received a wrong pointer",name)  ;\
                          displayPreserve()  

static void dnacptPick (int k, double x) ;
static char dnacptComplementBase (char dna)  ;
static BOOL dnacptReverseComplement (Array dna)  ;
static int  useKeySetButtonBox = 0 ;

#define CODE_FILE  /* writes out coding sequences found in "Codon Usage" option to soding.seq */

#ifdef CODE_FILE
static FILE *codeFile = 0, *exonFile = 0 ;
#endif

/************************************************************************/
/*************************************************************************/

void dnaRepaint(Array colors, int color)
{ register int i = arrayMax(colors) ;
  while(i--)
    arr(colors,i,int) = color ;
}

/************************************************************************/

static void dnacptTotalLength(void)
{
  Array dna ;
  register int n =0, t = 0 ;
  Array ll = arrayCreate(50,int) ;
  KEY key = 0 ;
  DNACPTGET("dnacptTotalLength") ;

  dnacptDisplay() ;
  while(lexNext(_VDNA,&key))
    if(dna = dnaGet(key))
      { n += arrayMax(dna) ;
	t++ ;
	array(ll,arrayMax(dna)/1000,int)++ ;
	arrayDestroy(dna) ;
      }
  graphText(messprintf("Found %d sequences, total length %d",t,n),
	    4,dnacpt->line++) ;
  graphTextBounds (80,dnacpt->line += 2) ;
  graphRedraw() ;
  plotHisto("Sequences lengths in kilobase", ll) ;
}

/***********************************************/

static int intOrder(void *a, void *b)
{
  return
    (int)  ( (*(int *) a) - (*(int *) b) ) ;
}

/*************************************************************************/

static void dnacptFingerPrint(void)
{
  Array dna, protein ;  Array colors ; KEY key ;
  void *look ;
  Array hind3, sau3a , fp ; 
  register int i , j , n , t = 0 , nfp ,
     h, u, s, ih, is ;
  int  frame ;
  register char *cp , *cp0 ;
  BOOL lastIsHind3 = FALSE ;
  char h3[]={'*',A_,A_,G_,C_,T_,T_,'*',0} ;
  char s3a[]={'*',G_,A_,T_,C_,'*',0} ;
  char dir[80], fil[24] ;
  FILE *f ;
  DNACPTGET("dnacptFingerPrint") ;

  if(!fMapActive(&dna,&protein,&colors,&key,&frame,&look))
    { messout("Please, first select a dna window") ;
      return ;
    }

  /* HindIII == AAGCTT */
  /* Sau3AI = GATC */

          /* note that pickMatch returns pos + 1 or zero */

  strncpy(fil, name(key), 22) ;
  f = filqueryopen(dir, fil, "fp", "w") ;


      /* Find all hind3 sites */
  hind3 = arrayCreate(30,int) ;
  t = 0 ;
  n = arrayMax(dna) ;
  dnaRepaint(colors,BLACK) ;
  cp = cp0 = arrp(dna,0,char) ;
  while(i = pickMatch(cp , h3))
    { 
      if(i>n)
	break ;
      cp += i ; i = cp - cp0 - 1 ;
      array(hind3,t++,int) = i ;
      j = 6 ;
      while(j--)
	arr(colors,i + j,int) = LIGHTGREEN ;
    }

  graphText(messprintf("Found %d hind3 sites",t = arrayMax(hind3)),
	    2, dnacpt->line++) ;
  if (f) fprintf(f,"\n\nFound %d hind3 sites\n",t = arrayMax(hind3)) ;
  for(i=0;i<t;i++)
   { graphText(messprintf("(%d)",arr(hind3,i,int)),
	       3, dnacpt->line ++) ;
     if (f) fprintf(f, "(%d)\n",arr(hind3,i,int)) ;
   }
  dnacpt->line += 3 ;

      /* Find all sau3a sites */
  sau3a = arrayCreate(30,int) ;
  t = 0 ;
  cp = cp0 = arrp(dna,0,char) ;
  while(i = pickMatch(cp , s3a))
    { 
      if(i>n)
	break ;
      cp += i ; i = cp - cp0 - 1 ;
      array(sau3a,t++,int) = i ;
      j = 4 ;
      while(j--)
	arr(colors,i + j,int) = LIGHTBLUE ;
    }

	
  graphText(messprintf("Position and length of the finger printing bands"),
	    2, dnacpt->line++) ;
  if (f)
    fprintf(f, "\n\n\nPosition and length of the finger printing bands\n"),

  /* Now we measure from hind3 towards sau3a sites */
  nfp = 0;
  fp = arrayCreate(30,int) ;

  ih = arrayMax(hind3) - 1 ;
  is = arrayMax(sau3a) - 1 ;
  u  = arrayMax(dna) ;
  
  h = ih>=0 ? array(hind3,ih--,int) : -1 ;
  s = is>=0 ? array(sau3a,is--,int) : 0 ;
  lastIsHind3 = FALSE ;
  while(u)
    if(h<s) /* I am on a sua3a site */
      { if(lastIsHind3)
	  { array(fp,nfp++,int) = u - s ;
	    { graphText(messprintf("s-h %8d %8d %8d",s, u , u-s),
			3, dnacpt->line ++) ;
	      if (f) fprintf(f, "s-h %8d %8d %8d\n",s, u , u-s) ;
	    }
	  }
	
	u = s ;
	s = is>=0 ?  array(sau3a,is--,int) : 0 ;
	lastIsHind3 = FALSE ;
      }
    else /* I am on a hind3 site */
      { array(fp,nfp++,int) = u - h ;
	 if(lastIsHind3)
	   { graphText(messprintf("h-h %8d %8d %8d",h, u , u-h),
		       3, dnacpt->line ++) ;
	     if (f) fprintf(f, "h-h %8d %8d %8d\n",h, u , u-h) ;
	   }
	else
	   { graphText(messprintf("h-s %8d %8d %8d",h, u , u-h),
		       3, dnacpt->line ++) ;
	     if (f) fprintf(f,"h-s %8d %8d %8d\n",h, u , u-h) ;
	   }
	u = h ;
	h = ih>=0 ? array(hind3,ih--,int) : -1 ;
	lastIsHind3 = TRUE ;
      }

  dnacpt->line += 3 ;
  arraySort(fp,intOrder) ;
  graphText(messprintf("%d Ordered Bands",t = arrayMax(fp)),
	    2, dnacpt->line++) ;
  if (f) fprintf(f, "\n\n%d Ordered Bands\n",t = arrayMax(fp)) ;
  for(i=0;i<t;i++)
   { graphText(messprintf("(%d)",arr(fp,i,int)),
	  3, dnacpt->line++) ;
     if (f) fprintf(f, "(%d)\n",arr(fp,i,int)) ;
   }
  
  graphTextBounds (80,dnacpt->line += 2) ;
  graphRedraw() ;
  
  {
    Array histo ;
    if(fp)
      { i = arrayMax(fp) ;
	if(i)
	  { histo = arrayCreate(2000,int) ;
	    while(i--)
	      array(histo,array(fp,i,int),int)++ ;
	    plotHisto
	      (messprintf("%s Finger Print",
			  name(key)), 
	       histo) ;
	  }
      }
  }
  arrayDestroy(fp) ;
  arrayDestroy(hind3) ;
  arrayDestroy(sau3a) ;    
  if (f) fclose(f) ;
  fMapDisplayDNA(look) ;
}

/*************************************************************************/

static void dnacptDoTranslate(Array dna, Array protein, int frame)
{
  register char *cp, *cp0 , *cpmax , *pp , *pp0 , trueFrame = frame ;

  if (!arrayExists(dna) || ! arrayExists(protein))
    messcrash ("dnacptDoTranslate received a bad dna/protein array") ;

  if (arrayMax(dna) < 3)
    return  ;
  
  cp = cp0 = arrp(dna,0,char) ;
  
  cpmax = cp0 + arrayMax(dna) ;
  protein = arrayReCreate(protein,arrayMax(dna)/3+ 3,char) ;
  arrayMax(protein) = arrayMax(dna)/3 + 2 ;
  pp = pp0 = arrp(protein,0,char) ;
  
  if(frame<0)
      { dnacptReverseComplement(dna) ;
	frame = (- frame ) % 3 ;
      }

  if(frame >= 0 )
    for (cp =  cp0 + (frame % 3) ;
	 cp + 2 < cpmax ;
	 cp +=3 )
      {
	switch(*cp)
	  { 
	  case T_:
	    switch(*(cp+1))
	      {
	      case T_:
		switch(*(cp+2))
		  { 
		  case T_: case C_: case Y_:
		    *pp++ = 'F';
		    break;
		  case A_: case G_: case R_:
		    *pp++ = 'L';
		    break;
		  default:
		    *pp++ = 'X' ;
		    break ;
		  }
		break ;
	      case C_:
		*pp++ = 'S' ;
		break ;
	      case A_:
		switch(*(cp+2))
		  { 
		  case T_: case C_: case Y_:
		    *pp++ = 'Y';
		    break;
		  case A_: case G_: case R_:
		    *pp++ = '*';
		    break;
		  default:
		    *pp++ = 'X' ;
		    break ;
		  }
		break ;
	      case G_:
		switch(*(cp+2))
		  { 
		  case T_: case C_: case Y_:
		    *pp++ = 'C';
		    break;
		  case A_: 
		    *pp++ = '*';
		    break;
		  case G_: 
		    *pp++ = 'W';
		    break;
		  default:
		    *pp++ = 'X' ;
		    break ;
		  }
		break ;
	      default:
		*pp++ = 'X' ;
		break ;
	      }
	    break ;
	  case C_:
	    switch(*(cp+1))
	      {
	      case T_:
		*pp++ = 'L' ;
		break ;
	      case C_:
		*pp++ = 'P' ;
		break ;
	      case A_:
		switch(*(cp+2))
		  {
		  case T_: case C_: case Y_:
		    *pp++ = 'H';
		    break;
		  case A_: case G_: case R_:
		    *pp++ = 'Q';
		    break;
		  default:
		    *pp++ = 'X' ;
		    break ;
		  }
		break ;
	      case G_:
		*pp++ = 'R' ;
		break ;
	      default:
		*pp++ = 'X' ;
		break ;
	      }
	    break ;
	  case A_:
	    switch(*(cp+1))
	      {
	      case T_:
		switch(*(cp+2))
		  {
		  case T_: case C_: case A_: 
		  case Y_: case M_: case W_:
		  case H_:
		    *pp++ = 'I';
		    break;
		  case G_:
		    *pp++ = 'M';
		    break;
		  default:
		    *pp++ = 'X' ;
		    break ;
		  }
		break ;
	      case C_:
		*pp++ = 'T' ;
		break ;
	      case A_:
		switch(*(cp+2))
		  {
		  case T_: case C_: case Y_:
		    *pp++ = 'N';
		    break;
		  case A_: case G_: case R_:
		    *pp++ = 'K';
		    break;
		  default:
		    *pp++ = 'X' ;
		    break ;
		  }
		break ;
	      case G_:
		switch(*(cp+2))
		  {
		  case T_: case C_: case Y_:
		    *pp++ = 'S';
		    break;
		  case A_: case G_: case R_:
		    *pp++ = 'R';
		    break;
		  default:
		    *pp++ = 'X' ;
		    break ;
		  }
		break ;
	      default:
		*pp++ = 'X' ;
		break ;
	      }
	    break ;
	  case G_:
	    switch(*(cp+1))
	      {
	      case T_:
		*pp++ = 'V' ;
		break ;
	      case C_:
		*pp++ = 'A' ;
		break ;
	      case A_:
		switch(*(cp+2))
		  {
		  case T_: case C_: case Y_: 
		    *pp++ = 'D';
		    break;
		  case A_: case G_: case R_:
		    *pp++ = 'E';
		    break;
		  default:
		    *pp++ = 'X' ;
		    break ;
		  }
		break ;
	      case G_:
		*pp++ = 'G' ;
		break ;
	      default:
		*pp++ = 'X' ;
		break ;
	      }
	    break ;
  /***************** Wobble on first Letter *******************/
	  case Y_:  /* T or C */
	    switch(*(cp+1))
	      {
	      case U_:
		switch(*(cp+2))
		  {
		  case A_: case G_: case R_: 
		    *pp++ = 'L';
		    break;
		  default:
		    *pp++ = 'X' ;
		    break ;
		  }
		break ;
	      default:
		*pp++ = 'X' ;
		break ;
	      }
	    break ;
	  case M_:  /* A or C */
	    switch(*(cp+1))
	      {
	      case G_:
		switch(*(cp+2))
		  {
		  case A_: case G_: case R_: 
		    *pp++ = 'R';
		    break;
		  default:
		    *pp++ = 'X' ;
		    break ;
		  }
		break ;
	      default:
		*pp++ = 'X' ;
		break ;
	      }
	    break ;
	  case R_:
	    switch(*(cp+1))
	      {
	      case A_:
		switch(*(cp+2))
		  {
		  case T_: case C_: case Y_: 
		    *pp++ = 'B'; /*Asp, Asn */
		    break;
		  default:
		    *pp++ = 'X' ;
		    break ;
		  }
		break ;
	      default:
		*pp++ = 'X' ;
		break ;
	      }
	    break ;
	  case S_:
	    switch(*(cp+1))
	      {
	      case A_:
		switch(*(cp+2))
		  {
		  case A_: case G_: case R_: 
		    *pp++ = 'Z'; /* Glu, Gln */
		    break;
		  default:
		    *pp++ = 'X' ;
		    break ;
		  }
		break ;
	      default:
		*pp++ = 'X' ;
		break ;
	      }
	    break ;
	  default:
	    *pp++ = 'X' ;
	    break ;
	  }
      }
  arrayMax(protein) = pp - pp0 ;
  if(3*arrayMax(protein) > arrayMax(dna))
    messcrash("Program error in DoTranslate, probably a missing break, sorry") ;

  if(trueFrame<0)
      { dnacptReverseComplement(dna) ;  /* restore dna */
	/* Now reverse protein */
	{ char c, *cp = arrp(protein,0,char) , *cq = cp + arrayMax(protein) - 1 ;
	  while(cp < cq)
	    { c = *cp ; *cp++ = *cq ; *cq-- = c ; }
	}
      }
}

/*************************************************************************/

static void dnacptStopSites(Array dna, Array colors, int frame)
{ register  i , j ;
  register char *cp, *cp0 , *cpmax ;
  int color = LIGHTRED ;

  cp = cp0 = arrp(dna,0,char) ;
  cpmax = cp0 + arrayMax(dna) ;
  if(arrayMax(dna) < 3)
    return ;
  
   if(frame >= 0 )
	for (cp =  cp0 + (frame % 3) ;
	     cp + 2 < cpmax ;
	     cp +=3 )
	    /* search UAG UAA UGA */
	{
	    /*
	     *    R_ is A_|G_
	     */
	    if (((*cp & T_) && (*(cp+1) & A_) && (*(cp+2) & R_))
		|| ((*(cp+1) & G_) && (*(cp+2) & A_)))
	    {
		i = cp - cp0 ;
		j = 3 ;
		while(j--)
		    arr(colors,i + j,int) = color ;
	    }
	}
  else /* look backwords for complement */
	for (cp =  cpmax - ((-frame) % 3) - 1; /* -1 since i start on zero */
	     cp - 2 >= cp0 ;
	     cp -=3 )
	    /* search UAG UAA UGA */
	{
	    /*
	     *	  Y is C|T
	     */
	    if (((*cp & A_) && (*(cp-1) & T_) && (*(cp-2) & Y_ ))
	       || ((*(cp-1) & C_) &&  (*(cp-2) & T_)))
	    {
		i = cp - cp0 ;
		j = 3 ;
		while(j--)
		    arr(colors,i - j,int) = color ;
	  }
      }

}

/*************************************************************************/
/* copied from pickMatch and modified to use bit masks */
/* match to template
   
   returns 0 if not found
           1 + pos of first sigificant match  if found
*/

int dnacptPickMatch (char *cp,char *tp)
{
  char *c=cp, *t=tp,  *cs=cp ;
 
  if(!*cp || !*tp)
    return 0 ;

  while (*c)
    switch(*t)
      {
      case '\0':
        return  cs - cp + 1 ;
      default  :
        if (!(*t++ & *c++))
	  { t = tp; c = ++cs ; }
	break;
      }
  
  return *t ? 0 : cs - cp + 1 ;
}

/*************************************************************************/

static void dnacptColorMatches(Array sites, int n, int col,
			       Array colors, int len, int from)
{ int  t , i, j , color ;

  switch(col)
    {
    case 1:
      color = LIGHTBLUE ;
      break ;
    case 2:
      color = LIGHTRED ;
      break ;
    case 3:
      color = MAGENTA ;
      break ;
    case 4:
      color = LIGHTGREEN ;
      break ;
    }
  for(t = n ; t < arrayMax(sites) ; t++)
    {
      i = array(sites,t,int) + from ;
      if (i<0 || i > arrayMax(colors))
	messcrash("dnacptColorMatches over flow") ;
      j = len ; 
      while(j--)
	arr(colors,i + j,int) = color ;
    }
}

/*************************************************************************/

static int dnacptMatch(Array dna, Array sites, char *cq, int from, int length)
{ register int   max = arrayMax(dna) , n = max, t = 0 , i ;
  register char *cp, *cp0 , c ;

  
   if (n > from + length)
    n = from + length ;
  if(from<0)
    from = 0 ;
  if (from >n)
    from = n ;
  cp = cp0 = arrp(dna,from,char) ;
  c = array(dna,n,char) ;
  array(dna,n,char) = 0 ;  /* neccesary in pickMatch */

          /* note that pickMatch returns pos + 1 or zero */
  t = arrayMax(sites)  ;
  while(i = dnacptPickMatch(cp , cq))
    { 
      if(i>n)
	break ;
      cp += i ; 
      array(sites,t++,int) = cp - cp0 - 1 ;
    }
/*  array(sites,t++,int) = cp + n - cp0 - 1 ; last fragment i think */
  arrayMax(dna) = max ;  /* restore the correct values */
  array(dna,n,char) = c ;  
  return arrayMax(sites) ;
}

/*************************************************************************/

Array dnacptMultipleMatch(Array dna, Array colors, Stack s, int from, int length)
{
  Array   sites = arrayCreate(20,int) ;
  int n, col = 0 ;
  char *cp ;

  stackCursor(s, 0) ;
  while (cp  = stackNextText(s))
    {
      n = arrayMax(sites) ;
      dnacptMatch(dna, sites, cp, from, length) ; 
      if(colors)  /* i.e. not if from keyset */
	dnacptColorMatches(sites, n , ++col,  colors, strlen(cp), from) ;
     }
  return sites ;
}

/*************************************************************************/

static int naturalOrder(void *a, void *b)
{ return *(int*)a - *(int*) b ;
}

static void dnacptShowMatches(KEY seqKey, Array sites)
{ register int  t, i, nrow ;

  DNACPTGET("dnacptShowMatches") ;
  arraySort(sites, naturalOrder) ;
  t = arrayMax(sites) ;
  graphText(messprintf("Found %d sites in %s",t, name(seqKey)),
	    2,dnacpt->line++) ;
  if (!t)
    return ;
  dnacpt->line++ ;
  nrow = (t+1)/2 ;
  for(i=0;i<t-1;i++)
    graphText (messprintf ("%8d  (%d)",
			   arr(sites,i,int) + 1 , /* No ZERO in biology */
			   arr(sites,i+1,int)-arr(sites,i,int)),
	       (i >= nrow) ? 22 : 1, dnacpt->line + i%nrow) ;
  graphText (messprintf("%8d",arr(sites,i,int)+1),
	     (i >= nrow) ? 22 : 1, dnacpt->line + i%nrow) ;
  dnacpt->line += nrow + 1 ;
}

/*************************************************************************/
#define BOX_LENGTH 40
Stack dnacptAnalyseRestrictionBox(char *text)
{
  OBJ obj ;
  KEY key ;
  char *cname ,*cp;
  static Stack s = 0 , s1 = 0 , s2 = 0 ;
  BOOL error = FALSE ;
  int n ;
 
  if(!text || !*text)
    { messout("First type in a restriction site") ;
      return FALSE ;
    }
 
  if(stackExists(s))
    stackDestroy(s) ;
  s = stackCreate(80) ; 
  s1  = stackCreate(80) ;
  s2  = stackCreate(80) ;
 
  stackTokeniseTextOn(s1, text, ";") ; /* tokeniser */
          /* Now replace known enzyme names by their sequence */
          /* and produce a clean message */
  while (cp  = stackNextText(s1))
    {
      if(lexword2key(cp,&key,_VRestriction)
	 && (obj = bsCreate(key)))
	{ catText(s2, name(key)) ;
	  catText(s2, " ;  ") ;
	  if (bsGetData(obj,_Site,_Text,&cname))
	    pushText(s, cname) ;
	  bsDestroy(obj) ;
	}
      else
	{
	  pushText(s, cp) ;
	  catText(s2, cp) ;
	  catText(s2, " ;  ") ;
	}
    }
  memset(text, 0, BOX_LENGTH) ;  
  strncpy(text, stackText(s2, 0), 39) ;
 
        /* Verify that we have sequences */
        /* go to lower case */
  stackCursor(s, 0) ;
  while( cp  = stackNextText(s))
    { 
      --cp ;
      while(*++cp)
	 *cp = freelower(*cp) ;
    }
       /* try to encode */
  stackCursor(s, 0) ;
  while (cp  = stackNextText(s)) 
    { 
      n = strlen(cp) ;
      dnaEncodeString(cp) ;
      if (n > strlen(cp))
	error = TRUE ;
    }

  if(error)
      { messout 
	  ("%s %s %s",
	   "Type names of a known restriction sites, or valid dna sequences,",
	   "separated by semi columns. For example:",
	   "BamHI ; aatgga ; mksw") ;
	helpOn("DNA_and_amino_acids_nomenclature") ;
	stackDestroy(s) ;
      }
  else
    stackCursor(s, 0) ;
  stackDestroy(s1) ;
  stackDestroy(s2) ;
  return s ;
}
 
/*************************************************************************/

static void dnacptRestriction(void)
{
  Array dna, protein ;  Array colors ; KEY  seqKey ;
  void *look ;
  Array sites ;
  extern void fMapFindOrigin(void *look, int *op, int*lp) ;
  Stack s ; int from = 0 , length , frame ;
  DNACPTGET("dnacptRestriction") ;
  
  if(!dnacpt->useKeySet)
    { if(!fMapActive(&dna,&protein,&colors,&seqKey,&frame, &look))
	{ messout("First select a dna window or the Use KeySet buttton") ;
	  return ;
	}
      dnaRepaint(colors,BLACK) ;
      fMapFindOrigin(look, &from, &length) ;
    }
  
  s = dnacptAnalyseRestrictionBox(dnacpt->restriction) ;
  if(!s)
    { dnacptDisplay() ;
      return ;
    }

  if (dnacpt->useKeySet)
    { KEY dnaKey = 0 , seq ; KEYSET keySet ; void *look ;
      int nn = 0 , dummy ;
      
      if (!keySetActive(&keySet, &look)) 
	{ messout("First select a keySet containing sequences") ;
	  return ;
	}

      while(lexNext(_VDNA,&dnaKey))
	if (lexReClass(dnaKey, &seq, _VSequence) &&
	    keySetFind(keySet, seq, &dummy) &&
	    (dna = dnaGet(dnaKey) ) )
	  { nn++ ;
	    sites =  dnacptMultipleMatch(dna,0, s,0, arrayMax(dna)) ;
	    if(arrayMax(sites))
	      dnacptShowMatches(dnaKey,sites) ;
	    arrayDestroy(sites) ;
	    arrayDestroy(dna) ;
	  }
      graphText(messprintf("I scanned %d sequences",nn),
		2,dnacpt->line++) ;
    }
  else
    { sites = dnacptMultipleMatch(dna,colors,s,from, length) ;
      dnacptShowMatches(seqKey,sites) ;
      arrayDestroy(sites) ;
    }
  stackDestroy(s) ;
  graphTextBounds (80, ++dnacpt->line) ;
  graphRedraw () ;
  if (!dnacpt->useKeySet)
    fMapDisplayDNA(look) ;
  dnacpt->useKeySet = FALSE ;
  graphBoxDraw(useKeySetButtonBox, BLACK, WHITE) ;
}

/*************************************************************************/

static char dnacptComplementBase (char c) 
{
  switch (c)
    { case A_: return T_ ;
      case T_: return A_ ;
      case G_: return C_ ;
      case C_: return G_ ;

      case R_: return Y_ ;
      case Y_: return R_ ;
      case W_: return W_ ;
      case M_: return K_ ;
      case K_: return M_ ;
      case S_: return S_ ;
	
      case H_: return D_ ;
      case D_: return H_ ;
      case B_: return V_ ;
      case V_: return B_ ;
	
      case N_: return N_ ;

      default: return 0 ;
      }
}

/**********************/

static BOOL dnacptReverseComplement(Array dna) 
{
  char c, *cp, *cq ;
  int i ;

  for (i = 0 ; i < arrayMax(dna) ; ++i)	/* dry run first */
    if (!dnacptComplementBase (arr(dna, i, char)))
      return FALSE ;

  for (i = 0 ; i < arrayMax(dna) ; ++i)	/* now complement bases */
    arr(dna,i,char) = dnacptComplementBase (arr(dna, i, char)) ;

  cp = arrp(dna,0,char) ;	/* now reverse */
  cq = cp + arrayMax(dna) - 1 ;
  while (cp < cq)
    { c = *cp ; 
      *cp++ = *cq ; 
      *cq-- = c  ;
    }
  
  return TRUE ;
}

/*************************************************************************/

static void dnacptStops(void)
{
  Array dna, protein ;  Array colors ; KEY seqKey ; int frame ;
  void *look ;
  DNACPTGET("dnacptRestriction") ;

  if(dnacpt->useKeySet)
    { messout("You cannot do that on all the DNA at once") ;
      return ;
    }
  else
    { if(fMapActive(&dna,&protein,&colors,&seqKey,&frame,&look))
	dnaRepaint(colors,BLACK) ;
    else
      { messout("First select a dna window") ;
	return ;
      }
    }
  memset(dnacpt->restriction,0,40) ;
  dnacptDisplay() ;

  graphText(messprintf("Looking for stop codons UAG UAA UGA"),
	    2, dnacpt->line++) ;
  dnacptStopSites(dna, colors, frame) ;
    
  graphTextBounds (80, ++dnacpt->line) ;
  graphRedraw () ;

  fMapDisplayDNA(look) ;
}

/*************************************************************************/

static void dnacptTranslate(void)
{
  Array dna, protein ;  Array colors ; KEY seqKey ; int frame ;
  void *look ;
  DNACPTGET("dnacptTranslate") ;

  if(dnacpt->useKeySet)
    { messout("You cannot do that on all the DNA at once") ;
      return ;
    }
  else
    { if(fMapActive(&dna,&protein,&colors,&seqKey,&frame,&look))
	dnaRepaint(colors,BLACK) ;
    else
      { messout("First select a dna window") ;
	return ;
      }
    }
  memset(dnacpt->restriction,0,40) ;
  dnacptDisplay() ;
 
  graphText(messprintf("Translating to proteins"),
	    2, dnacpt->line++) ;
  graphRedraw () ;

  dnacptDoTranslate(dna, protein, frame) ;
  dnacptStops() ; /* will display and color the stops */
}

void dnacptTranslate2(Array dna, Array protein, Array colors, int frame)
{  DNACPTGET("dnacptTranslate2") ;

  dnaRepaint(colors,BLACK) ;
  dnacptDoTranslate(dna, protein, frame) ;
  dnacptStops() ; /* will display and color the stops */
}

/*************************************************************************/

static void dnacptintronsExons(void)
{
  Array dna, protein ;  Array colors ; KEY seqKey ;
  void *look ;
  Array cDna, p1;
  int frame ; BOOL positiveStrand ;
  DNACPTGET("dnacptintronsExons") ;

  if(dnacpt->useKeySet)
    { messout("You cannot do that on all the DNA at once") ;
      return ;
    }
  else
    { if(fMapActive(&dna,&protein,&colors,&seqKey,&frame,&look))
	{ 
	  if( !(cDna = fMapGetMessage(look, &positiveStrand))) 
	    return ;
	  p1  = arrayCreate(arrayMax(cDna)/3 + 3,char) ;
  /* max + 3 because the amino acid seq may be longer in different frames
     and this buffers against coloring slips at the end */
	  dnacptDoTranslate(cDna, p1, positiveStrand ? 0 : -3) ;
	  fMapIntronsExons (look, p1) ;
	  arrayDestroy(p1) ;
	  arrayDestroy(cDna) ;
	}
    else
      { messout ("First select a Sequence") ;
	return ;
      }
    }
}

/*************************************************************************/
/*************************************************************************/

static void dnacptFastaDump(void)
{
  Array dna, protein ;  Array colors ; KEY  seqKey ;
  void *look ;
  extern void fMapFindOrigin(void *look, int *op, int*lp) ;
  extern BOOL dumpFastA (KEY key, KEY title,  Array a, int from, int length, FILE* fil) ;
  int from = 0 , length , frame ;
  DNACPTGET("dnacptFastaDump") ;
  
  if(!dnacpt->useKeySet)
    { if(!fMapActive(&dna,&protein,&colors,&seqKey,&frame, &look))
	{ messout("First select a dna window or the Use KeySet buttton") ;
	  return ;
	}
      fMapFindOrigin(look, &from, &length) ;
    }
  
  if (dnacpt->useKeySet)
    { KEYSET keySet ; void *look ; 
      extern void  dumpKeySetFastA (KEYSET ks) ;
            
      if (!keySetActive(&keySet, &look)) 
	{ messout("First select a keySet containing sequences") ;
	  return ;
	}

      dumpKeySetFastA (keySet) ;
    }
  else
    { 
      extern FILE* dnaFileOpen(void) ;
      FILE *fil = dnaFileOpen() ;
      OBJ obj ;
      KEY titleKey = 0 ;
      
      if (!fil)
	return ;
      
      if (obj = bsCreate(seqKey))
	{ 
	  bsGetKey (obj, _Title, &titleKey) ;
	  bsDestroy (obj) ;
	}
      dumpFastA (seqKey, titleKey, dna, from, length, fil) ;
      fclose (fil) ;
      
      messout("I wrote 1 sequence") ;
    }

  dnacpt->useKeySet = FALSE ;
  graphBoxDraw(useKeySetButtonBox, BLACK, WHITE) ;
}

/*************************************************************************/
/**************************************************************************/

static void dnaAG_TC(void)
{
  register int i , j ;
  int this, old = 0 , frame ;
  int n ;
  Array plot ;
  Array dna, protein ;  Array colors ; KEY key ;
  char *cp , keep ;
  void *look ;
  static char fileName[24],dirName[80] ;
  static  FILE *f = 0 ;
  DNACPTGET("dnaAG_TC") ;

  if(!fMapActive(&dna,&protein,&colors,&key,&frame,&look))
    { messout("First select a dna window") ;
      return ;
    }
  plot = arrayCreate(50,int) ;
  dnacptDisplay() ;
  dnaRepaint(colors,BLACK) ;
  cp = arrp(dna,0,char) ;
  n =arrayMax(dna) ;
  
  strncpy(fileName, name(key), 22) ;
  f= filqueryopen (dirName, fileName, "agct", "w") ;
  if(f)
    fprintf(f,"\n\n %s\n",name(key)) ;

  for(i = j = 0 ; i<n ; cp++, i++ )
    { if(*cp & R_)  /* R is A|G */
	this = 1 ;
      if(*cp & Y_)  /* Y is T|C */
	  this = 2 ;
      if(this == old)
	j++;
      else
	{ if(j>12)
	    { array(plot,j,int)++ ;
	      if(f)
		fprintf(f,"%d pos %d\n  ",j, i-j) ;
	      keep = *(cp+2);
	      *(cp+2) = 0 ;
	      if(f)
		fprintf(f," %s\n",
			dnaDecodeString(cp-j-(i>1 ? 2 : i)));
	      *(cp+2) = keep ;
	      while(j--)
		arr(colors,i-j-1,int) = RED ;
	    }
	  j = 1 ;
	}
      old = this ;
      this = 0 ;
    }

  if(f)
    fclose(f) ;

  /*  plotHisto("Sites found",plot) ;
      rplaced by arrayDestroy : do not do both */
  arrayDestroy(plot) ;

  fMapDisplayDNA(look) ;
}

/*************************************************************************/
/*************************************************************************/
/**********   Concensus splicing sequence ********************************/
   /* accumulates in sites5 (resp sites), a bit of dna around from (resp to) 
      complementing if from > to
      */
      
static void dnacptAddSplice(Array dna, Array  site5, Array site3, 
			    int from, int to, int lengthEx, int lengthIn)
{
  int i = from ; char *cp ; int *ip ; int n = lengthEx + lengthIn + 1 ;

  if (from < to)
    {
      for(i = from - lengthEx, cp = arrp(dna, 0, char) + i , ip = arrayp(site5, 0, int);
	  i <= from + lengthIn && i < arrayMax(dna); cp ++, i++, ip++ )
	{ 
	  if (i<0) continue ;
	  switch(*cp)
	    {
	    case A_ : (*ip)++ ; break ;
	    case C_ : (*(ip + n))++ ; break ;
	    case G_ : (*(ip + 2*n))++ ; break ;
	    case T_ : (*(ip + 3*n))++ ; break ;
	    default: (*(ip + 4*n))++ ; break ;
	    }
	}
      for(i = to - lengthIn, cp = arrp(dna, 0, char) + i , ip = arrp(site3, 0, int);
	  i <= to + lengthEx && i < arrayMax(dna); cp ++, i++, ip++ )
	{ if (i<0) continue ;
	  switch(*cp)
	    {
	    case A_ : (*ip)++ ; break ;
	    case C_ : (*(ip + n))++ ; break ;
	    case G_ : (*(ip + 2*n))++ ; break ;
	    case T_ : (*(ip + 3*n))++ ; break ;
	    default: (*(ip + 4*n))++ ; break ;
	    }
	}
    }
  else
    {
      for(i = from + lengthEx , cp = arrp(dna, 0, char) + i , ip = arrp(site5, 0, int);
	  i >= from - lengthIn  && i >= 0 ; cp--, i--, ip++ )
	{ 
	  if (i >= arrayMax(dna)) continue ;
	  switch(*cp)
	    {
	    case T_ : (*ip)++ ; break ;
	    case G_ : (*(ip + n))++ ; break ;
	    case C_ : (*(ip + 2*n))++ ; break ;
	    case A_ : (*(ip + 3*n))++ ; break ;
	    default: (*(ip + 4*n))++ ; break ;
	    }
	}
      for(i = to + lengthIn , cp = arrp(dna, 0, char) + i , ip = arrp(site3, 0, int);
	  i >= to - lengthEx  && i >= 0; cp--, i--, ip++ )
	{ if (i >= arrayMax(dna)) continue ;
	  switch(*cp)
	    {
	    case T_ : (*ip)++ ; break ;
	    case G_ : (*(ip + n))++ ; break ;
	    case C_ : (*(ip + 2*n))++ ; break ;
	    case A_ : (*(ip + 3*n))++ ; break ;
	    default: (*(ip + 4*n))++ ; break ;
	    }
	}
    }
	
}

/*************************************************************************/
    /* display the cumulted splice sites in the current gmapcpt window */
static void  dnacptShowSpliceConcensus(DNACPT dnacpt, char *title, 
				       Array  site,int  lengthEx, int lengthIn) 
{ int n = lengthIn + lengthEx + 1 , i , j ;
  graphText(title, 5, dnacpt->line += 3) ;
  for (j=0; j<5; j++)
    for(i=0; i< n; i++)
      graphText(messprintf("%3d", array(site, n*j + i, int)),
		5 + 5*i, dnacpt->line + 2 + j) ;
  graphText("A", 2, dnacpt->line + 2) ;
  graphText("C", 2, dnacpt->line + 3) ;
  graphText("G", 2, dnacpt->line + 4) ;
  graphText("T", 2, dnacpt->line + 5) ;
  graphText("X", 2, dnacpt->line + 6) ;

  if(*title == '5')
    { graphText("--- intron ---> ", 5 * lengthEx + 12, dnacpt->line + 1) ;
      graphLine(5*lengthEx + 9.05, dnacpt->line + 1.5, 
		5*lengthEx + 9.05, dnacpt->line + 7.5) ;
    }
  else
    { int p = 5 * lengthIn - 14 ;
      if (p<0) p = 1 ;
      graphText("--- intron --->", p , dnacpt->line + 1 ) ;
      graphLine(5*lengthIn + 4.05, dnacpt->line + 1.5, 
		5*lengthIn + 4.05, dnacpt->line + 7.5) ;
    }
  dnacpt->line += 8 ;
  graphRedraw() ;
}

/*************************************************************************/
    /* Gets the sites recursivelly 
     *  taking orientation into account
     */  
static void  dnacptGetSpliceSites(Array sites,KEY  seq, int from, int to)
{ int i ;
  OBJ Seq = bsCreate(seq) ;
  Array exons = arrayCreate(8, BSunit) ;
  Array subSeq = arrayCreate(9, BSunit) ;
  if (!Seq)
    return ;
  if (bsFindTag(Seq, _Has_CDS)  &&
      bsFlatten(Seq, 3, subSeq) )
    for (i=0; i < arrayMax(subSeq) ; i += 3)
      if (from < to)        /* -1 because no zero in biology */
	dnacptGetSpliceSites(sites, arr(subSeq,i,BSunit).k , 
			     from + arr(subSeq,i+1,BSunit).i - 1,
			     from + arr(subSeq,i+2,BSunit).i - 1 ) ;
      else
	dnacptGetSpliceSites(sites, arr(subSeq,i,BSunit).k ,
			     from -  arr(subSeq,i+1,BSunit).i + 1 ,
			     from -  arr(subSeq,i+2,BSunit).i + 1 ) ;
  if(bsFindTag(Seq, _Source_Exons) &&
     bsFlatten(Seq, 2, exons) )
    for (i = 1; i + 1  < arrayMax(exons) ; i += 2) /* introns are between exons */
      { int  n = arrayMax(sites) ;
	if (from < to)        /* -1 because no zero in biology */
	  {
	    array(sites, n, int) = from + arr(exons,i,BSunit).i - 1 ;
	    array(sites, n + 1, int) = from + arr(exons,i + 1 ,BSunit).i - 1 ;
	  }
	else
	  {
	    array(sites, n, int) = from - arr(exons,i,BSunit).i + 1 ;
	    array(sites, n + 1, int) = from - arr(exons,i + 1 ,BSunit).i + 1 ;
	  }
      }
  arrayDestroy(subSeq) ;
  arrayDestroy(exons) ;
  bsDestroy(Seq) ;
}

/*************************************************************************/
  /* Given tha active key set
   * look for dnasequnces whose descriptor is in keyset
   * in each, look recursively for the splice sites,
   *  accumulate them and display.
   * by starting from _VDNA, we are garanteed not to double count.
   */
static void dnacptSpliceConsensus(void)
{
  Array dna ; KEY seq = 0 , dnaKey = 0 ;
  KEYSET keySet , ks2 ; void *look ;
  Array sites ,  site5, site3 ;
  int lengthEx = 3 , lengthIn = 7 , i , dummy , nn = 0 , nintrons = 0 ;
  DNACPTGET("dnacptSpliceConcensus") ;
      
  if(!dnacpt->useKeySet)
    { messout("I just know how to do that from a keySet") ;
      return ;
    }
  if (!keySetActive(&keySet, &look)) 
    { messout("First select a keySet containing sequences") ;
      return ;
    }

  ks2 = keySetCreate() ;
  sites = arrayCreate(100, int) ;
  site5 =arrayCreate(5*(lengthIn  + lengthEx + 1), int) ; /* 5 is atgc + ambiguous */
  site3 =arrayCreate(5*(lengthIn + lengthEx), int) ;     /* 5 is atgc + ambiguous */
  array(site5,5*(lengthIn  + lengthEx + 1) - 1 , int) = 0 ;
  array(site3,5*(lengthIn  + lengthEx + 1) - 1 , int) = 0 ;
  while(lexNext(_VDNA,&dnaKey))  
    { if (lexReClass(dnaKey, &seq, _VSequence) &&
	  keySetFind(keySet, seq, &dummy) &&
	  (dna = dnaGet(dnaKey) ) )
	{ nn++ ;
	  keySetInsert(ks2,seq) ;
	  arrayMax(sites) = 0 ;
	  dnacptGetSpliceSites(sites, seq,  0, arrayMax(dna) - 1 ) ;
	  for (i=0 ; i<arrayMax(sites) ; i+= 2)
	    dnacptAddSplice(dna, site5, site3, 
			    arr(sites,i,int), arr(sites, i+1, int), lengthEx, lengthIn ) ;
	  nintrons += arrayMax(sites) / 2. ;
	  arrayDestroy(dna) ;
	}
    }

      
  if (!nn)
    { messout("First select a keySet containing sequences") ;
      return ;
    }
  graphText(messprintf("I scanned %d sequences, containing %d introns",
		       nn, nintrons), 
	    2,dnacpt->line += 2) ;
  dnacptShowSpliceConcensus(dnacpt, "5\'concensus",site5, lengthEx, lengthIn) ;
  dnacptShowSpliceConcensus(dnacpt, "3\'concensus",site3, lengthEx, lengthIn) ;
  arrayDestroy(site3) ;
  arrayDestroy(site5) ;
  arrayDestroy(sites) ;
  keySetDestroy(keySet) ;
  keySetShow(ks2, look) ;
}

/*************************************************************************/
/*************************************************************************/
/******************   Codon usage         ********************************/
   /* this is a static because it follows the usual UCGA display of the
      genetic code with codon in 0, 65 ; rather than our usual A_, T_, G_, C_
      internal convention, all ambiguos codons are treatad as 64.
      */
static char* codonFullName(int codon)
{
  char 
    *line[3] , name[4], *cp ;

  line[0] = "PhePheLeuLeuSerSerSerSerTyrTyr******CysCys***Trp" ;
  line[1] = "LeuLeuLeuLeuProProProProHisHisGlnGlnArgArgArgArg" ;
  line[2] = "IleIleIleMetThrThrThrThrAsnAsnLysLysSerSerArgArg" ;
  line[3] = "ValValValValAlaAlaAlaAlaAspAspGluGluGlyGlyGlyGly" ;

  cp = line[codon>>4] + 12 * ((codon & 12) >> 2) + 3 * (codon & 3) ;
  strncpy(name, cp, 3) ;
  name[3] = 0 ;
  return name ;
}

/**********/
  /* Gives the per thousandth of usage of synonymous codons */
static void dnacptRenormaliseUsage(Array usage)
{
  int i, j ;
  int equiv[64] , tot[64] ;
  char name[5] ;
 
  for(i = 0 ; i < 64; i++)
    equiv[i] = -1 ;

  for(i = 0 ; i < 64; i++)
    if(equiv[i] == -1)
      { tot[i] = 0 ;
	strcpy(name, codonFullName(i)) ;
	for (j = i; j <64 ; j++)
	if(!strcmp(name, codonFullName(j)))
	  { tot[i] += arr(usage,j, int) ;
	    equiv[j] = i ;
	  }
      }

  for(i = 0 ; i < 64; i++)
    if(tot[equiv[i]])
      arr(usage, i, int) = 
	(1000 * arr(usage, i, int)) / tot[equiv[i]] ;
}

/**********/

typedef struct LOOKusageStruct
{ int magic ;
  Graph graph ;
  Array usage ;
  int myUsage ;
} *LOOKusage  ;

static void dnacptShowUsageDestroy(void)
{ LOOKusage look = 0 ;

  if (graphAssFind(dnacptShowUsageDestroy,&look) &&
      look &&
      look->magic == MAGIC_CODON_USAGE )
    { if (look->myUsage == arrayExists(look->usage))
	arrayDestroy(look->usage) ; 
      look->magic = 0 ;
      messfree(look) ;
    }
}

/********/

static int aaUse(Array usage, char *cp, int total)
{
  int  subtotal = 0 , i ;
 
  for(i = 0 ; i < 64; i++)
    if (!strcmp(cp, codonFullName(i)))
      subtotal += arr(usage,i, int) ;

 return 
	subtotal ;
}

/********/

static void showAA(char *text, int n, int x , int  y, int total)
{
   graphText(messprintf("%s%5.1f%%", text,
			(100.0 * n)/total), x, y) ;
 }

/********/

static int dnacptShowSomePeptide(Array usage, int total,
				 char *cp, int x, int y)
{ int i, n = 0 , nn = 0 ; char c ;
  for (i = 0 ; *cp ; i++, cp+= 3)
    { c = *(cp + 3) ;
      *(cp + 3) = 0 ; 
      n = aaUse(usage, cp, total) ; nn += n ;
      showAA(cp, n, 8 + 11*i, y , total ) ; 
      *(cp + 3) = c ;
    }
  return nn ;
}

/********/

static int  dnacptShowPeptideUsage(Array usage, int y, int total) 
{ int  n , subTotal = 0 ; 

  graphText("Amino acids usage:", 2, y) ; y += 2 ;
  
  n = dnacptShowSomePeptide(usage, total, "LysArgHis", 8, y + 2 ) ;
  showAA("Basic", n, 4, y + 1 , total) ;
  subTotal += n ;
 
  n = dnacptShowSomePeptide(usage, total, "AspGlu", 8, y + 4 ) ;
  showAA("Acidic", n, 4, y + 3 , total) ;
  subTotal += n ;

  n = dnacptShowSomePeptide(usage, total, "AsnGlnCysMetSerThr", 8, y + 6 ) ;
  showAA("Neutral", n, 4, y + 5 , total) ;
  subTotal += n ;
  showAA("Hydrophilic", subTotal, 1, y  , total) ;
  subTotal = 0 ;

  n = dnacptShowSomePeptide(usage, total, "GlyAlaValProLeuIle", 8, y + 10 ) ;
  showAA("Aliphatic", n, 4, y + 9 , total) ;
  subTotal += n ;

  n = dnacptShowSomePeptide(usage, total, "PheTyrTrp", 8, y + 12 ) ;
  showAA("Aromatic", n, 4, y + 11, total) ;
  subTotal += n ;
  showAA("Hydrophobic", subTotal, 1, y + 8 , total) ;
  subTotal = 0 ;

  return 21 ;  /* Overall number of lines used. */
}

/********/

static void   dnacptShowCodonUsage(int nn, int ncds, Array usage)
{ int n = arrayMax(usage) , total = 0 , i, j, k , codon ;
  int y = 4 ;
  char name[] = {'U','C','A','G'} , buffer[40] ; 
  LOOKusage look = (LOOKusage) messalloc(sizeof(struct LOOKusageStruct)) ;

  displayCreate(DtCodons) ;
  graphRegister(DESTROY, dnacptShowUsageDestroy) ;
  graphAssociate(dnacptShowUsageDestroy,look) ;
  look->magic = MAGIC_CODON_USAGE ;
  look->usage = usage ;
  look->myUsage = arrayExists(usage) ;
  look->graph = graphActive() ;

  graphText(messprintf("I scanned %d sequences, containing %d coding sequences",
		       nn, ncds), 
	    2, 2) ;
  while(n--)
    total += arr(usage, n, int) ;
    
  y += dnacptShowPeptideUsage(usage, y, total) ;

  graphText("Codon usage.", 2, y - 4) ;
  /*      graphText("Percentage of codon occurence.", 2, y - 4) ; */

  graphText(messprintf("Total %d codons, %d stops", total, 
		       arr(usage,10,int) + arr(usage,11,int) + arr(usage,14,int))
	    , 40, y - 4) ;
  i = arr(usage,64, int) ;
  if (i)
    graphText(messprintf("%d ambiguous codons.", i), 10, y - 3) ;
  i = arr(usage,65, int) ;
  if (i)
    graphText(messprintf("%d uncomplete codons.", i), 40, y - 3) ;

  for (i = 0 ; i < 5; i++)
    graphLine(5, y + 8 * i, 73, y + 8*i) ;
  for (i = 0 ; i < 5; i++)
    graphLine(5 + 17 *i , y , 5 + 17 * i, y + 32 ) ;
  
    for(i=0; i<4; i++)
    graphText(messprintf("%c", name[i]), 13 + 17 * i, y  -1.3) ;
  for(i=0; i<4; i++)
    graphText(messprintf("%c", name[i]), 3, y + 4 + 8 * i) ;



  dnacptRenormaliseUsage(usage) ;
  for(i = 0; i<4 ; i++)
    for ( j = 0 ; j < 4 ; j++)
      for ( k = 0 ; k < 4 ; k++)
	{ codon = 16*i + 4*j + k ;
	  buffer[0] = name[i] ;
	  buffer[1] = name[j] ;
	  buffer[2] = name[k] ;
	  buffer[3] = buffer[4] = ' ' ;
	  buffer[5] = 0 ;
	  strcat(buffer, codonFullName(codon) ) ; /* 5, 6, 7 */
	  buffer[8] = 0 ;
	  strcat(buffer, messprintf("%5.1f%%", arr(usage,codon,int)/ 10.0)) ;
	  graphText(buffer, 7 + 17*j, y + 2 + 8*i + k + (k/2) ) ;
	}

  graphRedraw() ;
}

/*************************************************************************/
   /* accumulates in usage the codon used in dna messenger */      
static void dnacptAddCds2 (Array usage,  Array dna)
{
  int i = arrayMax(dna), codon = 0 , frame = 0 ;
  char *cp = arrp(dna, 0, char) ;
  
  while(i--)
    { 
      if (codon != 64)
	switch(*cp++)
	  {
	  case T_: break ;
	  case C_: codon += 1 ; break ;
	  case A_: codon += 2 ; break ;
	  case G_: codon += 3 ; break ;
	  default: codon = 64 ; break ;
	  }

      frame++ ;
      frame %= 3 ;
      if (frame)
	{ if (codon != 64) codon <<= 2 ; }
      else 
	{ array(usage, codon, int)++ ;
/*   Stops should not occur 
	  if(codon ==10 || codon == 11 || codon == 14 )
	     invokeDebugger() ;
*/
	  codon = 0 ;
	}
    }    
  if (frame)
    array(usage, 65, int)++ ;
}

/****************************************************************************/
   /* construct the plus and back messengers  dna1 and dna2 */

static int dnacptAddCds (Array dna, Array cds, Array usage)
{ int ncds = 0, from, to, i, j ;
  static Array gene = 0, exon = 0 ; 
  int n = 0, ne = 0 ;
  char *cp ;
  KEY seq = 0 ;
  int kExon ;

  gene = arrayReCreate(gene, 1000, char) ;
  exon = arrayReCreate(exon, 300, char) ;

  for (i=0 ; i < arrayMax(cds) ; i+= 2)
    {
      from = arr(cds,i,int) ; to = arr(cds, i+1, int) ;

      if (from == -1)   /* begin of seq */
	{ seq = to ;
	  kExon = 1;
	  n = 0 ;
	  arrayMax(gene) = 0 ;
	  continue ;
	}
      
      if (from == -2)   /* end of seq */
	{ ++ncds ; 
	  dnacptAddCds2 (usage, gene) ;
#ifdef CODE_FILE
	  dnaDecodeArray (gene) ;
	  array(gene, arrayMax(gene), char) = 0 ;
	  fprintf (codeFile, "%s %s\n", name(to), arrp(gene,0,char)) ;
#endif	     
	  continue ;
	}

      cp = arrp(dna, from, char) ; /* copy dna */
      if (from <= to)
	for (j = from ; j <= to ; j++, cp++)
	  { if (ne || !(n%3))	/* keep in frame */
	      array(exon,ne++,char) = *cp ;
	    array(gene,n++,char) = *cp ;
	  }
      else
	for (j = from ; j >= to ; j--, cp--)
	  { if (ne || !(n%3))
	      array(exon,ne++,char) = dnacptComplementBase (*cp) ;
	    array(gene,n++,char) = dnacptComplementBase (*cp) ;
	  }

#ifdef CODE_FILE
      dnaDecodeArray (exon) ;
      array(exon, arrayMax(exon), char) = 0 ;
      fprintf (exonFile, "%s_%d %s\n", name(seq), kExon, arrp(exon,0,char)) ;
#endif	     
      arrayMax(exon) = 0 ;
      ne = 0 ;
      ++kExon ;
    }

  return ncds ;
}

/*************************************************************************/
    /* Gets the coding sequences recursivelly 
     *  taking orientation into account
     */  
static BOOL  dnacptGetCds(Array cds, KEY  seq, int from, int to)
{ int i, min, max, n ;
  BOOL result = FALSE ;
  OBJ Seq = bsCreate(seq) ;
  Array exons = arrayCreate(8, BSunit) ;
  Array subSeq = arrayCreate(9, BSunit) ;
  if (!Seq)
    return FALSE ;
  if (bsFindTag(Seq, _CDS))  /* seq is by itself coding sequences */
    { 
      min = 1 ; max = 0 ;	/* max 0 means go to the end of the spliced sequence */
      if (bsGetData(Seq, _bsRight, _Int, &min))
	bsGetData(Seq, _bsRight, _Int, &max) ;
      n = arrayMax(cds) ;
      array(cds, n++  , int) = -1 ;
      array(cds, n++ , int) = seq ; /* begin of seq */
      if (!bsFindTag(Seq, _Source_Exons) || !bsFlatten(Seq, 2, exons))
	{ array(exons, 0, BSunit).i = 1 ;  /* Failure: Make a fake entry */
	  array(exons, 1, BSunit).i = (from < to) ? to - from + 1 : from - to + 1 ;
	}
      for (i = 0; i < arrayMax(exons) ; i += 2, n += 2 ) /*  exons themselves */
	{ result = TRUE ; 
	  if (from < to)        /* -1 because no zero in biology */
	    {
	      array(cds, n, int) = from + arr(exons,i,BSunit).i - 1 ;
	      array(cds, n + 1, int) = from + arr(exons,i + 1 ,BSunit).i - 1 ;
	    }
	  else
	    {
	      array(cds, n, int) = from - arr(exons,i,BSunit).i + 1 ;
	      array(cds, n + 1, int) = from - arr(exons,i + 1 ,BSunit).i + 1 ;
	    }
	}
      array(cds, n  , int) = -2 ;
      array(cds, n + 1 , int) = seq ; /* end of seq */
    }
  else  /* recurse though the sub sequences */
    {
      if (bsFindTag(Seq, _Contains)  &&    /* At present the model is messy, 
					      _Contains   Has_xxx ?Seq int int
					      We should move to _Contains ?Seq int int 
					      This poses a compatibility problem in release 1.n
					      */
	  bsFlatten(Seq, 4, subSeq) )
	for (i=0; i < arrayMax(subSeq) ; i += 4)
	  if (from < to)        /* -1 because no zero in biology */
	    dnacptGetCds(cds, arr(subSeq,i+1,BSunit).k , 
			 from + arr(subSeq,i+2,BSunit).i - 1,
			 from + arr(subSeq,i+3,BSunit).i - 1 ) ;
	  else
	    dnacptGetCds(cds, arr(subSeq,i+1,BSunit).k ,
			 from -  arr(subSeq,i+2,BSunit).i + 1 ,
			 from -  arr(subSeq,i+3,BSunit).i + 1 ) ;
    }
  arrayDestroy(subSeq) ;
  arrayDestroy(exons) ;
  bsDestroy(Seq) ;
  return result ;
}

/*************************************************************************/
  /* Given the active key set
   * look for dnasequnces whose descriptor is in keyset
   * in each, look recursively for the coding sequences,
   *  accumulate the codon usage and display.
   * by starting from _VDNA, we are garanteed not to double count.
   */

static void dnacptCodonUsage(void)
{
  Array dna ; KEY seq = 0 , dnaKey = 0 ;
  KEYSET keySet , ks2 ; void *look ;
  Array usage , cds ;
  int   dummy , n, nn = 0 , ncds = 0 ;
  DNACPTGET("dnacptCodonUsage") ;
      
  if(!dnacpt->useKeySet)
    { messout("I just know how to do that from a keySet") ;
      return ;
    }
  if (!keySetActive(&keySet, &look)) 
    { messout("First select a keySet containing sequences") ;
      return ;
    }

#ifdef CODE_FILE
  codeFile = fopen ("coding.seq","w") ;
  exonFile = fopen ("exons.seq","w") ;
#endif

  ks2 = keySetCreate() ;
  cds = arrayCreate(50,int) ;
  usage = arrayCreate(64, int) ;
  while(lexNext(_VDNA,&dnaKey))
    { if (lexReClass(dnaKey, &seq, _VSequence) &&
	  keySetFind(keySet, seq, &dummy) &&
	  (dna = dnaGet(dnaKey) ) )
	{ nn++ ;
	  arrayMax(cds) = 0 ;
	  dnacptGetCds (cds, seq,  0, arrayMax(dna) - 1 ) ;
	  n = dnacptAddCds(dna, cds, usage) ;
	  if (n)
	    { ncds += n ;
	      keySetInsert(ks2,seq) ;
	    }
	  arrayDestroy(dna) ;
	}
    }

#ifdef CODE_FILE
  fclose (codeFile) ;
  fclose (exonFile) ;
#endif
      
  if (!nn)
    { messout("First select a keySet containing sequences") ;
      return ;
    }

  dnacptShowCodonUsage(nn, ncds, usage) ;

  /*   arrayDestroy(usage) ; destroyed in show.. */
  arrayDestroy(cds) ;
  keySetDestroy(keySet) ;
  keySetShow(ks2, look) ;
}

/*****************************************/
/*****************************************/

static void  dnacptChangeFrame(int newFrame)
{ int frame;
  Array dna, protein ;  Array colors ; KEY key ;
  void *look ;
  DNACPTGET("dnacptChangeFrame") ;
  
  dnacpt->frame = newFrame ;
  dnacptDisplay() ;    
  if (!fMapActive(&dna,&protein,&colors,&key,&frame,&look))
    { messout("First select an fMap window") ;
      return ;
    }
  if (frame != newFrame)
    { fMapSetFrame (newFrame) ;
      if (fMapSetStatus(-1) & STATUS_PROTEIN)
	dnacptTranslate() ;
      else
	dnacptStops() ;
      fMapDisplayDNA(look) ;
    }
}

/*********/

static void dnacptReadingFrame1 (void)
{ dnacptChangeFrame(0) ;
}
static void dnacptReadingFrame2 (void)
{ dnacptChangeFrame(1) ;
}
static void dnacptReadingFrame3 (void)
{ dnacptChangeFrame(2) ;
}
static void dnacptReadingFrame4 (void)
{ dnacptChangeFrame(-3) ;
}
static void dnacptReadingFrame5 (void)
{ dnacptChangeFrame(-1) ;
}
static void dnacptReadingFrame6 (void)
{ dnacptChangeFrame(-2) ;
}

/*****************************************/
/************* action routines ***********/
/*****************************************/

static void dnacptUseKeySet (void)
{    
  DNACPTGET("dnacptUseKeySet") ;
  
  if(dnacpt->useKeySet)
    { dnacpt->useKeySet = FALSE ;
      graphBoxDraw(useKeySetButtonBox, BLACK, WHITE) ;
    }
  else
    { dnacpt->useKeySet = TRUE ;
      graphBoxDraw(useKeySetButtonBox, BLACK, RED) ;
    }
  
}

/*****************************************/

static void dnacptDestroy (void)
{    
  DNACPTGET("dnacptDestroy") ;
  
  dnacpt->magic = 0 ;
  messfree (dnacpt) ;
  
  dnacptGraph = 0 ;
}

/*******************************************/

extern int ksetClassComplete(char *text, int len, int class) ;
static void restrictionCompletion(char *cp, int len)
{
  ksetClassComplete(cp, len, _VRestriction) ;
}

/**********************************************************/

static void dnacptPick (int k, double x)
{    
  DNACPTGET("dnacptPick") ;
  
  if (!k)
    return ;

  else  if (k == dnacpt->restrictionBox)
/*
    if ( k== dnacpt->curr )
      dnacptRestriction() ;
    else
*/
      {
	if (dnacpt->curr)
	  graphBoxDraw (dnacpt->curr, BLACK, WHITE) ;
	dnacpt->curr = k ;
	graphCompletionEntry (restrictionCompletion, dnacpt->restriction,0,(float) x,0,0) ;
      }
  graphActivate(dnacpt->graph) ;
}

/**********************************************************/
extern void gelDisplay (void) ;

static MENUOPT dnacptMenu[] =
  {
   graphDestroy, "Quit",
   help, "Help",
   graphPrint,"Print",
   dnacptFingerPrint, "Finger print",
   dnacptStops, "Stop codons",
   dnacptRestriction, "Restrict",
   dnacptFastaDump, "Dump Sequence",
   dnacptintronsExons, "Translate Gene",
   dnaAG_TC,"poly R/poly Y",
   dnacptSpliceConsensus, "Splice concensus",
   dnacptCodonUsage, "Codon usage",
   dnacptTotalLength,"Total",
   gelDisplay, "Show gels",
   dnacptDisplay, "Clear",
   0, 0 
   } ;

/*************************************************************/

static void dnacptDisplay(void)
{ int box ;
  DNACPTGET("dnacptDisplay") ;
 
  graphActivate(dnacpt->graph) ;
  graphClear() ;
  graphButtons(dnacptMenu,.5,8.5, 35.) ;
  
  graphText("Reading frame", 3.,1.3) ;
 
  graphButton("1",dnacptReadingFrame1,18.,.8) ;
  graphButton("2",dnacptReadingFrame2,24.,.8) ;
  graphButton("3",dnacptReadingFrame3,30.,.8) ;
  graphButton("-1",dnacptReadingFrame4,18.,2.3) ;
  graphButton("-2",dnacptReadingFrame5,24.,2.3) ;
  graphButton("-3",dnacptReadingFrame6,30.,2.3) ;
    
  /* hack to recolor the active frame button */
  box = graphBoxStart() ;
  graphBoxEnd() ;
  box = box - 6 + (dnacpt->frame >= 0 ? dnacpt->frame % 3:
		   (-dnacpt->frame) % 3 + 3 ) ;
  graphBoxDraw(box,BLACK,RED) ;

  useKeySetButtonBox = graphButton ("Use KeySet", dnacptUseKeySet, 2,3) ;
  if (dnacpt->useKeySet)
    graphBoxDraw(useKeySetButtonBox, BLACK, RED) ;
  if (dnacpt->complement)
    { box = graphBoxStart() ;
      graphText("Complementary sequence", 30.,4.) ;
      graphBoxEnd() ;
      graphBoxDraw(box,BLACK,RED) ;
    }

  graphText ("Restriction site: ", 0.5, 4.53) ;
  graphText ("i.e. hindIII; aaata; wska", 2.5, 5.50) ;
  dnacpt->restrictionBox =
    graphCompletionEntry (restrictionCompletion, dnacpt->restriction, 
			  BOX_LENGTH, 2.5, 7.0, dnacptRestriction) ;
  dnacpt->line = 20 ;
  graphTextBounds (80,17) ;
  graphRedraw() ;
}

/****************************************************************/
/****************************************************************/
/********************  public routines   ************************/

void dnaAnalyse(void)
{
  DNACPT dnacpt ;

  if(graphActivate(dnacptGraph))
    { 
      graphPop() ;
      return ;
    }

  dnacptGraph = displayCreate (DtDnaTool) ;
  if (!dnacptGraph)
      return ;

  dnacpt=(DNACPT)messalloc(sizeof(struct DNACPTSTUFF));
  dnacpt->magic = MAGIC;
  dnacpt->graph = dnacptGraph ; /* provision for multi windows */

  graphRegister (DESTROY,dnacptDestroy) ;
  graphRegister (PICK,(GraphFunc)dnacptPick) ;

  graphMenu(dnacptMenu) ;  
  graphAssociate(dnaAnalyse, dnacpt);

  dnacpt->restriction = (char*)  messalloc (40) ;
  strcpy(dnacpt->restriction,"") ;

  dnacptDisplay() ;
}

/*************************************************************/
/*************************************************************/
