/*
 *      ISIS release V1.1, Dec. 1988
 *      Export restrictions apply
 */
/*
 *      Restarts ISIS at a site
 */

char isis_rcsid[] = "$Revision: 1.34 $$Date: 89/01/31 09:54:55 $$Source: /usr/fsys/bullwinkle/b/isis/distrib/util/RCS/isis.c,v $";
#include "isis.h"

# include <sys/ioctl.h>
# include <sys/socket.h>
#if(HPUX)
# include <time.h>
#else
# include <sys/time.h>
#endif
# include <signal.h>
# include <ctype.h>
# include <netdb.h>
# include <errno.h>
# include <stdio.h>
# include <fcntl.h>
# include <signal.h>
#if(SUN4)
# include "fork.h"
#endif

/*
 *****************************************************************
 *  This version of isis.c has been recoded from earlier ones.   *
 *  It does not use hardware broadcasts to say hello at startup. *
 *****************************************************************
 */

#define NCHILD          10
#define NRETRY          2

#define SIGINHIBIT      SIGEMT

#define VERSION         "V1.1"

static  RESTART_PORT, CLIENT_PORT;
struct  timeval broad_time ={ 3, 0};
struct  timeval recov_time ={ 30, 0};
struct  timeval poll ={ 0, 0};
static  query_sock, restartmode = -1, isis_is_up, my_index, zap_flag;
static  site_id coord;
static  char *rname = "isis.rc", *sname = "sites", *sarg;
static  char cnames[NCHILD][20], his_name[64];
static  cid[NCHILD], nchild;
static  cno[10], pno[10], bno[10], sid[10], *sp = sid, ninstances, recov_stage;
static  condition got_query;
static  saddr sin, dst;
static  inhibit, auto_restart, maxviewid;
static  message *isis_rcvmsg();

#define I_QUERY         1       /* Hello? */
#define I_KILL          2       /* Kill yourself */
#define I_REPLY         3

inhib()
  {
        inhibit ^= 1;
  }

FILE    *rfile;

main(argc, argv)
  char **argv;
  {
        int fd_restart(), bc_query();
        extern isis_socket;
        FILE *sfile, *vfile;
        register i, c;
        int first_time = 1, nsites = 0;
        my_process_id = ISIS;
        while(--argc)
        {
            char *arg = *++argv;
            c = **argv;
            if(c == '-')
                c = *++*argv;
            switch(c)
            {
              default:
                panic("Usage: isis [-Rrestartfile] [-Ssfile] [-H...] [-c#/#] [-t] [-csite/incarn] [port-number]");
                break;
              case 'R':
                if(*++*argv == 0)
                    ++argv;
                rname = *argv;
                break;
              case 'Z':
                ++zap_flag;
                break;
              case 'S':
                if(*++*argv == 0)
                    ++argv;
                sname = *argv;
                sarg = arg;
                break;
              case '0': case '1': case '2': case '3': case '4':
              case '5': case '6': case '7': case '8': case '9':
                RESTART_PORT = atoi(*argv);
                break;
              case 'A':
                auto_restart = 5;
                break;
              case 'H':
                my_site_no = atoi(*argv);
                break;
              case 't':
                restartmode = FD_TOTAL;
                break;
              case 'c':
                restartmode = FD_PARTIAL;
                begin
                {
                    register char *s = *argv;
                    coord = atoi(++s);
                    s = *argv;
                    while(*s && *s++ != '/')
                        continue;
                    coord = (coord<<8) | atoi(s);
                }
            }
        }
        gethostname(my_host, 64);
        /* Pre-scan the site-id file */
        begin
        {
            char str[64];
            int port, cport, rport, sno;
            int len = strlen(my_host);

            if((sfile = fopen(sname, "r")) == NULL)
                panic("sitefile %s: Cannot read", sname);
            forever
            {
                register isup = fgetc(sfile), c;
                static new_format = 0;
                if(fscanf(sfile, "%d:%d,%d,%d %s", &sno, &port, &cport, &rport, str) != 5)
                    break;
                while((c = fgetc(sfile)) > 0 && c != '\n')
                    if(isalpha(c))
                        ++new_format;
                if(isup && !new_format)
                {
                    ++new_format;
                    print("Warning: old format sites file (no scope info).  ISIS may run inefficiently\n");
                }
                if(!port || !cport || !rport)
                {
                    print("** Sites file contains <0> for a port number.\n");
                    print("** This is no longer supported\n");
                    panic("ISIS startup failed");
                }
                if(sno < 0 || sno >= MAX_SITES)
                    print("Site number %d out of bounds, ignored!\n", sno);
                if(isup == '+')
                {
                    register n;
                    if(sno >= nsites)
                        nsites = sno+1;
                    bcopy(str, site_names[sno], 64);
                    str[len] = 0;
                    if(strcmp(my_host, str) == 0)
                    {
                        register n;
                        cno[ninstances] = cport;
                        pno[ninstances] = port;
                        bno[ninstances] = rport;
                        for(n = 0; n < ninstances; n++)
                        {
                            if(cno[ninstances] == cno[n] || pno[ninstances] == pno[n] || bno[ninstances] == bno[n])
                                panic("Two instances at same site use same port no.\n");
                        }
                        ++ninstances;
                        *sp++ = sno;
                    }
                }
            }
            fclose(sfile);
        }
        if(coord > 0 && *site_names[SITE_NO(coord)] == 0)
            panic("fd_restart: impossible coordinator site-id %x", coord);
        if(ninstances == 0)
            panic("%s: not listed in %s\n", my_host, sname);
        if(my_site_no == 0)
        {
            /* Fork off necessary sub-instances */
            for(i = ninstances-1; i; i--)
                if(fork() == 0)
                    break;
            if(i)
                sleep(20);
            my_index = i;
            my_site_no = sid[i];
        }
        else
        {
            for(i = ninstances-1; i >= 0; i--)
                if(sid[i] == my_site_no)
                    break;
            if(i < 0)
                panic("-H%d: site %d not listed in sites table\n", my_site_no);
        }
        begin
        {
            char str[64];
            int port, cport, rport, sid;
            if((sfile = fopen(sname, "r")) == NULL)
                panic("sitefile %s: Cannot read", sname);
            for(i = 0; i < MAX_SITES; i++)
            {
                register isup = fgetc(sfile), c;
                if(fscanf(sfile, "%d:%d,%d,%d %s", &sid, &port, &cport, &rport, str) != 5)
                    break;
                while((c = fgetc(sfile)) > 0 && c != '\n')
                    continue;
                strcpy(site_names[sid], str);
                if(sid == my_site_no)
                {
                    if(rport)
                        RESTART_PORT = rport;
                    CLIENT_PORT = cport;
                    my_site_no = sid;
                }
            }
            fclose(sfile);
        }

        if(RESTART_PORT == 0)
        {
            register struct servent *sp = getservbyname("isis", "bcast");
            if(sp == (struct servent*)0)
                panic("isis.*: service not listed in /etc/services on this host");
            RESTART_PORT = sp->s_port;
        }

        begin
        {
            /* Create socket to listen on */
            register struct hostent *hep;
            if((hep = gethostbyname(my_host)) == 0)
                return(-1);
            sin.sin_family = AF_INET;
            bcopy(hep->h_addr, &sin.sin_addr, hep->h_length);
        }
        
        query_sock = socket(AF_INET, SOCK_DGRAM, 0);
        if(zap_flag)
            sin.sin_port = 0;
        else if(ninstances == 1)
            sin.sin_port = htons(RESTART_PORT);
        else
            sin.sin_port = htons(bno[my_index]);
        if(bind(query_sock, (struct sockaddr*)&sin, sizeof(sin)) == -1)
        {
            print("Can't allocate UDP port %d!  (Is ISIS already running?)\n", ntohs(sin.sin_port));
            exit(0);
        }
#       ifdef UNIX_DOM
        {
            char pname[64];
            sprintf(pname, "/tmp/Is%d", CLIENT_PORT);
            unlink(pname);
        }
#       endif
  retry:
        while(first_time || auto_restart-- > 0)
        {
            int len, inflag = 0, tcount = 0;
            register p;
            saddr who;

            first_time = 0;
            if(restartmode != -1)
                goto skip;
            restartmode = FD_TOTAL;
            if(zap_flag == 0)
                print("Site %d (%s): isis is restarting...\n", my_site_no, site_names[my_site_no]);
            else
                print("Site %d (%s): isis is zapping everyone else...\n", my_site_no, site_names[my_site_no]);

            recov_stage = -2;
        again:
            maxviewid = -2;
            if(recov_stage++ == 0) recov_stage = 0;
            begin
            {
                register s;
                register message *mp = msg_newmsg();
                if(zap_flag == 0)
                {
                    print("Is anyone there?\n");
                    msg_put(mp, "%d,%d,%s,%d,%h,%s", I_QUERY, my_site_no, site_names[my_site_no], recov_stage, -1, VERSION);
                }
                else
                    msg_put(mp, "%d,%d,%s,%d,%h,%s", I_KILL, my_site_no, site_names[my_site_no], recov_stage, -1, VERSION);
                for(s = 1; s < nsites && maxviewid <= 0; s++)
                {
                    register struct hostent *hep;
                    if(s == my_site_no && zap_flag == 0)
                        continue;
                    if((hep = gethostbyname(site_names[s])) == 0)
                        continue;
                    dst.sin_family = AF_INET;
                    bcopy(hep->h_addr, &dst.sin_addr, hep->h_length);
                    dst.sin_port = htons(RESTART_PORT);;
                    isis_sendmsg(query_sock, &dst, sizeof(dst), mp);
                    inflag = 1<<query_sock;
                    if(select(32, &inflag, (int*)0, (int*)0, &poll) <= 0)
                        inflag = 0;
                    if(inflag)
                        bc_query();
                }
                if(zap_flag)
                    exit(0);
            }
            do
            {
                inflag = 1<<query_sock;
                if(select(32, &inflag, (int*)0, (int*)0, &broad_time) <= 0)
                    inflag = 0;
                if(inflag)
                    bc_query();
            }
            while(inflag);
            switch(maxviewid)
            {
              case -2:
                  if(++tcount < NRETRY)
                  {
                      print("... found no operational sites, checking again just in case\n");
                      goto again;
                  }
                  else
                      print("site %d (%s) doing a total restart\n", my_site_no, site_names[my_site_no]);
                  break;
  
              case -1:
              case 0:
                  print("A total restart is underway at site %s... waiting!\n", his_name);
                  sleep(20);
                  do
                  {
                      inflag = 1<<query_sock;
                      if(select(32, &inflag, (int*)0, (int*)0, &broad_time) <= 0)
                          inflag = 0;
                      if(inflag)
                          bc_drain();
                  }
                  while(inflag);
                  goto again;

              default:
                  restartmode = FD_PARTIAL;
                  print("site %d (%s) doing a partial restart, coord is %d/%d\n", my_site_no,
                      site_names[my_site_no], SITE_NO(coord), SITE_INCARN(coord));
                  break;
            }

            /* Now start PROTOCOLS, REXEC, RMGR, etc */
        skip:
            if((rfile = fopen(rname, "r")) == NULL)
            {
                if((rfile = fopen("restartfile", "r")) == NULL)
                    panic("restartfile %s: Cannot read", rname);
                print("Warning: should rename `restartfile' as `%s'\n", rname);
            }
            begin
            {
                char cpname[20], line[200], *args[20];
                register char *sp = line, **ap = args;
                *ap++ = sp;
                c = NEXTC(rfile);
                if(c == '\n' || c <= 0)
                    panic("isis.rc formatting error on line 1");
                forever
                {
                    putchar(c);
                    switch(c)
                    {      
                      case ' ':
                      case '\t':
                        if(sp != line && sp[-1])
                        {
                            *sp++ = 0;
                            *ap++ = sp;
                        }
                        break;
                      default:
                        *sp++ = c;
                        break;
                    }
                    if((c = NEXTC(rfile)) <= 0 || c == '\n')
                        break;
                }
                isis_socket = 0;
                if(sarg)
                {
                    *ap++ = sarg;
                    print(" %s", sarg);
                }
                if(ninstances > 1)
                {
                    sprintf(cpname, "-H%d", my_site_no);
                    *ap++ = cpname;
                    print(" %s", cpname);
                }
                putchar(c);
                *sp = 0;
                if(ap < &args[2])
                    panic("isis.rc formatting error on line 1");
                *ap = 0;
                if((cid[nchild] = vfork()) == 0)
                {
                    if(isis_socket)
                        close(isis_socket);
                    close(fileno(rfile));
                    if(query_sock)
                        close(query_sock);
                    execvp(args[0], &args[1]);
                    print("%s: cannot execute!\n", args[0]);
                    kill(0, SIGTERM);
                    exit(0);
                }
               ++nchild;
            }
            if(cid[0] <= 0)
                panic("error parsing restartfile");
            sleep(5);
            if(isis_init(CLIENT_PORT) == -1)
            {
                print("... slow protos startup, please be patient\n");
                sleep(15);
                if(isis_init(CLIENT_PORT) == -1)
                {
                    print("isis: unable to connect to <protos> at this site (check %d.log or for core image)\n", my_site_no);
                    kill(0, SIGTERM);
		    if(!auto_restart)
			exit(0);
		    print("Sleeping 5 minutes before retrying auto-restart...\n");
		    sleep(10);
		    print("ISIS auto-restart...\n");
                    goto retry;
                }
            }
            isis_task(fd_restart, "fd_restart");
            isis_task(bc_query, "bc_query");
            isis_input(query_sock, bc_query, "bc_query");
            isis_mainloop(fd_restart);
        }
        return(0);
  }

fd_restart()
  {
        register message *mp;
        if(restartmode == FD_PARTIAL && SITE_NO(coord) == my_site_no)
            panic("isis restart too soon after failure (wait a minute and then retry)!");
        isis_start_done();
        sv_init();
        begin
        {
            register c;
            nchild = 0;
            while((c = NEXTC(rfile)) > 0)
            {
                char cpname[20], line[200], *args[20];
                register char *sp = line, **ap = args;
                
                if(c == '\n')
                    continue;
                *ap++ = sp;
                forever
                {
                    putchar(c);
                    switch(c)
                    {      
                      case ' ':
                      case '\t':
                        if(sp != line && sp[-1])
                        {
                            *sp++ = 0;
                            *ap++ = sp;
                        }
                        break;
                      default:
                        *sp++ = c;
                        break;
                    }
                    if((c = NEXTC(rfile)) <= 0 || c == '\n')
                        break;
                }
                if(CLIENT_PORT)
                {
                    /* Pass port number to override default, if specified */
                    sprintf(cpname, "%d", CLIENT_PORT);
                    *ap++ = cpname;
                    print(" %s", cpname);
                }
                putchar(c);
                *sp = 0;
                if(ap == args)
                    continue;
                if(ap == &args[1])
                    *ap++ = args[0];
                *ap = 0;
                if(nchild == NCHILD)
                    --nchild;
                if((cid[nchild] = vfork()) == 0)
                {
                    if(isis_socket)
                        close(isis_socket);
                    close(fileno(rfile));
                    if(query_sock)
                        close(query_sock);
                    execvp(args[0], &args[1]);
                    my_process_id = getpid();
                    panic("%s: cannot execute!\n", args[0]);
                }
                nchild++;
            }
            fclose(rfile);
        }
        mp = msg_genmsg(CL_RESTARTMODE, (char*)&restartmode, FTYPE_LONG, sizeof(int),
                CL_COORD, (char*)&coord, FTYPE_SITEID, sizeof(site_id), 0);
        if(isis(CL_FDRESTART, mp, (char*)&isis_sv, sizeof(sview)) != sizeof(sview))
            panic("fd_restart: no result returned");
        isis_svmutex = isis_sv;
        my_site_incarn = isis_sv.sv_incarn[my_site_no];
        my_address = ADDRESS(my_site_no, my_site_incarn, my_process_id, 0);
        msg_delete(mp);
        if(isis_sv.sv_viewid == 0)
            panic("Site restart too soon after failure, please wait a few minutes and try again");
        isis_is_up = 1;
        print("Site %d/%d is up!\n", my_site_no, my_site_incarn);
        print("site view has viewid %d/%d\n", isis_sv.sv_viewid&0xFF, isis_sv.sv_viewid>>8);
        begin
        {
            register site_id *s;
            for(s = isis_sv.sv_slist; *s; s++)
                print("    %-30s[site_no %d  site_incarn %d]\n", site_names[SITE_NO(*s)],
                        SITE_NO(*s), SITE_INCARN(*s));
        }
  }

bc_drain()
  {
        register message *mp;
        saddr who;
        int len = sizeof(who);
        if((mp = isis_rcvmsg(query_sock, &who, &len)) == 0)
            print("isis_rcvmsg failed in bc_drain\n");
        else
            msg_delete(mp);
  }

bc_query()
  {
        register message *mp, *rmsg;
        sview *site_getview();
        register sview *sv;
        char version[10];
        saddr who;
        int type, sender, hisviewid;
        int len = sizeof(who);
        site_id hiscoord;
        if((mp = isis_rcvmsg(query_sock, &who, &len)) == 0)
        {
            print("isis_rcvmsg failed in bc_query\n");
            return;
        }
        if(msg_get(mp, "%d,%d,%s,%d,%h,%s", &type, &sender, his_name, &hisviewid, &hiscoord, version) != 6)
            strcpy(version, "V1.0");
        vcompare(VERSION, version);
        msg_delete(mp);
        if(type == I_KILL)
            term("Zapped by site %s\n", his_name);
        if(sender == my_site_no)
            return;
        if(type == I_REPLY && isis_is_up)
                return;
        if(strcmp(site_names[sender], his_name))
        {
            if(type == I_REPLY)
                panic("someone is running isis at site %s using a different site-table!", his_name);
            print("Warning: someone is trying to run isis at site %s using a different site-table!\n", his_name);
            return;
        }
        if(hisviewid > maxviewid && hiscoord != 0)
        {
            if(hisviewid > 0 || hisviewid > recov_stage)
            {
                maxviewid = hisviewid;
                coord = hiscoord;
            }
            else if(hisviewid == recov_stage && strcmp(his_name, my_host) > 0)
            {
                maxviewid = hisviewid;
                coord = hiscoord;
            }
            if(coord > 0 && *site_names[SITE_NO(coord)] == 0)
                panic("Impossible coordinator site-id %d", SITE_NO(coord));
            if(coord == my_site_no)
                panic("Told to use myself as coordinator (wait a few minutes and then try again)");
        }
        if(type == I_REPLY)
            return;
        rmsg = msg_gen("%d,%d,%s", I_REPLY, my_site_no, site_names[my_site_no]);
        if(!isis_is_up)
            msg_put(rmsg, "%d,%h,%s", recov_stage, -1, VERSION);
        else
        {
            sv = site_getview();
            msg_put(rmsg, "%d,%h,%s", sv->sv_viewid, sv->sv_slist[0], VERSION);
        }
        isis_sendmsg(query_sock, &who, len, rmsg);
        msg_delete(rmsg);
  }


isis_failed()
  {
        register who, n;
        /* Get rid of all the old port names, if any */
#       ifdef UNIX_DOM
        {
            char pname[64];
            sprintf(pname, "/tmp/Is%d", CLIENT_PORT);
            unlink(pname);
            sprintf(pname, "/tmp/Cl%d", my_process_id);
            unlink(pname);
        }
#       endif
        if(isis_is_up)
        {
            static acount = 0;
            if(++acount == 4)
                panic("Too many crash/restart cycles");
            print("ISIS has crashed at site %s...", site_names[my_site_no]);
            for(n = 0; n < nchild; n++)
                kill(cid[n], SIGKILL);
        }
        isis_is_up = 0;
        t_sig(&got_query, 1);
        while((who = wait(0)) != -1)
            continue;
        if(!auto_restart)
            exit(0);
        print("Sleeping 5 minutes before retrying auto-restart...\n");
        sleep(10);
	print("ISIS auto-restart...\n");
        return(0);
  }

term(msg, site)
  char *msg, *site;
  {
        print(msg, site);
        kill(0, SIGTERM);
        exit(0);
  }

static  peekc;

NEXTC(fi)
  register FILE *fi;
  {
        register c, white = 0;
        if(c = peekc)
        {
            peekc = 0;
            return(c);
        }
        do
        {
            c = fgetc(fi);
            if(c == ' ' || c == '\t')
                ++white;
        }
        while(c == ' ' || c == '\t');
        if(white)
        {
            if(c == '\n')
                return(c);
            peekc = c;
            return(' ');
        }
        return(c);
  }

/* Algorithm for writing a message to a socket */
isis_sendmsg(sock, dst, dlen, mp)
  saddr *dst;
  int dlen;
  register message *mp;
  {
        register nb, iovlen;
        register struct iovec *iovp;
        register char *mbuf, *mptr;
        char *malloc();
        mptr = mbuf = malloc(nb = msg_getlen(mp));
        iovlen = msg_getiovlen(mp);
        iovp = msg_getiovec(mp);
        while(iovlen)
        {
            bcopy(iovp->iov_base, mptr, iovp->iov_len);
            mptr += iovp->iov_len;
            --iovlen; ++iovp;
        }
        sendto(sock, mbuf, nb, 0, (struct sockaddr*)dst, dlen);
        free(mbuf);
        return(0);
  }

static message *
isis_rcvmsg(sock, who, wlen)
  saddr *who;
  int *wlen;
  {
        register message *mp;
        register nb, length;
        block_desc *blk_desc;
        long mbuf[1024];

        while((nb = recvfrom(sock, (char*)mbuf, sizeof(mbuf), 0, (struct sockaddr*)who, wlen)) == -1)
        {
            if(errno == EINTR)
                continue;
            perror("recvfrom");
            return(0);
        }
        length = ntohl(mbuf[0]);
        blk_desc = msg_blockalloc(length, 0);
        bcopy(mbuf, blk_desc->addr, nb);
        if(mp = msg_reconstruct(blk_desc->addr, blk_desc))
            return(mp);
        return(0);
  }

vcompare(myversion, hisversion)
  char *myversion, *hisversion;
  {
        if(strcmp(myversion, hisversion) == 0)
            return;
        panic("Someone is running an incompatible version of ISIS! (my version %s, his %s)\n",
            myversion, hisversion);
  }
