-- (C) Copyright International Business Machines Corporation 23 January 
-- 1990.  All Rights Reserved. 
--  
-- See the file USERAGREEMENT distributed with this software for full 
-- terms and conditions of use. 
-- File: cginternal.d
-- Author: Andy Lowry
-- SCCS Info: @(#)cginternal.d	1.55 3/13/90

-- Defintions for structures used internally by the code generator

cgInternal: using (common, stdenv, interpform, annotate, inferredtype,
  LIStuff, coercions)

definitions
  
  -- cgData is a record containing most of the "global" information
  -- required by the various codegen modules.  It is used as a
  -- convenience so as to avoid long parameter lists.  In addition to
  -- global information about the program being codegen'ed, outports
  -- for the many codegen procedures are also available.
  cgData: record (
    Prog: cgProgData,		-- the absprog and its annotations
    options: annotations,	-- options affecting what codegen does
    Aux: cgAuxData,		-- auxiliary tables produced by codegen
    Fns: cgFuncs,		-- outports to various functions
    std: stdenv,		-- standard hermes capas
    Proc: cgProcData,		-- process-specific info
    Tplt: cgTemplates,		-- instruction templates for quick
				-- copying vs slow builds
    scratch: cgScratchpad	-- ephemeral stuff we need passed around
  );
  cgProgData: record (
    absprog: predefined!program, -- the program to be translated
    annotations: annotations	-- assorted info about the program
  );
  cgAuxData: record (		-- auxiliary data maintained by codegen
    opMap: opMap		-- for table-driven translation
  );
  cgFuncs: record (		-- capabilities to various codegen procs
    optimize: optimizeFn,	-- optimize a collection of basic blocks
    cgProcess: cgProcessFn,	-- generate code for a process
    cgProcInit: cgProcInitFn,	-- initialize per-process codegen structures
    BBAssemble: BBAssembleFn,	-- string basic blocks together
    cgClause: cgClauseFn,	-- generate code for a single clause
    cgStmt: cgStmtFn,		-- generate code for a single statement
    -- Statement translators for operator classes
    cgDirectStmt: cgStmtFn,
    cgTypedStmt: cgStmtFn,
    -- Utility functions
    LIStuff: LIStuffFn,		-- stuffs LI prog into hidden absprog field
    clauseUsesObj: clauseUsesObjFn,-- checks if an obj is used in a clause
    lkupType: lkupTypeFn,	-- looks up root object type in given
				-- scope set or inferred defs
    breakSelector: breakSelectorFn,-- breaks a selector up into a
				-- direct lookup and additional tests
    cgSelector: cgSelectorFn,	-- generates skeleton for table op w/selector
    newTableInfo: newTableInfoFn,-- chooses new table datareps
    atoi: atoiFn,		-- charstring -> integer
--  ator: atorFn,		-- charstring -> real
    objAddrInit: objAddrInitFn,	-- start up an object mapper for a process
    varCaseID: varCaseIDFn,	-- variant component objectname -> case ID
    compOffsets: compOffsetsFn,	-- make offsets from component
				-- ID's for LI addressing
    typeDef: typeDefFn		-- typename -> type definition
  );
  cgProcData: record (		-- process-specific auxiliary info
    id: processid,		-- the id of the process we're translating
    coercions: processcoercions,-- coercions for this process
    linkedPids: procIdList,	-- pids of processes statically linked
				-- to this one
    BBData: BBData,		-- the basic-block representation
    curBB: basicBlock,		-- to insert new instructions
				-- (this BB not yet in BBData.BBs)
    objAddr: objAddrFn,		-- object mapper
    rootAddr: rootAddrFn,	-- root object mapper
    objType: objTypeFn,		-- gets typename for an object
    tmpAddr: tmpAddrFn,		-- allocates data vector slot for compiler tmp
    setAddr: setAddrfn,		-- set/unset temporary object alias
    nRegUsed: nRegUsedFn	-- get # of allocated data vector slots
  );
  cgTemplates: record (
    boolF: interpform!operation,-- load boolean#'false'
    boolT: interpform!operation,-- load boolean#'true'
    noop: interpform!operation,	-- do nothing
    endblock: interpform!operation,-- close off a block
    endprocess: interpform!operation,-- terminate a process
    discard: interpform!operation-- discard an object
  );
  cgScratchpad: record (
    selInfo: selectorInfo	-- information about selectors currently
				-- being codegened
  );
  
  -- An opMap describes how Hermes opcodes map to LI opcodes.  A
  -- DIRECT map entry is used for a Hermes opcode that is ALWAYS
  -- mapped to the same LI opcode.  A TYPED map entry is used for a
  -- Hermes opcode where the mapping depends only on the operand type.
  -- An ADHOC map entry refers to a Hermes opcode for which a
  -- specialized procedure is needed.  Non-ADHOC opcodes can be
  -- translated automatically using additional information in the map.
  opMap: table of opMapEntry {full} keys (hOp);
  opMapEntry: record (
    hOp: predefined!operator,	-- the Hermes opcode
    mapInfo: opMapEntryInfo	-- how to map this opcode
  );
  opMapEntryInfo: variant of opMapEntryType (
    'DIRECT' -> direct: interpform!opcode {init},
    'TYPED' -> typed: typedOpMap {init},
    'ADHOC' -> adhoc: charstring {init}-- name of translator module
  );
  opMapEntryType: enumeration (
    'DIRECT', 'TYPED', 'ADHOC'
  );
  typedOpMap: table of typedOpMapEntry {full} keys (type);
  typedOpMapEntry: record (
    type: primitive_types,
    liOp: interpform!opcode
  );
  
  -- List of process id's, used to build the linkedProgs table for a
  -- process's LI prog
  procIdList: ordered table of predefined!processid {init};
  
  -- A basic block (BB) is a sequence of LI instructions with no
  -- branches, and no branch targets other than the first instruction.
  -- During execution, a BB can be entered only through the top, and
  -- can exit only through the bottom (or as a result of exceptions or
  -- user exits).  There may be multiple exits from a single BB.  When
  -- the LI program is finally assembled by stringing together BB's,
  -- branch and select instructions are inserted at the ends of the
  -- BB's as necessary to realize the BB's exit structure.
  
  -- Block statements with their handlers are encoded by tying off the
  -- current BB with an "exit" structure that lists the various
  -- handlers and their targets.  This should not be considered (e.g.
  -- by dataflow-style optimizers) to carry normal "exit" semantics
  -- for two reasons: (1) the exiting BB will never actually branch
  -- immediately to one of the handler targets; and (2) runtime
  -- branches to the handlers could come from the middle of a basic
  -- block in the Hermes block statement body.  Be aware of this!!!
  basicBlock: record (
    id: BBid,			-- unique ID for each BB
    code: interpform!code,	-- the instructions in the BB
    exit: BBExit		-- exit structure
  );
  basicBlocks: table of basicBlock {full} keys (id);
  
  BBid: nominal;
  BBidList: ordered table of BBid {init};

  -- A BB exit specification consists of one or more target BB id's
  -- and a means of selecting among them.  In most cases, the exit
  -- structure is closely linked with the final instruction in the BB,
  -- with any instruction addresses needed for the qualifiers set to
  -- zero.  Optimization code must be very careful not to disturb
  -- these instructions!
  --
  -- There are six forms of exit:
  --   jump: A single exit target is given.  The branch is
  --     always taken.  There is no associated instruction in the BB.
  --   ifelse: The final instruction in the BB is a noop with one
  --     operand.  The noop will be turned into either a branch_false
  --     or a branch_true, so as to cause a branch based on the test
  --     value addressed by the operand.  The exit structure contains
  --     two BBid targets: one for an 'if' branch, one for an 'else'.
  --   test: This is for instructions that may or may not jump,
  --     depending on some operation they perform.  For example, loop
  --     tests fall in this category.  The final instruction in the BB
  --     is the test instruction itself.  The exit structure contains
  --     two BBid targets: one for the jump case, and one for the
  --     nojump case.
  --   select: The final BB instruction is a 'select' instruction with
  --     the guards properly encoded.  The exit structure contains a
  --     list of BBid targets, and an otherwise target.
  --   handlers: A list of handler names (including exceptions, exits,
  --     and 'others') and target BB's.  As mentioned above, these are
  --     not actually 'exit' targets for the basic block, but are used
  --     to generate a 'block' instruction during BB assembly.  An
  --     'entry' target is also included, representing the normal
  --     block entry.  There is no associated instruction in the BB.
  --   none: There is no exit from this BB (last instruction is
  --     endprocess or raise_exception or some such).  There is no
  --     associated instruction in the BB.
  BBExitType: enumeration (
    'jump', 'ifelse', 'test', 'select', 'handlers', 'none', 'unknown');
  BBExit: variant of BBExitType (
    'jump' -> jump: BBid {init},
    'ifelse' -> ifelse: BBIfelseExit {full},
    'test' -> test: BBTestExit {full},
    'select' -> selection: BBSelectExit {full},
    'handlers' -> handlers: BBHandlersExit {full},
    'none' -> none: empty {},
    'unknown' -> unknown: empty {}
  );
  
  BBIfelseExit: record (
    ifTarget: BBid,
    elseTarget: BBid
  );
  
  BBTestExit: record (
    jump: BBid,
    nojump: BBid
  );
  
  BBSelectExit: record (
    targets: BBidList,
    other: BBid
  );
  
  BBHandlersExit: record (
    names: handlerNameList,
    targets: BBidList,
    entry: BBid
  );
  handlerNameList: ordered table of predefined!handlerName {full};

  -- A BBData record encapsulates a full BB representation of
  -- a Hermes process, including the basic blocks themselves, an
  -- ordering on the BB's, and the size of the register bank needed to
  -- accomodate all the operands appearing in the BB code blocks.  The
  -- BB identified by 'entry' should be the first BB executed when the
  -- process is started.
  BBData: record (
    BBs: basicBlocks,
    entry: BBid,
    BBOrder: BBidList,
    nReg: integer
  );
  
  
  -- Information about selectors currently being codegened
  selectorInfo: table of selectorInfoEntry {full} keys (elt);
  selectorInfoEntry: record (
    elt: rootname,		-- the element variable
    tblAddr: interpform!operand,-- the source table address
    lkup: lookupType		-- the lookup method used for the selector
  );
  
  ----------------------------------------------------------------
  -- Following are the interface definitions for processes for which
  -- capabilities are passed around in cgData.Fns (most of them make
  -- use of cgData, so we can't put them in their own definitions
  -- modules without getting an import cycle).
  ----------------------------------------------------------------

  -- optimize is a function that transforms a basic block
  -- representation of a program so as to improve its runtime
  -- performance.
  optimize: callmessage (
    BBData: BBData
  )
  exit {full}
  minimum {full};
  optimizeQ: inport of optimize {full};
  optimizeFn: outport of optimizeQ;
  
  -- cgProcess is a function to generate LI code for a single absprog
  -- process.  "Single" does not mean that imbedded program literals
  -- cannot be codegened when they are encountered (indeed, this is
  -- the case, in the current implementation).  But the results of
  -- such "recursive" codegen's will have no effect on the code
  -- generated for the parent process.
  cgProcess: callmessage (
    processid: processid,	-- process to be codegen'ed
    cgData: cgData,		-- absprog & annotations, and other stuff
    LIprog: interpform!prog	-- the generated code
  )
  constant(processid)
  exit {init(processid), init(cgData), full(LIprog),
    full(cgData.Prog), init(cgData.options), full(cgData.Aux),
    full(cgData.Fns), full(cgData.Tplt), full(cgData.std), 
    full(cgData.scratch)}
  minimum {init(processid), init(cgData), 
    full(cgData.Prog), full(cgData.options), full(cgData.Aux), 
    full(cgData.Fns), full(cgData.Tplt), full(cgData.std),
    full(cgData.scratch)};
  cgProcessQ: inport of cgProcess {
    init(processid), init(cgData),
    full(cgData.Prog), init(cgData.options), full(cgData.Aux), 
    full(cgData.Fns), full(cgData.Tplt), full(cgData.std), 
    full(cgData.scratch)};
  cgProcessFn: outport of cgProcessQ;
  
  -- cgProcInit allocates and initializes a cgProcData record to plug
  -- in as the per-process auxiliary info record in cgData
  cgProcInit: callmessage (
    id: processid,
    cgData: cgData,
    shutdown: signalPort
  )
  constant(id)
  exit {full}
  minimum {init(id), init(cgData),
    full(cgData.Prog), full(cgData.options), full(cgData.Aux),
    full(cgData.Fns), full(cgData.Tplt), full(cgData.std),
    full(cgData.scratch)};
  cgProcInitQ: inport of cgProcInit {
    init(id), init(cgData),
    full(cgData.Prog), full(cgData.options), full(cgData.Aux),
    full(cgData.Fns), full(cgData.Tplt), full(cgData.std),
    full(cgData.scratch)};
  cgProcInitFn: outport of cgProcInitQ;

  -- BBAssemble strings together the code from a collection of basic
  -- blocks, obeying the associated ordering and inserting branch and
  -- select instructions as necessary to obey the exit structures.  A
  -- single, flat code body is returned.  The basic block data is
  -- destroyed in the process.
  BBAssemble: callmessage (
    BBData: BBData,		-- the BB representation of the program
    code: interpform!code	-- corresponding flat code
  )
  exit {full}
  minimum {full(BBData)};
  BBAssembleQ: inport of BBAssemble {full(BBData)};
  BBAssembleFn: outport of BBAssembleQ;

  -- cgClause translates the given clause from the absprog into the
  -- current basic block
  cgClause: callmessage (
    clauseid: clauseid,
    cgData: cgData
  )
  constant(clauseid)
  exit {full}
  minimum {full};
  cgClauseQ: inport of cgClause {full};
  cgClauseFn: outport of cgClauseQ;

  -- cgStmt translates a single statement, creating and updating basic
  -- blocks as needed.
  cgStmt: callmessage (
    stmt: predefined!statement, -- the stmt to be translated
    cgData: cgData
  )
  constant(stmt)
  exit {full}
  minimum {full};
  cgStmtQ: inport of cgStmt {full};
  cgStmtFn: outport of cgStmtQ;
  
  -- objAddr converts an absprog operand (an objectname) and returns an
  -- offset list to be used as the corresponding LI instruction
  -- operand.  objAddrInit starts up an objAddr process for a
  -- particular absprog process
  objAddr: callmessage (
    objname: predefined!objectname,
    offsets: interpform!operand
  )
  constant (objname)
  exit {full};
  objAddrQ: inport of objAddr {full(objname)};
  objAddrFn: outport of objAddrQ;

  objAddrInit: callmessage (
    progData: cgProgData,
    processid: processid,
    compOffsets: compOffsetsFn,
    lkupType: lkupTypeFn,
    objAddr: objAddrFn,
    rootAddr: rootAddrFn,
    objType: objTypeFn,
    tmpAddr: tmpAddrFn,
    setAddr: setAddrFn,
    nRegUsed: nRegUsedFn,
    shutdown: signalPort
  )
  constant (progData, processid, compOffsets, lkupType)
  exit {full};
  objAddrInitQ: inport of objAddrInit 
      {full(progData),init(processid),init(compOffsets),init(lkupType)};
  objAddrInitFn: outport of objAddrInitQ;
  
  -- rootAddr is like objAddr, but it expects only a rootid, not a
  -- fully-qualified objectname.
  rootAddr: callmessage (
    root: predefined!rootid,
    scope: predefined!scopeid,	-- the declaration scope of the root obj
    offsets: interpform!operand
  )
  constant (root, scope)
  exit {full};
  rootAddrQ: inport of rootAddr {init(root), init(scope)};
  rootAddrFn: outport of rootAddrQ;
  
  -- objType returns the typename associated with the given absprog
  -- objectname
  objType: callmessage (
    objname: predefined!objectname,
    typename: predefined!typename
  )
  constant (objname)
  exit {full};
  objTypeQ: inport of objType {full(objname)};
  objTypeFn: outport of objTypeQ;
  
  -- tmpAddr allocates a new slot in the process data vector and
  -- returns an offset list addressing that slot.  Use to get an
  -- address for an unnamed temporary introduced by codegen
  tmpAddr: callmessage (
    offsets: interpform!operand
  )
  exit {full};
  tmpAddrQ: inport of tmpAddr {};
  tmpAddrFn: outport of tmpAddrQ;
  
  -- setAddr explicitly sets the LI address that will be returned by
  -- objAddr for the given objectname.  No containing object is
  -- affected, but contained objects are.  If the object is a root
  -- object, rootAddr will also start returning the given address.
  setAddr: callmessage (
    obj: objectname,		-- the object being mapped
    addr: interpform!operand	-- the LI address to be used
  )
  constant (obj, addr)
  exit {full};
  setAddrQ: inport of setAddr {full};
  setAddrFn: outport of setAddrQ;
    
  -- varCaseID takes an objectname, which must correspond to a
  -- component of a variant, and returns the corresponding variant
  -- case ID number
  varCaseID: callmessage (
    objname: objectname,
    cgData: cgData,
    caseID: integer
  )
  constant(objname, cgData)
  exit {full};
  varCaseIDQ: inport of varCaseID {full(objname), full(cgData)};
  varCaseIDFn: outport of varCaseIDQ;

  -- nRegUsed returns the number of registers used so far by objAddr
  -- in assigning addresses to objects
  nRegUsed: callmessage (
    nRegUsed: integer
  )
  exit {full};
  nRegUsedQ: inport of nRegUsed {};
  nRegUsedFn: outport of nRegUsedQ;
  
  -- breakSelector breaks up a selector into two parts: a direct
  -- lookup and a followup test.  The direct lookup specifies how to
  -- use a key, index, or position to find the required table element, if
  -- possible.  The followup test must be applied to the result of the
  -- direct lookup to verify that an eligible table element has indeed
  -- been located.
  breakSelector: callmessage (
    selector: predefined!selector,-- the selector to be analyzed
    tblType: typename,		-- the table's type
    cgData: cgData,
    lookup: lookupInfo,		-- how to do a keyed/indexed lkup if possible
    tests: statements,		-- additional tests to determine if
				-- lkup succeeded
    newresult: objectname	-- bool var holding result of
				-- additional tests, if any
  )
  constant (selector, tblType, cgData)
  exit {full};
  breakSelectorQ: inport of breakSelector 
      {init(selector), full(tblType), full(cgData)};
  breakSelectorFn: outport of breakSelectorQ;
  lookupInfo: variant of lookupType (
    'scan' -> scan: empty {},	-- no direct lookup possible
    'index' -> index: lookupData {full},-- indexed lookup 
    'key' -> key: lookupData {full},-- keyed lookup
    'position' -> posn: lookupValue {full}-- element posn in ordered tbl
  );
  lookupType: enumeration ('scan', 'index', 'key', 'position');
  lookupData: record (
    repno: integer,		-- the table representation number by
				-- which to do the lookup
    values: lookupValueList
  );
  lookupValueList: ordered table of lookupValue {full};
  lookupValue: record (
    computation: statements,	-- statements to compute the value
    result: objectname		-- where the value ends up
  );
  
  -- cgSelector acts as a coroutine to build a skeleton for certain
  -- table operations that make use of selectors.  The initial call
  -- builds a preamble and then returns so that the caller can
  -- generate the appropriate body code for the lookup loop.  Then the
  -- caller calls a continuation capability returned by the original
  -- call, which generates code to tie off the loop.
cgSelector: callmessage (
    selector: predefined!selector,-- the selector controlling the op
    cgData: cgData,
    tblType: typename,		-- the table's type
    loopType: selectorLoopType,	-- determines behavior of the loop
    lookupInfo: lookupInfo,	-- the type of lookup being used
    tblAddr: interpform!operand,-- the table being operated on
    continuation: cgSelectorContinueFn
  )
  constant (selector, tblType, loopType, tblAddr)
  exit {full}
  minimum {full(selector), full(cgData), full(tblType), 
    init(loopType), init(tblAddr)};
  cgSelectorQ: inport of cgSelector {
    full(selector), full(cgData), full(tblType), 
    init(loopType), init(tblAddr) };
  cgSelectorFn: outport of cgSelectorQ;
  
  selectorLoopType: enumeration (-- how the lookup loop should behave
    'allOrNone',		-- loop over all matching elements,
				-- but don't raise 'NotFound' if none
    'oneOrNone',		-- stop after the first match, and
				-- don't raise 'NotFound' if none
    'exactlyOne'		-- stop after first match, raise
				-- 'NotFound' if none
  );

  cgSelectorContinue: callmessage (
    cgData: cgData
  )
  exit {full}
  minimum {full};
  cgSelectorContinueQ: inport of cgSelectorContinue {full};
  cgSelectorContinueFn: outport of cgSelectorContinueQ;
  
  -- clauseUsesObj determines whether the given clause makes use of
  -- the given object (or one of its components, or recursiveily...).
  -- The test may not be exact, due to performance constraints, so
  -- that a 'true' might be returned when the object is not actually
  -- used.  Callers should therefore assure that actions taken for a
  -- 'true' result will be correct (though perhaps not optimal) even
  -- when the correct result is 'false'.
  clauseUsesObj: callmessage (
    clauseid: clauseid,
    obj: objectname,
    cgData: cgData,
    result: predefined!boolean
  )
  constant (clauseid, obj, cgData)
  exit {full};
  clauseUsesObjQ: inport of clauseUsesObj 
      {init(clauseid), full(obj), full(cgData)};
  clauseUsesObjFn: outport of clauseUsesObjQ;
  
  -- lkupType takes a root object name and finds its type (as a typename)
  -- from a given collection of scopes or, if that fails, from a given
  -- table of inferred definitions.
  lkupType: callmessage (
    obj: rootname,
    scopes: predefined!scopes,
    infDefs: inferredDefinitions,
    type: typename
  )
  constant (obj, scopes, infDefs)
  exit {full};
  lkupTypeQ: inport of lkupType {full(obj), init(scopes), init(infDefs)};
  lkupTypeFn: outport of lkupTypeQ;
  
  -- newTableInfo selects one or more data representations for a given
  -- table type.
  newTableInfo: callmessage (
    info: predefined!table_info,-- table description from type definition
    typeDef: typeDefFn,		-- utility to extract type defn from typename
    compOffsets: compOffsetsFn,	-- util to turn component list into offsets
    datarep: interpform!new_table_info-- selected data rep(s)
  )
  constant (info, typeDef, compOffsets)
  exit {full};
  newTableInfoQ: inport of newTableInfo 
      {full(info),init(typeDef),init(compOffsets)};
  newTableInfoFn: outport of newTableInfoQ;

  -- typeDef returns the full type definition corresponding to the
  -- given type name
  typeDef: callmessage (
    name: predefined!typename,
    def: predefined!type_definition
  )
  constant (name)
  exit {full};
  typeDefQ: inport of typeDef {full(name)};
  typeDefFn: outport of typeDefQ;
  
  -- compOffsets takes a list of predefined!component objects and
  -- returns a list of matching offsets for LI operand addressing
  compOffsets: callmessage (
    type: typename,		-- the type to which 1st component belongs
    components: predefined!component_list,
    offsets: interpform!operand
  )
  constant (components)
  exit {full}
  minimum {full(type), init(components)};
  compOffsetsQ: inport of compOffsets {full(type),init(components)};
  compOffsetsFn: outport of compOffsetsQ;
  
  -- atoi converts a decimal string to an integer, e.g. "103" => 10
  atoi: callmessage (
    s: charstring,
    i: integer
  )
  constant(s)
  exit {full};
  atoiQ: inport of atoi {init(s)};
  atoiFn: outport of atoiQ;
  
  -- ator converts a decimal string to a real, e.g. "10.38e5" => 1038000.0
  ator: callmessage (
    s: charstring,
    r: real
  )
  constant(s)
  exit {full};
  atorQ: inport of ator {init(s)};
  atorFn: outport of atorQ;
  
  -- A few miscellaneous types needed by some of the modules
  clauseidList: ordered table of clauseid {init};

end definitions
