[scn_base.h] Basic Token Separation

contents



#include "symbols.h"
#include "scn_io.h"



The module [scn_base] implements the scan stream interface based on a given STYX-konform scanner definition. ( see [scn_gen] )

The scan stream is reentrant and allows multiple scan operations at a time.


Macros


Token definition flags ( see Scn_dfnToken )

#define SCN_FLG_IgnoreToken  1    /* token has to be ignored          */
#define SCN_FLG_IgnoreCase   2    /* case insensitive token           */
#define SCN_FLG_EofToken     4    /* eof token in embedded language   */
#define SCN_FLG_SwitchToken  8    /* switch to/from embedded language */
#define SCN_FLG_CfgToken     16   /* embedded language token          */
#define SCN_FLG_IndentToken  32   /* (de)indent token                 */
#define SCN_FLG_PatternToken 64   /* (start or end) pattern token     */
#define SCN_FLG_GroupStack   128  /* push/pop token group             */


Token preprocessing result values

#define SCN_FLG_RescanNone     0
#define SCN_FLG_RescanString   1
#define SCN_FLG_RescanBString  2
#define SCN_FLG_RescanFile     3
#define SCN_FLG_RescanBFile    4
#define SCN_FLG_RescanExtFile  5
#define SCN_FLG_RescanExtBFile 6
#define SCN_FLG_RescanChar     7
#define SCN_FLG_RescanChrItr   8
#define SCN_FLG_RescanBChrItr  9



Stream of tokens


Scn_Stream Stream onto a scanner

Scn_Stream_Itr Character iterator onto a scan stream

   Type of token preprocessing function
   RC = SCN_FLG_RescanChrItr   & valid cMacVal --> rescan character iterator cMacVal
                                                   ( frees character iterator )
   RC = SCN_FLG_RescanBChrItr  & valid cMacVal --> rescan binary char iterator cMacVal
                                                   ( frees character iterator )
   RC = SCN_FLG_RescanString   & valid cMacVal --> rescan string cMacVal
   RC = SCN_FLG_RescanFile     & valid cMacVal --> rescan file cMacVal
   RC = SCN_FLG_RescanBString  & valid cMacVal --> rescan binary string cMacVal
   RC = SCN_FLG_RescanBFile    & valid cMacVal --> rescan binary file cMacVal
   RC = SCN_FLG_RescanExtFile  & valid cMacVal --> rescan file cMacVal,
                                                   with main stream read function if possible
   RC = SCN_FLG_RescanExtBFile & valid cMacVal --> rescan binary file cMacVal,
                                                   with main stream read function if possible
   RC = SCN_FLG_RescanChar     & valid cMacVal --> rescan character cMacVal
                                                   ( single byte or ucs4 )
   RC = SCN_FLG_RescanNone     & valid cMacVal --> replace cTokVal with cMacVal
                                                   ( single byte or utf-8 )
   ELSE                                        --> ignore cTokVal


typedef int (*Scn_PreMacFun)
            (
              Scn_Stream pStream, c_string cTokNam,
              c_string cTokVal, symbol* cMacVal
            );


Type of the embedded language token constructor
typedef Any_T (*Scn_eTerm)
              (
                Abs_T CurPTCfg, c_string language, c_string StartSymbol
              );


Type of the embedded language accept token recognizer
typedef c_bool (*Scn_eAccept)(Abs_T CurPTCfg);



Accessing scanner definition

c_string Scn_id(Scn_T scn)
name of the scanner (group) 'scn'; allocs memory

short Scn_check_Token(Scn_T scn, c_string s)
whether string 's' is a token in scanner 'scn';
result = token number ( > 0 ) or 0
assertion: Scn_groups(scn) = 0

short Scn_check_WCToken(Scn_T scn, wc_string s)
whether wide string 's' is a token in scanner 'scn';
result = token number ( > 0 ) or 0
assertion: Scn_groups(scn) = 0

int Scn_tokens(Scn_T scn)
number of tokens defined in scanner ( group ) 'scn'
c_string Scn_tokid(Scn_T scn, int i)
name of the i-th token definied in scanner 'scn'; allocs memory
assertion: Scn_groups(scn) = 0

c_byte Scn_tokFlags(Scn_T scn, int i)
flags of the i-th token definied in scanner 'scn'
assertion: Scn_groups(scn) = 0

int Scn_dycks(Scn_T scn)
number of dyck token defined in scanner ( group ) 'scn'
Scn_T Scn_dyck(Scn_T scn, int i)
i-th dyck scanner definied in scanner group 'scn'
assertion: Scn_dycks(scn) > 0

int Scn_dycktoken(Scn_T scn, int i)
i-th dyck token definied in scanner group 'scn'
assertion: Scn_dycks(scn) > 0

int Scn_groups(Scn_T scn)
number of groups defined in scanner ( group ) 'scn'
Scn_T Scn_group(Scn_T scn, int i)
i-th scanner definied in scanner group 'scn'
assertion: Scn_groups(scn) > 0

int Scn_SwitchGroup(Scn_T scn, int i)
switch group of the i-th token definied in scanner group 'scn' or -1
assertion: Scn_groups(scn) = 0

long Scn_check_GroupToken(Scn_T scn, c_string s)
whether string 's' is a token in scanner group 'scn';
result: high = group index , low = token number ( > 0 ) or 0
assertion: Scn_groups(scn) > 0

long Scn_check_GroupWCToken(Scn_T scn, wc_string s)
whether wide string 's' is a token in scanner group 'scn';
result: high = group index , low = token number ( > 0 ) or 0
assertion: Scn_groups(scn) > 0



Character iterator definition

Scn_Stream_Itr Stream_Itr_new
               (
                 int      f_getc (StdCPtr file),
                 void     f_close(StdCPtr file),
                 c_string   f_wc2mb(wc_string wc),
                 LONG_INT f_seek (StdCPtr file, long offset, int origin),
                 StdCPtr  file,
                 c_string   fileid
               )
creates and initializes a character iterator
onto the open character source 'file'
'f_getc' : next character
'f_close': closes character source
'f_wc2mb': converts wide to multibyte character
'f_seek' : positions character source
'fileid' : character source identifier

void Stream_Itr_free(Scn_Stream_Itr itr)
frees character iterator 'itr'


Scan stream definition

Scn_Stream Stream_bgn(Scn_T scn, Scn_Stream_Itr itr)
creates and initializes a scan stream
onto scanner 'scn' and character iterator 'itr'

void Stream_close(Scn_Stream t)
closes character source i.e. file of scan stream 't'
void Stream_free(Scn_Stream t)
frees scan stream 't'
void Stream_reset(Scn_Stream t)
re-initializes scan process in current scan stream 't'
void Stream_premac_set(Scn_Stream t, Scn_PreMacFun cPreMac)
adds 'cPreMac' to scan stream 't'
for token preprocessing & macro expansion

void Stream_eterm_set
     (
       Scn_Stream t, Abs_T CurPTCfg, Scn_eTerm eTerm, Scn_eAccept eAccept
     )
adds 'eTerm', 'eAccept' and 'CurPTCfg' to scan stream 't'
for embedded language token construction
( deactivates other token concatenation )

void Stream_add_ctxval(Scn_Stream t, Any_T id, Any_T val)
adds the context pair 'id' |--> 'val' to scan stream 't';
updates existing entries

void Stream_concat_other(Scn_Stream t)
activates other token concatenation in scan stream 't'
void Stream_skip_character(Scn_Stream t, int cC)
skips character 'cC' ( but tracks position )
void Stream_enable_lookahead(Scn_Stream t)
activates n character lookahead in scan stream 't'
void Stream_binmode_set(Scn_Stream t)
activates binary mode in scan stream 't'
( deactivates other token concatenation )

Scn_Stream Stream_line
           (
             Scn_T scn, StdCPtr line, int (*cGet)(StdCPtr line), c_string id
           )
#define STREAM_STDIN(scn) \
	Stream_line(scn,stdin,(int (*)(StdCPtr f))fgetc,"<stdin>")
creates a scan stream
onto scanner 'scn' and character stream 'line'
'cGet': next character
'id' : character stream identifier

Scn_Stream Stream_file(Scn_T scn, c_string EnvVar, c_string FileName, c_string Ext)
creates a scan stream
onto scanner 'scn' and file [$'EnvVar'/]'FileName''Ext'

Scn_Stream Stream_extfile
           (
             Scn_T scn, c_string EnvVar, c_string FileName, c_string Ext,
             int (*cGet)(FILE* f)
           )
creates a scan stream
onto scanner 'scn' and file [$'EnvVar'/]'FileName''Ext'
with external next character function 'cGet'

Scn_Stream Stream_bstring(Scn_T scn, c_bstring bstr)
creates a scan stream onto scanner 'scn' and binary string 'bstr'
void Stream_bstring_set(Scn_Stream stream, c_bstring bstr)
re-initializes scan stream 'stream' with binary string 'bstr'
Scn_Stream Stream_string(Scn_T scn, c_string Text)
creates a scan stream onto scanner 'scn' and string 'Text'
void Stream_string_set(Scn_Stream stream, c_string Text)
re-initializes scan stream 'stream' with string 'Text'
void Stream_string_insert(Scn_Stream stream, c_string Text)
inserts string 'Text' at current position in scan stream 'stream'


Defining token numbers

void Stream_defEofId(Scn_Stream t, short tid)
defines 'tid' as EOF token number in scan stream 't'
void Stream_defErrId(Scn_Stream t, short tid)
defines 'tid' as error token number in scan stream 't'
void Stream_defTokId(Scn_Stream t, c_string Name, short tid)
defines 'tid' as number for token 'Name' in scan stream 't'
c_bool Stream_hasTokId(Scn_Stream t, c_string Name)
whether 'Name' is a token in scan stream 't'
c_bool Stream_defDynKeyId(Scn_Stream t, symbol keysym, short tid)
defines 'tid' as number for dynamic keyword 'keysym' in scan stream 't'
void Stream_defKeyId(Scn_Stream t, c_string Text, short tid)
defines 'tid' as number for keyword 'Text' in scan stream 't'
void Stream_defWCKeyId(Scn_Stream t, wc_string Text, short tid)
defines 'tid' as number for wide keyword 'Text' in scan stream 't'


Scan stream operations

void Stream_next(Scn_Stream t)
separates the next token in scan stream 't';
That's the workhorse operation.



Accessing current token

symbol Stream_csym(Scn_Stream t)
symbol of the current token in scan stream 't'
short Stream_ctid(Scn_Stream t)
token number of the current token in scan stream 't'
short Stream_ctid_nokey(Scn_Stream t)
token number of the current token in scan stream 't'
(non-indent token --> ignore keyword)

c_string Stream_ctnam(Scn_Stream t)
name of the current token in scan stream 't' or NULL (EOF)
c_bool Stream_cica(Scn_Stream t)
whether the current token in scan stream 't'
has the attribute 'ignore case'

c_string Stream_cbuf(Scn_Stream t)
string of the current token in scan stream 't' or NULL
( not valid for an embedded language token )

wc_string Stream_wcbuf(Scn_Stream t)
wide string of the current token in scan stream 't' or NULL
( not valid for an embedded language token )

long Stream_clen(Scn_Stream t)
length of the current token in scan stream 't'
( not valid for an embedded language and the follow token )

symbol Stream_cfil(Scn_Stream t)
source identifier of the current token in scan stream 't'
long  Stream_clin(Scn_Stream t)
first line number of the current token in scan stream 't'
long  Stream_ccol(Scn_Stream t)
first column number of the current token in scan stream 't'


Accessing scan stream definition

Scn_T Stream_scn(Scn_Stream t)
scanner definition of scan stream 't'
Scn_PreMacFun Stream_premac(Scn_Stream t)
preprocessor of scan stream 't'
Scn_eTerm Stream_etermFun(Scn_Stream t)
embedded language token constructor of scan stream 't'
Scn_eAccept Stream_etermEof(Scn_Stream t)
embedded language accept token recognizer of scan stream 't'
Abs_T Stream_etermCfg(Scn_Stream t)
current parse term configuration of scan stream 't'
c_bool Stream_binmode(Scn_Stream t)
whether current scan stream 't' has binary mode
c_bool Stream_lookahead(Scn_Stream t)
whether current scan stream 't' has n character lookahead enabled
c_bool Stream_unicode(Scn_Stream t)
whether current scan stream 't' is unicode-based
StdCPtr Stream_cstream(Scn_Stream t)
character stream of current scan stream 't'
Any_T Stream_get_ctxval(Scn_Stream t, Any_T id)
value of context variable 'id' in scan stream 't' or NULL