/***  symbol.c  ***/

#include <stdio.h>

#include "lalgol.h"
#include "symbol.h"
#include "data.h"
#include "hash.h"
#include "io.h"

static bool peep_optor(int);
static void operator(void);
static int  bold(void);
static void identifier(void);
static void string_or_character(void);
static inta  integral_value(int);
static void number(void);
static void real_number(int);
static void bits_number(int);
static int  next_token(void);
static bool open_symbol(void);
static void block(int,int,int,int);


/* bool nomad_or_colon(char), is_letter(char), is_letgit(char),
	is_string_char(char), is_digit(char), is_base_digit(base,char);
 * int sort_of(char);
 * char next_char() -> ch, next_visible_char() -> ch; void put_symbol(s);
 * int place_integer(i), place_string(length), place_real(double); */

/*----------------------------------------------------------------------*/
/*  constructs like 'op'===('int'a)'int':'skip'  */

static int SAFE;

#define is_local_SAFE() 	!(1&SAFE)
#define clear_SAFE()		SAFE=0
#define set_SAFE()		SAFE=05
#define unset_SAFE()		SAFE=(SAFE&06)?02:0
#define set_local_SAFE()	SAFE |= (SAFE&06)?01:0

/*----------------------------------------------------------------------*/
/*  operators, assignation and identity relations */

#define OPTOR_max	30
static int OPTOR[OPTOR_max];

static bool peep_optor(i) int i;
{
	if(i>4||is_local_SAFE()||TEMP[0]==':')return(FALSE);
	switch(TEMP[--i]){
	    case ':': if(i<2||TEMP[--i]!='='||TEMP[--i]==':')return(FALSE);
			break;
	    case '=': if(i>1&&(TEMP[--i]!=':'||TEMP[--i]==':'))return(FALSE);
			break;
	    default:  if(i>1)return(FALSE);
	}
	unset_SAFE(); return(TRUE);
}

static void operator()
{ int i,j,k,op_count,p;
	i=op_count=0; TEMP[0]=ch;
	while(nomad_or_colon(TEMP[++i]=next_char())){
	    if(i>=TEMP_max){
		error("too long operator sequence");
		return;
	    }
	}
	while(i){
	    if(op_count==OPTOR_max){
		error("too long operator sequence");
		return;
	    }
	    switch(TEMP[--i]){
		case ':':  j = (i<2||TEMP[i-1]!='=') ? 0 : i<3 ? 2 : 3;
			   break;
		case '=':  j = (i<1||peep_optor(i)) ? 0 :
			       (i<2||TEMP[i-1]!=':') ? 1 : i<3 ? 2 : 3;
			   break;
		default:   j = i<1 ? 0 : 1; break;
	    }
	    i -= j;            /* the operator is in TEMP[i:i+j] */
	    if((p=STNG_pointer)+j>=STNG_max)
		fatal_error("character table is full");
	    for(k=0;k<=j;k++) STNG[STNG_pointer++]=TEMP[i+k];
	    STNG[STNG_pointer++]=0;
	    OPTOR[op_count++]=insert(p,IIND);
	}
	while(op_count>0) put_symbol(OPTOR[--op_count]);
	return;
}

/*----------------------------------------------------------------------*/
/* bold symbols */

static int bold()
{ int p;
	p=STNG_pointer; STNG[STNG_pointer]='\'';
	while((++STNG_pointer)<STNG_max &&
	      is_letter(STNG[STNG_pointer]=next_char()));
	if(STNG_pointer>=STNG_max)
	    fatal_error("character table is full");
	STNG[STNG_pointer++]=0;
	if(p+2==STNG_pointer){
	    error("misplaced bold delimiter");
	    if(ch=='\'')next_char();
	    return(SKIP);
	}
	if(ch=='\''){next_visible_char();} else {
	    error("missing bold delimiter");}
	return(insert(p,IIND));
}

/*----------------------------------------------------------------------*/
/* identifiers */

static void identifier()
{ int p;
	p=STNG_pointer; STNG[STNG_pointer]=ch;
	while(++STNG_pointer<STNG_max &&
	      is_letgit(STNG[STNG_pointer]=next_visible_char()));
	if(STNG_pointer>=STNG_max)
	    fatal_error("character table is full");
	STNG[STNG_pointer++]=0; put_symbol(insert(p,ITAG));
	return;
}

/*----------------------------------------------------------------------*/
/*  string and character denotation  */

static void string_or_character()
{int i=0;
start:	while(is_string_char(TEMP[i<TEMP_max?i++:i]=next_char()));
	if(i>=TEMP_max){
	    error("too long string"); if(ch=='"')next_char(); return;
	}
	if(ch!='"'){
	    error("missing string delimiter"); return;
	}	/* check for double quote */
	if(next_char()=='"') {goto start;}
	switch(i){ /* one more than the length of the string */
	    case 1:  put_symbol(DSTRING+place_integer(0)); return;
	    case 2:  put_symbol(DCHAR+place_integer(TEMP[0])); return;
	    default: put_symbol(DSTRING+place_string(i-1)); return;
	}
}

/*----------------------------------------------------------------------*/
/*  integral, real and bits denotation  */

static inta integral_value(i) int i;
{inta j;
        TEMP[i-1]=0;
#if INT32
        sscanf(TEMP,"%d",&j);
#else   /* TURBO_C */
        sscanf(TEMP,"%ld",&j);
#endif
        return(j);
}

static void real_number(i) int i;
{double r; 
	if(ch=='.'){
	    while(is_digit(TEMP[i<TEMP_max?i++:i]=next_visible_char()));
	}
	if(ch=='e' || ch=='E'){ /* read the exponent */
	    TEMP[i<TEMP_max?i++:i]=next_visible_char();
	    while(is_digit(TEMP[i<TEMP_max?i++:i]=next_visible_char()));
	}
	if(i>=TEMP_max){error("too long number"); return;}
        TEMP[i-1]=0;
#if INT32 && 0	/* not for linux */
        sscanf(TEMP,"%F",&r);
#else   /* TURBO_C */
        sscanf(TEMP,"%lf",&r);
#endif
        put_symbol(DREAL+place_real(r)); 
}

#define get_value(x)	(x<='9'?x-'0':x+10-(x<='F'?'A':'a'))

static void bits_number(i) int i;
{int base=0; inta value=0;
	switch(integral_value((int)i)){
	    case 16: ++base;
	    case 8:  ++base;
	    case 4:  ++base;
	    case 2:  ++base;
	}
	if(!base){
	    error("wrong bits denotation"); next_visible_char(); return;
	}
	while(is_base_digit(base,next_visible_char()))
	    value = (value<<base) | get_value(ch);
        put_symbol(DBITS+place_integral(value));
}

#define integral_number(i)  put_symbol(DINT+place_integral(integral_value(i)))

static void number()
{int i=1;
	if((TEMP[0]=ch)=='.'){real_number(i); return;}
	while(is_digit(TEMP[i<TEMP_max?i++:i]=next_visible_char()));
	if(i>=TEMP_max){error("too long number"); return;}
	switch(ch){ /* what is the symbol following the number */
	    case '.': case 'e': case 'E': real_number(i); return;
	    case 'r': case 'R':		  bits_number(i); return;
	    default:			  integral_number(i); return;
	}
}

/*----------------------------------------------------------------------*/
/*  next token  */

static int token;

static int next_token()
{int result;
start:	switch(sort_of(ch)){
	case layout_t:	next_char(); goto start;
	case letter_t:	identifier(); unset_SAFE(); goto start;
	case digit_point_t:
			number(); unset_SAFE(); goto start;
	case nomad_monad_colon_t:
			operator(); unset_SAFE(); goto start;
	case wrong_t:	error("unrecognizable character"); next_char();
			goto start;
	default:	switch(ch){ /* some single character */
	case '#':	skip_short_comment(); goto start;
	case '"':	string_or_character(); unset_SAFE(); goto start;
	case '\'':	result=bold();
			if(is_open_close(result))return(result);
			if(is_comment(result)){
			    skip_comment(result); goto start;
			}
			if(is_OP_or_PRIO(result)) set_SAFE();
			put_symbol(result); goto start;
	case '(':	next_char(); return(OPEN_4);
	case ')':	next_char(); return(CLOSE_4);
	case '[':	next_char(); return(OPEN_5);
	case ']':	next_char(); return(CLOSE_5);
	case ';':	put_symbol(GOON); next_visible_char(); clear_SAFE();
			    goto start;
	case ',':	put_symbol(COMMA); next_visible_char();
			    set_local_SAFE(); goto start;
	case '@':	put_symbol(AT); next_visible_char(); goto start;
	case '|':	if(next_char()==':'){
			    next_char(); return(AGAIN_4);
			}
			return(VERT_4);
	default:	return(EOF);
	}}
}

/*----------------------------------------------------------------------*/
/*  the program itself  */

static bool open_symbol()
{	switch(token=next_token()){
	case OPEN_1:	put_symbol(OPEN); 
			    block(CLOSE_1,0,0,0); return(TRUE);
	case OPEN_2:	put_symbol(OPEN);
			    block(CLOSE_2,VERT_21,VERT_22,AGAIN_2);
			    return(TRUE);
	case OPEN_3:	put_symbol(OPEN);
			    block(CLOSE_3,VERT_31,VERT_32,AGAIN_3);
			    return(TRUE);
	case OPEN_4:	put_symbol(OPEN);
			    block(CLOSE_4,VERT_4,VERT_4,AGAIN_4);
			    return(TRUE);
	case OPEN_5:	put_symbol(OPEN);
			    block(CLOSE_5,0,0,0); return(TRUE);
	case DO_1:	put_symbol(DO); put_symbol(OPEN);
			    block(OD_1,0,0,0); return(TRUE);
	}
	return(FALSE);
}

static void block(close,vert1,vert2,again) int close,vert1,vert2,again;
{int old_SAFE, then_part;
	old_SAFE=SAFE; clear_SAFE(); then_part=0;
back:	while(open_symbol());
	if(token==close){
	    SAFE=old_SAFE; put_symbol(CLOSE); return;
	}
	if(then_part==0 && token==vert1){
	    put_symbol(VERT); then_part++; goto back;
	}
	if(then_part==1 && token==again){
	    put_symbol(AGAIN); then_part--; goto back;
	}
	if(then_part==1 && token==vert2){
	    put_symbol(VERT); then_part++; goto back;
	}
	if(token==EOF){
	    fatal_error("end of file was hit unexpectedly");
	}
	error("parenthesis mismatch"); goto back;
}

void program()
{
	put_symbol(OPEN); clear_SAFE();
	if(!open_symbol()){
	    fatal_error("the program must start with an open symbol");
	}
	put_symbol(GOON); put_symbol(STOP_TAG); put_symbol(COLON);
	put_symbol(SKIP); put_symbol(CLOSE);
}

