(* lexer.lex
   The lexical analyzer: lexer.ml is generated automatically
   from lexer.mll.
   
   The only modification commonly needed here is adding new keywords to the 
   list of reserved words at the top.  
*)

structure Tokens = Tokens

type pos = int
type svalue = Tokens.svalue
type ('a,'b) token = ('a,'b) Tokens.token
type lexresult= (svalue,pos) token

fun error (startpos,endpos) msg = 
    TextIO.output (TextIO.stdOut,
		   String.concat["Lexer Error (", Int.toString startpos, ",", Int.toString endpos, ") ",msg,"\n"])

fun inc (ri as ref i) = (ri := i+1)
fun dec (ri as ref i) = (ri := i-1)

val lineno   = ref 1
fun newline () = (inc lineno)

val commentDepth = ref 0

val stringStart = ref 0
val charlist : string list ref = ref nil
fun resetString pos = (stringStart := pos; charlist := nil)
fun addString (charlist,s:string) = charlist := s :: (!charlist)
fun addChar (charlist, c:char) = addString(charlist, String.str c)
fun makeString () = (concat(rev(!charlist)) before charlist := nil)

fun eof () =
    let val pos = Int.max(!stringStart+2, 0)
     in if !commentDepth > 0 then
	  error (0,0) "unclosed comment"
        else if !charlist <> [] then
          error (0,0) "unclosed string"
	else ();
	Tokens.EOF(0,0)
    end	

%%
%s COMMENT STRING;
%header (functor SimpleBoolLexFun(structure Tokens: SimpleBool_TOKENS));
uc=[A-Z];
lc=[a-z];
id1=[A-Za-z_];
id2=[A-Za-z0-9_'];
alpha=[A-Za-z];
digit=[0-9];
num=[0-9]+;
eol=("\013\010"|"\010"|"\013");
ws=[\ \t];
%%
<INITIAL>{eol}    => (newline(); lex());
<INITIAL>{ws}+    => (lex());

<INITIAL>"if"     => (Tokens.IF(yypos,yypos+2));
<INITIAL>"then"   => (Tokens.THEN(yypos,yypos+4));
<INITIAL>"else"   => (Tokens.ELSE(yypos,yypos+4));
<INITIAL>"true"   => (Tokens.TRUE(yypos,yypos+4));
<INITIAL>"false"  => (Tokens.FALSE(yypos,yypos+5));
<INITIAL>"Bool"   => (Tokens.BOOL(yypos,yypos+6));
<INITIAL>"lambda" => (Tokens.LAMBDA(yypos,yypos+6));
<INITIAL>"bind"   => (Tokens.BIND(yypos,yypos+4));


<INITIAL>"_" => (Tokens.USCORE(yypos,yypos+1));
<INITIAL>"'" => (Tokens.APOSTROPHE(yypos,yypos+1));
<INITIAL>"#" => (Tokens.HASH(yypos,yypos+1));
<INITIAL>"$" => (Tokens.TRIANGLE(yypos,yypos+1));
<INITIAL>"*" => (Tokens.STAR(yypos,yypos+1));
<INITIAL>"|" => (Tokens.VBAR(yypos,yypos+1));
<INITIAL>"." => (Tokens.DOT(yypos,yypos+1));
<INITIAL>";" => (Tokens.SEMI(yypos,yypos+1));
<INITIAL>"," => (Tokens.COMMA(yypos,yypos+1));
<INITIAL>"/" => (Tokens.SLASH(yypos,yypos+1));
<INITIAL>"=" => (Tokens.EQ(yypos,yypos+1));
<INITIAL>"==" => (Tokens.EQEQ(yypos,yypos+2));
<INITIAL>"[" => (Tokens.LSQUARE(yypos,yypos+1));
<INITIAL>"<" => (Tokens.LT(yypos,yypos+1));
<INITIAL>"{" => (Tokens.LCURLY(yypos,yypos+1));
<INITIAL>"(" => (Tokens.LPAREN(yypos,yypos+1));
<INITIAL>"{|" => (Tokens.LCURLYBAR(yypos,yypos+2));
<INITIAL>"[|" => (Tokens.LSQUAREBAR(yypos,yypos+2));
<INITIAL>"}" => (Tokens.RCURLY(yypos,yypos+1));
<INITIAL>")" => (Tokens.RPAREN(yypos,yypos+1));
<INITIAL>"]" => (Tokens.RSQUARE(yypos,yypos+1));
<INITIAL>">" => (Tokens.GT(yypos,yypos+1));
<INITIAL>"|}" => (Tokens.BARRCURLY(yypos,yypos+2));
<INITIAL>"|>" => (Tokens.BARGT(yypos,yypos+2));
<INITIAL>"|]" => (Tokens.BARRSQUARE(yypos,yypos+2));

<INITIAL>":=" => (Tokens.COLONEQ(yypos,yypos+2));
<INITIAL>"->" => (Tokens.ARROW(yypos,yypos+2));
<INITIAL>"<-" => (Tokens.LEFTARROW(yypos,yypos+2));
<INITIAL>"=>" => (Tokens.DARROW(yypos,yypos+2));
<INITIAL>"<=" => (Tokens.LEFTDARROW(yypos,yypos+2));
<INITIAL>"==>" => (Tokens.DDARROW(yypos,yypos+3));

<INITIAL>"/*"   => (YYBEGIN COMMENT; commentDepth := 1; lex());
<INITIAL>"*/"   => (error (yypos,yypos+2) "Unmatched end of comment"; lex());

<INITIAL>{num}"."{num} => (Tokens.FLOATV(valOf(Real.fromString yytext),yypos,yypos+size yytext));
<INITIAL>{num} => (Tokens.INTV(valOf(Int.fromString(yytext)),yypos,yypos+size yytext));

<INITIAL>{uc}{id2}* => (Tokens.UCID(yytext,yypos,yypos+size yytext));
<INITIAL>{lc}{id2}* => (Tokens.LCID(yytext,yypos,yypos+size yytext));

<INITIAL>[~%\\+&:@`-]+ => (if yytext = ":" then Tokens.COLON(yypos,yypos+1)
	          else if yytext = "::" then Tokens.COLONCOLON(yypos,yypos+2)
		  else Tokens.LCID(yytext,yypos,yypos+size yytext));

<INITIAL>[!?^] => (Tokens.LCID(yytext,yypos,yypos+1));

<INITIAL>\"   => (resetString(yypos); YYBEGIN STRING; lex());
<INITIAL>.    => (error (yypos,yypos+1) "Illegal character"; lex());

<COMMENT>"/*"	=> (inc commentDepth; lex());
<COMMENT>{eol}	=> (newline(); lex());
<COMMENT>"*/"   => (dec commentDepth; if !commentDepth=0 then YYBEGIN INITIAL else (); lex());
<COMMENT>.	=> (lex());

<STRING>\"      => (YYBEGIN INITIAL; Tokens.STRINGV(makeString(),!stringStart,yypos+1));
<STRING>{eol}   => (error (!stringStart, yypos) "unclosed string"; Tokens.EOF(0,0));
<STRING>\\a	=> (addString(charlist, "\007"); lex());
<STRING>\\b	=> (addString(charlist, "\008"); lex());
<STRING>\\f	=> (addString(charlist, "\012"); lex());
<STRING>\\n	=> (addString(charlist, "\010"); lex());
<STRING>\\r	=> (addString(charlist, "\013"); lex());
<STRING>\\t	=> (addString(charlist, "\009"); lex());
<STRING>\\v	=> (addString(charlist, "\011"); lex());
<STRING>\\\\	=> (addString(charlist, "\\"); lex());
<STRING>\\\"    => (addString(charlist, "\""); lex());
<STRING>\\\^.	=>
	(error (yypos,yypos+2) "illegal control escape; must be one of \
	  \@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
	 lex());
<STRING>\\[0-9]{3}	=>
 (let val x = Char.ord(String.sub(yytext,1))*100
	     +Char.ord(String.sub(yytext,2))*10
	     +Char.ord(String.sub(yytext,3))
	     -((Char.ord #"0")*111)
  in (if x>255
      then error (yypos,yypos+4) "illegal ascii escape"
      else addChar(charlist, Char.chr x);
      lex())
  end);
<STRING>\\	=> (error (yypos,yypos+1) "illegal string escape"; lex());
<STRING>[\000-\031]  => (error (yypos,yypos+1) "illegal non-printing character in string";
                         lex());
<STRING>.  => (addString(charlist,yytext); lex());

