(***********************************************************************) (* *) (* Applied Type System *) (* *) (* Hongwei Xi *) (* *) (***********************************************************************) (* ** ATS/Anairiats - Unleashing the Potential of Types! ** ** Copyright (C) 2002-2008 Hongwei Xi, Boston University ** ** All rights reserved ** ** ATS is free software; you can redistribute it and/or modify it under ** the terms of the GNU GENERAL PUBLIC LICENSE (GPL) as published by the ** Free Software Foundation; either version 3, or (at your option) any ** later version. ** ** ATS is distributed in the hope that it will be useful, but WITHOUT ANY ** WARRANTY; without even the implied warranty of MERCHANTABILITY or ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ** for more details. ** ** You should have received a copy of the GNU General Public License ** along with ATS; see the file COPYING. If not, please write to the ** Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA ** 02110-1301, USA. *) (* ****** ****** *) // // Author: Hongwei Xi (hwxi AT cs DOT bu DOT edu) // July 2007 // (* ****** ****** *) (* The lexical analyzer for ATS/Anairiats *) %{ // preamble %{^ #include "ats_grammar_yats.h" %} // end of ... (* ****** ****** *) staload "libats_lex_lexing.sats" (* ****** ****** *) staload CS = "ats_charlst.sats" staload Err = "ats_error.sats" staload Fil = "ats_filename.sats" staload Loc = "ats_location.sats" staload POSMARK = "ats_posmark.sats" staload Syn = "ats_syntax.sats" (* ****** ****** *) staload "ats_lexer.sats" (* ****** ****** *) overload prerr with $Loc.prerr_location (* ****** ****** *) // // HX: it is called when [ats_lexer_lats.dats] is loaded dynamically // val () = ats_lexer_lats_initialize () where { extern fun ats_lexer_lats_initialize (): void = "ats_lexer_lats_initialize" } // end of [val] (* ****** ****** *) dataviewtype poslst = (* list of positions *) | POSLSTnil | POSLSTcons of (position_t, poslst) // end of [poslst] fun poslst_free (ps: poslst): void = case+ ps of | ~POSLSTcons (p, ps) => poslst_free ps | ~POSLSTnil () => () // end of [poslst_free] extern fun keyword_search (name: string): token_t = "atsopt_keyword_search" // implemented in C in [ats_keyword.dats] // end of [keyword_search] // fn MAIN_lexing_error (): token_t = lexing_error () // extern fun CHAR (fstpos: position_t): token_t fn CHAR_lexing_error (fstpos: position_t): token_t = lexing_error () fn CHAR0 (): token_t = CHAR (lexing_fstpos_get ()) (* ****** ****** *) extern fun COMMENT (p: position_t, ps: poslst): void fn COMMENT_lexing_error (p: position_t, ps: poslst): void = let val () = poslst_free (ps) in lexing_error () end // end of [COMMENT_lexing_error] fn COMMENT0 (): void = let val fstpos = lexing_fstpos_get () val fstoff = position_toff fstpos val () = $POSMARK.posmark_insert_comment_beg fstoff in COMMENT (fstpos, POSLSTnil ()) end // end of [COMMENT0] (* ****** ****** *) extern fun COMMENT_CLIKE (p: position_t): void fn COMMENT_CLIKE_lexing_error (p: position_t): void = lexing_error () // end of [COMMENT_CLIKE_lexing_error] fn COMMENT0_CLIKE (): void = let val fstpos = lexing_fstpos_get () val fstoff = position_toff fstpos val () = $POSMARK.posmark_insert_comment_beg fstoff in COMMENT_CLIKE (fstpos) end // end of [COMMENT0_CLIKE] (* ****** ****** *) extern fun COMMENT_LINE (): void fn COMMENT_LINE_lexing_error (): void = lexing_error () extern fun COMMENT_REST (): void fn COMMENT_REST_lexing_error (): void = lexing_error () (* ****** ****** *) extern fun STRING {n:nat} (fstpos: position_t, cs: $CS.charlst_vt n, n: int n): token_t fn STRING_lexing_error {n:nat} (fstpos: position_t, cs: $CS.charlst_vt n, n: int n): token_t = ($CS.charlst_free cs; lexing_error ()) // end of [STRING_lexing_error] fn STRING0 (): token_t = STRING (lexing_fstpos_get (), $CS.CHARLSTnil (), 0) // end of [STRING0] (* ****** ****** *) extern fun EXTCODE {n:nat} (fstpos: position_t, i: int, cs: $CS.charlst_vt n, n: int n): token_t fn EXTCODE_lexing_error {n:nat} (fstpos: position_t, i: int, cs: $CS.charlst_vt n, n: int n): token_t = ($CS.charlst_free cs; lexing_error ()) // end of [EXTCODE_lexing_error] fn EXTCODE0 (i: int): token_t = EXTCODE (lexing_fstpos_get (), i, $CS.CHARLSTnil (), 0) // end of [EXTCODE0] (* ****** ****** *) implement ISNONE = $extval (token_t, "-1") implement ISSTATIC = $extval (token_t, "ISSTATIC") implement ISDYNAMIC = $extval (token_t, "ISDYNAMIC") %{$ ats_bool_type eq_token_token (ats_int_type tok1, ats_int_type tok2) { return (tok1 == tok2 ? ats_true_bool : ats_false_bool) ; } // end of [ats_bool_type eq_token_token] %} // end of ... // macdef TOKEN_eof = $extval (token_t, "TOKEN_eof") // macdef LITERAL_char = $extval (token_t, "LITERAL_char") macdef LITERAL_float = $extval (token_t, "LITERAL_float") macdef LITERAL_floatsp = $extval (token_t, "LITERAL_floatsp") macdef LITERAL_int = $extval (token_t, "LITERAL_int") macdef LITERAL_intsp = $extval (token_t, "LITERAL_intsp") macdef LITERAL_string = $extval (token_t, "LITERAL_string") macdef LITERAL_extcode = $extval (token_t, "LITERAL_extcode") macdef IDENTIFIER_alp = $extval (token_t, "IDENTIFIER_alp") macdef IDENTIFIER_sym = $extval (token_t, "IDENTIFIER_sym") macdef IDENTIFIER_arr = $extval (token_t, "IDENTIFIER_arr") macdef IDENTIFIER_tmp = $extval (token_t, "IDENTIFIER_tmp") macdef IDENTIFIER_dlr = $extval (token_t, "IDENTIFIER_dlr") macdef IDENTIFIER_srp = $extval (token_t, "IDENTIFIER_srp") macdef IDENTIFIER_ext = $extval (token_t, "IDENTIFIER_ext") // macdef ABSPROP = $extval (token_t, "ABSPROP") macdef ABSTYPE = $extval (token_t, "ABSTYPE") macdef ABST0YPE = $extval (token_t, "ABST0YPE") macdef ABSVIEW = $extval (token_t, "ABSVIEW") macdef ABSVIEWTYPE = $extval (token_t, "ABSVIEWTYPE") macdef ABSVIEWT0YPE = $extval (token_t, "ABSVIEWT0YPE") macdef AND = $extval (token_t, "AND") macdef AS = $extval (token_t, "AS") macdef ASSUME = $extval (token_t, "ASSUME") macdef ATLAM = $extval (token_t, "ATLAM") macdef ATLLAM = $extval (token_t, "ATLLAM") macdef ATFIX = $extval (token_t, "ATFIX") macdef BEGIN = $extval (token_t, "BEGIN") macdef BREAK = $extval (token_t, "BREAK") macdef CASE = $extval (token_t, "CASE") macdef CASEMINUS = $extval (token_t, "CASEMINUS") macdef CASEPLUS = $extval (token_t, "CASEPLUS") macdef CLASS = $extval (token_t, "CLASS") macdef CONTINUE = $extval (token_t, "CONTINUE") macdef DATASORT = $extval (token_t, "DATASORT") macdef DATAPARASORT = $extval (token_t, "DATAPARASORT") macdef DATAPROP = $extval (token_t, "DATAPROP") macdef DATATYPE = $extval (token_t, "DATATYPE") macdef DATAVIEW = $extval (token_t, "DATAVIEW") macdef DATAVIEWTYPE = $extval (token_t, "DATAVIEWTYPE") macdef DYNLOAD = $extval (token_t, "DYNLOAD") macdef ELSE = $extval (token_t, "ELSE") macdef END = $extval (token_t, "END") macdef EXCEPTION = $extval (token_t, "EXCEPTION") macdef EXTERN = $extval (token_t, "EXTERN") macdef FN = $extval (token_t, "FN") macdef FNSTAR = $extval (token_t, "FNSTAR") macdef FOR = $extval (token_t, "FOR") macdef FORSTAR = $extval (token_t, "FORSTAR") macdef FUN = $extval (token_t, "FUN") macdef FIX = $extval (token_t, "FIX") macdef IF = $extval (token_t, "IF") macdef IMPLEMENT = $extval (token_t, "IMPLEMENT") macdef IN = $extval (token_t, "IN") macdef INFIX = $extval (token_t, "INFIX") macdef INFIXL = $extval (token_t, "INFIXL") macdef INFIXR = $extval (token_t, "INFIXR") macdef LAM = $extval (token_t, "LAM") macdef LET = $extval (token_t, "LET") macdef LLAM = $extval (token_t, "LLAM") macdef LOCAL = $extval (token_t, "LOCAL") macdef MACDEF = $extval (token_t, "MACDEF") macdef MACRODEF = $extval (token_t, "MACRODEF") (* macdef METHOD = $extval (token_t, "METHOD") macdef METHODSTAR = $extval (token_t, "METHODSTAR") macdef MODCLS = $extval (token_t, "MODCLS") *) macdef MODPROP = $extval (token_t, "MODPROP") macdef MODTYPE = $extval (token_t, "MODTYPE") macdef MODULE = $extval (token_t, "MODULE") macdef NONFIX = $extval (token_t, "NONFIX") macdef OVERLOAD = $extval (token_t, "OVERLOAD") macdef POSTFIX = $extval (token_t, "POSTFIX") macdef PRAXI = $extval (token_t, "PRAXI") macdef PRFIX = $extval (token_t, "PRFIX") macdef PRFN = $extval (token_t, "PRFN") macdef PRFUN = $extval (token_t, "PRFUN") macdef PROPMINUS = $extval (token_t, "PROPMINUS") macdef PROPPLUS = $extval (token_t, "PROPPLUS") macdef PRVAL = $extval (token_t, "PRVAL") (* macdef OBJECT = $extval (token_t, "OBJECT") macdef OBJCLS = $extval (token_t, "OBJCLS") *) macdef OF = $extval (token_t, "OF") macdef OP = $extval (token_t, "OP") macdef PROPDEF = $extval (token_t, "PROPDEF") macdef R0EAD = $extval (token_t, "R0EAD") macdef REC = $extval (token_t, "REC") macdef STAIF = $extval (token_t, "STAIF") macdef SORTDEF = $extval (token_t, "SORTDEF") macdef STA = $extval (token_t, "STA") macdef STADEF = $extval (token_t, "STADEF") macdef STALOAD = $extval (token_t, "STALOAD") macdef STAVAR = $extval (token_t, "STAVAR") (* macdef STRUCT = $extval (token_t, "STRUCT") *) macdef SYMELIM = $extval (token_t, "SYMELIM") macdef SYMINTR = $extval (token_t, "SYMINTR") macdef THEN = $extval (token_t, "THEN") macdef TRY = $extval (token_t, "TRY") macdef TYPEDEF = $extval (token_t, "TYPEDEF") macdef TYPEMINUS = $extval (token_t, "TYPEMINUS") macdef TYPEPLUS = $extval (token_t, "TYPEPLUS") macdef T0YPE = $extval (token_t, "T0YPE") macdef T0YPEMINUS = $extval (token_t, "T0YPEMINUS") macdef T0YPEPLUS = $extval (token_t, "T0YPEPLUS") (* macdef UNION = $extval (token_t, "UNION") *) macdef VAL = $extval (token_t, "VAL") macdef VALMINUS = $extval (token_t, "VALMINUS") macdef VALPLUS = $extval (token_t, "VALPLUS") macdef VAR = $extval (token_t, "VAR") macdef VIEWDEF = $extval (token_t, "VIEWDEF") macdef VIEWMINUS = $extval (token_t, "VIEWMINUS") macdef VIEWPLUS = $extval (token_t, "VIEWPLUS") macdef VIEWTYPEDEF = $extval (token_t, "VIEWTYPEDEF") macdef VIEWTYPEMINUS = $extval (token_t, "VIEWTYPEMINUS") macdef VIEWTYPEPLUS = $extval (token_t, "VIEWTYPEPLUS") macdef VIEWT0YPE = $extval (token_t, "VIEWT0YPE") macdef VIEWT0YPEMINUS = $extval (token_t, "VIEWT0YPEMINUS") macdef VIEWT0YPEPLUS = $extval (token_t, "VIEWT0YPEPLUS") macdef WHEN = $extval (token_t, "WHEN") macdef WHERE = $extval (token_t, "WHERE") macdef WHILE = $extval (token_t, "WHILE") macdef WHILESTAR = $extval (token_t, "WHILESTAR") macdef WITH = $extval (token_t, "WITH") macdef WITHPROP = $extval (token_t, "WITHPROP") macdef WITHTYPE = $extval (token_t, "WITHTYPE") macdef WITHVIEW = $extval (token_t, "WITHVIEW") macdef WITHVIEWTYPE = $extval (token_t, "WITHVIEWTYPE") // $-keywords macdef DLRDECRYPT = $extval (token_t, "DLRDECRYPT") macdef DLRENCRYPT = $extval (token_t, "DLRENCRYPT") macdef DLRDELAY = $extval (token_t, "DLRDELAY") macdef DLREXEC = $extval (token_t, "DLREXEC") macdef DLREXIT = $extval (token_t, "DLREXIT") macdef DLREXTERN = $extval (token_t, "DLREXTERN") macdef DLRFOLD = $extval (token_t, "DLRFOLD") macdef DLRRAISE = $extval (token_t, "DLRRAISE") macdef DLRUNFOLD = $extval (token_t, "DLRUNFOLD") // #-keywords macdef SRPASSERT = $extval (token_t, "SRPASSERT") macdef SRPDEFINE = $extval (token_t, "SRPDEFINE") macdef SRPIF = $extval (token_t, "SRPIF") macdef SRPELSE = $extval (token_t, "SRPELSE") macdef SRPELIF = $extval (token_t, "SRPELIF") macdef SRPENDIF = $extval (token_t, "SRPENDIF") macdef SRPERROR = $extval (token_t, "SRPERROR") macdef SRPINCLUDE = $extval (token_t, "SRPINCLUDE") macdef SRPTHEN = $extval (token_t, "SRPTHEN") macdef SRPWARNING = $extval (token_t, "SRPWARNING") // keywords-@ macdef FOLDAT = $extval (token_t, "FOLDAT") macdef FREEAT = $extval (token_t, "FREEAT") macdef VIEWAT = $extval (token_t, "VIEWAT") // macdef LPAREN = $extval (token_t, "LPAREN") macdef RPAREN = $extval (token_t, "RPAREN") macdef LBRACKET = $extval (token_t, "LBRACKET") macdef RBRACKET = $extval (token_t, "RBRACKET") macdef LBRACE = $extval (token_t, "LBRACE") macdef RBRACE = $extval (token_t, "RBRACE") macdef QUOTELPAREN = $extval (token_t, "QUOTELPAREN") macdef QUOTELBRACKET = $extval (token_t, "QUOTELBRACKET") macdef QUOTELBRACE = $extval (token_t, "QUOTELBRACE") macdef ATLPAREN = $extval (token_t, "ATLPAREN") macdef ATLBRACKET = $extval (token_t, "ATLBRACKET") macdef ATLBRACE = $extval (token_t, "ATLBRACE") macdef HASHLPAREN = $extval (token_t, "HASHLPAREN") macdef HASHLBRACKET = $extval (token_t, "HASHLBRACKET") macdef HASHLBRACE = $extval (token_t, "HASHLBRACE") // macdef AMPERSAND = $extval (token_t, "AMPERSAND") macdef BACKQUOTE = $extval (token_t, "BACKQUOTE") macdef BACKSLASH = $extval (token_t, "BACKSLASH") macdef BANG = $extval (token_t, "BANG") macdef BAR = $extval (token_t, "BAR") macdef COMMA = $extval (token_t, "COMMA") macdef COLON = $extval (token_t, "COLON") macdef SEMICOLON = $extval (token_t, "SEMICOLON") macdef DOT = $extval (token_t, "DOT") macdef EQ = $extval (token_t, "EQ") macdef LT = $extval (token_t, "LT") macdef GT = $extval (token_t, "GT") macdef HASH = $extval (token_t, "HASH") macdef TILDA = $extval (token_t, "TILDA") macdef DOTDOT = $extval (token_t, "DOTDOT") macdef DOTDOTDOT = $extval (token_t, "DOTDOTDOT") macdef EQLT = $extval (token_t, "EQLT") macdef EQGT = $extval (token_t, "EQGT") macdef EQLTGT = $extval (token_t, "EQLTGT") macdef EQSLASHGT = $extval (token_t, "EQSLASHGT") macdef EQGTGT = $extval (token_t, "EQGTGT") macdef EQSLASHGTGT = $extval (token_t, "EQSLASHGTGT") macdef GTLT = $extval (token_t, "GTLT") macdef DOTLT = $extval (token_t, "DOTLT") macdef GTDOT = $extval (token_t, "GTDOT") macdef DOTLTGTDOT = $extval (token_t, "DOTLTGTDOT") macdef MINUSLT = $extval (token_t, "MINUSLT") macdef MINUSGT = $extval (token_t, "MINUSGT") macdef MINUSLTGT = $extval (token_t, "MINUSLTGT") macdef COLONLT = $extval (token_t, "COLONLT") macdef COLONLTGT = $extval (token_t, "COLONLTGT") macdef BACKQUOTELPAREN = $extval (token_t, "BACKQUOTELPAREN") macdef COMMALPAREN = $extval (token_t, "COMMALPAREN") macdef PERCENTLPAREN = $extval (token_t, "PERCENTLPAREN") macdef BACKQUOTELBRACKET = $extval (token_t, "BACKQUOTELBRACKET") macdef COMMALBRACKET = $extval (token_t, "COMMALBRACKET") macdef BACKQUOTELBRACE = $extval (token_t, "BACKQUOTELBRACE") macdef COMMALBRACE = $extval (token_t, "COMMALBRACE") (* ****** ****** *) // implemented in [ats_grammar.yats] extern fun yylval_char_set (_: $Syn.c0har): void = "yylval_char_set" extern fun yylval_extcode_set (_: $Syn.e0xtcode): void = "yylval_extcode_set" extern fun yylval_float_set (_: $Syn.f0loat): void = "yylval_float_set" extern fun yylval_floatsp_set (_: $Syn.f0loatsp): void = "yylval_floatsp_set" extern fun yylval_ide_set (_: $Syn.i0de): void = "yylval_ide_set" extern fun yylval_int_set (_: $Syn.i0nt): void = "yylval_int_set" extern fun yylval_intsp_set (_: $Syn.i0ntsp): void = "yylval_intsp_set" extern fun yylval_string_set (_: $Syn.s0tring): void = "yylval_string_set" extern fun yylval_token_set (_: $Syn.t0kn): void = "yylval_string_set" (* ****** ****** *) fn process_token (): void = let val fstpos = lexing_fstpos_get () val fstoff = position_toff fstpos val lstpos = lexing_lstpos_get () val lstoff = position_toff lstpos val loc = begin $Loc.location_make ($Fil.the_filename_get (), fstpos, lstpos) end // end of [val] in yylval_token_set ($Syn.t0kn_make loc); end // end of [process_token] fn process_keyword (): void = let val fstpos = lexing_fstpos_get () val fstoff = position_toff fstpos val lstpos = lexing_lstpos_get () val lstoff = position_toff lstpos val loc = begin $Loc.location_make ($Fil.the_filename_get (), fstpos, lstpos) end // end of [val] (* val () = begin print "process_keyword:\n"; print "fstpos = "; print fstpos; print_newline (); print "lstpos = "; print lstpos; print_newline (); end // end of [val] *) in $POSMARK.posmark_insert_keyword_beg fstoff; $POSMARK.posmark_insert_keyword_end lstoff; yylval_token_set ($Syn.t0kn_make loc); end // end of [process_keyword] (* ****** ****** *) fn process_comment_open ( p: position_t, ps: poslst ) : void = // let val fstpos = lexing_fstpos_get () in COMMENT (fstpos, POSLSTcons (p, ps)) end // fn process_comment_close (p0: position_t, ps: poslst): void = begin case+ ps of | ~POSLSTcons (p, ps) => COMMENT (p, ps) | ~POSLSTnil () => let val lstpos = lexing_lstpos_get () val lstoff = position_toff lstpos in $POSMARK.posmark_insert_comment_end lstoff end // end of [POSLSTnil] // end of [process_comment_close] // end // end of [process_comment_close] (* ****** ****** *) fn process_comment_clike_open (p1: position_t): void = let val p2 = lexing_fstpos_get () in prerr_string "The comment starting at ["; prerr_position p2; prerr_string "] cannot be embedded in another C-like comment"; prerr_string ", which initiates from ["; prerr_position p1; prerr_string "]."; prerr_newline (); $raise LexingErrorException end // end of [process_comment_clike_open] fn process_comment_clike_close (fstpos: position_t): void = let val lstpos = lexing_lstpos_get () val lstoff = position_toff lstpos in $POSMARK.posmark_insert_comment_end lstoff end // end of [process_comment_clike_close] (* ****** ****** *) fn process_comment_line_open (): void = let val fstpos = lexing_fstpos_get () val fstoff = position_toff fstpos in $POSMARK.posmark_insert_comment_beg fstoff end // end of [process_comment_line_open] fn process_comment_line_close (): void = let val lstpos = lexing_lstpos_get () val lstoff = position_toff lstpos in $POSMARK.posmark_insert_comment_end lstoff end // end of [process_comment_line_close] (* ****** ****** *) fn process_comment_rest_open (): void = let val fstpos = lexing_fstpos_get () val fstoff = position_toff fstpos in $POSMARK.posmark_insert_comment_beg fstoff end // end of [process_comment_rest_open] fn process_comment_rest_close (): void = let val lstpos = lexing_lstpos_get () val lstoff = position_toff lstpos in $POSMARK.posmark_insert_comment_end lstoff end // end of [process_comment_rest_close] (* ****** ****** *) fn location_get (): $Loc.location_t = $Loc.location_make ($Fil.the_filename_get (), lexing_fstpos_get (), lexing_lstpos_get ()) // end of [location_get] fn location_get_pos (fstpos: position_t): $Loc.location_t = $Loc.location_make ($Fil.the_filename_get (), fstpos, lexing_lstpos_get ()) // end of [location_get_pos] fn tokenize_identifier_alp (): token_t = let val str = lexeme_string () (* val () = begin print "tokenize_identifier_alp: str = "; print str; print_newline () end // end of [val] *) val tok = keyword_search str in if token_is_valid tok then let val () = process_keyword () in tok end else let // not a keyword val loc = location_get () in yylval_ide_set ($Syn.i0de_make (loc, str)); IDENTIFIER_alp end // end of [if] end // end of [tokenize_identifier_alp] fn tokenize_identifier_sym (): token_t = let val str = lexeme_string () (* val () = begin print "tokenize_identifier_sym: str = "; print str; print_newline () end // end of [val] *) val tok = keyword_search str in if token_is_valid tok then let val () = process_keyword () in tok end else let // not a keyword val loc = location_get () in yylval_ide_set ($Syn.i0de_make (loc, str)); IDENTIFIER_sym end // end of [if] end // end of [tokenize_identifier_sym] (* ****** ****** *) fn prefix_identifier1 (s0: string): string = let val s0 = string1_of_string s0; val n0 = string_length s0 in if n0 > 0 then let val sbp = string_make_substring (s0, 0, n0-1) in string1_of_strbuf sbp end else s0 end // end of [prefix_identifier1] (* ****** ****** *) fn tokenize_identifier_arr (): token_t = let // array identifier val str = prefix_identifier1 (lexeme_string ()) val loc = location_get () in yylval_ide_set ($Syn.i0de_make (loc, str)); IDENTIFIER_arr end // end of [tokenize_identifier_arr] fn tokenize_identifier_tmp (): token_t = let // template identifier val str = prefix_identifier1 (lexeme_string ()) val loc = location_get () in yylval_ide_set ($Syn.i0de_make (loc, str)); IDENTIFIER_tmp end // end of [tokenize_identifier_tmp] fn tokenize_identifier_ext (): token_t = let // template identifier val str = prefix_identifier1 (lexeme_string ()) val loc = location_get () in yylval_ide_set ($Syn.i0de_make (loc, str)); IDENTIFIER_ext end // end of [tokenize_identifier_ext] (* ****** ****** *) fn tokenize_identifier_dlr (): token_t = let // $-identifier val str = lexeme_string () val tok = keyword_search str in if token_is_valid tok then (process_keyword (); tok) else let val loc = location_get () in yylval_ide_set ($Syn.i0de_make (loc, str)); IDENTIFIER_dlr end (* end of [if] *) end // end of [tokenize_identifier_dlr] fn tokenize_identifier_srp (): token_t = let // #-identifier val str = lexeme_string () val tok = keyword_search str in if token_is_valid tok then (process_keyword (); tok) else let val loc = location_get () in yylval_ide_set ($Syn.i0de_make (loc, str)); IDENTIFIER_srp end (* end of [if] *) end // end of [tokenize_identifier_srp] (* ****** ****** *) fn process_char (fstpos: position_t): void = let val chr = lexeme_get 0; val loc = location_get_pos (fstpos) in yylval_char_set ($Syn.c0har_make (loc, chr)) end // end of [process_char] fn char_for_escaped (c: char): char = begin case+ c of | 'a' => '\007' (* alert *) | 'b' => '\010' (* backspace *) | 'f' => '\014' (* line feed *) | 't' => '\011' (* horizontal tab *) | 'n' => '\012' (* newline *) | 'r' => '\015' (* carriage return *) | 'v' => '\013' (* vertical tab *) | _ => c end // end of [char_for_escaped] fn process_char_escaped (fstpos: position_t): void = let val chr = char_for_escaped (lexeme_get 1) val loc = location_get_pos (fstpos) in yylval_char_set ($Syn.c0har_make (loc, chr)) end // end of [process_char_escaped] fn char_for_oct_code_1 (i: int): char = char_of_int (lexeme_get i - '0') fn process_char_oct_1 (fstpos: position_t): void = let val chr = char_for_oct_code_1 (1) val loc = location_get_pos (fstpos) in yylval_char_set ($Syn.c0har_make (loc, chr)) end // end of [process_char_oct_1] fn char_for_oct_code_2 (i: int): char = let val (pf_lxbf | ptr_lxbf) = lexing_lexbuf_get () val d0 = lexeme_get_lexbuf (!ptr_lxbf, i) - '0' val d1 = lexeme_get_lexbuf (!ptr_lxbf, i+1) - '0' val () = lexing_lexbuf_set (pf_lxbf | ptr_lxbf) in char_of_int ((d0 << 3) + d1) end // end of [char_for_oct_code_2] fn process_char_oct_2 (fstpos: position_t): void = let val chr = char_for_oct_code_2 (1) val loc = location_get_pos (fstpos) in yylval_char_set ($Syn.c0har_make (loc, chr)) end // end of [process_char_oct_2] fn char_for_oct_code_3 (i: int): char = let val (pf_lxbf | ptr_lxbf) = lexing_lexbuf_get () val d0 = lexeme_get_lexbuf (!ptr_lxbf, i) - '0' val d1 = lexeme_get_lexbuf (!ptr_lxbf, i+1) - '0' val d2 = lexeme_get_lexbuf (!ptr_lxbf, i+2) - '0' val () = lexing_lexbuf_set (pf_lxbf | ptr_lxbf) in char_of_int ((d0 << 6) + (d1 << 3) + d2) end // end of [char_for_oct_code_3] fn process_char_oct_3 (fstpos: position_t): void = let val chr = char_for_oct_code_3 (1) val loc = location_get_pos (fstpos) in yylval_char_set ($Syn.c0har_make (loc, chr)) end // end of [char_for_oct_code_3] (* ****** ****** *) fn int_of_xdigit (c: char): int = if char_isdigit c then c - '0' else begin if char_isupper c then 10 + (c - 'A') else 10 + (c - 'a') end (* end of [if] *) (* end of [int_of_xdigit] *) fn char_for_hex_code_1 (i: int): char = let val d0 = int_of_xdigit (lexeme_get i) in char_of_int d0 end // end of [char_for_hex_code_1] fn process_char_hex_1 (fstpos: position_t): void = let val chr = char_for_hex_code_1 (2) val loc = location_get_pos (fstpos) in yylval_char_set ($Syn.c0har_make (loc, chr)) end // end of [process_char_hex_1] fn char_for_hex_code_2 (i: int): char = let val (pf_lxbf | ptr_lxbf) = lexing_lexbuf_get () val d0 = int_of_xdigit (lexeme_get_lexbuf (!ptr_lxbf, i)) val d1 = int_of_xdigit (lexeme_get_lexbuf (!ptr_lxbf, i+1)) val () = lexing_lexbuf_set (pf_lxbf | ptr_lxbf) in char_of_int ((d0 << 4) + d1) end // end of [char_for_hex_code_2] fn process_char_hex_2 (fstpos: position_t): void = let val chr = char_for_hex_code_2 (2) val loc = location_get_pos (fstpos) in yylval_char_set ($Syn.c0har_make (loc, chr)) end // end of [process_char_hex_2] (* ****** ****** *) fn process_literal_float (): void = let val loc = location_get (); val str = lexeme_string () in yylval_float_set ($Syn.f0loat_make (loc, str)) end // end of [process_literal_float] fn process_literal_floatsp (): void = let val str = lexeme_string (); val loc = location_get () in yylval_floatsp_set ($Syn.f0loatsp_make (loc, str)) end // end of [process_literal_floatsp] (* ****** ****** *) %{$ ats_bool_type ats_lexer_literal_int_check // HX: checking octal numbers (ats_ptr_type s0, ats_ptr_type err) { char c, *s = s0 ; // c = *s ; if (c != '0') return ats_true_bool ; ++s ; c = *s ; while (1) { if (isdigit (c)) { if (c >= '8') { *((char*)err) = c; return ats_false_bool ; } } else { return ats_true_bool ; } // end of [if] ++s ; c = *s ; } // end of [while] // return ats_true_bool ; } /* end of [ats_lexer_literal_int_check] */ %} // end of [...] extern fun process_literal_int_check // HX: checking octal numbers (_: string, err: &char): bool = "ats_lexer_literal_int_check" // end of [process_literal_int_check] fn process_literal_int (): void = let val str = lexeme_string () (* val () = printf ("process_literal_int: str = %s\n", @(str)) *) val loc = location_get () var err: char = '\000'; val () = if process_literal_int_check (str, err) then () else begin prerr loc; prerr ": the digit ["; prerr err; prerr "] is illegal in the octal constant ["; prerr str; prerr "]."; prerr_newline (); $Err.abort {void} () end // end of [if] in yylval_int_set ($Syn.i0nt_make (loc, str)) end // end of [process_literal_int] extern fun process_literal_intsp_check // HX: checking octal numbers (_: string, err: &char): bool = "ats_lexer_literal_int_check" // end of [process_literal_intsp_check] fn process_literal_intsp (): void = let val str = lexeme_string (); val loc = location_get () var err: char = '\000'; val () = if process_literal_intsp_check (str, err) then () else begin prerr loc; prerr ": the digit ["; prerr err; prerr "] is illegal in the octal constant ["; prerr str; prerr "]."; prerr_newline (); $Err.abort {void} () end // end of [if] in yylval_intsp_set ($Syn.i0ntsp_make (loc, str)) end // end of [process_literal_intsp] (* ****** ****** *) fn STRING_char {n:nat} (fstpos: position_t, cs: $CS.charlst_vt n, n: int n): token_t = let val c = lexeme_get 0 in STRING (fstpos, $CS.CHARLSTcons (c, cs), n+1) end fn STRING_char_escaped {n:nat} (fstpos: position_t, cs: $CS.charlst_vt n, n: int n): token_t = let val c = char_for_escaped (lexeme_get 1) in STRING (fstpos, $CS.CHARLSTcons (c, cs), n+1) end fn STRING_char_oct_1 {n:nat} (fstpos: position_t, cs: $CS.charlst_vt n, n: int n): token_t = let val c = char_for_oct_code_1 (1) in STRING (fstpos, $CS.CHARLSTcons (c, cs), n+1) end fn STRING_char_oct_2 {n:nat} (fstpos: position_t, cs: $CS.charlst_vt n, n: int n): token_t = let val c = char_for_oct_code_2 (1) in STRING (fstpos, $CS.CHARLSTcons (c, cs), n+1) end fn STRING_char_oct_3 {n:nat} (fstpos: position_t, cs: $CS.charlst_vt n, n: int n): token_t = let val c = char_for_oct_code_3 (1) in STRING (fstpos, $CS.CHARLSTcons (c, cs), n+1) end fn STRING_char_hex_1 {n:nat} (fstpos: position_t, cs: $CS.charlst_vt n, n: int n): token_t = let val c = char_for_hex_code_1 (2) in STRING (fstpos, $CS.CHARLSTcons (c, cs), n+1) end fn STRING_char_hex_2 {n:nat} (fstpos: position_t, cs: $CS.charlst_vt n, n: int n): token_t = let val c = char_for_hex_code_2 (2) in STRING (fstpos, $CS.CHARLSTcons (c, cs), n+1) end fn process_literal_string {n:nat} (fstpos: position_t, cs: $CS.charlst_vt n, n: int n): void = let val str = $CS.string_make_charlst_rev_int (cs, n) val loc = location_get_pos (fstpos) in yylval_string_set ($Syn.s0tring_make (loc, str, n)) end // end of [process_literal_string] (* ****** ****** *) fn EXTCODE_char {n:nat} (fstpos: position_t, i: int, cs: $CS.charlst_vt n, n: int n): token_t = let val c = lexeme_get 0 in EXTCODE (fstpos, i, $CS.CHARLSTcons (c, cs), n+1) end fn process_literal_extcode {n:nat} (fstpos: position_t, i: int, cs: $CS.charlst_vt n, n: int n): void = let val str = $CS.string_make_charlst_rev_int (cs, n) val loc = location_get_pos (fstpos) in yylval_extcode_set ($Syn.e0xtcode_make (loc, i, str)) end (* ****** ****** *) (* // declared in [lexing.sats] exception LexingErrorException *) fn process_illegal_token {a:viewt@ype} (): a = begin $Fil.atsopt_filename_prerr (); prerr_string ": LEXING ERROR"; prerr_string ": illegal character ["; prerr_char (lexeme_get 0); prerr_string "] at position ["; lexing_curpos_prerr (); prerr_string "]."; prerr_newline (); $raise LexingErrorException end // end of [process_illegal_token] fn process_illegal_char {a:viewt@ype} (fstpos: position_t): a = begin $Fil.atsopt_filename_prerr (); prerr_string ": LEXING ERROR"; prerr_string ": illegal character at ["; prerr_position fstpos; prerr_string "] is unclosed!\n"; $raise LexingErrorException end // end of [process_illegal_char] // fn process_unclosed_comment {a:viewt@ype} (p: position_t, ps: poslst): a = let val () = poslst_free ps in prerr_string "The comment starting at ["; prerr_position p; prerr_string "] is unclosed!\n"; $raise LexingErrorException end // end of [process_unclosed_comment] fn process_unclosed_comment_clike {a:viewt@ype} (p: position_t): a = begin prerr_string "The comment starting at ["; prerr_position p; prerr_string "] is unclosed!\n"; $raise LexingErrorException end // end of [process_unclosed_comment_clike] // fn process_unclosed_string {a:viewt@ype} {n:nat} (fstpos: position_t, cs: $CS.charlst_vt n, n: int n): a = begin $CS.charlst_free (cs); prerr_string "The string starting at ["; prerr_position fstpos; prerr_string "] is unclosed!\n"; $raise LexingErrorException end // end of [process_unclosed_string] fn process_unclosed_extcode {a:viewt@ype} {n:nat} (fstpos: position_t, i: int, cs: $CS.charlst_vt n, n: int n) : a = begin $CS.charlst_free (cs); prerr_string "The code starting at ["; prerr_position fstpos; prerr_string "] is unclosed!\n"; $raise LexingErrorException end // end of [process_unclosed_extcode] (* ****** ****** *) (* end of preamble *) %} (* ****** ****** *) // definition of constant regular expressions xX = ['x' 'X'] digit_dec = [ '0'-'9' ] digit_oct = [ '0'-'7' ] digit_hex = [ '0'-'9' 'a'-'f' 'A'-'F'] digit = $digit_dec symbolic1 = [ '%' '&' '+' '-' '.' '/' ':' '=' '@' '~' '`' '^' '|' '*' '!' '$' '#' '?' ] symbolic2 = [ '%' '&' '+' '-' '.' '/' ':' '=' '@' '~' '`' '^' '|' '*' '<' '>' ] identfst = [ 'A'-'Z' 'a'-'z' '_' ] identrst = [ '0'-'9' 'A'-'Z' 'a'-'z' '_' '\'' ] identifier_alp = $identfst $identrst* identifier_sym = ($symbolic1+ | $symbolic2+) char_escaped = [ 'n' 't' 'v' 'b' 'r' 'f' 'a' '\\' '?' '\'' '"' '(' '[' '{' ] newline = '\n' (* \f: \014; \t: 011; \r: \015 *) blanks = [ '\f' '\r' '\n' '\t' ' ' ] + uint_dec = $digit_dec+ (* including octal representation *) uint_hex = $digit_hex+ uint = $uint_dec | '0' $xX $uint_hex IS = [ 'l' 'L' 'u' 'U' ] literal_int = ['~']? $uint literal_intsp = $literal_int ($IS)+ exp = ['e' 'E'] ['+' '-']? $digit+ ufloat0 = $digit+ $exp ufloat1 = $digit+ '.' $digit* ($exp)? ufloat2 = $digit+ '.' $digit+ ($exp)? // [$digit+] should not be replaced with [$digit*]! FS = [ 'd' 'D' 'f' 'F' 'l' 'L' ] ufloat = $ufloat0 | $ufloat1 | $ufloat2 literal_float = ['~']? $ufloat literal_floatsp = $literal_float $FS %% MAIN () = | $blanks { MAIN () } | "case-" { process_keyword (); CASEMINUS } | "case+" { process_keyword (); CASEPLUS } | "fn*" { process_keyword (); FNSTAR } | "for*" { process_keyword (); FORSTAR } (* | "method*" { process_keyword (); METHODSTAR } *) | "prop-" { process_token (); PROPMINUS } | "prop+" { process_token (); PROPPLUS } | "type-" { process_token (); TYPEMINUS } | "type+" { process_token (); TYPEPLUS } | "val-" { process_keyword (); VALMINUS } | "val+" { process_keyword (); VALPLUS } | "view-" { process_token (); VIEWMINUS } | "view+" { process_token (); VIEWPLUS } | "viewtype-" { process_token (); VIEWTYPEMINUS } | "viewtype+" { process_token (); VIEWTYPEPLUS } | "while*" { process_keyword (); WHILESTAR } | "fold@" { process_token (); FOLDAT } | "free@" { process_token (); FREEAT } | "view@" { process_token (); VIEWAT } | "@lam" { process_token (); ATLAM } | "@llam" { process_token (); ATLLAM } | "@fix" { process_token (); ATFIX } | "r@ead" { process_token (); R0EAD } | "t@ype" { process_token (); T0YPE } | "t@ype+" { process_token (); T0YPEPLUS } | "t@ype-" { process_token (); T0YPEMINUS } | "viewt@ype" { process_token (); VIEWT0YPE } | "viewt@ype+" { process_token (); VIEWT0YPEPLUS } | "viewt@ype-" { process_token (); VIEWT0YPEMINUS } | "abst@ype" { process_keyword (); ABST0YPE } | "absviewt@ype" { process_keyword (); ABSVIEWT0YPE } | "////" { process_comment_rest_open (); COMMENT_REST (); TOKEN_eof } | "//" { process_comment_line_open (); COMMENT_LINE (); MAIN () } | "/*" { COMMENT0_CLIKE (); MAIN () } | "(*" { COMMENT0 (); MAIN () } | $identifier_alp { tokenize_identifier_alp () } | $identifier_alp '\[' { tokenize_identifier_arr () } | $identifier_alp '\<' { tokenize_identifier_tmp () } | $identifier_alp '!' { tokenize_identifier_ext () } | '$' $identifier_alp { tokenize_identifier_dlr () } | '#' $identifier_alp { tokenize_identifier_srp () } | $identifier_sym { tokenize_identifier_sym () } | $literal_int { process_literal_int (); LITERAL_int } | $literal_intsp { process_literal_intsp (); LITERAL_intsp } | $literal_float { process_literal_float (); LITERAL_float } | $literal_floatsp { process_literal_floatsp (); LITERAL_floatsp } | '(' { process_keyword (); LPAREN } | ')' { process_keyword (); RPAREN } | "[" { process_keyword (); LBRACKET } | "]" { process_keyword (); RBRACKET } | "{" { process_keyword (); LBRACE } | "}" { process_keyword (); RBRACE } | "'(" { process_keyword (); QUOTELPAREN } | "'[" { process_keyword (); QUOTELBRACKET } | "'{" { process_keyword (); QUOTELBRACE } | "@(" { process_keyword (); ATLPAREN } | "@[" { process_keyword (); ATLBRACKET } | "@{" { process_keyword (); ATLBRACE } | "#{" { process_keyword (); HASHLPAREN } | "#[" { process_keyword (); HASHLBRACKET } | "#{" { process_keyword (); HASHLBRACE } | "," { process_keyword (); COMMA } | ";" { process_keyword (); SEMICOLON } | "\\" { process_keyword (); BACKSLASH } | "`(" { process_keyword (); BACKQUOTELPAREN } (* macro syntax *) | ",(" { process_keyword (); COMMALPAREN } (* macro syntax *) | "%(" { process_keyword (); PERCENTLPAREN } (* macro syntax *) (* | "`[" { process_keyword (); BACKQUOTELBRACKET } (* meta-programming syntax *) | ",[" { process_keyword (); COMMALBRACKET } (* meta-programming syntax *) | "`{" { process_keyword (); BACKQUOTELBRACE } (* distributed meta-programming syntax *) | ",{" { process_keyword (); COMMALBRACE } (* distributed meta-programming syntax *) *) | '\"' { STRING0 () } | '\'' { CHAR0 () } | "%{^" { EXTCODE0 (0) } | "%{" { EXTCODE0 (1) } | "%{$" { EXTCODE0 (2) } | "%{#" { EXTCODE0 (~1) } | $EOF { TOKEN_eof } | [^] { process_illegal_token () } // end of [MAIN] COMMENT (p, ps) = | "(*" { process_comment_open (p, ps) } | "*)" { process_comment_close (p, ps) } | [^ '\(' '*' ]* { COMMENT (p, ps) } | '\(' { COMMENT (p, ps) } | '*' { COMMENT (p, ps) } | $EOF { process_unclosed_comment (p, ps) } // [COMMENT] COMMENT_CLIKE (p) = | "/*" { process_comment_clike_open (p) } | "*/" { process_comment_clike_close (p) } | [^ '/' '*' ]* { COMMENT_CLIKE (p) } | '/' { COMMENT_CLIKE (p) } | '*' { COMMENT_CLIKE (p) } | $EOF { process_unclosed_comment_clike (p) } // [COMMENT_CLIKE] COMMENT_LINE () = | [^ '\n']* $newline { process_comment_line_close () } | [^ '\n']* $EOF { process_comment_line_close () } // [COMMENT_LINE] COMMENT_REST () = | [^ '\n']* $newline { COMMENT_REST () } | [^ '\n']* $EOF { process_comment_rest_close () } // [COMMENT_REST] CHAR (pos) = // [pos]: starting position of a character | [^ '\\'] '\'' { process_char (pos); LITERAL_char } | '\\' $char_escaped '\'' { process_char_escaped (pos); LITERAL_char } | '\\' $digit_oct $digit_oct $digit_oct '\'' { process_char_oct_3 (pos); LITERAL_char } | '\\' $digit_oct $digit_oct '\'' { process_char_oct_2 (pos); LITERAL_char } | '\\' $digit_oct '\'' { process_char_oct_1 (pos); LITERAL_char } | '\\' $xX $digit_hex $digit_hex '\'' { process_char_hex_2 (pos); LITERAL_char } | '\\' $xX $digit_hex '\'' { process_char_hex_1 (pos); LITERAL_char } | [^] { process_illegal_char (pos) } // [CHAR] STRING (pos, cs, n) = // [pos]: the starting position | '\"' { process_literal_string (pos, cs, n); LITERAL_string } | '\\' $newline { STRING (pos, cs, n) } | '\\' $char_escaped { STRING_char_escaped (pos, cs, n) } | '\\' $digit_oct $digit_oct $digit_oct { STRING_char_oct_3 (pos, cs, n) } | '\\' $digit_oct $digit_oct { STRING_char_oct_2 (pos, cs, n) } | '\\' $digit_oct { STRING_char_oct_1 (pos, cs, n) } | '\\' $xX $digit_hex $digit_hex { STRING_char_hex_2 (pos, cs, n) } | '\\' $xX $digit_hex { STRING_char_hex_1 (pos, cs, n) } | $EOF { process_unclosed_string (pos, cs, n) } | [^] { STRING_char (pos, cs, n) } // end of [STRING] EXTCODE (pos, i, cs, n) = // [pos]: the starting position | "%}" { process_literal_extcode (pos, i, cs, n); LITERAL_extcode } | $EOF { process_unclosed_extcode (pos, i, cs, n) } | [^] { EXTCODE_char (pos, i, cs, n) } // end of [EXTCODE] %% %{ ats_void_type ats_lexer_lats_initialize () { // currently empty return ; } // ats_bool_type token_is_valid (ats_int_type tok) { return (tok >= 0 ? ats_true_bool : ats_false_bool) ; } %} // end of [%{] (* ****** ****** *) (* end of [ats_lexer.lats] *)