use std::io::prelude::*; use std::error::Error; use std::fmt; use std::iter::Peekable; use std::str::Chars; use std::char; #[derive(Debug)] pub enum LexError { UnexpectedChar, MalformedEscapeSequence, MalformedNumber } impl Error for LexError { fn description(&self) -> &str { match *self { LexError::UnexpectedChar => "Unexpected character in input", LexError::MalformedEscapeSequence => "Unexpected values in escape sequence", LexError::MalformedNumber => "Unexpected characters in number" } } fn cause(&self) -> Option<&Error> { None } } impl fmt::Display for LexError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.description()) } } #[derive(Debug)] pub enum ParseError { BadInput, InputPastEndOfFile, UnknownOperator, MissingRParen, MissingLCurly, MissingRCurly, MalformedCallExpr, VarExpectsIdentifier, ExpectedMethodInvocation, FnMissingName, FnMissingParams } impl Error for ParseError { fn description(&self) -> &str { match *self { ParseError::BadInput => "Unparseable characters in the input stream", ParseError::InputPastEndOfFile => "Input past end of file", ParseError::UnknownOperator => "Unknown operator", ParseError::MissingRParen => "Expected ')'", ParseError::MissingLCurly => "Expected '{'", ParseError::MissingRCurly => "Expected '}'", ParseError::MalformedCallExpr => "Call contains bad expression", ParseError::VarExpectsIdentifier => "'var' expects the name of a variable", ParseError::ExpectedMethodInvocation => "Expected method call after '.'", ParseError::FnMissingName => "Function declaration is missing name", ParseError::FnMissingParams => "Function declaration is missing parameters" } } fn cause(&self) -> Option<&Error> { None } } impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.description()) } } #[derive(Debug, Clone)] pub struct FnDef { pub name: String, pub params: Vec, pub body: Box } #[derive(Debug, Clone)] pub enum Stmt { If(Box, Box), IfElse(Box, Box, Box), While(Box, Box), Var(String, Option>), Block(Box>), Expr(Box), Break, Return, ReturnWithVal(Box) } #[derive(Debug, Clone)] pub enum Expr { IntConst(i32), Identifier(String), StringConst(String), FnCall(String, Box>), MethodCall(String, String, Box>), Assignment(Box, Box), True, False } #[derive(Debug)] pub enum Token { IntConst(i32), Identifier(String), StringConst(String), LCurly, RCurly, LParen, RParen, LSquare, RSquare, Plus, Minus, Multiply, Divide, Semicolon, Colon, Comma, Period, Equals, True, False, Var, If, Else, While, LessThan, GreaterThan, Bang, LessThanEqual, GreaterThanEqual, EqualTo, NotEqualTo, Pipe, Or, Ampersand, And, Fn, Break, Return, LexErr(LexError) } pub struct TokenIterator<'a> { char_stream: Peekable> } impl<'a> Iterator for TokenIterator<'a> { type Item = Token; fn next(&mut self) -> Option { while let Some(c) = self.char_stream.next() { match c { '0'...'9' => { let mut result = Vec::new(); result.push(c); while let Some(&nxt) = self.char_stream.peek() { match nxt { '0'...'9' => { result.push(nxt); self.char_stream.next(); }, _ => break } } let out : String = result.iter().cloned().collect(); if let Ok(val) = out.parse::() { return Some(Token::IntConst(val)); } return Some(Token::LexErr(LexError::MalformedNumber)); }, 'A'...'Z' | 'a'...'z' | '_' => { let mut result = Vec::new(); result.push(c); while let Some(&nxt) = self.char_stream.peek() { match nxt { '0'...'9' | 'A'...'Z' | 'a'...'z' | '_' => { result.push(nxt); self.char_stream.next(); }, _ => break } } let out : String = result.iter().cloned().collect(); if out == "true" { return Some(Token::True); } else if out == "false" { return Some(Token::False); } else if out == "var" { return Some(Token::Var); } else if out == "if" { return Some(Token::If); } else if out == "else" { return Some(Token::Else); } else if out == "while" { return Some(Token::While); } else if out == "break" { return Some(Token::Break); } else if out == "return" { return Some(Token::Return); } else if out == "fn" { return Some(Token::Fn); } else { return Some(Token::Identifier(out)); } }, '"' => { let mut result = Vec::new(); let mut escape = false; while let Some(nxt) = self.char_stream.next() { match nxt { '"' if !escape => break, '\\' if !escape => escape = true, '\\' if escape => {escape = false; result.push('\\'); }, 't' if escape => {escape = false; result.push('\t'); }, 'n' if escape => {escape = false; result.push('\n'); }, 'r' if escape => {escape = false; result.push('\r'); }, 'x' if escape => { escape = false; let mut out_val: u32 = 0; for _ in 0..2 { if let Some(c) = self.char_stream.next() { if let Some(d1) = c.to_digit(16) { out_val *= 16; out_val += d1; } else { return Some(Token::LexErr(LexError::MalformedEscapeSequence)); } } else { return Some(Token::LexErr(LexError::MalformedEscapeSequence)); } } if let Some(r) = char::from_u32(out_val) { result.push(r); } else { return Some(Token::LexErr(LexError::MalformedEscapeSequence)); } } 'u' if escape => { escape = false; let mut out_val: u32 = 0; for _ in 0..4 { if let Some(c) = self.char_stream.next() { if let Some(d1) = c.to_digit(16) { out_val *= 16; out_val += d1; } else { return Some(Token::LexErr(LexError::MalformedEscapeSequence)); } } else { return Some(Token::LexErr(LexError::MalformedEscapeSequence)); } } if let Some(r) = char::from_u32(out_val) { result.push(r); } else { return Some(Token::LexErr(LexError::MalformedEscapeSequence)); } } 'U' if escape => { escape = false; let mut out_val: u32 = 0; for _ in 0..8 { if let Some(c) = self.char_stream.next() { if let Some(d1) = c.to_digit(16) { out_val *= 16; out_val += d1; } else { return Some(Token::LexErr(LexError::MalformedEscapeSequence)); } } else { return Some(Token::LexErr(LexError::MalformedEscapeSequence)); } } if let Some(r) = char::from_u32(out_val) { result.push(r); } else { return Some(Token::LexErr(LexError::MalformedEscapeSequence)); } } _ if escape => return Some(Token::LexErr(LexError::MalformedEscapeSequence)), _ => { escape = false; result.push(nxt); }, } } let out : String = result.iter().cloned().collect(); return Some(Token::StringConst(out)) } '{' => { return Some(Token::LCurly); }, '}' => { return Some(Token::RCurly); }, '(' => { return Some(Token::LParen); }, ')' => { return Some(Token::RParen); }, '[' => { return Some(Token::LSquare); }, ']' => { return Some(Token::RSquare); }, '+' => { return Some(Token::Plus); }, '-' => { return Some(Token::Minus); }, '*' => { return Some(Token::Multiply); }, '/' => { return Some(Token::Divide); }, ';' => { return Some(Token::Semicolon); }, ':' => { return Some(Token::Colon); }, ',' => { return Some(Token::Comma); }, '.' => { return Some(Token::Period); }, '=' => { match self.char_stream.peek() { Some(&'=') => {self.char_stream.next(); return Some(Token::EqualTo); }, _ => { return Some(Token::Equals); } } }, '<' => { match self.char_stream.peek() { Some(&'=') => {self.char_stream.next(); return Some(Token::LessThanEqual); }, _ => { return Some(Token::LessThan); } } } '>' => { match self.char_stream.peek() { Some(&'=') => {self.char_stream.next(); return Some(Token::GreaterThanEqual); }, _ => { return Some(Token::GreaterThan); } } }, '!' => { match self.char_stream.peek() { Some(&'=') => {self.char_stream.next(); return Some(Token::NotEqualTo); }, _ => { return Some(Token::Bang); } } }, '|' => { match self.char_stream.peek() { Some(&'|') => {self.char_stream.next(); return Some(Token::Or); }, _ => { return Some(Token::Pipe); } } }, '&' => { match self.char_stream.peek() { Some(&'&') => {self.char_stream.next(); return Some(Token::And); }, _ => { return Some(Token::Ampersand); } } }, ' ' | '\n' | '\r' => (), _ => return Some(Token::LexErr(LexError::UnexpectedChar)) } } None } } pub fn lex<'a>(input: &'a String) -> TokenIterator<'a> { TokenIterator { char_stream: input.chars().peekable() } } fn get_precedence(token: &Token) -> i32 { match *token { Token::Equals => 10, Token::Or => 11, Token::And => 12, Token::LessThan => 15, Token::LessThanEqual => 15, Token::GreaterThan => 15, Token::GreaterThanEqual => 15, Token::EqualTo => 15, Token::NotEqualTo => 15, Token::Plus => 20, Token::Minus => 20, Token::Multiply => 40, _ => -1 } } fn parse_paren_expr<'a>(input: &mut Peekable>) -> Result { let expr = try!(parse_expr(input)); match input.next() { Some(Token::RParen) => Ok(expr), _ => Err(ParseError::MissingRParen) } } fn parse_ident_expr<'a>(id: String, input: &mut Peekable>) -> Result { let id2 = match input.peek() { Some(&Token::Period) => { input.next(); match input.next() { Some(Token::Identifier(ref s)) => { s.clone() } _ => return Err(ParseError::ExpectedMethodInvocation) } }, _ => String::new() }; match input.peek() { Some(&Token::LParen) => {input.next();}, _ => return Ok(Expr::Identifier(id)) } let mut args = Vec::new(); match input.peek() { Some(&Token::RParen) => { input.next(); if id2 == "" { return Ok(Expr::FnCall(id, Box::new(args))) } else { return Ok(Expr::MethodCall(id, id2, Box::new(args))) } }, _ => () } loop { if let Ok(arg) = parse_expr(input) { args.push(arg); } else { return Err(ParseError::MalformedCallExpr); } match input.peek() { Some(&Token::RParen) => { input.next(); if id2 == "" { return Ok(Expr::FnCall(id, Box::new(args))) } else { return Ok(Expr::MethodCall(id, id2, Box::new(args))) } }, Some(&Token::Comma) => (), _ => return Err(ParseError::MalformedCallExpr) } input.next(); } } fn parse_primary<'a>(input: &mut Peekable>) -> Result { if let Some(token) = input.next() { match token { Token::IntConst(ref x) => {Ok(Expr::IntConst(x.clone()))}, Token::StringConst(ref s) => {Ok(Expr::StringConst(s.clone()))}, Token::Identifier(ref s) => {parse_ident_expr(s.clone(), input)}, Token::LParen => {parse_paren_expr(input)}, Token::True => {Ok(Expr::True)}, Token::False => {Ok(Expr::False)}, Token::LexErr(le) => {println!("Error: {}", le); Err(ParseError::BadInput)} _ => {println!("Can't parse: {:?}", token); Err(ParseError::BadInput)} } } else { Err(ParseError::InputPastEndOfFile) } } fn parse_binop<'a>(input: &mut Peekable>, prec: i32, lhs: Expr) -> Result { let mut lhs_curr = lhs; loop { let mut curr_prec = -1; if let Some(curr_op) = input.peek() { curr_prec = get_precedence(curr_op); } if curr_prec < prec { return Ok(lhs_curr); } if let Some(op_token) = input.next() { let mut rhs = try!(parse_primary(input)); let mut next_prec = -1; if let Some(next_op) = input.peek() { next_prec = get_precedence(next_op); } if curr_prec < next_prec { rhs = try!(parse_binop(input, curr_prec+1, rhs)); } lhs_curr = match op_token { Token::Plus => Expr::FnCall("+".to_string(), Box::new(vec![lhs_curr, rhs])), Token::Minus => Expr::FnCall("-".to_string(), Box::new(vec![lhs_curr, rhs])), Token::Multiply => Expr::FnCall("*".to_string(), Box::new(vec![lhs_curr, rhs])), Token::Divide => Expr::FnCall("/".to_string(), Box::new(vec![lhs_curr, rhs])), Token::Equals => Expr::Assignment(Box::new(lhs_curr), Box::new(rhs)), Token::EqualTo => Expr::FnCall("==".to_string(), Box::new(vec![lhs_curr, rhs])), Token::NotEqualTo => Expr::FnCall("!=".to_string(), Box::new(vec![lhs_curr, rhs])), Token::LessThan => Expr::FnCall("<".to_string(), Box::new(vec![lhs_curr, rhs])), Token::LessThanEqual => Expr::FnCall("<=".to_string(), Box::new(vec![lhs_curr, rhs])), Token::GreaterThan => Expr::FnCall(">".to_string(), Box::new(vec![lhs_curr, rhs])), Token::GreaterThanEqual => Expr::FnCall(">=".to_string(), Box::new(vec![lhs_curr, rhs])), Token::Or => Expr::FnCall("||".to_string(), Box::new(vec![lhs_curr, rhs])), Token::And => Expr::FnCall("&&".to_string(), Box::new(vec![lhs_curr, rhs])), _ => return Err(ParseError::UnknownOperator) }; } } } fn parse_expr<'a>(input: &mut Peekable>) -> Result { let lhs = try!(parse_primary(input)); parse_binop(input, 0, lhs) } fn parse_if<'a>(input: &mut Peekable>) -> Result { input.next(); let guard = try!(parse_expr(input)); let body = try!(parse_block(input)); match input.peek() { Some(& Token::Else) => { input.next(); let else_body = try!(parse_block(input)); Ok(Stmt::IfElse(Box::new(guard), Box::new(body), Box::new(else_body))) } _ => { Ok(Stmt::If(Box::new(guard), Box::new(body))) } } } fn parse_while<'a>(input: &mut Peekable>) -> Result { input.next(); let guard = try!(parse_expr(input)); let body = try!(parse_block(input)); Ok(Stmt::While(Box::new(guard), Box::new(body))) } fn parse_var<'a>(input: &mut Peekable>) -> Result { input.next(); let name = match input.next() { Some(Token::Identifier(ref s)) => s.clone(), _ => return Err(ParseError::VarExpectsIdentifier) }; match input.peek() { Some(&Token::Equals) => { input.next(); let initializer = try!(parse_expr(input)); Ok(Stmt::Var(name, Some(Box::new(initializer)))) } _ => Ok(Stmt::Var(name, None)) } } fn parse_block<'a>(input: &mut Peekable>) -> Result { match input.peek() { Some(& Token::LCurly) => (), _ => return Err(ParseError::MissingLCurly) } input.next(); let mut stmts = Vec::new(); let skip_body = match input.peek() { Some(& Token::RCurly) => true, _ => false }; if !skip_body { while let Some(_) = input.peek() { stmts.push(try!(parse_stmt(input))); match input.peek() { Some(& Token::Semicolon) => {input.next();}, _ => () } match input.peek() { Some(& Token::RCurly) => break, _ => () } } } match input.peek() { Some(& Token::RCurly) => {input.next(); Ok(Stmt::Block(Box::new(stmts)))}, _ => Err(ParseError::MissingRCurly) } } fn parse_expr_stmt<'a>(input: &mut Peekable>) -> Result { let expr = try!(parse_expr(input)); Ok(Stmt::Expr(Box::new(expr))) } fn parse_stmt<'a>(input: &mut Peekable>) -> Result { match input.peek() { Some(& Token::If) => parse_if(input), Some(& Token::While) => parse_while(input), Some(& Token::Break) => {input.next(); Ok(Stmt::Break)}, Some(& Token::Return) => { input.next(); match input.peek() { Some(& Token::Semicolon) => Ok(Stmt::Return), _ => {let ret = try!(parse_expr(input)); Ok(Stmt::ReturnWithVal(Box::new(ret))) } } } Some(& Token::LCurly) => parse_block(input), Some(& Token::Var) => parse_var(input), _ => parse_expr_stmt(input) } } fn parse_fn<'a>(input: &mut Peekable>) -> Result { input.next(); let name = match input.next() { Some(Token::Identifier(ref s)) => s.clone(), _ => return Err(ParseError::FnMissingName) }; match input.peek() { Some(&Token::LParen) => {input.next();}, _ => return Err(ParseError::FnMissingParams) } let mut params = Vec::new(); let skip_params = match input.peek() { Some(&Token::RParen) => { input.next(); true } _ => false }; if !skip_params { loop { match input.next() { Some(Token::RParen) => { break }, Some(Token::Comma) => (), Some(Token::Identifier(ref s)) => { params.push(s.clone()); }, _ => return Err(ParseError::MalformedCallExpr) } } } let body = try!(parse_block(input)); Ok(FnDef{name: name, params: params, body: Box::new(body)}) } fn parse_top_level<'a>(input: &mut Peekable>) -> Result<(Vec, Vec), ParseError> { let mut stmts = Vec::new(); let mut fndefs = Vec::new(); while let Some(_) = input.peek() { match input.peek() { Some(& Token::Fn) => fndefs.push(try!(parse_fn(input))), _ => stmts.push(try!(parse_stmt(input))) } match input.peek() { Some(& Token::Semicolon) => {input.next();}, _ => () } } Ok((stmts, fndefs)) } pub fn parse<'a>(input: &mut Peekable>) -> Result<(Vec, Vec), ParseError> { parse_top_level(input) }