rhai/src/parser.rs
2016-03-03 09:31:42 -05:00

650 lines
23 KiB
Rust

use std::io::prelude::*;
use std::error::Error;
use std::fmt;
use std::iter::Peekable;
use std::str::Chars;
use std::char;
#[derive(Debug)]
pub enum LexError {
UnexpectedChar,
MalformedEscapeSequence,
MalformedNumber
}
impl Error for LexError {
fn description(&self) -> &str {
match *self {
LexError::UnexpectedChar => "Unexpected character in input",
LexError::MalformedEscapeSequence => "Unexpected values in escape sequence",
LexError::MalformedNumber => "Unexpected characters in number"
}
}
fn cause(&self) -> Option<&Error> {
None
}
}
impl fmt::Display for LexError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.description())
}
}
#[derive(Debug)]
pub enum ParseError {
BadInput,
InputPastEndOfFile,
UnknownOperator,
MissingRParen,
MissingLCurly,
MissingRCurly,
MalformedCallExpr,
VarExpectsIdentifier,
ExpectedMethodInvocation,
FnMissingName,
FnMissingParams
}
impl Error for ParseError {
fn description(&self) -> &str {
match *self {
ParseError::BadInput => "Unparseable characters in the input stream",
ParseError::InputPastEndOfFile => "Input past end of file",
ParseError::UnknownOperator => "Unknown operator",
ParseError::MissingRParen => "Expected ')'",
ParseError::MissingLCurly => "Expected '{'",
ParseError::MissingRCurly => "Expected '}'",
ParseError::MalformedCallExpr => "Call contains bad expression",
ParseError::VarExpectsIdentifier => "'var' expects the name of a variable",
ParseError::ExpectedMethodInvocation => "Expected method call after '.'",
ParseError::FnMissingName => "Function declaration is missing name",
ParseError::FnMissingParams => "Function declaration is missing parameters"
}
}
fn cause(&self) -> Option<&Error> {
None
}
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.description())
}
}
#[derive(Debug, Clone)]
pub struct FnDef {
pub name: String,
pub params: Vec<String>,
pub body: Box<Stmt>
}
#[derive(Debug, Clone)]
pub enum Stmt { If(Box<Expr>, Box<Stmt>), IfElse(Box<Expr>, Box<Stmt>, Box<Stmt>), While(Box<Expr>, Box<Stmt>),
Var(String, Option<Box<Expr>>), Block(Box<Vec<Stmt>>), Expr(Box<Expr>), Break, Return, ReturnWithVal(Box<Expr>) }
#[derive(Debug, Clone)]
pub enum Expr { IntConst(i32), Identifier(String), StringConst(String), FnCall(String, Box<Vec<Expr>>),
MethodCall(String, String, Box<Vec<Expr>>), Assignment(Box<Expr>, Box<Expr>), True, False }
#[derive(Debug)]
pub enum Token { IntConst(i32), Identifier(String), StringConst(String), LCurly, RCurly, LParen, RParen, LSquare, RSquare,
Plus, Minus, Multiply, Divide, Semicolon, Colon, Comma, Period, Equals, True, False, Var, If, Else, While,
LessThan, GreaterThan, Bang, LessThanEqual, GreaterThanEqual, EqualTo, NotEqualTo, Pipe, Or, Ampersand, And, Fn,
Break, Return, LexErr(LexError) }
pub struct TokenIterator<'a> {
char_stream: Peekable<Chars<'a>>
}
impl<'a> Iterator for TokenIterator<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
while let Some(c) = self.char_stream.next() {
match c {
'0'...'9' => {
let mut result = Vec::new();
result.push(c);
while let Some(&nxt) = self.char_stream.peek() {
match nxt {
'0'...'9' => { result.push(nxt); self.char_stream.next(); },
_ => break
}
}
let out : String = result.iter().cloned().collect();
if let Ok(val) = out.parse::<i32>() {
return Some(Token::IntConst(val));
}
return Some(Token::LexErr(LexError::MalformedNumber));
},
'A'...'Z' | 'a'...'z' | '_' => {
let mut result = Vec::new();
result.push(c);
while let Some(&nxt) = self.char_stream.peek() {
match nxt {
'0'...'9' | 'A'...'Z' | 'a'...'z' | '_' => {
result.push(nxt); self.char_stream.next(); },
_ => break
}
}
let out : String = result.iter().cloned().collect();
if out == "true" {
return Some(Token::True);
}
else if out == "false" {
return Some(Token::False);
}
else if out == "var" {
return Some(Token::Var);
}
else if out == "if" {
return Some(Token::If);
}
else if out == "else" {
return Some(Token::Else);
}
else if out == "while" {
return Some(Token::While);
}
else if out == "break" {
return Some(Token::Break);
}
else if out == "return" {
return Some(Token::Return);
}
else if out == "fn" {
return Some(Token::Fn);
}
else {
return Some(Token::Identifier(out));
}
},
'"' => {
let mut result = Vec::new();
let mut escape = false;
while let Some(nxt) = self.char_stream.next() {
match nxt {
'"' if !escape => break,
'\\' if !escape => escape = true,
'\\' if escape => {escape = false; result.push('\\'); },
't' if escape => {escape = false; result.push('\t'); },
'n' if escape => {escape = false; result.push('\n'); },
'r' if escape => {escape = false; result.push('\r'); },
'x' if escape => {
escape = false;
let mut out_val: u32 = 0;
for _ in 0..2 {
if let Some(c) = self.char_stream.next() {
if let Some(d1) = c.to_digit(16) {
out_val *= 16;
out_val += d1;
}
else {
return Some(Token::LexErr(LexError::MalformedEscapeSequence));
}
}
else {
return Some(Token::LexErr(LexError::MalformedEscapeSequence));
}
}
if let Some(r) = char::from_u32(out_val) {
result.push(r);
}
else {
return Some(Token::LexErr(LexError::MalformedEscapeSequence));
}
}
'u' if escape => {
escape = false;
let mut out_val: u32 = 0;
for _ in 0..4 {
if let Some(c) = self.char_stream.next() {
if let Some(d1) = c.to_digit(16) {
out_val *= 16;
out_val += d1;
}
else {
return Some(Token::LexErr(LexError::MalformedEscapeSequence));
}
}
else {
return Some(Token::LexErr(LexError::MalformedEscapeSequence));
}
}
if let Some(r) = char::from_u32(out_val) {
result.push(r);
}
else {
return Some(Token::LexErr(LexError::MalformedEscapeSequence));
}
}
'U' if escape => {
escape = false;
let mut out_val: u32 = 0;
for _ in 0..8 {
if let Some(c) = self.char_stream.next() {
if let Some(d1) = c.to_digit(16) {
out_val *= 16;
out_val += d1;
}
else {
return Some(Token::LexErr(LexError::MalformedEscapeSequence));
}
}
else {
return Some(Token::LexErr(LexError::MalformedEscapeSequence));
}
}
if let Some(r) = char::from_u32(out_val) {
result.push(r);
}
else {
return Some(Token::LexErr(LexError::MalformedEscapeSequence));
}
}
_ if escape => return Some(Token::LexErr(LexError::MalformedEscapeSequence)),
_ => { escape = false; result.push(nxt); },
}
}
let out : String = result.iter().cloned().collect();
return Some(Token::StringConst(out))
}
'{' => { return Some(Token::LCurly); },
'}' => { return Some(Token::RCurly); },
'(' => { return Some(Token::LParen); },
')' => { return Some(Token::RParen); },
'[' => { return Some(Token::LSquare); },
']' => { return Some(Token::RSquare); },
'+' => { return Some(Token::Plus); },
'-' => { return Some(Token::Minus); },
'*' => { return Some(Token::Multiply); },
'/' => { return Some(Token::Divide); },
';' => { return Some(Token::Semicolon); },
':' => { return Some(Token::Colon); },
',' => { return Some(Token::Comma); },
'.' => { return Some(Token::Period); },
'=' => {
match self.char_stream.peek() {
Some(&'=') => {self.char_stream.next(); return Some(Token::EqualTo); },
_ => { return Some(Token::Equals); }
}
},
'<' => {
match self.char_stream.peek() {
Some(&'=') => {self.char_stream.next(); return Some(Token::LessThanEqual); },
_ => { return Some(Token::LessThan); }
}
}
'>' => {
match self.char_stream.peek() {
Some(&'=') => {self.char_stream.next(); return Some(Token::GreaterThanEqual); },
_ => { return Some(Token::GreaterThan); }
}
},
'!' => {
match self.char_stream.peek() {
Some(&'=') => {self.char_stream.next(); return Some(Token::NotEqualTo); },
_ => { return Some(Token::Bang); }
}
},
'|' => {
match self.char_stream.peek() {
Some(&'|') => {self.char_stream.next(); return Some(Token::Or); },
_ => { return Some(Token::Pipe); }
}
},
'&' => {
match self.char_stream.peek() {
Some(&'&') => {self.char_stream.next(); return Some(Token::And); },
_ => { return Some(Token::Ampersand); }
}
},
' ' | '\n' | '\r' => (),
_ => return Some(Token::LexErr(LexError::UnexpectedChar))
}
}
None
}
}
pub fn lex<'a>(input: &'a String) -> TokenIterator<'a> {
TokenIterator { char_stream: input.chars().peekable() }
}
fn get_precedence(token: &Token) -> i32 {
match *token {
Token::Equals => 10,
Token::Or => 11,
Token::And => 12,
Token::LessThan => 15,
Token::LessThanEqual => 15,
Token::GreaterThan => 15,
Token::GreaterThanEqual => 15,
Token::EqualTo => 15,
Token::NotEqualTo => 15,
Token::Plus => 20,
Token::Minus => 20,
Token::Multiply => 40,
_ => -1
}
}
fn parse_paren_expr<'a>(input: &mut Peekable<TokenIterator<'a>>) -> Result<Expr, ParseError> {
let expr = try!(parse_expr(input));
match input.next() {
Some(Token::RParen) => Ok(expr),
_ => Err(ParseError::MissingRParen)
}
}
fn parse_ident_expr<'a>(id: String, input: &mut Peekable<TokenIterator<'a>>) -> Result<Expr, ParseError> {
let id2 = match input.peek() {
Some(&Token::Period) => {
input.next();
match input.next() {
Some(Token::Identifier(ref s)) => {
s.clone()
}
_ => return Err(ParseError::ExpectedMethodInvocation)
}
},
_ => String::new()
};
match input.peek() {
Some(&Token::LParen) => {input.next();},
_ => return Ok(Expr::Identifier(id))
}
let mut args = Vec::new();
match input.peek() {
Some(&Token::RParen) => {
input.next();
if id2 == "" {
return Ok(Expr::FnCall(id, Box::new(args)))
}
else {
return Ok(Expr::MethodCall(id, id2, Box::new(args)))
}
},
_ => ()
}
loop {
if let Ok(arg) = parse_expr(input) {
args.push(arg);
}
else {
return Err(ParseError::MalformedCallExpr);
}
match input.peek() {
Some(&Token::RParen) => {
input.next();
if id2 == "" {
return Ok(Expr::FnCall(id, Box::new(args)))
}
else {
return Ok(Expr::MethodCall(id, id2, Box::new(args)))
}
},
Some(&Token::Comma) => (),
_ => return Err(ParseError::MalformedCallExpr)
}
input.next();
}
}
fn parse_primary<'a>(input: &mut Peekable<TokenIterator<'a>>) -> Result<Expr, ParseError> {
if let Some(token) = input.next() {
match token {
Token::IntConst(ref x) => {Ok(Expr::IntConst(x.clone()))},
Token::StringConst(ref s) => {Ok(Expr::StringConst(s.clone()))},
Token::Identifier(ref s) => {parse_ident_expr(s.clone(), input)},
Token::LParen => {parse_paren_expr(input)},
Token::True => {Ok(Expr::True)},
Token::False => {Ok(Expr::False)},
Token::LexErr(le) => {println!("Error: {}", le); Err(ParseError::BadInput)}
_ => {println!("Can't parse: {:?}", token); Err(ParseError::BadInput)}
}
}
else {
Err(ParseError::InputPastEndOfFile)
}
}
fn parse_binop<'a>(input: &mut Peekable<TokenIterator<'a>>, prec: i32, lhs: Expr) -> Result<Expr, ParseError> {
let mut lhs_curr = lhs;
loop {
let mut curr_prec = -1;
if let Some(curr_op) = input.peek() {
curr_prec = get_precedence(curr_op);
}
if curr_prec < prec {
return Ok(lhs_curr);
}
if let Some(op_token) = input.next() {
let mut rhs = try!(parse_primary(input));
let mut next_prec = -1;
if let Some(next_op) = input.peek() {
next_prec = get_precedence(next_op);
}
if curr_prec < next_prec {
rhs = try!(parse_binop(input, curr_prec+1, rhs));
}
lhs_curr = match op_token {
Token::Plus => Expr::FnCall("+".to_string(), Box::new(vec![lhs_curr, rhs])),
Token::Minus => Expr::FnCall("-".to_string(), Box::new(vec![lhs_curr, rhs])),
Token::Multiply => Expr::FnCall("*".to_string(), Box::new(vec![lhs_curr, rhs])),
Token::Divide => Expr::FnCall("/".to_string(), Box::new(vec![lhs_curr, rhs])),
Token::Equals => Expr::Assignment(Box::new(lhs_curr), Box::new(rhs)),
Token::EqualTo => Expr::FnCall("==".to_string(), Box::new(vec![lhs_curr, rhs])),
Token::NotEqualTo => Expr::FnCall("!=".to_string(), Box::new(vec![lhs_curr, rhs])),
Token::LessThan => Expr::FnCall("<".to_string(), Box::new(vec![lhs_curr, rhs])),
Token::LessThanEqual => Expr::FnCall("<=".to_string(), Box::new(vec![lhs_curr, rhs])),
Token::GreaterThan => Expr::FnCall(">".to_string(), Box::new(vec![lhs_curr, rhs])),
Token::GreaterThanEqual => Expr::FnCall(">=".to_string(), Box::new(vec![lhs_curr, rhs])),
Token::Or => Expr::FnCall("||".to_string(), Box::new(vec![lhs_curr, rhs])),
Token::And => Expr::FnCall("&&".to_string(), Box::new(vec![lhs_curr, rhs])),
_ => return Err(ParseError::UnknownOperator)
};
}
}
}
fn parse_expr<'a>(input: &mut Peekable<TokenIterator<'a>>) -> Result<Expr, ParseError> {
let lhs = try!(parse_primary(input));
parse_binop(input, 0, lhs)
}
fn parse_if<'a>(input: &mut Peekable<TokenIterator<'a>>) -> Result<Stmt, ParseError> {
input.next();
let guard = try!(parse_expr(input));
let body = try!(parse_block(input));
match input.peek() {
Some(& Token::Else) => {
input.next();
let else_body = try!(parse_block(input));
Ok(Stmt::IfElse(Box::new(guard), Box::new(body), Box::new(else_body)))
}
_ => {
Ok(Stmt::If(Box::new(guard), Box::new(body)))
}
}
}
fn parse_while<'a>(input: &mut Peekable<TokenIterator<'a>>) -> Result<Stmt, ParseError> {
input.next();
let guard = try!(parse_expr(input));
let body = try!(parse_block(input));
Ok(Stmt::While(Box::new(guard), Box::new(body)))
}
fn parse_var<'a>(input: &mut Peekable<TokenIterator<'a>>) -> Result<Stmt, ParseError> {
input.next();
let name = match input.next() {
Some(Token::Identifier(ref s)) => s.clone(),
_ => return Err(ParseError::VarExpectsIdentifier)
};
match input.peek() {
Some(&Token::Equals) => {
input.next();
let initializer = try!(parse_expr(input));
Ok(Stmt::Var(name, Some(Box::new(initializer))))
}
_ => Ok(Stmt::Var(name, None))
}
}
fn parse_block<'a>(input: &mut Peekable<TokenIterator<'a>>) -> Result<Stmt, ParseError> {
match input.peek() {
Some(& Token::LCurly) => (),
_ => return Err(ParseError::MissingLCurly)
}
input.next();
let mut stmts = Vec::new();
let skip_body = match input.peek() {
Some(& Token::RCurly) => true,
_ => false
};
if !skip_body {
while let Some(_) = input.peek() {
stmts.push(try!(parse_stmt(input)));
match input.peek() {
Some(& Token::Semicolon) => {input.next();},
_ => ()
}
match input.peek() {
Some(& Token::RCurly) => break,
_ => ()
}
}
}
match input.peek() {
Some(& Token::RCurly) => {input.next(); Ok(Stmt::Block(Box::new(stmts)))},
_ => Err(ParseError::MissingRCurly)
}
}
fn parse_expr_stmt<'a>(input: &mut Peekable<TokenIterator<'a>>) -> Result<Stmt, ParseError> {
let expr = try!(parse_expr(input));
Ok(Stmt::Expr(Box::new(expr)))
}
fn parse_stmt<'a>(input: &mut Peekable<TokenIterator<'a>>) -> Result<Stmt, ParseError> {
match input.peek() {
Some(& Token::If) => parse_if(input),
Some(& Token::While) => parse_while(input),
Some(& Token::Break) => {input.next(); Ok(Stmt::Break)},
Some(& Token::Return) => {
input.next();
match input.peek() {
Some(& Token::Semicolon) => Ok(Stmt::Return),
_ => {let ret = try!(parse_expr(input)); Ok(Stmt::ReturnWithVal(Box::new(ret))) }
}
}
Some(& Token::LCurly) => parse_block(input),
Some(& Token::Var) => parse_var(input),
_ => parse_expr_stmt(input)
}
}
fn parse_fn<'a>(input: &mut Peekable<TokenIterator<'a>>) -> Result<FnDef, ParseError> {
input.next();
let name = match input.next() {
Some(Token::Identifier(ref s)) => s.clone(),
_ => return Err(ParseError::FnMissingName)
};
match input.peek() {
Some(&Token::LParen) => {input.next();},
_ => return Err(ParseError::FnMissingParams)
}
let mut params = Vec::new();
let skip_params = match input.peek() {
Some(&Token::RParen) => { input.next(); true }
_ => false
};
if !skip_params {
loop {
match input.next() {
Some(Token::RParen) => { break },
Some(Token::Comma) => (),
Some(Token::Identifier(ref s)) => { params.push(s.clone()); },
_ => return Err(ParseError::MalformedCallExpr)
}
}
}
let body = try!(parse_block(input));
Ok(FnDef{name: name, params: params, body: Box::new(body)})
}
fn parse_top_level<'a>(input: &mut Peekable<TokenIterator<'a>>) -> Result<(Vec<Stmt>, Vec<FnDef>), ParseError> {
let mut stmts = Vec::new();
let mut fndefs = Vec::new();
while let Some(_) = input.peek() {
match input.peek() {
Some(& Token::Fn) => fndefs.push(try!(parse_fn(input))),
_ => stmts.push(try!(parse_stmt(input)))
}
match input.peek() {
Some(& Token::Semicolon) => {input.next();},
_ => ()
}
}
Ok((stmts, fndefs))
}
pub fn parse<'a>(input: &mut Peekable<TokenIterator<'a>>) -> Result<(Vec<Stmt>, Vec<FnDef>), ParseError> {
parse_top_level(input)
}