Limit script size to avoid overflowing positions.

This commit is contained in:
Stephen Chung 2020-03-03 13:30:46 +08:00
parent 6969b13ebc
commit fa13588f69

View File

@ -5,14 +5,14 @@ use std::fmt;
use std::iter::Peekable; use std::iter::Peekable;
use std::str::Chars; use std::str::Chars;
#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)] #[derive(Debug, Eq, PartialEq, Hash, Clone)]
pub enum LexError { pub enum LexError {
UnexpectedChar(char), UnexpectedChar(char),
UnterminatedString, UnterminatedString,
MalformedEscapeSequence, MalformedEscapeSequence,
MalformedNumber, MalformedNumber,
MalformedChar, MalformedChar,
Nothing, InputError(String),
} }
type LERR = LexError; type LERR = LexError;
@ -25,7 +25,7 @@ impl Error for LexError {
LERR::MalformedEscapeSequence => "Unexpected values in escape sequence", LERR::MalformedEscapeSequence => "Unexpected values in escape sequence",
LERR::MalformedNumber => "Unexpected characters in number", LERR::MalformedNumber => "Unexpected characters in number",
LERR::MalformedChar => "Char constant not a single character", LERR::MalformedChar => "Char constant not a single character",
LERR::Nothing => "This error is for internal use only", LERR::InputError(_) => "Imput error",
} }
} }
} }
@ -34,6 +34,7 @@ impl fmt::Display for LexError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self { match self {
LERR::UnexpectedChar(c) => write!(f, "Unexpected '{}'", c), LERR::UnexpectedChar(c) => write!(f, "Unexpected '{}'", c),
LERR::InputError(s) => write!(f, "{}", s),
_ => write!(f, "{}", self.description()), _ => write!(f, "{}", self.description()),
} }
} }
@ -57,27 +58,67 @@ pub enum ParseErrorType {
type PERR = ParseErrorType; type PERR = ParseErrorType;
#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)] const MAX_LINES: u16 = 65535;
const MAX_POS: u16 = 65535;
#[derive(Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)]
pub struct Position { pub struct Position {
pub line: usize, line: u16,
pub pos: usize, pos: u16,
} }
impl Position { impl Position {
pub fn advance(&mut self) { pub fn new() -> Self {
self.pos += 1; Self { line: 1, pos: 0 }
} }
pub fn rewind(&mut self) {
// Beware, should not rewind at zero position pub fn line(&self) -> u16 {
self.pos -= 1; self.line
} }
pub fn new_line(&mut self) {
self.line += 1; pub fn position(&self) -> u16 {
self.pos = 0; self.pos
} }
pub(crate) fn advance(&mut self) -> Result<u16, LexError> {
if self.pos >= MAX_POS {
Err(LERR::InputError(format!(
"cannot advance beyond maximum line length ({})",
MAX_POS
)))
} else {
self.pos += 1;
Ok(self.pos)
}
}
pub(crate) fn rewind(&mut self) -> Result<u16, LexError> {
if self.pos == 0 {
Err(LERR::InputError("cannot rewind at position 0".into()))
} else {
self.pos -= 1;
Ok(self.pos)
}
}
pub(crate) fn new_line(&mut self) -> Result<u16, LexError> {
if self.line > MAX_LINES {
Err(LERR::InputError(format!(
"reached maximum number of lines ({})",
MAX_LINES
)))
} else {
self.line += 1;
self.pos = 0;
Ok(self.pos)
}
}
pub fn eof() -> Self { pub fn eof() -> Self {
Self { line: 0, pos: 0 } Self { line: 0, pos: 0 }
} }
pub fn is_eof(&self) -> bool { pub fn is_eof(&self) -> bool {
self.line == 0 self.line == 0
} }
@ -85,9 +126,20 @@ impl Position {
impl std::fmt::Display for Position { impl std::fmt::Display for Position {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result {
match self.line { if self.is_eof() {
0 => write!(f, "EOF"), write!(f, "EOF")
_ => write!(f, "line {}, position {}", self.line, self.pos), } else {
write!(f, "line {}, position {}", self.line, self.pos)
}
}
}
impl std::fmt::Debug for Position {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result {
if self.is_eof() {
write!(f, "(EOF)")
} else {
write!(f, "({}:{})", self.line, self.pos)
} }
} }
} }
@ -99,11 +151,11 @@ impl ParseError {
pub fn error_type(&self) -> &PERR { pub fn error_type(&self) -> &PERR {
&self.0 &self.0
} }
pub fn line(&self) -> usize { pub fn line(&self) -> u16 {
self.1.line self.1.line()
} }
pub fn position(&self) -> usize { pub fn position(&self) -> u16 {
self.1.pos self.1.position()
} }
pub fn is_eof(&self) -> bool { pub fn is_eof(&self) -> bool {
self.1.is_eof() self.1.is_eof()
@ -383,14 +435,21 @@ pub struct TokenIterator<'a> {
} }
impl<'a> TokenIterator<'a> { impl<'a> TokenIterator<'a> {
fn advance(&mut self) { fn advance(&mut self) -> Result<u16, (LexError, Position)> {
self.pos.advance(); self.pos.advance().map_err(|err| (err, self.pos))
} }
fn rewind(&mut self) { fn rewind(&mut self) -> Result<u16, (LexError, Position)> {
self.pos.rewind(); self.pos.rewind().map_err(|err| (err, self.pos))
} }
fn new_line(&mut self) { fn new_line(&mut self) -> Result<u16, (LexError, Position)> {
self.pos.new_line(); self.pos.new_line().map_err(|err| (err, self.pos))
}
fn advance_token(&mut self) -> Result<u16, (Token, Position)> {
self.advance().map_err(|err| (Token::LexErr(err.0), err.1))
}
fn new_line_token(&mut self) -> Result<u16, (Token, Position)> {
self.new_line().map_err(|err| (Token::LexErr(err.0), err.1))
} }
pub fn parse_string_const( pub fn parse_string_const(
@ -407,7 +466,7 @@ impl<'a> TokenIterator<'a> {
return Err((LERR::UnterminatedString, Position::eof())); return Err((LERR::UnterminatedString, Position::eof()));
} }
self.advance(); self.advance()?;
match next_char.unwrap() { match next_char.unwrap() {
'\\' if !escape => escape = true, '\\' if !escape => escape = true,
@ -441,7 +500,7 @@ impl<'a> TokenIterator<'a> {
} else { } else {
return Err((LERR::MalformedEscapeSequence, self.pos)); return Err((LERR::MalformedEscapeSequence, self.pos));
} }
self.advance(); self.advance()?;
} }
if let Some(r) = char::from_u32(out_val) { if let Some(r) = char::from_u32(out_val) {
@ -464,7 +523,7 @@ impl<'a> TokenIterator<'a> {
} else { } else {
return Err((LERR::MalformedEscapeSequence, self.pos)); return Err((LERR::MalformedEscapeSequence, self.pos));
} }
self.advance(); self.advance()?;
} }
if let Some(r) = char::from_u32(out_val) { if let Some(r) = char::from_u32(out_val) {
@ -487,7 +546,7 @@ impl<'a> TokenIterator<'a> {
} else { } else {
return Err((LERR::MalformedEscapeSequence, self.pos)); return Err((LERR::MalformedEscapeSequence, self.pos));
} }
self.advance(); self.advance()?;
} }
if let Some(r) = char::from_u32(out_val) { if let Some(r) = char::from_u32(out_val) {
@ -500,7 +559,7 @@ impl<'a> TokenIterator<'a> {
x if enclosing_char == x && !escape => break, x if enclosing_char == x && !escape => break,
_ if escape => return Err((LERR::MalformedEscapeSequence, self.pos)), _ if escape => return Err((LERR::MalformedEscapeSequence, self.pos)),
'\n' => { '\n' => {
self.rewind(); self.rewind()?;
return Err((LERR::UnterminatedString, self.pos)); return Err((LERR::UnterminatedString, self.pos));
} }
x => { x => {
@ -516,49 +575,65 @@ impl<'a> TokenIterator<'a> {
fn inner_next(&mut self) -> Option<(Token, Position)> { fn inner_next(&mut self) -> Option<(Token, Position)> {
while let Some(c) = self.char_stream.next() { while let Some(c) = self.char_stream.next() {
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
let pos = self.pos; let pos = self.pos;
match c { match c {
'\n' => self.new_line(), '\n' => {
if let Err(err) = self.new_line_token() {
return Some(err);
}
}
'0'..='9' => { '0'..='9' => {
let mut result = Vec::new(); let mut result = Vec::new();
let mut radix_base: Option<u32> = None; let mut radix_base: Option<u32> = None;
result.push(c); result.push(c);
while let Some(&nxt) = self.char_stream.peek() { while let Some(&next_char) = self.char_stream.peek() {
match nxt { match next_char {
'0'..='9' => { '0'..='9' => {
result.push(nxt); result.push(next_char);
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
} }
'.' => { '.' => {
result.push(nxt); result.push(next_char);
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
while let Some(&nxt_float) = self.char_stream.peek() { return Some(err);
match nxt_float { }
while let Some(&next_char_in_float) = self.char_stream.peek() {
match next_char_in_float {
'0'..='9' => { '0'..='9' => {
result.push(nxt_float); result.push(next_char_in_float);
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
} }
_ => break, _ => break,
} }
} }
} }
'x' | 'X' => { 'x' | 'X' => {
result.push(nxt); result.push(next_char);
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
while let Some(&nxt_hex) = self.char_stream.peek() { return Some(err);
match nxt_hex { }
while let Some(&next_char_in_hex) = self.char_stream.peek() {
match next_char_in_hex {
'0'..='9' | 'a'..='f' | 'A'..='F' => { '0'..='9' | 'a'..='f' | 'A'..='F' => {
result.push(nxt_hex); result.push(next_char_in_hex);
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
} }
_ => break, _ => break,
} }
@ -566,15 +641,19 @@ impl<'a> TokenIterator<'a> {
radix_base = Some(16); radix_base = Some(16);
} }
'o' | 'O' => { 'o' | 'O' => {
result.push(nxt); result.push(next_char);
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
while let Some(&nxt_oct) = self.char_stream.peek() { return Some(err);
match nxt_oct { }
while let Some(&next_char_in_oct) = self.char_stream.peek() {
match next_char_in_oct {
'0'..='8' => { '0'..='8' => {
result.push(nxt_oct); result.push(next_char_in_oct);
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
} }
_ => break, _ => break,
} }
@ -582,15 +661,19 @@ impl<'a> TokenIterator<'a> {
radix_base = Some(8); radix_base = Some(8);
} }
'b' | 'B' => { 'b' | 'B' => {
result.push(nxt); result.push(next_char);
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
while let Some(&nxt_bin) = self.char_stream.peek() { return Some(err);
match nxt_bin { }
while let Some(&next_char_in_binary) = self.char_stream.peek() {
match next_char_in_binary {
'0' | '1' | '_' => { '0' | '1' | '_' => {
result.push(nxt_bin); result.push(next_char_in_binary);
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
} }
_ => break, _ => break,
} }
@ -608,6 +691,7 @@ impl<'a> TokenIterator<'a> {
.skip(2) .skip(2)
.filter(|c| c != &'_') .filter(|c| c != &'_')
.collect(); .collect();
if let Ok(val) = i64::from_str_radix(&out, radix) { if let Ok(val) = i64::from_str_radix(&out, radix) {
return Some((Token::IntegerConstant(val), pos)); return Some((Token::IntegerConstant(val), pos));
} }
@ -630,12 +714,14 @@ impl<'a> TokenIterator<'a> {
let mut result = Vec::new(); let mut result = Vec::new();
result.push(c); result.push(c);
while let Some(&nxt) = self.char_stream.peek() { while let Some(&next_char) = self.char_stream.peek() {
match nxt { match next_char {
x if x.is_alphanumeric() || x == '_' => { x if x.is_ascii_alphanumeric() || x == '_' => {
result.push(x); result.push(x);
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
} }
_ => break, _ => break,
} }
@ -673,11 +759,11 @@ impl<'a> TokenIterator<'a> {
let mut chars = result.chars(); let mut chars = result.chars();
return Some(( return Some((
if let Some(out) = chars.next() { if let Some(first_char) = chars.next() {
if chars.count() != 0 { if chars.count() != 0 {
Token::LexErr(LERR::MalformedChar) Token::LexErr(LERR::MalformedChar)
} else { } else {
Token::CharConstant(out) Token::CharConstant(first_char)
} }
} else { } else {
Token::LexErr(LERR::MalformedChar) Token::LexErr(LERR::MalformedChar)
@ -698,7 +784,9 @@ impl<'a> TokenIterator<'a> {
match self.char_stream.peek() { match self.char_stream.peek() {
Some(&'=') => { Some(&'=') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Token::PlusAssign Token::PlusAssign
} }
_ if self.last.is_next_unary() => Token::UnaryPlus, _ if self.last.is_next_unary() => Token::UnaryPlus,
@ -712,7 +800,9 @@ impl<'a> TokenIterator<'a> {
match self.char_stream.peek() { match self.char_stream.peek() {
Some(&'=') => { Some(&'=') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Token::MinusAssign Token::MinusAssign
} }
_ if self.last.is_next_unary() => Token::UnaryMinus, _ if self.last.is_next_unary() => Token::UnaryMinus,
@ -726,7 +816,9 @@ impl<'a> TokenIterator<'a> {
match self.char_stream.peek() { match self.char_stream.peek() {
Some(&'=') => { Some(&'=') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Token::MultiplyAssign Token::MultiplyAssign
} }
_ => Token::Multiply, _ => Token::Multiply,
@ -737,37 +829,58 @@ impl<'a> TokenIterator<'a> {
'/' => match self.char_stream.peek() { '/' => match self.char_stream.peek() {
Some(&'/') => { Some(&'/') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
while let Some(c) = self.char_stream.next() { while let Some(c) = self.char_stream.next() {
if c == '\n' { match c {
self.new_line(); '\n' => {
break; if let Err(err) = self.new_line_token() {
} else { return Some(err);
self.advance(); }
break;
}
_ => {
if let Err(err) = self.advance_token() {
return Some(err);
}
}
} }
} }
} }
Some(&'*') => { Some(&'*') => {
let mut level = 1; let mut level = 1;
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
while let Some(c) = self.char_stream.next() { while let Some(c) = self.char_stream.next() {
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
match c { match c {
'/' => { '/' => {
if let Some('*') = self.char_stream.next() { if let Some('*') = self.char_stream.next() {
level += 1; level += 1;
} }
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
} }
'*' => { '*' => {
if let Some('/') = self.char_stream.next() { if let Some('/') = self.char_stream.next() {
level -= 1; level -= 1;
} }
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
}
'\n' => {
if let Err(err) = self.new_line_token() {
return Some(err);
}
} }
'\n' => self.new_line(),
_ => (), _ => (),
} }
@ -778,7 +891,9 @@ impl<'a> TokenIterator<'a> {
} }
Some(&'=') => { Some(&'=') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
return Some((Token::DivideAssign, pos)); return Some((Token::DivideAssign, pos));
} }
_ => return Some((Token::Divide, pos)), _ => return Some((Token::Divide, pos)),
@ -790,7 +905,9 @@ impl<'a> TokenIterator<'a> {
'=' => match self.char_stream.peek() { '=' => match self.char_stream.peek() {
Some(&'=') => { Some(&'=') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
return Some((Token::EqualsTo, pos)); return Some((Token::EqualsTo, pos));
} }
_ => return Some((Token::Equals, pos)), _ => return Some((Token::Equals, pos)),
@ -798,21 +915,29 @@ impl<'a> TokenIterator<'a> {
'<' => match self.char_stream.peek() { '<' => match self.char_stream.peek() {
Some(&'=') => { Some(&'=') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
return Some((Token::LessThanEqualsTo, pos)); return Some((Token::LessThanEqualsTo, pos));
} }
Some(&'<') => { Some(&'<') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
return match self.char_stream.peek() { return match self.char_stream.peek() {
Some(&'=') => { Some(&'=') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Some((Token::LeftShiftAssign, pos)) Some((Token::LeftShiftAssign, pos))
} }
_ => { _ => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Some((Token::LeftShift, pos)) Some((Token::LeftShift, pos))
} }
}; };
@ -824,21 +949,29 @@ impl<'a> TokenIterator<'a> {
match self.char_stream.peek() { match self.char_stream.peek() {
Some(&'=') => { Some(&'=') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Token::GreaterThanEqualsTo Token::GreaterThanEqualsTo
} }
Some(&'>') => { Some(&'>') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
match self.char_stream.peek() { match self.char_stream.peek() {
Some(&'=') => { Some(&'=') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Token::RightShiftAssign Token::RightShiftAssign
} }
_ => { _ => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Token::RightShift Token::RightShift
} }
} }
@ -853,7 +986,9 @@ impl<'a> TokenIterator<'a> {
match self.char_stream.peek() { match self.char_stream.peek() {
Some(&'=') => { Some(&'=') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Token::NotEqualsTo Token::NotEqualsTo
} }
_ => Token::Bang, _ => Token::Bang,
@ -866,12 +1001,16 @@ impl<'a> TokenIterator<'a> {
match self.char_stream.peek() { match self.char_stream.peek() {
Some(&'|') => { Some(&'|') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Token::Or Token::Or
} }
Some(&'=') => { Some(&'=') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Token::OrAssign Token::OrAssign
} }
_ => Token::Pipe, _ => Token::Pipe,
@ -884,12 +1023,16 @@ impl<'a> TokenIterator<'a> {
match self.char_stream.peek() { match self.char_stream.peek() {
Some(&'&') => { Some(&'&') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Token::And Token::And
} }
Some(&'=') => { Some(&'=') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Token::AndAssign Token::AndAssign
} }
_ => Token::Ampersand, _ => Token::Ampersand,
@ -902,7 +1045,9 @@ impl<'a> TokenIterator<'a> {
match self.char_stream.peek() { match self.char_stream.peek() {
Some(&'=') => { Some(&'=') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Token::XOrAssign Token::XOrAssign
} }
_ => Token::XOr, _ => Token::XOr,
@ -915,7 +1060,9 @@ impl<'a> TokenIterator<'a> {
match self.char_stream.peek() { match self.char_stream.peek() {
Some(&'=') => { Some(&'=') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Token::ModuloAssign Token::ModuloAssign
} }
_ => Token::Modulo, _ => Token::Modulo,
@ -928,7 +1075,9 @@ impl<'a> TokenIterator<'a> {
match self.char_stream.peek() { match self.char_stream.peek() {
Some(&'=') => { Some(&'=') => {
self.char_stream.next(); self.char_stream.next();
self.advance(); if let Err(err) = self.advance_token() {
return Some(err);
}
Token::PowerOfAssign Token::PowerOfAssign
} }
_ => Token::PowerOf, _ => Token::PowerOf,
@ -959,7 +1108,7 @@ impl<'a> Iterator for TokenIterator<'a> {
pub fn lex(input: &str) -> TokenIterator<'_> { pub fn lex(input: &str) -> TokenIterator<'_> {
TokenIterator { TokenIterator {
last: Token::LexErr(LERR::Nothing), last: Token::LexErr(LERR::InputError("".into())),
pos: Position { line: 1, pos: 0 }, pos: Position { line: 1, pos: 0 },
char_stream: input.chars().peekable(), char_stream: input.chars().peekable(),
} }