Fix number parsing.

This commit is contained in:
Stephen Chung 2020-03-03 21:39:25 +08:00
parent 9f80bf03c4
commit 71a3c79915
4 changed files with 133 additions and 69 deletions

View File

@ -683,12 +683,29 @@ a.x = 500;
a.update(); a.update();
``` ```
## Numbers
```rust
let x = 123; // i64
let x = 123.4; // f64
let x = 123_456_789; // separators can be put anywhere inside the number
let x = 0x12abcd; // i64 in hex
let x = 0o777; // i64 in oct
let x = 0b1010_1111; // i64 in binary
```
Conversion functions:
* `to_int` - converts an `f32` or `f64` to `i64`
* `to_float` - converts an integer type to `f64`
## Strings and Chars ## Strings and Chars
```rust ```rust
let name = "Bob"; let name = "Bob";
let middle_initial = 'C'; let middle_initial = 'C';
let last = 'Davis'; let last = "Davis";
let full_name = name + " " + middle_initial + ". " + last; let full_name = name + " " + middle_initial + ". " + last;
full_name == "Bob C. Davis"; full_name == "Bob C. Davis";
@ -706,9 +723,13 @@ let c = "foo"[0]; // a syntax error for now - cannot index into literals
let c = ts.s[0]; // a syntax error for now - cannot index into properties let c = ts.s[0]; // a syntax error for now - cannot index into properties
let c = record[0]; // this works let c = record[0]; // this works
// Escape sequences in strings
record += " \u2764\n"; // escape sequence of '❤' in Unicode
record == "Bob C. Davis: age 42 ❤\n"; // '\n' = new-line
// Unlike Rust, Rhai strings can be modified // Unlike Rust, Rhai strings can be modified
record[4] = 'Z'; record[4] = '\x58'; // 0x58 = 'X'
record == "Bob Z. Davis: age 42"; record == "Bob X. Davis: age 42 ❤\n";
``` ```
The following standard functions operate on strings: The following standard functions operate on strings:
@ -727,6 +748,7 @@ full_name.len() == 14;
full_name.trim(); full_name.trim();
full_name.len() == 12; full_name.len() == 12;
full_name == "Bob C. Davis";
full_name.pad(15, '$'); full_name.pad(15, '$');
full_name.len() == 15; full_name.len() == 15;

View File

@ -34,9 +34,24 @@
// needs to be here, because order matters for macros // needs to be here, because order matters for macros
macro_rules! debug_println { macro_rules! debug_println {
() => (#[cfg(feature = "debug_msgs")] {print!("\n")}); () => (
($fmt:expr) => (#[cfg(feature = "debug_msgs")] {print!(concat!($fmt, "\n"))}); #[cfg(feature = "debug_msgs")]
($fmt:expr, $($arg:tt)*) => (#[cfg(feature = "debug_msgs")] {print!(concat!($fmt, "\n"), $($arg)*)}); {
print!("\n");
}
);
($fmt:expr) => (
#[cfg(feature = "debug_msgs")]
{
print!(concat!($fmt, "\n"));
}
);
($fmt:expr, $($arg:tt)*) => (
#[cfg(feature = "debug_msgs")]
{
print!(concat!($fmt, "\n"), $($arg)*);
}
);
} }
mod any; mod any;
@ -50,6 +65,6 @@ mod scope;
pub use any::Dynamic; pub use any::Dynamic;
pub use engine::{Array, Engine, EvalAltResult}; pub use engine::{Array, Engine, EvalAltResult};
pub use scope::Scope;
pub use fn_register::{RegisterDynamicFn, RegisterFn}; pub use fn_register::{RegisterDynamicFn, RegisterFn};
pub use parser::{ParseError, ParseErrorType, AST}; pub use parser::{ParseError, ParseErrorType, AST};
pub use scope::Scope;

View File

@ -9,9 +9,9 @@ use std::str::Chars;
pub enum LexError { pub enum LexError {
UnexpectedChar(char), UnexpectedChar(char),
UnterminatedString, UnterminatedString,
MalformedEscapeSequence, MalformedEscapeSequence(String),
MalformedNumber, MalformedNumber(String),
MalformedChar, MalformedChar(String),
InputError(String), InputError(String),
} }
@ -22,9 +22,9 @@ impl Error for LexError {
match *self { match *self {
LERR::UnexpectedChar(_) => "Unexpected character", LERR::UnexpectedChar(_) => "Unexpected character",
LERR::UnterminatedString => "Open string is not terminated", LERR::UnterminatedString => "Open string is not terminated",
LERR::MalformedEscapeSequence => "Unexpected values in escape sequence", LERR::MalformedEscapeSequence(_) => "Unexpected values in escape sequence",
LERR::MalformedNumber => "Unexpected characters in number", LERR::MalformedNumber(_) => "Unexpected characters in number",
LERR::MalformedChar => "Char constant not a single character", LERR::MalformedChar(_) => "Char constant not a single character",
LERR::InputError(_) => "Input error", LERR::InputError(_) => "Input error",
} }
} }
@ -34,6 +34,9 @@ impl fmt::Display for LexError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self { match self {
LERR::UnexpectedChar(c) => write!(f, "Unexpected '{}'", c), LERR::UnexpectedChar(c) => write!(f, "Unexpected '{}'", c),
LERR::MalformedEscapeSequence(s) => write!(f, "Invalid escape sequence: '{}'", s),
LERR::MalformedNumber(s) => write!(f, "Invalid number: '{}'", s),
LERR::MalformedChar(s) => write!(f, "Invalid character: '{}'", s),
LERR::InputError(s) => write!(f, "{}", s), LERR::InputError(s) => write!(f, "{}", s),
_ => write!(f, "{}", self.description()), _ => write!(f, "{}", self.description()),
} }
@ -535,7 +538,7 @@ impl<'a> TokenIterator<'a> {
enclosing_char: char, enclosing_char: char,
) -> Result<String, (LexError, Position)> { ) -> Result<String, (LexError, Position)> {
let mut result = Vec::new(); let mut result = Vec::new();
let mut escape = false; let mut escape = String::with_capacity(12);
loop { loop {
let next_char = self.char_stream.next(); let next_char = self.char_stream.next();
@ -547,107 +550,123 @@ impl<'a> TokenIterator<'a> {
self.advance()?; self.advance()?;
match next_char.unwrap() { match next_char.unwrap() {
'\\' if !escape => escape = true, '\\' if escape.is_empty() => {
'\\' if escape => { escape.push('\\');
escape = false; }
'\\' if !escape.is_empty() => {
escape.clear();
result.push('\\'); result.push('\\');
} }
't' if escape => { 't' if !escape.is_empty() => {
escape = false; escape.clear();
result.push('\t'); result.push('\t');
} }
'n' if escape => { 'n' if !escape.is_empty() => {
escape = false; escape.clear();
result.push('\n'); result.push('\n');
} }
'r' if escape => { 'r' if !escape.is_empty() => {
escape = false; escape.clear();
result.push('\r'); result.push('\r');
} }
'x' if escape => { 'x' if !escape.is_empty() => {
escape = false; let mut seq = escape.clone();
seq.push('x');
escape.clear();
let mut out_val: u32 = 0; let mut out_val: u32 = 0;
for _ in 0..2 { for _ in 0..2 {
if let Some(c) = self.char_stream.next() { if let Some(c) = self.char_stream.next() {
seq.push(c);
self.advance()?;
if let Some(d1) = c.to_digit(16) { if let Some(d1) = c.to_digit(16) {
out_val *= 16; out_val *= 16;
out_val += d1; out_val += d1;
} else { } else {
return Err((LERR::MalformedEscapeSequence, self.pos)); return Err((LERR::MalformedEscapeSequence(seq), self.pos));
} }
} else { } else {
return Err((LERR::MalformedEscapeSequence, self.pos)); return Err((LERR::MalformedEscapeSequence(seq), self.pos));
} }
self.advance()?;
} }
if let Some(r) = char::from_u32(out_val) { if let Some(r) = char::from_u32(out_val) {
result.push(r); result.push(r);
} else { } else {
return Err((LERR::MalformedEscapeSequence, self.pos)); return Err((LERR::MalformedEscapeSequence(seq), self.pos));
} }
} }
'u' if escape => { 'u' if !escape.is_empty() => {
escape = false; let mut seq = escape.clone();
seq.push('u');
escape.clear();
let mut out_val: u32 = 0; let mut out_val: u32 = 0;
for _ in 0..4 { for _ in 0..4 {
if let Some(c) = self.char_stream.next() { if let Some(c) = self.char_stream.next() {
seq.push(c);
self.advance()?;
if let Some(d1) = c.to_digit(16) { if let Some(d1) = c.to_digit(16) {
out_val *= 16; out_val *= 16;
out_val += d1; out_val += d1;
} else { } else {
return Err((LERR::MalformedEscapeSequence, self.pos)); return Err((LERR::MalformedEscapeSequence(seq), self.pos));
} }
} else { } else {
return Err((LERR::MalformedEscapeSequence, self.pos)); return Err((LERR::MalformedEscapeSequence(seq), self.pos));
} }
self.advance()?;
} }
if let Some(r) = char::from_u32(out_val) { if let Some(r) = char::from_u32(out_val) {
result.push(r); result.push(r);
} else { } else {
return Err((LERR::MalformedEscapeSequence, self.pos)); return Err((LERR::MalformedEscapeSequence(seq), self.pos));
} }
} }
'U' if escape => { 'U' if !escape.is_empty() => {
escape = false; let mut seq = escape.clone();
seq.push('U');
escape.clear();
let mut out_val: u32 = 0; let mut out_val: u32 = 0;
for _ in 0..8 { for _ in 0..8 {
if let Some(c) = self.char_stream.next() { if let Some(c) = self.char_stream.next() {
seq.push(c);
self.advance()?;
if let Some(d1) = c.to_digit(16) { if let Some(d1) = c.to_digit(16) {
out_val *= 16; out_val *= 16;
out_val += d1; out_val += d1;
} else { } else {
return Err((LERR::MalformedEscapeSequence, self.pos)); return Err((LERR::MalformedEscapeSequence(seq), self.pos));
} }
} else { } else {
return Err((LERR::MalformedEscapeSequence, self.pos)); return Err((LERR::MalformedEscapeSequence(seq), self.pos));
} }
self.advance()?;
} }
if let Some(r) = char::from_u32(out_val) { if let Some(r) = char::from_u32(out_val) {
result.push(r); result.push(r);
} else { } else {
return Err((LERR::MalformedEscapeSequence, self.pos)); return Err((LERR::MalformedEscapeSequence(seq), self.pos));
} }
} }
x if enclosing_char == x && escape => result.push(x), x if enclosing_char == x && !escape.is_empty() => result.push(x),
x if enclosing_char == x && !escape => break, x if enclosing_char == x && escape.is_empty() => break,
_ if escape => return Err((LERR::MalformedEscapeSequence, self.pos)), _ if !escape.is_empty() => {
return Err((LERR::MalformedEscapeSequence(escape), self.pos))
}
'\n' => { '\n' => {
self.rewind()?; self.rewind()?;
return Err((LERR::UnterminatedString, self.pos)); return Err((LERR::UnterminatedString, self.pos));
} }
x => { x => {
escape = false; escape.clear();
result.push(x); result.push(x);
} }
} }
} }
let out: String = result.iter().cloned().collect(); let out: String = result.iter().collect();
Ok(out) Ok(out)
} }
@ -672,7 +691,7 @@ impl<'a> TokenIterator<'a> {
while let Some(&next_char) = self.char_stream.peek() { while let Some(&next_char) = self.char_stream.peek() {
match next_char { match next_char {
'0'..='9' => { '0'..='9' | '_' => {
result.push(next_char); result.push(next_char);
self.char_stream.next(); self.char_stream.next();
if let Err(err) = self.advance_token() { if let Err(err) = self.advance_token() {
@ -687,7 +706,7 @@ impl<'a> TokenIterator<'a> {
} }
while let Some(&next_char_in_float) = self.char_stream.peek() { while let Some(&next_char_in_float) = self.char_stream.peek() {
match next_char_in_float { match next_char_in_float {
'0'..='9' => { '0'..='9' | '_' => {
result.push(next_char_in_float); result.push(next_char_in_float);
self.char_stream.next(); self.char_stream.next();
if let Err(err) = self.advance_token() { if let Err(err) = self.advance_token() {
@ -698,7 +717,7 @@ impl<'a> TokenIterator<'a> {
} }
} }
} }
'x' | 'X' => { 'x' | 'X' if c == '0' => {
result.push(next_char); result.push(next_char);
self.char_stream.next(); self.char_stream.next();
if let Err(err) = self.advance_token() { if let Err(err) = self.advance_token() {
@ -706,7 +725,7 @@ impl<'a> TokenIterator<'a> {
} }
while let Some(&next_char_in_hex) = self.char_stream.peek() { while let Some(&next_char_in_hex) = self.char_stream.peek() {
match next_char_in_hex { match next_char_in_hex {
'0'..='9' | 'a'..='f' | 'A'..='F' => { '0'..='9' | 'a'..='f' | 'A'..='F' | '_' => {
result.push(next_char_in_hex); result.push(next_char_in_hex);
self.char_stream.next(); self.char_stream.next();
if let Err(err) = self.advance_token() { if let Err(err) = self.advance_token() {
@ -718,7 +737,7 @@ impl<'a> TokenIterator<'a> {
} }
radix_base = Some(16); radix_base = Some(16);
} }
'o' | 'O' => { 'o' | 'O' if c == '0' => {
result.push(next_char); result.push(next_char);
self.char_stream.next(); self.char_stream.next();
if let Err(err) = self.advance_token() { if let Err(err) = self.advance_token() {
@ -726,7 +745,7 @@ impl<'a> TokenIterator<'a> {
} }
while let Some(&next_char_in_oct) = self.char_stream.peek() { while let Some(&next_char_in_oct) = self.char_stream.peek() {
match next_char_in_oct { match next_char_in_oct {
'0'..='8' => { '0'..='8' | '_' => {
result.push(next_char_in_oct); result.push(next_char_in_oct);
self.char_stream.next(); self.char_stream.next();
if let Err(err) = self.advance_token() { if let Err(err) = self.advance_token() {
@ -738,7 +757,7 @@ impl<'a> TokenIterator<'a> {
} }
radix_base = Some(8); radix_base = Some(8);
} }
'b' | 'B' => { 'b' | 'B' if c == '0' => {
result.push(next_char); result.push(next_char);
self.char_stream.next(); self.char_stream.next();
if let Err(err) = self.advance_token() { if let Err(err) = self.advance_token() {
@ -763,19 +782,19 @@ impl<'a> TokenIterator<'a> {
} }
if let Some(radix) = radix_base { if let Some(radix) = radix_base {
let out: String = result let out: String = result.iter().skip(2).filter(|&&c| c != '_').collect();
.iter()
.cloned()
.skip(2)
.filter(|c| c != &'_')
.collect();
if let Ok(val) = i64::from_str_radix(&out, radix) { return Some((
return Some((Token::IntegerConstant(val), pos)); if let Ok(val) = i64::from_str_radix(&out, radix) {
} Token::IntegerConstant(val)
} else {
Token::LexErr(LERR::MalformedNumber(result.iter().collect()))
},
pos,
));
} }
let out: String = result.iter().cloned().collect(); let out: String = result.iter().filter(|&&c| c != '_').collect();
return Some(( return Some((
if let Ok(val) = out.parse::<i64>() { if let Ok(val) = out.parse::<i64>() {
@ -783,7 +802,7 @@ impl<'a> TokenIterator<'a> {
} else if let Ok(val) = out.parse::<f64>() { } else if let Ok(val) = out.parse::<f64>() {
Token::FloatConstant(val) Token::FloatConstant(val)
} else { } else {
Token::LexErr(LERR::MalformedNumber) Token::LexErr(LERR::MalformedNumber(result.iter().collect()))
}, },
pos, pos,
)); ));
@ -805,7 +824,7 @@ impl<'a> TokenIterator<'a> {
} }
} }
let out: String = result.iter().cloned().collect(); let out: String = result.iter().collect();
return Some(( return Some((
match out.as_str() { match out.as_str() {
@ -840,12 +859,12 @@ impl<'a> TokenIterator<'a> {
return Some(( return Some((
if let Some(first_char) = chars.next() { if let Some(first_char) = chars.next() {
if chars.count() != 0 { if chars.count() != 0 {
Token::LexErr(LERR::MalformedChar) Token::LexErr(LERR::MalformedChar(format!("'{}'", result)))
} else { } else {
Token::CharConstant(first_char) Token::CharConstant(first_char)
} }
} else { } else {
Token::LexErr(LERR::MalformedChar) Token::LexErr(LERR::MalformedChar(format!("'{}'", result)))
}, },
pos, pos,
)); ));

View File

@ -5,13 +5,21 @@ fn test_string() -> Result<(), EvalAltResult> {
let mut engine = Engine::new(); let mut engine = Engine::new();
assert_eq!( assert_eq!(
engine.eval::<String>("\"Test string: \\u2764\"")?, engine.eval::<String>(r#""Test string: \u2764""#)?,
"Test string: ❤".to_string() "Test string: ❤".to_string()
); );
assert_eq!( assert_eq!(
engine.eval::<String>("\"foo\" + \"bar\"")?, engine.eval::<String>(r#""Test string: \x58""#)?,
"Test string: X".to_string()
);
assert_eq!(
engine.eval::<String>(r#""foo" + "bar""#)?,
"foobar".to_string() "foobar".to_string()
); );
assert_eq!(
engine.eval::<String>(r#""foo" + 123.4556"#)?,
"foo123.4556".to_string()
);
Ok(()) Ok(())
} }