diff --git a/CHANGELOG.md b/CHANGELOG.md index 1222f2f5..3de6bcfc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ Breaking changes New features ------------ +* String interpolation support is added via the `` `... ${`` ... ``} ...` `` syntax. * `FileModuleResolver` resolves relative paths under the parent path (i.e. the path holding the script that does the loading). This allows seamless cross-loading of scripts from a directory hierarchy instead of having all relative paths load from the current working directory. diff --git a/src/ast.rs b/src/ast.rs index 408db58e..b5dd9518 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -8,6 +8,7 @@ use crate::stdlib::{ collections::BTreeMap, fmt, hash::Hash, + iter::empty, num::NonZeroUsize, ops::{Add, AddAssign}, string::String, @@ -15,6 +16,7 @@ use crate::stdlib::{ vec::Vec, }; use crate::token::Token; +use crate::utils::calc_fn_hash; use crate::{ Dynamic, FnNamespace, FnPtr, Identifier, ImmutableString, Module, Position, Shared, StaticVec, INT, @@ -844,10 +846,11 @@ impl StmtBlock { impl fmt::Debug for StmtBlock { #[inline(always)] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.statements, f)?; if !self.pos.is_none() { - write!(f, "{} @ ", self.pos)?; + write!(f, " @ {:?}", self.pos)?; } - fmt::Debug::fmt(&self.statements, f) + Ok(()) } } @@ -1293,6 +1296,18 @@ pub struct OpAssignment { pub op: &'static str, } +impl OpAssignment { + pub fn new(op: &'static str) -> Self { + let op2 = &op[..op.len() - 1]; // extract operator without = + + Self { + hash_op_assign: calc_fn_hash(empty(), op, 2), + hash_op: calc_fn_hash(empty(), op2, 2), + op, + } + } +} + /// _(INTERNALS)_ An set of function call hashes. /// Exported under the `internals` feature only. /// @@ -1548,6 +1563,8 @@ pub enum Expr { StringConstant(ImmutableString, Position), /// [`FnPtr`] constant. FnPointer(ImmutableString, Position), + /// An interpolated [string][ImmutableString]. + InterpolatedString(Box>), /// [ expr, ... ] Array(Box>, Position), /// #{ name:expr, ... } @@ -1608,7 +1625,7 @@ impl Expr { Self::Array(x, _) if self.is_constant() => { let mut arr = Array::with_capacity(x.len()); arr.extend(x.iter().map(|v| v.get_constant_value().unwrap())); - Dynamic(Union::Array(Box::new(arr), AccessMode::ReadOnly)) + arr.into() } #[cfg(not(feature = "no_object"))] @@ -1617,7 +1634,7 @@ impl Expr { x.0.iter().for_each(|(k, v)| { *map.get_mut(k.name.as_str()).unwrap() = v.get_constant_value().unwrap() }); - Dynamic(Union::Map(Box::new(map), AccessMode::ReadOnly)) + map.into() } _ => return None, @@ -1643,6 +1660,7 @@ impl Expr { Self::IntegerConstant(_, pos) => *pos, Self::CharConstant(_, pos) => *pos, Self::StringConstant(_, pos) => *pos, + Self::InterpolatedString(x) => x.first().unwrap().position(), Self::FnPointer(_, pos) => *pos, Self::Array(_, pos) => *pos, Self::Map(_, pos) => *pos, @@ -1672,6 +1690,9 @@ impl Expr { Self::IntegerConstant(_, pos) => *pos = new_pos, Self::CharConstant(_, pos) => *pos = new_pos, Self::StringConstant(_, pos) => *pos = new_pos, + Self::InterpolatedString(x) => { + x.first_mut().unwrap().set_position(new_pos); + } Self::FnPointer(_, pos) => *pos = new_pos, Self::Array(_, pos) => *pos = new_pos, Self::Map(_, pos) => *pos = new_pos, @@ -1693,7 +1714,7 @@ impl Expr { #[inline] pub fn is_pure(&self) -> bool { match self { - Self::Array(x, _) => x.iter().all(Self::is_pure), + Self::InterpolatedString(x) | Self::Array(x, _) => x.iter().all(Self::is_pure), Self::Map(x, _) => x.0.iter().map(|(_, v)| v).all(Self::is_pure), @@ -1731,10 +1752,8 @@ impl Expr { | Self::FnPointer(_, _) | Self::Unit(_) => true, - // An array literal is constant if all items are constant - Self::Array(x, _) => x.iter().all(Self::is_constant), + Self::InterpolatedString(x) | Self::Array(x, _) => x.iter().all(Self::is_constant), - // An map literal is constant if all items are constant Self::Map(x, _) => x.0.iter().map(|(_, expr)| expr).all(Self::is_constant), _ => false, @@ -1763,6 +1782,7 @@ impl Expr { | Self::Unit(_) => false, Self::StringConstant(_, _) + | Self::InterpolatedString(_) | Self::FnCall(_, _) | Self::Stmt(_) | Self::Dot(_, _) @@ -1814,7 +1834,7 @@ impl Expr { } } } - Self::Array(x, _) => { + Self::InterpolatedString(x) | Self::Array(x, _) => { for e in x.as_ref() { if !e.walk(path, on_node) { return false; diff --git a/src/engine.rs b/src/engine.rs index 15ed6f36..ab081435 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -25,8 +25,8 @@ use crate::stdlib::{ use crate::syntax::CustomSyntax; use crate::utils::get_hasher; use crate::{ - Dynamic, EvalAltResult, FnPtr, Identifier, Module, Position, RhaiResult, Scope, Shared, - StaticVec, + Dynamic, EvalAltResult, FnPtr, Identifier, ImmutableString, Module, Position, RhaiResult, + Scope, Shared, StaticVec, }; #[cfg(not(feature = "no_index"))] @@ -200,6 +200,9 @@ pub const FN_ANONYMOUS: &str = "anon$"; /// Standard equality comparison operator. pub const OP_EQUALS: &str = "=="; +/// Standard concatenation operator. +pub const OP_CONCAT: &str = "+="; + /// Standard method function for containment testing. /// /// The `in` operator is implemented as a call to this method. @@ -410,7 +413,7 @@ impl<'a> Target<'a> { Self::Value(_) => panic!("cannot update a value"), #[cfg(not(feature = "no_index"))] Self::StringChar(s, index, _) => { - let s = &mut *s.write_lock::().unwrap(); + let s = &mut *s.write_lock::().unwrap(); // Replace the character at the specified index position let new_ch = new_val.as_char().map_err(|err| { @@ -591,7 +594,7 @@ pub struct Limits { /// Not available under `no_module`. #[cfg(not(feature = "no_module"))] pub max_modules: usize, - /// Maximum length of a [string][crate::ImmutableString]. + /// Maximum length of a [string][ImmutableString]. pub max_string_size: Option, /// Maximum length of an [array][Array]. /// @@ -714,6 +717,9 @@ pub struct Engine { /// A map mapping type names to pretty-print names. pub(crate) type_names: BTreeMap, + /// An empty [`ImmutableString`] for cloning purposes. + pub(crate) empty_string: ImmutableString, + /// A set of symbols to disable. pub(crate) disabled_symbols: BTreeSet, /// A map containing custom keywords and precedence to recognize. @@ -815,6 +821,7 @@ impl Engine { module_resolver: Box::new(crate::module::resolvers::DummyModuleResolver::new()), type_names: Default::default(), + empty_string: Default::default(), disabled_symbols: Default::default(), custom_keywords: Default::default(), custom_syntax: Default::default(), @@ -875,6 +882,7 @@ impl Engine { module_resolver: Box::new(crate::module::resolvers::DummyModuleResolver::new()), type_names: Default::default(), + empty_string: Default::default(), disabled_symbols: Default::default(), custom_keywords: Default::default(), custom_syntax: Default::default(), @@ -1587,8 +1595,8 @@ impl Engine { #[cfg(not(feature = "no_object"))] Dynamic(Union::Map(map, _)) => { // val_map[idx] - let index = &*idx.read_lock::().ok_or_else(|| { - self.make_type_mismatch_err::(idx.type_name(), idx_pos) + let index = &*idx.read_lock::().ok_or_else(|| { + self.make_type_mismatch_err::(idx.type_name(), idx_pos) })?; if _create && !map.contains_key(index.as_str()) { @@ -1698,6 +1706,34 @@ impl Engine { self.eval_dot_index_chain(scope, mods, state, lib, this_ptr, expr, level, None) } + // `... ${...} ...` + Expr::InterpolatedString(x) => { + let mut pos = expr.position(); + let mut result: Dynamic = self.empty_string.clone().into(); + + for expr in x.iter() { + let item = self.eval_expr(scope, mods, state, lib, this_ptr, expr, level)?; + self.eval_op_assignment( + mods, + state, + lib, + Some(OpAssignment::new(OP_CONCAT)), + pos, + (&mut result).into(), + item, + expr.position(), + )?; + pos = expr.position(); + } + + assert!( + result.is::(), + "interpolated string must be a string" + ); + + Ok(result) + } + #[cfg(not(feature = "no_index"))] Expr::Array(x, _) => { let mut arr = Array::with_capacity(x.len()); @@ -1707,7 +1743,7 @@ impl Engine { .flatten(), ); } - Ok(Dynamic(Union::Array(Box::new(arr), AccessMode::ReadWrite))) + Ok(arr.into()) } #[cfg(not(feature = "no_object"))] @@ -1718,7 +1754,7 @@ impl Engine { .eval_expr(scope, mods, state, lib, this_ptr, expr, level)? .flatten(); } - Ok(Dynamic(Union::Map(Box::new(map), AccessMode::ReadWrite))) + Ok(map.into()) } // Normal function call @@ -2445,7 +2481,7 @@ impl Engine { if let Some(path) = self .eval_expr(scope, mods, state, lib, this_ptr, &expr, level)? - .try_cast::() + .try_cast::() { use crate::ModuleResolver; @@ -2481,7 +2517,7 @@ impl Engine { Ok(Dynamic::UNIT) } else { - Err(self.make_type_mismatch_err::("", expr.position())) + Err(self.make_type_mismatch_err::("", expr.position())) } } diff --git a/src/engine_api.rs b/src/engine_api.rs index b1a68ea9..3b4cdee9 100644 --- a/src/engine_api.rs +++ b/src/engine_api.rs @@ -5,9 +5,11 @@ use crate::engine::{EvalContext, Imports, State}; use crate::fn_native::{FnCallArgs, SendSync}; use crate::fn_register::RegisterNativeFunction; use crate::optimize::OptimizationLevel; +use crate::parser::ParseState; use crate::stdlib::{ any::{type_name, TypeId}, boxed::Box, + num::NonZeroUsize, string::String, }; use crate::{ @@ -1156,8 +1158,22 @@ impl Engine { scripts: &[&str], optimization_level: OptimizationLevel, ) -> Result { - let stream = self.lex_raw(scripts, None); - self.parse(&mut stream.peekable(), scope, optimization_level) + let (stream, buffer) = self.lex_raw(scripts, None); + let mut state = ParseState::new( + self, + buffer, + #[cfg(not(feature = "unchecked"))] + NonZeroUsize::new(self.max_expr_depth()), + #[cfg(not(feature = "unchecked"))] + #[cfg(not(feature = "no_function"))] + NonZeroUsize::new(self.max_function_expr_depth()), + ); + self.parse( + &mut stream.peekable(), + &mut state, + scope, + optimization_level, + ) } /// Read the contents of a file into a string. #[cfg(not(feature = "no_std"))] @@ -1331,7 +1347,7 @@ impl Engine { .into()); }; - let stream = self.lex_raw( + let (stream, buffer) = self.lex_raw( &scripts, Some(if has_null { |token| match token { @@ -1344,8 +1360,22 @@ impl Engine { }), ); - let ast = - self.parse_global_expr(&mut stream.peekable(), &scope, OptimizationLevel::None)?; + let mut state = ParseState::new( + self, + buffer, + #[cfg(not(feature = "unchecked"))] + NonZeroUsize::new(self.max_expr_depth()), + #[cfg(not(feature = "unchecked"))] + #[cfg(not(feature = "no_function"))] + NonZeroUsize::new(self.max_function_expr_depth()), + ); + + let ast = self.parse_global_expr( + &mut stream.peekable(), + &mut state, + &scope, + OptimizationLevel::None, + )?; // Handle null - map to () if has_null { @@ -1424,10 +1454,19 @@ impl Engine { script: &str, ) -> Result { let scripts = [script]; - let stream = self.lex_raw(&scripts, None); + let (stream, buffer) = self.lex_raw(&scripts, None); let mut peekable = stream.peekable(); - self.parse_global_expr(&mut peekable, scope, self.optimization_level) + let mut state = ParseState::new( + self, + buffer, + #[cfg(not(feature = "unchecked"))] + NonZeroUsize::new(self.max_expr_depth()), + #[cfg(not(feature = "unchecked"))] + #[cfg(not(feature = "no_function"))] + NonZeroUsize::new(self.max_function_expr_depth()), + ); + self.parse_global_expr(&mut peekable, &mut state, scope, self.optimization_level) } /// Evaluate a script file. /// @@ -1585,10 +1624,24 @@ impl Engine { script: &str, ) -> Result> { let scripts = [script]; - let stream = self.lex_raw(&scripts, None); + let (stream, buffer) = self.lex_raw(&scripts, None); + let mut state = ParseState::new( + self, + buffer, + #[cfg(not(feature = "unchecked"))] + NonZeroUsize::new(self.max_expr_depth()), + #[cfg(not(feature = "unchecked"))] + #[cfg(not(feature = "no_function"))] + NonZeroUsize::new(self.max_function_expr_depth()), + ); // No need to optimize a lone expression - let ast = self.parse_global_expr(&mut stream.peekable(), scope, OptimizationLevel::None)?; + let ast = self.parse_global_expr( + &mut stream.peekable(), + &mut state, + scope, + OptimizationLevel::None, + )?; self.eval_ast_with_scope(scope, &ast) } @@ -1726,8 +1779,24 @@ impl Engine { script: &str, ) -> Result<(), Box> { let scripts = [script]; - let stream = self.lex_raw(&scripts, None); - let ast = self.parse(&mut stream.peekable(), scope, self.optimization_level)?; + let (stream, buffer) = self.lex_raw(&scripts, None); + let mut state = ParseState::new( + self, + buffer, + #[cfg(not(feature = "unchecked"))] + NonZeroUsize::new(self.max_expr_depth()), + #[cfg(not(feature = "unchecked"))] + #[cfg(not(feature = "no_function"))] + NonZeroUsize::new(self.max_function_expr_depth()), + ); + + let ast = self.parse( + &mut stream.peekable(), + &mut state, + scope, + self.optimization_level, + )?; + self.consume_ast_with_scope(scope, &ast) } /// Evaluate an AST, but throw away the result and only return error (if any). diff --git a/src/optimize.rs b/src/optimize.rs index ae0f5dce..9a86e1de 100644 --- a/src/optimize.rs +++ b/src/optimize.rs @@ -609,6 +609,16 @@ fn optimize_expr(expr: &mut Expr, state: &mut State) { match expr { // {} Expr::Stmt(x) if x.statements.is_empty() => { state.set_dirty(); *expr = Expr::Unit(x.pos) } + // { Stmt(Expr) } + Expr::Stmt(x) if x.statements.len() == 1 && x.statements[0].is_pure() && matches!(x.statements[0], Stmt::Expr(_)) => + { + state.set_dirty(); + if let Stmt::Expr(e) = mem::take(&mut x.statements[0]) { + *expr = e; + } else { + unreachable!(); + } + } // { stmt; ... } - do not count promotion as dirty because it gets turned back into an array Expr::Stmt(x) => x.statements = optimize_stmt_block(mem::take(&mut x.statements).into_vec(), state, true, true, false).into(), // lhs.rhs @@ -664,6 +674,59 @@ fn optimize_expr(expr: &mut Expr, state: &mut State) { // lhs[rhs] (lhs, rhs) => { optimize_expr(lhs, state); optimize_expr(rhs, state); } }, + // `` + Expr::InterpolatedString(x) if x.is_empty() => { + state.set_dirty(); + *expr = Expr::StringConstant(state.engine.empty_string.clone(), Position::NONE); + } + // `...` + Expr::InterpolatedString(x) if x.len() == 1 && matches!(x[0], Expr::StringConstant(_, _)) => { + state.set_dirty(); + *expr = mem::take(&mut x[0]); + } + // `... ${ ... } ...` + Expr::InterpolatedString(x) => { + x.iter_mut().for_each(|expr| optimize_expr(expr, state)); + + let mut n= 0; + + // Merge consecutive strings + while n < x.len()-1 { + match (mem::take(&mut x[n]), mem::take(&mut x[n+1])) { + (Expr::StringConstant(mut s1, pos), Expr::StringConstant(s2, _)) => { + s1 += s2; + x[n] = Expr::StringConstant(s1, pos); + x.remove(n+1); + state.set_dirty(); + } + (expr1, Expr::Unit(_)) => { + x[n] = expr1; + x.remove(n+1); + state.set_dirty(); + } + (Expr::Unit(_), expr2) => { + x[n+1] = expr2; + x.remove(n); + state.set_dirty(); + } + (expr1, Expr::StringConstant(s, _)) if s.is_empty() => { + x[n] = expr1; + x.remove(n+1); + state.set_dirty(); + } + (Expr::StringConstant(s, _), expr2) if s.is_empty()=> { + x[n+1] = expr2; + x.remove(n); + state.set_dirty(); + } + (expr1, expr2) => { + x[n] = expr1; + x[n+1] = expr2; + n += 1; + } + } + } + } // [ constant .. ] #[cfg(not(feature = "no_index"))] Expr::Array(_, _) if expr.is_constant() => { diff --git a/src/packages/string_more.rs b/src/packages/string_more.rs index 817a43bb..4fd3062b 100644 --- a/src/packages/string_more.rs +++ b/src/packages/string_more.rs @@ -23,9 +23,26 @@ mod string_functions { use crate::ImmutableString; #[rhai_fn(name = "+", name = "append")] - pub fn add_append(ctx: NativeCallContext, string: &str, mut item: Dynamic) -> ImmutableString { + pub fn add_append( + ctx: NativeCallContext, + string: ImmutableString, + mut item: Dynamic, + ) -> ImmutableString { let s = print_with_func(FUNC_TO_STRING, &ctx, &mut item); - format!("{}{}", string, s).into() + + if s.is_empty() { + string + } else { + format!("{}{}", string, s).into() + } + } + #[rhai_fn(name = "+=")] + pub fn append(ctx: NativeCallContext, string: &mut ImmutableString, mut item: Dynamic) { + let s = print_with_func(FUNC_TO_STRING, &ctx, &mut item); + + if !s.is_empty() { + string.make_mut().push_str(&s); + } } #[rhai_fn(name = "+", pure)] pub fn add_prepend( @@ -33,8 +50,22 @@ mod string_functions { item: &mut Dynamic, string: &str, ) -> ImmutableString { - let s = print_with_func(FUNC_TO_STRING, &ctx, item); - format!("{}{}", s, string).into() + let mut s = print_with_func(FUNC_TO_STRING, &ctx, item); + + if string.is_empty() { + s + } else { + s.make_mut().push_str(string); + s.into() + } + } + #[rhai_fn(name = "+=")] + pub fn prepend(ctx: NativeCallContext, item: &mut Dynamic, string: &str) { + let mut s = print_with_func(FUNC_TO_STRING, &ctx, item); + + if !string.is_empty() { + s.make_mut().push_str(string); + } } #[rhai_fn(name = "+")] @@ -137,13 +168,18 @@ mod string_functions { .unwrap_or(-1 as INT) } - pub fn sub_string(string: &str, start: INT, len: INT) -> ImmutableString { + pub fn sub_string( + ctx: NativeCallContext, + string: &str, + start: INT, + len: INT, + ) -> ImmutableString { let offset = if string.is_empty() || len <= 0 { - return "".to_string().into(); + return ctx.engine().empty_string.clone().into(); } else if start < 0 { 0 } else if start as usize >= string.chars().count() { - return "".to_string().into(); + return ctx.engine().empty_string.clone().into(); } else { start as usize }; @@ -165,9 +201,13 @@ mod string_functions { .into() } #[rhai_fn(name = "sub_string")] - pub fn sub_string_starting_from(string: &str, start: INT) -> ImmutableString { + pub fn sub_string_starting_from( + ctx: NativeCallContext, + string: &str, + start: INT, + ) -> ImmutableString { let len = string.len() as INT; - sub_string(string, start, len) + sub_string(ctx, string, start, len) } #[rhai_fn(name = "crop")] @@ -341,9 +381,9 @@ mod string_functions { string.chars().map(Into::::into).collect() } #[rhai_fn(name = "split")] - pub fn split_at(string: ImmutableString, start: INT) -> Array { + pub fn split_at(ctx: NativeCallContext, string: ImmutableString, start: INT) -> Array { if start <= 0 { - vec!["".into(), string.into()] + vec![ctx.engine().empty_string.clone().into(), string.into()] } else { let prefix: String = string.chars().take(start as usize).collect(); let prefix_len = prefix.len(); diff --git a/src/parser.rs b/src/parser.rs index 8fb73c08..7f3a4315 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -11,6 +11,7 @@ use crate::optimize::optimize_into_ast; use crate::optimize::OptimizationLevel; use crate::stdlib::{ boxed::Box, + cell::Cell, collections::BTreeMap, format, hash::{Hash, Hasher}, @@ -40,9 +41,11 @@ type FunctionsLib = BTreeMap>; /// A type that encapsulates the current state of the parser. #[derive(Debug)] -struct ParseState<'e> { +pub struct ParseState<'e> { /// Reference to the scripting [`Engine`]. engine: &'e Engine, + /// Input stream buffer containing the next character to read. + buffer: Shared>>, /// Interned strings. interned_strings: IdentifierBuilder, /// Encapsulates a local stack with variable names to simulate an actual runtime scope. @@ -75,6 +78,7 @@ impl<'e> ParseState<'e> { #[inline(always)] pub fn new( engine: &'e Engine, + buffer: Shared>>, #[cfg(not(feature = "unchecked"))] max_expr_depth: Option, #[cfg(not(feature = "unchecked"))] #[cfg(not(feature = "no_function"))] @@ -82,6 +86,7 @@ impl<'e> ParseState<'e> { ) -> Self { Self { engine, + buffer, #[cfg(not(feature = "unchecked"))] max_expr_depth, #[cfg(not(feature = "unchecked"))] @@ -458,7 +463,7 @@ fn parse_index_chain( .into_err(*pos)) } Expr::IntegerConstant(_, pos) => match lhs { - Expr::Array(_, _) | Expr::StringConstant(_, _) => (), + Expr::Array(_, _) | Expr::StringConstant(_, _) | Expr::InterpolatedString(_) => (), Expr::Map(_, _) => { return Err(PERR::MalformedIndexExpr( @@ -490,14 +495,14 @@ fn parse_index_chain( }, // lhs[string] - Expr::StringConstant(_, pos) => match lhs { + Expr::StringConstant(_, _) | Expr::InterpolatedString(_) => match lhs { Expr::Map(_, _) => (), - Expr::Array(_, _) | Expr::StringConstant(_, _) => { + Expr::Array(_, _) | Expr::StringConstant(_, _) | Expr::InterpolatedString(_) => { return Err(PERR::MalformedIndexExpr( "Array or string expects numeric index, not a string".into(), ) - .into_err(*pos)) + .into_err(idx_expr.position())) } #[cfg(not(feature = "no_float"))] @@ -979,6 +984,7 @@ fn parse_primary( Token::Pipe | Token::Or if settings.allow_anonymous_fn => { let mut new_state = ParseState::new( state.engine, + state.buffer.clone(), #[cfg(not(feature = "unchecked"))] state.max_function_expr_depth, #[cfg(not(feature = "unchecked"))] @@ -1010,6 +1016,50 @@ fn parse_primary( expr } + // Interpolated string + Token::InterpolatedString(_) => { + let mut segments: StaticVec = Default::default(); + + if let (Token::InterpolatedString(s), pos) = input.next().unwrap() { + segments.push(Expr::StringConstant(s.into(), pos)); + } else { + unreachable!(); + } + + loop { + let expr = match parse_block(input, state, lib, settings.level_up())? { + block @ Stmt::Block(_, _) => Expr::Stmt(Box::new(block.into())), + stmt => unreachable!("expecting Stmt::Block, but gets {:?}", stmt), + }; + segments.push(expr); + + // Make sure to parse the following as text + state.buffer.set(Some('`')); + + match input.next().unwrap() { + (Token::StringConstant(s), pos) => { + if !s.is_empty() { + segments.push(Expr::StringConstant(s.into(), pos)); + } + // End the interpolated string if it is terminated by a back-tick. + break; + } + (Token::InterpolatedString(s), pos) => { + if !s.is_empty() { + segments.push(Expr::StringConstant(s.into(), pos)); + } + } + (token, _) => unreachable!( + "expected a string within an interpolated string literal, but gets {:?}", + token + ), + } + } + + println!("Interpolated string: {:?}", segments); + Expr::InterpolatedString(Box::new(segments)) + } + // Array literal #[cfg(not(feature = "no_index"))] Token::LeftBracket => parse_array_literal(input, state, lib, settings.level_up())?, @@ -1020,8 +1070,8 @@ fn parse_primary( // Identifier Token::Identifier(_) => { - let s = match input.next().unwrap().0 { - Token::Identifier(s) => s, + let s = match input.next().unwrap() { + (Token::Identifier(s), _) => s, _ => unreachable!(), }; @@ -1067,8 +1117,8 @@ fn parse_primary( // Reserved keyword or symbol Token::Reserved(_) => { - let s = match input.next().unwrap().0 { - Token::Reserved(s) => s, + let s = match input.next().unwrap() { + (Token::Reserved(s), _) => s, _ => unreachable!(), }; @@ -1101,14 +1151,10 @@ fn parse_primary( } } - Token::LexError(_) => { - let err = match input.next().unwrap().0 { - Token::LexError(err) => err, - _ => unreachable!(), - }; - - return Err(err.into_err(settings.pos)); - } + Token::LexError(_) => match input.next().unwrap() { + (Token::LexError(err), _) => return Err(err.into_err(settings.pos)), + _ => unreachable!(), + }, _ => { return Err(LexError::UnexpectedInput(token.syntax().to_string()).into_err(settings.pos)) @@ -1374,13 +1420,7 @@ fn make_assignment_stmt<'a>( let op_info = if op.is_empty() { None } else { - let op2 = &op[..op.len() - 1]; // extract operator without = - - Some(OpAssignment { - hash_op_assign: calc_fn_hash(empty(), &op, 2), - hash_op: calc_fn_hash(empty(), op2, 2), - op, - }) + Some(OpAssignment::new(op)) }; match &lhs { @@ -1460,7 +1500,7 @@ fn parse_op_assignment_stmt( settings.pos = *token_pos; let op = match token { - Token::Equals => "".into(), + Token::Equals => "", Token::PlusAssign | Token::MinusAssign @@ -1797,9 +1837,10 @@ fn parse_custom_syntax( // Add enough empty variable names to the stack. // Empty variable names act as a barrier so earlier variables will not be matched. // Variable searches stop at the first empty variable name. + let empty = state.get_identifier(""); state.stack.resize( state.stack.len() + delta as usize, - ("".into(), AccessMode::ReadWrite), + (empty, AccessMode::ReadWrite), ); } delta if delta < 0 && state.stack.len() <= delta.abs() as usize => state.stack.clear(), @@ -2502,6 +2543,7 @@ fn parse_stmt( (Token::Fn, pos) => { let mut new_state = ParseState::new( state.engine, + state.buffer.clone(), #[cfg(not(feature = "unchecked"))] state.max_function_expr_depth, #[cfg(not(feature = "unchecked"))] @@ -2930,18 +2972,11 @@ impl Engine { pub(crate) fn parse_global_expr( &self, input: &mut TokenStream, + state: &mut ParseState, scope: &Scope, optimization_level: OptimizationLevel, ) -> Result { let mut functions = Default::default(); - let mut state = ParseState::new( - self, - #[cfg(not(feature = "unchecked"))] - NonZeroUsize::new(self.max_expr_depth()), - #[cfg(not(feature = "unchecked"))] - #[cfg(not(feature = "no_function"))] - NonZeroUsize::new(self.max_function_expr_depth()), - ); let settings = ParseSettings { allow_if_expr: false, @@ -2954,7 +2989,7 @@ impl Engine { level: 0, pos: Position::NONE, }; - let expr = parse_expr(input, &mut state, &mut functions, settings)?; + let expr = parse_expr(input, state, &mut functions, settings)?; assert!(functions.is_empty()); @@ -2978,17 +3013,10 @@ impl Engine { fn parse_global_level( &self, input: &mut TokenStream, + state: &mut ParseState, ) -> Result<(Vec, Vec>), ParseError> { let mut statements = Vec::with_capacity(16); let mut functions = BTreeMap::new(); - let mut state = ParseState::new( - self, - #[cfg(not(feature = "unchecked"))] - NonZeroUsize::new(self.max_expr_depth()), - #[cfg(not(feature = "unchecked"))] - #[cfg(not(feature = "no_function"))] - NonZeroUsize::new(self.max_function_expr_depth()), - ); while !input.peek().unwrap().0.is_eof() { let settings = ParseSettings { @@ -3003,7 +3031,7 @@ impl Engine { pos: Position::NONE, }; - let stmt = parse_stmt(input, &mut state, &mut functions, settings)?; + let stmt = parse_stmt(input, state, &mut functions, settings)?; if stmt.is_noop() { continue; @@ -3046,10 +3074,11 @@ impl Engine { pub(crate) fn parse( &self, input: &mut TokenStream, + state: &mut ParseState, scope: &Scope, optimization_level: OptimizationLevel, ) -> Result { - let (statements, lib) = self.parse_global_level(input)?; + let (statements, lib) = self.parse_global_level(input, state)?; Ok( // Optimize AST diff --git a/src/token.rs b/src/token.rs index 20990833..187d3726 100644 --- a/src/token.rs +++ b/src/token.rs @@ -1,11 +1,14 @@ //! Main module defining the lexer and parser. +use std::iter::FusedIterator; + use crate::engine::{ Precedence, KEYWORD_DEBUG, KEYWORD_EVAL, KEYWORD_FN_PTR, KEYWORD_FN_PTR_CALL, KEYWORD_FN_PTR_CURRY, KEYWORD_IS_DEF_VAR, KEYWORD_PRINT, KEYWORD_THIS, KEYWORD_TYPE_OF, }; use crate::stdlib::{ borrow::Cow, + cell::Cell, char, fmt, format, iter::Peekable, num::NonZeroUsize, @@ -13,7 +16,7 @@ use crate::stdlib::{ str::{Chars, FromStr}, string::{String, ToString}, }; -use crate::{Engine, LexError, StaticVec, INT}; +use crate::{Engine, LexError, Shared, StaticVec, INT}; #[cfg(not(feature = "no_float"))] use crate::ast::FloatWrapper; @@ -209,6 +212,8 @@ pub enum Token { CharConstant(char), /// A string constant. StringConstant(String), + /// An interpolated string. + InterpolatedString(String), /// `{` LeftBrace, /// `}` @@ -485,6 +490,7 @@ impl Token { #[cfg(feature = "decimal")] DecimalConstant(d) => d.to_string().into(), StringConstant(_) => "string".into(), + InterpolatedString(_) => "string".into(), CharConstant(c) => c.to_string().into(), Identifier(s) => s.clone().into(), Reserved(s) => s.clone().into(), @@ -855,18 +861,30 @@ pub fn parse_string_literal( termination_char: char, continuation: bool, verbatim: bool, -) -> Result { + allow_interpolation: bool, +) -> Result<(String, bool), (LexError, Position)> { let mut result: smallvec::SmallVec<[char; 16]> = Default::default(); let mut escape: smallvec::SmallVec<[char; 12]> = Default::default(); let start = *pos; let mut skip_whitespace_until = 0; + let mut interpolated = false; loop { let next_char = stream.get_next().ok_or((LERR::UnterminatedString, start))?; pos.advance(); + // String interpolation? + if allow_interpolation + && next_char == '$' + && escape.is_empty() + && stream.peek_next().map(|ch| ch == '{').unwrap_or(false) + { + interpolated = true; + break; + } + if let Some(max) = state.max_string_size { if result.len() > max.get() { return Err((LexError::StringTooLong(max.get()), *pos)); @@ -1000,7 +1018,7 @@ pub fn parse_string_literal( } } - Ok(s) + Ok((s, interpolated)) } /// Consume the next character. @@ -1296,10 +1314,11 @@ fn get_next_token_inner( // " - string literal ('"', _) => { - return parse_string_literal(stream, state, pos, c, true, false).map_or_else( - |err| Some((Token::LexError(err.0), err.1)), - |out| Some((Token::StringConstant(out), start_pos)), - ); + return parse_string_literal(stream, state, pos, c, true, false, false) + .map_or_else( + |err| Some((Token::LexError(err.0), err.1)), + |(result, _)| Some((Token::StringConstant(result), start_pos)), + ); } // ` - string literal ('`', _) => { @@ -1320,9 +1339,15 @@ fn get_next_token_inner( _ => (), } - return parse_string_literal(stream, state, pos, c, false, true).map_or_else( + return parse_string_literal(stream, state, pos, c, false, true, true).map_or_else( |err| Some((Token::LexError(err.0), err.1)), - |out| Some((Token::StringConstant(out), start_pos)), + |(result, interpolated)| { + if interpolated { + Some((Token::InterpolatedString(result), start_pos)) + } else { + Some((Token::StringConstant(result), start_pos)) + } + }, ); } @@ -1335,9 +1360,9 @@ fn get_next_token_inner( } ('\'', _) => { return Some( - parse_string_literal(stream, state, pos, c, false, false).map_or_else( + parse_string_literal(stream, state, pos, c, false, false, false).map_or_else( |err| (Token::LexError(err.0), err.1), - |result| { + |(result, _)| { let mut chars = result.chars(); let first = chars.next().unwrap(); @@ -1765,6 +1790,10 @@ pub struct MultiInputsStream<'a> { impl InputStream for MultiInputsStream<'_> { #[inline(always)] fn unget(&mut self, ch: char) { + if self.buf.is_some() { + panic!("cannot unget two characters in a row"); + } + self.buf = Some(ch); } fn get_next(&mut self) -> Option { @@ -1813,6 +1842,8 @@ pub struct TokenIterator<'a> { state: TokenizeState, /// Current position. pos: Position, + /// Buffer containing the next character to read, if any. + buffer: Shared>>, /// Input character stream. stream: MultiInputsStream<'a>, /// A processor function that maps a token to another. @@ -1823,6 +1854,11 @@ impl<'a> Iterator for TokenIterator<'a> { type Item = (Token, Position); fn next(&mut self) -> Option { + if let Some(ch) = self.buffer.take() { + self.stream.unget(ch); + self.pos.rewind(); + } + let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) { // {EOF} None => return None, @@ -1901,12 +1937,17 @@ impl<'a> Iterator for TokenIterator<'a> { } } +impl FusedIterator for TokenIterator<'_> {} + impl Engine { /// _(INTERNALS)_ Tokenize an input text stream. /// Exported under the `internals` feature only. #[cfg(feature = "internals")] #[inline(always)] - pub fn lex<'a>(&'a self, input: impl IntoIterator) -> TokenIterator<'a> { + pub fn lex<'a>( + &'a self, + input: impl IntoIterator, + ) -> (TokenIterator<'a>, Shared>>) { self.lex_raw(input, None) } /// _(INTERNALS)_ Tokenize an input text stream with a mapping function. @@ -1917,7 +1958,7 @@ impl Engine { &'a self, input: impl IntoIterator, map: fn(Token) -> Token, - ) -> TokenIterator<'a> { + ) -> (TokenIterator<'a>, Shared>>) { self.lex_raw(input, Some(map)) } /// Tokenize an input text stream with an optional mapping function. @@ -1926,27 +1967,34 @@ impl Engine { &'a self, input: impl IntoIterator, map: Option Token>, - ) -> TokenIterator<'a> { - TokenIterator { - engine: self, - state: TokenizeState { - #[cfg(not(feature = "unchecked"))] - max_string_size: self.limits.max_string_size, - #[cfg(feature = "unchecked")] - max_string_size: None, - non_unary: false, - comment_level: 0, - end_with_none: false, - include_comments: false, - disable_doc_comments: self.disable_doc_comments, + ) -> (TokenIterator<'a>, Shared>>) { + let buffer: Shared>> = Cell::new(None).into(); + let buffer2 = buffer.clone(); + + ( + TokenIterator { + engine: self, + state: TokenizeState { + #[cfg(not(feature = "unchecked"))] + max_string_size: self.limits.max_string_size, + #[cfg(feature = "unchecked")] + max_string_size: None, + non_unary: false, + comment_level: 0, + end_with_none: false, + include_comments: false, + disable_doc_comments: self.disable_doc_comments, + }, + pos: Position::new(1, 0), + buffer, + stream: MultiInputsStream { + buf: None, + streams: input.into_iter().map(|s| s.chars().peekable()).collect(), + index: 0, + }, + map, }, - pos: Position::new(1, 0), - stream: MultiInputsStream { - buf: None, - streams: input.into_iter().map(|s| s.chars().peekable()).collect(), - index: 0, - }, - map, - } + buffer2, + ) } } diff --git a/tests/string.rs b/tests/string.rs index 92145239..4428d490 100644 --- a/tests/string.rs +++ b/tests/string.rs @@ -310,3 +310,62 @@ fn test_string_split() -> Result<(), Box> { Ok(()) } + +#[test] +fn test_string_interpolated() -> Result<(), Box> { + let engine = Engine::new(); + + assert_eq!( + engine.eval::( + r" + let x = 40; + `hello ${x+2} worlds!` + " + )?, + "hello 42 worlds!" + ); + + assert_eq!( + engine.eval::( + r" + const x = 42; + `hello ${x} worlds!` + " + )?, + "hello 42 worlds!" + ); + + assert_eq!(engine.eval::("`hello ${}world!`")?, "hello world!"); + + assert_eq!( + engine.eval::( + r" + const x = 42; + `${x} worlds!` + " + )?, + "42 worlds!" + ); + + assert_eq!( + engine.eval::( + r" + const x = 42; + `hello ${x}` + " + )?, + "hello 42" + ); + + assert_eq!( + engine.eval::( + r" + const x = 20; + `hello ${let y = x + 1; `${y * 2}`} worlds!` + " + )?, + "hello 42 worlds!" + ); + + Ok(()) +}