Add external control interface for tokenizer.
This commit is contained in:
parent
aacb7f0b24
commit
bc9c1ab850
@ -9,7 +9,6 @@ use crate::parser::ParseState;
|
|||||||
use crate::stdlib::{
|
use crate::stdlib::{
|
||||||
any::{type_name, TypeId},
|
any::{type_name, TypeId},
|
||||||
boxed::Box,
|
boxed::Box,
|
||||||
num::NonZeroUsize,
|
|
||||||
string::String,
|
string::String,
|
||||||
};
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
@ -1158,16 +1157,8 @@ impl Engine {
|
|||||||
scripts: &[&str],
|
scripts: &[&str],
|
||||||
optimization_level: OptimizationLevel,
|
optimization_level: OptimizationLevel,
|
||||||
) -> Result<AST, ParseError> {
|
) -> Result<AST, ParseError> {
|
||||||
let (stream, buffer) = self.lex_raw(scripts, None);
|
let (stream, tokenizer_control) = self.lex_raw(scripts, None);
|
||||||
let mut state = ParseState::new(
|
let mut state = ParseState::new(self, tokenizer_control);
|
||||||
self,
|
|
||||||
buffer,
|
|
||||||
#[cfg(not(feature = "unchecked"))]
|
|
||||||
NonZeroUsize::new(self.max_expr_depth()),
|
|
||||||
#[cfg(not(feature = "unchecked"))]
|
|
||||||
#[cfg(not(feature = "no_function"))]
|
|
||||||
NonZeroUsize::new(self.max_function_expr_depth()),
|
|
||||||
);
|
|
||||||
self.parse(
|
self.parse(
|
||||||
&mut stream.peekable(),
|
&mut stream.peekable(),
|
||||||
&mut state,
|
&mut state,
|
||||||
@ -1347,7 +1338,7 @@ impl Engine {
|
|||||||
.into());
|
.into());
|
||||||
};
|
};
|
||||||
|
|
||||||
let (stream, buffer) = self.lex_raw(
|
let (stream, tokenizer_control) = self.lex_raw(
|
||||||
&scripts,
|
&scripts,
|
||||||
Some(if has_null {
|
Some(if has_null {
|
||||||
|token| match token {
|
|token| match token {
|
||||||
@ -1360,15 +1351,7 @@ impl Engine {
|
|||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut state = ParseState::new(
|
let mut state = ParseState::new(self, tokenizer_control);
|
||||||
self,
|
|
||||||
buffer,
|
|
||||||
#[cfg(not(feature = "unchecked"))]
|
|
||||||
NonZeroUsize::new(self.max_expr_depth()),
|
|
||||||
#[cfg(not(feature = "unchecked"))]
|
|
||||||
#[cfg(not(feature = "no_function"))]
|
|
||||||
NonZeroUsize::new(self.max_function_expr_depth()),
|
|
||||||
);
|
|
||||||
|
|
||||||
let ast = self.parse_global_expr(
|
let ast = self.parse_global_expr(
|
||||||
&mut stream.peekable(),
|
&mut stream.peekable(),
|
||||||
@ -1454,18 +1437,10 @@ impl Engine {
|
|||||||
script: &str,
|
script: &str,
|
||||||
) -> Result<AST, ParseError> {
|
) -> Result<AST, ParseError> {
|
||||||
let scripts = [script];
|
let scripts = [script];
|
||||||
let (stream, buffer) = self.lex_raw(&scripts, None);
|
let (stream, tokenizer_control) = self.lex_raw(&scripts, None);
|
||||||
|
|
||||||
let mut peekable = stream.peekable();
|
let mut peekable = stream.peekable();
|
||||||
let mut state = ParseState::new(
|
let mut state = ParseState::new(self, tokenizer_control);
|
||||||
self,
|
|
||||||
buffer,
|
|
||||||
#[cfg(not(feature = "unchecked"))]
|
|
||||||
NonZeroUsize::new(self.max_expr_depth()),
|
|
||||||
#[cfg(not(feature = "unchecked"))]
|
|
||||||
#[cfg(not(feature = "no_function"))]
|
|
||||||
NonZeroUsize::new(self.max_function_expr_depth()),
|
|
||||||
);
|
|
||||||
self.parse_global_expr(&mut peekable, &mut state, scope, self.optimization_level)
|
self.parse_global_expr(&mut peekable, &mut state, scope, self.optimization_level)
|
||||||
}
|
}
|
||||||
/// Evaluate a script file.
|
/// Evaluate a script file.
|
||||||
@ -1624,16 +1599,8 @@ impl Engine {
|
|||||||
script: &str,
|
script: &str,
|
||||||
) -> Result<T, Box<EvalAltResult>> {
|
) -> Result<T, Box<EvalAltResult>> {
|
||||||
let scripts = [script];
|
let scripts = [script];
|
||||||
let (stream, buffer) = self.lex_raw(&scripts, None);
|
let (stream, tokenizer_control) = self.lex_raw(&scripts, None);
|
||||||
let mut state = ParseState::new(
|
let mut state = ParseState::new(self, tokenizer_control);
|
||||||
self,
|
|
||||||
buffer,
|
|
||||||
#[cfg(not(feature = "unchecked"))]
|
|
||||||
NonZeroUsize::new(self.max_expr_depth()),
|
|
||||||
#[cfg(not(feature = "unchecked"))]
|
|
||||||
#[cfg(not(feature = "no_function"))]
|
|
||||||
NonZeroUsize::new(self.max_function_expr_depth()),
|
|
||||||
);
|
|
||||||
|
|
||||||
// No need to optimize a lone expression
|
// No need to optimize a lone expression
|
||||||
let ast = self.parse_global_expr(
|
let ast = self.parse_global_expr(
|
||||||
@ -1779,16 +1746,8 @@ impl Engine {
|
|||||||
script: &str,
|
script: &str,
|
||||||
) -> Result<(), Box<EvalAltResult>> {
|
) -> Result<(), Box<EvalAltResult>> {
|
||||||
let scripts = [script];
|
let scripts = [script];
|
||||||
let (stream, buffer) = self.lex_raw(&scripts, None);
|
let (stream, tokenizer_control) = self.lex_raw(&scripts, None);
|
||||||
let mut state = ParseState::new(
|
let mut state = ParseState::new(self, tokenizer_control);
|
||||||
self,
|
|
||||||
buffer,
|
|
||||||
#[cfg(not(feature = "unchecked"))]
|
|
||||||
NonZeroUsize::new(self.max_expr_depth()),
|
|
||||||
#[cfg(not(feature = "unchecked"))]
|
|
||||||
#[cfg(not(feature = "no_function"))]
|
|
||||||
NonZeroUsize::new(self.max_function_expr_depth()),
|
|
||||||
);
|
|
||||||
|
|
||||||
let ast = self.parse(
|
let ast = self.parse(
|
||||||
&mut stream.peekable(),
|
&mut stream.peekable(),
|
||||||
|
@ -11,7 +11,6 @@ use crate::optimize::optimize_into_ast;
|
|||||||
use crate::optimize::OptimizationLevel;
|
use crate::optimize::OptimizationLevel;
|
||||||
use crate::stdlib::{
|
use crate::stdlib::{
|
||||||
boxed::Box,
|
boxed::Box,
|
||||||
cell::Cell,
|
|
||||||
collections::BTreeMap,
|
collections::BTreeMap,
|
||||||
format,
|
format,
|
||||||
hash::{Hash, Hasher},
|
hash::{Hash, Hasher},
|
||||||
@ -22,7 +21,9 @@ use crate::stdlib::{
|
|||||||
vec::Vec,
|
vec::Vec,
|
||||||
};
|
};
|
||||||
use crate::syntax::{CustomSyntax, MARKER_BLOCK, MARKER_EXPR, MARKER_IDENT};
|
use crate::syntax::{CustomSyntax, MARKER_BLOCK, MARKER_EXPR, MARKER_IDENT};
|
||||||
use crate::token::{is_keyword_function, is_valid_identifier, Token, TokenStream};
|
use crate::token::{
|
||||||
|
is_keyword_function, is_valid_identifier, Token, TokenStream, TokenizerControl,
|
||||||
|
};
|
||||||
use crate::utils::{get_hasher, IdentifierBuilder};
|
use crate::utils::{get_hasher, IdentifierBuilder};
|
||||||
use crate::{
|
use crate::{
|
||||||
calc_fn_hash, Dynamic, Engine, Identifier, LexError, ParseError, ParseErrorType, Position,
|
calc_fn_hash, Dynamic, Engine, Identifier, LexError, ParseError, ParseErrorType, Position,
|
||||||
@ -45,7 +46,7 @@ pub struct ParseState<'e> {
|
|||||||
/// Reference to the scripting [`Engine`].
|
/// Reference to the scripting [`Engine`].
|
||||||
engine: &'e Engine,
|
engine: &'e Engine,
|
||||||
/// Input stream buffer containing the next character to read.
|
/// Input stream buffer containing the next character to read.
|
||||||
buffer: Shared<Cell<Option<char>>>,
|
tokenizer_control: TokenizerControl,
|
||||||
/// Interned strings.
|
/// Interned strings.
|
||||||
interned_strings: IdentifierBuilder,
|
interned_strings: IdentifierBuilder,
|
||||||
/// Encapsulates a local stack with variable names to simulate an actual runtime scope.
|
/// Encapsulates a local stack with variable names to simulate an actual runtime scope.
|
||||||
@ -76,22 +77,15 @@ pub struct ParseState<'e> {
|
|||||||
impl<'e> ParseState<'e> {
|
impl<'e> ParseState<'e> {
|
||||||
/// Create a new [`ParseState`].
|
/// Create a new [`ParseState`].
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn new(
|
pub fn new(engine: &'e Engine, tokenizer_control: TokenizerControl) -> Self {
|
||||||
engine: &'e Engine,
|
|
||||||
buffer: Shared<Cell<Option<char>>>,
|
|
||||||
#[cfg(not(feature = "unchecked"))] max_expr_depth: Option<NonZeroUsize>,
|
|
||||||
#[cfg(not(feature = "unchecked"))]
|
|
||||||
#[cfg(not(feature = "no_function"))]
|
|
||||||
max_function_expr_depth: Option<NonZeroUsize>,
|
|
||||||
) -> Self {
|
|
||||||
Self {
|
Self {
|
||||||
engine,
|
engine,
|
||||||
buffer,
|
tokenizer_control,
|
||||||
#[cfg(not(feature = "unchecked"))]
|
#[cfg(not(feature = "unchecked"))]
|
||||||
max_expr_depth,
|
max_expr_depth: NonZeroUsize::new(engine.max_expr_depth()),
|
||||||
#[cfg(not(feature = "unchecked"))]
|
#[cfg(not(feature = "unchecked"))]
|
||||||
#[cfg(not(feature = "no_function"))]
|
#[cfg(not(feature = "no_function"))]
|
||||||
max_function_expr_depth,
|
max_function_expr_depth: NonZeroUsize::new(engine.max_function_expr_depth()),
|
||||||
#[cfg(not(feature = "no_closure"))]
|
#[cfg(not(feature = "no_closure"))]
|
||||||
external_vars: Default::default(),
|
external_vars: Default::default(),
|
||||||
#[cfg(not(feature = "no_closure"))]
|
#[cfg(not(feature = "no_closure"))]
|
||||||
@ -982,14 +976,8 @@ fn parse_primary(
|
|||||||
// | ...
|
// | ...
|
||||||
#[cfg(not(feature = "no_function"))]
|
#[cfg(not(feature = "no_function"))]
|
||||||
Token::Pipe | Token::Or if settings.allow_anonymous_fn => {
|
Token::Pipe | Token::Or if settings.allow_anonymous_fn => {
|
||||||
let mut new_state = ParseState::new(
|
let mut new_state = ParseState::new(state.engine, state.tokenizer_control.clone());
|
||||||
state.engine,
|
new_state.max_expr_depth = new_state.max_function_expr_depth;
|
||||||
state.buffer.clone(),
|
|
||||||
#[cfg(not(feature = "unchecked"))]
|
|
||||||
state.max_function_expr_depth,
|
|
||||||
#[cfg(not(feature = "unchecked"))]
|
|
||||||
state.max_function_expr_depth,
|
|
||||||
);
|
|
||||||
|
|
||||||
let settings = ParseSettings {
|
let settings = ParseSettings {
|
||||||
allow_if_expr: true,
|
allow_if_expr: true,
|
||||||
@ -1034,7 +1022,9 @@ fn parse_primary(
|
|||||||
segments.push(expr);
|
segments.push(expr);
|
||||||
|
|
||||||
// Make sure to parse the following as text
|
// Make sure to parse the following as text
|
||||||
state.buffer.set(Some('`'));
|
let mut control = state.tokenizer_control.get();
|
||||||
|
control.is_within_text = true;
|
||||||
|
state.tokenizer_control.set(control);
|
||||||
|
|
||||||
match input.next().unwrap() {
|
match input.next().unwrap() {
|
||||||
(Token::StringConstant(s), pos) => {
|
(Token::StringConstant(s), pos) => {
|
||||||
@ -2540,14 +2530,9 @@ fn parse_stmt(
|
|||||||
|
|
||||||
match input.next().unwrap() {
|
match input.next().unwrap() {
|
||||||
(Token::Fn, pos) => {
|
(Token::Fn, pos) => {
|
||||||
let mut new_state = ParseState::new(
|
let mut new_state =
|
||||||
state.engine,
|
ParseState::new(state.engine, state.tokenizer_control.clone());
|
||||||
state.buffer.clone(),
|
new_state.max_expr_depth = new_state.max_function_expr_depth;
|
||||||
#[cfg(not(feature = "unchecked"))]
|
|
||||||
state.max_function_expr_depth,
|
|
||||||
#[cfg(not(feature = "unchecked"))]
|
|
||||||
state.max_function_expr_depth,
|
|
||||||
);
|
|
||||||
|
|
||||||
let settings = ParseSettings {
|
let settings = ParseSettings {
|
||||||
allow_if_expr: true,
|
allow_if_expr: true,
|
||||||
|
42
src/token.rs
42
src/token.rs
@ -11,10 +11,11 @@ use crate::stdlib::{
|
|||||||
iter::{FusedIterator, Peekable},
|
iter::{FusedIterator, Peekable},
|
||||||
num::NonZeroUsize,
|
num::NonZeroUsize,
|
||||||
ops::{Add, AddAssign},
|
ops::{Add, AddAssign},
|
||||||
|
rc::Rc,
|
||||||
str::{Chars, FromStr},
|
str::{Chars, FromStr},
|
||||||
string::{String, ToString},
|
string::{String, ToString},
|
||||||
};
|
};
|
||||||
use crate::{Engine, LexError, Shared, StaticVec, INT};
|
use crate::{Engine, LexError, StaticVec, INT};
|
||||||
|
|
||||||
#[cfg(not(feature = "no_float"))]
|
#[cfg(not(feature = "no_float"))]
|
||||||
use crate::ast::FloatWrapper;
|
use crate::ast::FloatWrapper;
|
||||||
@ -25,6 +26,17 @@ use rust_decimal::Decimal;
|
|||||||
#[cfg(not(feature = "no_function"))]
|
#[cfg(not(feature = "no_function"))]
|
||||||
use crate::engine::KEYWORD_IS_DEF_FN;
|
use crate::engine::KEYWORD_IS_DEF_FN;
|
||||||
|
|
||||||
|
/// A type containing commands to control the tokenizer.
|
||||||
|
#[derive(Debug, Clone, Eq, PartialEq, Hash, Copy, Default)]
|
||||||
|
pub struct TokenizeControlBlock {
|
||||||
|
/// Is the current tokenizer position within an interpolated text string?
|
||||||
|
/// This flag allows switching the tokenizer back to _text_ parsing after an interpolation stream.
|
||||||
|
pub is_within_text: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A shared object that allows control of the tokenizer from outside.
|
||||||
|
pub type TokenizerControl = Rc<Cell<TokenizeControlBlock>>;
|
||||||
|
|
||||||
type LERR = LexError;
|
type LERR = LexError;
|
||||||
|
|
||||||
/// Separator character for numbers.
|
/// Separator character for numbers.
|
||||||
@ -849,6 +861,9 @@ pub trait InputStream {
|
|||||||
/// _(INTERNALS)_ Parse a string literal ended by `termination_char`.
|
/// _(INTERNALS)_ Parse a string literal ended by `termination_char`.
|
||||||
/// Exported under the `internals` feature only.
|
/// Exported under the `internals` feature only.
|
||||||
///
|
///
|
||||||
|
/// Returns the parsed string and a boolean indicating whether the string is
|
||||||
|
/// terminated by an interpolation `${`.
|
||||||
|
///
|
||||||
/// # Volatile API
|
/// # Volatile API
|
||||||
///
|
///
|
||||||
/// This function is volatile and may change.
|
/// This function is volatile and may change.
|
||||||
@ -1840,8 +1855,8 @@ pub struct TokenIterator<'a> {
|
|||||||
state: TokenizeState,
|
state: TokenizeState,
|
||||||
/// Current position.
|
/// Current position.
|
||||||
pos: Position,
|
pos: Position,
|
||||||
/// Buffer containing the next character to read, if any.
|
/// External buffer containing the next character to read, if any.
|
||||||
buffer: Shared<Cell<Option<char>>>,
|
tokenizer_control: TokenizerControl,
|
||||||
/// Input character stream.
|
/// Input character stream.
|
||||||
stream: MultiInputsStream<'a>,
|
stream: MultiInputsStream<'a>,
|
||||||
/// A processor function that maps a token to another.
|
/// A processor function that maps a token to another.
|
||||||
@ -1852,9 +1867,16 @@ impl<'a> Iterator for TokenIterator<'a> {
|
|||||||
type Item = (Token, Position);
|
type Item = (Token, Position);
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
if let Some(ch) = self.buffer.take() {
|
let mut control = self.tokenizer_control.get();
|
||||||
self.stream.unget(ch);
|
|
||||||
|
if control.is_within_text {
|
||||||
|
// Push a back-tick into the stream
|
||||||
|
self.stream.unget('`');
|
||||||
|
// Rewind the current position by one character
|
||||||
self.pos.rewind();
|
self.pos.rewind();
|
||||||
|
// Reset it
|
||||||
|
control.is_within_text = false;
|
||||||
|
self.tokenizer_control.set(control);
|
||||||
}
|
}
|
||||||
|
|
||||||
let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
|
let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
|
||||||
@ -1945,7 +1967,7 @@ impl Engine {
|
|||||||
pub fn lex<'a>(
|
pub fn lex<'a>(
|
||||||
&'a self,
|
&'a self,
|
||||||
input: impl IntoIterator<Item = &'a &'a str>,
|
input: impl IntoIterator<Item = &'a &'a str>,
|
||||||
) -> (TokenIterator<'a>, Shared<Cell<Option<char>>>) {
|
) -> (TokenIterator<'a>, ExternalBuffer) {
|
||||||
self.lex_raw(input, None)
|
self.lex_raw(input, None)
|
||||||
}
|
}
|
||||||
/// _(INTERNALS)_ Tokenize an input text stream with a mapping function.
|
/// _(INTERNALS)_ Tokenize an input text stream with a mapping function.
|
||||||
@ -1956,7 +1978,7 @@ impl Engine {
|
|||||||
&'a self,
|
&'a self,
|
||||||
input: impl IntoIterator<Item = &'a &'a str>,
|
input: impl IntoIterator<Item = &'a &'a str>,
|
||||||
map: fn(Token) -> Token,
|
map: fn(Token) -> Token,
|
||||||
) -> (TokenIterator<'a>, Shared<Cell<Option<char>>>) {
|
) -> (TokenIterator<'a>, ExternalBuffer) {
|
||||||
self.lex_raw(input, Some(map))
|
self.lex_raw(input, Some(map))
|
||||||
}
|
}
|
||||||
/// Tokenize an input text stream with an optional mapping function.
|
/// Tokenize an input text stream with an optional mapping function.
|
||||||
@ -1965,8 +1987,8 @@ impl Engine {
|
|||||||
&'a self,
|
&'a self,
|
||||||
input: impl IntoIterator<Item = &'a &'a str>,
|
input: impl IntoIterator<Item = &'a &'a str>,
|
||||||
map: Option<fn(Token) -> Token>,
|
map: Option<fn(Token) -> Token>,
|
||||||
) -> (TokenIterator<'a>, Shared<Cell<Option<char>>>) {
|
) -> (TokenIterator<'a>, TokenizerControl) {
|
||||||
let buffer: Shared<Cell<Option<char>>> = Cell::new(None).into();
|
let buffer: TokenizerControl = Default::default();
|
||||||
let buffer2 = buffer.clone();
|
let buffer2 = buffer.clone();
|
||||||
|
|
||||||
(
|
(
|
||||||
@ -1984,7 +2006,7 @@ impl Engine {
|
|||||||
disable_doc_comments: self.disable_doc_comments,
|
disable_doc_comments: self.disable_doc_comments,
|
||||||
},
|
},
|
||||||
pos: Position::new(1, 0),
|
pos: Position::new(1, 0),
|
||||||
buffer,
|
tokenizer_control: buffer,
|
||||||
stream: MultiInputsStream {
|
stream: MultiInputsStream {
|
||||||
buf: None,
|
buf: None,
|
||||||
streams: input.into_iter().map(|s| s.chars().peekable()).collect(),
|
streams: input.into_iter().map(|s| s.chars().peekable()).collect(),
|
||||||
|
Loading…
Reference in New Issue
Block a user