Add Engine::compact_script.
This commit is contained in:
parent
d94f362b51
commit
9bf3a9d78f
@ -37,7 +37,7 @@ Net features
|
|||||||
|
|
||||||
* A function pointer created via a closure definition now links to the particular anonymous function itself.
|
* A function pointer created via a closure definition now links to the particular anonymous function itself.
|
||||||
* This avoids a potentially expensive function lookup when the function pointer is called, speeding up closures.
|
* This avoids a potentially expensive function lookup when the function pointer is called, speeding up closures.
|
||||||
* An additional benefit is that function pointers can now be `export`ed from modules!
|
* Closures now also encapsulate their defining environment, so function pointers can now be freely `export`ed from modules!
|
||||||
|
|
||||||
### `!in`
|
### `!in`
|
||||||
|
|
||||||
@ -49,6 +49,12 @@ Net features
|
|||||||
* The options are for future-proofing the API.
|
* The options are for future-proofing the API.
|
||||||
* In this version, it gains the ability to set the value of the _custom state_ (accessible via `NativeCallContext::tag`) for a function evaluation, overriding `Engine::set_default_tag`.
|
* In this version, it gains the ability to set the value of the _custom state_ (accessible via `NativeCallContext::tag`) for a function evaluation, overriding `Engine::set_default_tag`.
|
||||||
|
|
||||||
|
### Compact a script for compression
|
||||||
|
|
||||||
|
* `Engine::compact_script` is added which takes a valid script (it still returns parsing errors) and returns a _compacted_ version of the script with all insignificant whitespaces and all comments removed.
|
||||||
|
* A compact script compresses better than one with liberal whitespaces and comments.
|
||||||
|
* Unlike some uglifiers or minifiers, `Engine::compact_script` does not optimize the script in any way, nor does it rename variables.
|
||||||
|
|
||||||
Enhancements
|
Enhancements
|
||||||
------------
|
------------
|
||||||
|
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
|
//! Module that provide formatting services to the [`Engine`].
|
||||||
use crate::packages::iter_basic::{BitRange, CharsStream, StepRange};
|
use crate::packages::iter_basic::{BitRange, CharsStream, StepRange};
|
||||||
|
use crate::parser::{ParseResult, ParseState};
|
||||||
use crate::{
|
use crate::{
|
||||||
Engine, ExclusiveRange, FnPtr, ImmutableString, InclusiveRange, Position, RhaiError, ERR,
|
Engine, ExclusiveRange, FnPtr, ImmutableString, InclusiveRange, OptimizationLevel, Position,
|
||||||
|
RhaiError, Scope, SmartString, StringsInterner, ERR,
|
||||||
};
|
};
|
||||||
use std::any::type_name;
|
use std::any::type_name;
|
||||||
#[cfg(feature = "no_std")]
|
#[cfg(feature = "no_std")]
|
||||||
@ -263,4 +266,33 @@ impl Engine {
|
|||||||
let t = self.map_type_name(type_name::<T>()).into();
|
let t = self.map_type_name(type_name::<T>()).into();
|
||||||
ERR::ErrorMismatchDataType(t, typ.into(), pos).into()
|
ERR::ErrorMismatchDataType(t, typ.into(), pos).into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Compact a script to eliminate insignificant whitespaces and comments.
|
||||||
|
///
|
||||||
|
/// This is useful to prepare a script for further compressing.
|
||||||
|
///
|
||||||
|
/// The output script is semantically identical to the input script, except smaller in size.
|
||||||
|
///
|
||||||
|
/// Unlike other uglifiers and minifiers, this method does not rename variables nor perform any
|
||||||
|
/// optimization on the input script.
|
||||||
|
#[inline]
|
||||||
|
pub fn compact_script(&self, script: impl AsRef<str>) -> ParseResult<String> {
|
||||||
|
let scripts = [script];
|
||||||
|
let (mut stream, tc) = self.lex_raw(&scripts, self.token_mapper.as_deref());
|
||||||
|
tc.borrow_mut().compressed = Some(String::new());
|
||||||
|
stream.state.last_token = Some(SmartString::new_const());
|
||||||
|
let scope = Scope::new();
|
||||||
|
let mut interner = StringsInterner::new();
|
||||||
|
let mut state = ParseState::new(&scope, &mut interner, tc);
|
||||||
|
let mut _ast = self.parse(
|
||||||
|
stream.peekable(),
|
||||||
|
&mut state,
|
||||||
|
#[cfg(not(feature = "no_optimize"))]
|
||||||
|
OptimizationLevel::None,
|
||||||
|
#[cfg(feature = "no_optimize")]
|
||||||
|
(),
|
||||||
|
)?;
|
||||||
|
let tc = state.tokenizer_control.borrow();
|
||||||
|
Ok(tc.compressed.as_ref().unwrap().into())
|
||||||
|
}
|
||||||
}
|
}
|
@ -23,7 +23,7 @@ pub mod limits_unchecked;
|
|||||||
|
|
||||||
pub mod events;
|
pub mod events;
|
||||||
|
|
||||||
pub mod type_names;
|
pub mod formatting;
|
||||||
|
|
||||||
pub mod custom_syntax;
|
pub mod custom_syntax;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
//! Module defining external-loaded modules for Rhai.
|
//! Module defining external-loaded modules for Rhai.
|
||||||
|
|
||||||
#[cfg(feature = "metadata")]
|
#[cfg(feature = "metadata")]
|
||||||
use crate::api::type_names::format_type;
|
use crate::api::formatting::format_type;
|
||||||
use crate::ast::FnAccess;
|
use crate::ast::FnAccess;
|
||||||
use crate::func::{
|
use crate::func::{
|
||||||
shared_take_or_clone, CallableFunction, FnCallArgs, IteratorFn, RegisterNativeFunction,
|
shared_take_or_clone, CallableFunction, FnCallArgs, IteratorFn, RegisterNativeFunction,
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
//! Serialization of functions metadata.
|
//! Serialization of functions metadata.
|
||||||
#![cfg(feature = "metadata")]
|
#![cfg(feature = "metadata")]
|
||||||
|
|
||||||
use crate::api::type_names::format_type;
|
use crate::api::formatting::format_type;
|
||||||
use crate::module::{calc_native_fn_hash, FuncInfo, ModuleFlags};
|
use crate::module::{calc_native_fn_hash, FuncInfo, ModuleFlags};
|
||||||
use crate::{calc_fn_hash, Engine, FnAccess, SmartString, StaticVec, AST};
|
use crate::{calc_fn_hash, Engine, FnAccess, SmartString, StaticVec, AST};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
@ -26,6 +26,8 @@ pub struct TokenizerControlBlock {
|
|||||||
/// Global comments.
|
/// Global comments.
|
||||||
#[cfg(feature = "metadata")]
|
#[cfg(feature = "metadata")]
|
||||||
pub global_comments: String,
|
pub global_comments: String,
|
||||||
|
/// Whitespace-compressed version of the script (if any).
|
||||||
|
pub compressed: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TokenizerControlBlock {
|
impl TokenizerControlBlock {
|
||||||
@ -37,6 +39,7 @@ impl TokenizerControlBlock {
|
|||||||
is_within_text: false,
|
is_within_text: false,
|
||||||
#[cfg(feature = "metadata")]
|
#[cfg(feature = "metadata")]
|
||||||
global_comments: String::new(),
|
global_comments: String::new(),
|
||||||
|
compressed: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -879,6 +882,8 @@ pub struct TokenizeState {
|
|||||||
pub include_comments: bool,
|
pub include_comments: bool,
|
||||||
/// Is the current tokenizer position within the text stream of an interpolated string?
|
/// Is the current tokenizer position within the text stream of an interpolated string?
|
||||||
pub is_within_text_terminated_by: Option<char>,
|
pub is_within_text_terminated_by: Option<char>,
|
||||||
|
/// Last token
|
||||||
|
pub last_token: Option<SmartString>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// _(internals)_ Trait that encapsulates a peekable character input stream.
|
/// _(internals)_ Trait that encapsulates a peekable character input stream.
|
||||||
@ -956,6 +961,10 @@ pub fn parse_string_literal(
|
|||||||
let mut skip_whitespace_until = 0;
|
let mut skip_whitespace_until = 0;
|
||||||
|
|
||||||
state.is_within_text_terminated_by = Some(termination_char);
|
state.is_within_text_terminated_by = Some(termination_char);
|
||||||
|
state.last_token.as_mut().map(|last| {
|
||||||
|
last.clear();
|
||||||
|
last.push(termination_char);
|
||||||
|
});
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
assert!(
|
assert!(
|
||||||
@ -985,6 +994,8 @@ pub fn parse_string_literal(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
state.last_token.as_mut().map(|last| last.push(next_char));
|
||||||
|
|
||||||
// String interpolation?
|
// String interpolation?
|
||||||
if allow_interpolation
|
if allow_interpolation
|
||||||
&& next_char == '$'
|
&& next_char == '$'
|
||||||
@ -1004,6 +1015,10 @@ pub fn parse_string_literal(
|
|||||||
// Double wrapper
|
// Double wrapper
|
||||||
if stream.peek_next().map_or(false, |c| c == termination_char) {
|
if stream.peek_next().map_or(false, |c| c == termination_char) {
|
||||||
eat_next(stream, pos);
|
eat_next(stream, pos);
|
||||||
|
state
|
||||||
|
.last_token
|
||||||
|
.as_mut()
|
||||||
|
.map(|last| last.push(termination_char));
|
||||||
} else {
|
} else {
|
||||||
state.is_within_text_terminated_by = None;
|
state.is_within_text_terminated_by = None;
|
||||||
break;
|
break;
|
||||||
@ -1060,6 +1075,7 @@ pub fn parse_string_literal(
|
|||||||
.get_next()
|
.get_next()
|
||||||
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
|
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
|
||||||
|
|
||||||
|
state.last_token.as_mut().map(|last| last.push(c));
|
||||||
seq.push(c);
|
seq.push(c);
|
||||||
pos.advance();
|
pos.advance();
|
||||||
|
|
||||||
@ -1240,6 +1256,8 @@ fn get_next_token_inner(
|
|||||||
state: &mut TokenizeState,
|
state: &mut TokenizeState,
|
||||||
pos: &mut Position,
|
pos: &mut Position,
|
||||||
) -> Option<(Token, Position)> {
|
) -> Option<(Token, Position)> {
|
||||||
|
state.last_token.as_mut().map(|last| last.clear());
|
||||||
|
|
||||||
// Still inside a comment?
|
// Still inside a comment?
|
||||||
if state.comment_level > 0 {
|
if state.comment_level > 0 {
|
||||||
let start_pos = *pos;
|
let start_pos = *pos;
|
||||||
@ -1398,6 +1416,8 @@ fn get_next_token_inner(
|
|||||||
negated_pos
|
negated_pos
|
||||||
});
|
});
|
||||||
|
|
||||||
|
state.last_token.as_mut().map(|last| *last = result.clone());
|
||||||
|
|
||||||
// Parse number
|
// Parse number
|
||||||
let token = radix_base.map_or_else(
|
let token = radix_base.map_or_else(
|
||||||
|| {
|
|| {
|
||||||
@ -1452,14 +1472,14 @@ fn get_next_token_inner(
|
|||||||
#[cfg(not(feature = "unicode-xid-ident"))]
|
#[cfg(not(feature = "unicode-xid-ident"))]
|
||||||
('a'..='z' | '_' | 'A'..='Z', ..) => {
|
('a'..='z' | '_' | 'A'..='Z', ..) => {
|
||||||
return Some(
|
return Some(
|
||||||
parse_identifier_token(stream, pos, start_pos, c)
|
parse_identifier_token(stream, state, pos, start_pos, c)
|
||||||
.unwrap_or_else(|err| (Token::LexError(err.into()), start_pos)),
|
.unwrap_or_else(|err| (Token::LexError(err.into()), start_pos)),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
#[cfg(feature = "unicode-xid-ident")]
|
#[cfg(feature = "unicode-xid-ident")]
|
||||||
(ch, ..) if unicode_xid::UnicodeXID::is_xid_start(ch) || ch == '_' => {
|
(ch, ..) if unicode_xid::UnicodeXID::is_xid_start(ch) || ch == '_' => {
|
||||||
return Some(
|
return Some(
|
||||||
parse_identifier_token(stream, pos, start_pos, c)
|
parse_identifier_token(stream, state, pos, start_pos, c)
|
||||||
.unwrap_or_else(|err| (Token::LexError(err.into()), start_pos)),
|
.unwrap_or_else(|err| (Token::LexError(err.into()), start_pos)),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -1942,18 +1962,24 @@ fn get_next_token_inner(
|
|||||||
/// Get the next token, parsing it as an identifier.
|
/// Get the next token, parsing it as an identifier.
|
||||||
fn parse_identifier_token(
|
fn parse_identifier_token(
|
||||||
stream: &mut impl InputStream,
|
stream: &mut impl InputStream,
|
||||||
|
state: &mut TokenizeState,
|
||||||
pos: &mut Position,
|
pos: &mut Position,
|
||||||
start_pos: Position,
|
start_pos: Position,
|
||||||
first_char: char,
|
first_char: char,
|
||||||
) -> Result<(Token, Position), LexError> {
|
) -> Result<(Token, Position), LexError> {
|
||||||
let mut identifier = SmartString::new_const();
|
let mut identifier = SmartString::new_const();
|
||||||
identifier.push(first_char);
|
identifier.push(first_char);
|
||||||
|
state.last_token.as_mut().map(|last| {
|
||||||
|
last.clear();
|
||||||
|
last.push(first_char);
|
||||||
|
});
|
||||||
|
|
||||||
while let Some(next_char) = stream.peek_next() {
|
while let Some(next_char) = stream.peek_next() {
|
||||||
match next_char {
|
match next_char {
|
||||||
x if is_id_continue(x) => {
|
x if is_id_continue(x) => {
|
||||||
identifier.push(x);
|
|
||||||
eat_next(stream, pos);
|
eat_next(stream, pos);
|
||||||
|
identifier.push(x);
|
||||||
|
state.last_token.as_mut().map(|last| last.push(x));
|
||||||
}
|
}
|
||||||
_ => break,
|
_ => break,
|
||||||
}
|
}
|
||||||
@ -2129,7 +2155,7 @@ impl<'a> Iterator for TokenIterator<'a> {
|
|||||||
type Item = (Token, Position);
|
type Item = (Token, Position);
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
{
|
let (within_interpolated, compress_script) = {
|
||||||
let control = &mut *self.state.tokenizer_control.borrow_mut();
|
let control = &mut *self.state.tokenizer_control.borrow_mut();
|
||||||
|
|
||||||
if control.is_within_text {
|
if control.is_within_text {
|
||||||
@ -2138,7 +2164,12 @@ impl<'a> Iterator for TokenIterator<'a> {
|
|||||||
// Reset it
|
// Reset it
|
||||||
control.is_within_text = false;
|
control.is_within_text = false;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
(
|
||||||
|
self.state.is_within_text_terminated_by.is_some(),
|
||||||
|
control.compressed.is_some(),
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
|
let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
|
||||||
// {EOF}
|
// {EOF}
|
||||||
@ -2230,6 +2261,49 @@ impl<'a> Iterator for TokenIterator<'a> {
|
|||||||
None => token,
|
None => token,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Collect the compressed script, if needed
|
||||||
|
if compress_script {
|
||||||
|
let control = &mut *self.state.tokenizer_control.borrow_mut();
|
||||||
|
|
||||||
|
if let Some(ref mut compressed) = control.compressed {
|
||||||
|
if !matches!(token, Token::EOF) {
|
||||||
|
use std::fmt::Write;
|
||||||
|
|
||||||
|
let last_token = self.state.last_token.as_ref().unwrap();
|
||||||
|
let mut buf = SmartString::new_const();
|
||||||
|
|
||||||
|
if last_token.is_empty() {
|
||||||
|
write!(buf, "{token}").unwrap();
|
||||||
|
} else if within_interpolated
|
||||||
|
&& matches!(
|
||||||
|
token,
|
||||||
|
Token::StringConstant(..) | Token::InterpolatedString(..)
|
||||||
|
)
|
||||||
|
{
|
||||||
|
compressed.push_str(&last_token[1..]);
|
||||||
|
} else {
|
||||||
|
buf = last_token.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
if !buf.is_empty() {
|
||||||
|
if !compressed.is_empty() {
|
||||||
|
let prev = compressed.chars().last().unwrap();
|
||||||
|
let cur = buf.chars().next().unwrap();
|
||||||
|
if (prev == '_' || is_id_first_alphabetic(prev) || is_id_continue(prev))
|
||||||
|
&& (cur == '_'
|
||||||
|
|| is_id_first_alphabetic(cur)
|
||||||
|
|| is_id_continue(cur))
|
||||||
|
{
|
||||||
|
compressed.push(' ');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
compressed.push_str(&buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Some((token, pos))
|
Some((token, pos))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2281,6 +2355,7 @@ impl Engine {
|
|||||||
comment_level: 0,
|
comment_level: 0,
|
||||||
include_comments: false,
|
include_comments: false,
|
||||||
is_within_text_terminated_by: None,
|
is_within_text_terminated_by: None,
|
||||||
|
last_token: None,
|
||||||
},
|
},
|
||||||
pos: Position::new(1, 0),
|
pos: Position::new(1, 0),
|
||||||
stream: MultiInputsStream {
|
stream: MultiInputsStream {
|
||||||
|
Loading…
Reference in New Issue
Block a user