Add Engine::compact_script.
This commit is contained in:
parent
d94f362b51
commit
9bf3a9d78f
@ -37,7 +37,7 @@ Net features
|
||||
|
||||
* A function pointer created via a closure definition now links to the particular anonymous function itself.
|
||||
* This avoids a potentially expensive function lookup when the function pointer is called, speeding up closures.
|
||||
* An additional benefit is that function pointers can now be `export`ed from modules!
|
||||
* Closures now also encapsulate their defining environment, so function pointers can now be freely `export`ed from modules!
|
||||
|
||||
### `!in`
|
||||
|
||||
@ -49,6 +49,12 @@ Net features
|
||||
* The options are for future-proofing the API.
|
||||
* In this version, it gains the ability to set the value of the _custom state_ (accessible via `NativeCallContext::tag`) for a function evaluation, overriding `Engine::set_default_tag`.
|
||||
|
||||
### Compact a script for compression
|
||||
|
||||
* `Engine::compact_script` is added which takes a valid script (it still returns parsing errors) and returns a _compacted_ version of the script with all insignificant whitespaces and all comments removed.
|
||||
* A compact script compresses better than one with liberal whitespaces and comments.
|
||||
* Unlike some uglifiers or minifiers, `Engine::compact_script` does not optimize the script in any way, nor does it rename variables.
|
||||
|
||||
Enhancements
|
||||
------------
|
||||
|
||||
|
@ -1,6 +1,9 @@
|
||||
//! Module that provide formatting services to the [`Engine`].
|
||||
use crate::packages::iter_basic::{BitRange, CharsStream, StepRange};
|
||||
use crate::parser::{ParseResult, ParseState};
|
||||
use crate::{
|
||||
Engine, ExclusiveRange, FnPtr, ImmutableString, InclusiveRange, Position, RhaiError, ERR,
|
||||
Engine, ExclusiveRange, FnPtr, ImmutableString, InclusiveRange, OptimizationLevel, Position,
|
||||
RhaiError, Scope, SmartString, StringsInterner, ERR,
|
||||
};
|
||||
use std::any::type_name;
|
||||
#[cfg(feature = "no_std")]
|
||||
@ -263,4 +266,33 @@ impl Engine {
|
||||
let t = self.map_type_name(type_name::<T>()).into();
|
||||
ERR::ErrorMismatchDataType(t, typ.into(), pos).into()
|
||||
}
|
||||
|
||||
/// Compact a script to eliminate insignificant whitespaces and comments.
|
||||
///
|
||||
/// This is useful to prepare a script for further compressing.
|
||||
///
|
||||
/// The output script is semantically identical to the input script, except smaller in size.
|
||||
///
|
||||
/// Unlike other uglifiers and minifiers, this method does not rename variables nor perform any
|
||||
/// optimization on the input script.
|
||||
#[inline]
|
||||
pub fn compact_script(&self, script: impl AsRef<str>) -> ParseResult<String> {
|
||||
let scripts = [script];
|
||||
let (mut stream, tc) = self.lex_raw(&scripts, self.token_mapper.as_deref());
|
||||
tc.borrow_mut().compressed = Some(String::new());
|
||||
stream.state.last_token = Some(SmartString::new_const());
|
||||
let scope = Scope::new();
|
||||
let mut interner = StringsInterner::new();
|
||||
let mut state = ParseState::new(&scope, &mut interner, tc);
|
||||
let mut _ast = self.parse(
|
||||
stream.peekable(),
|
||||
&mut state,
|
||||
#[cfg(not(feature = "no_optimize"))]
|
||||
OptimizationLevel::None,
|
||||
#[cfg(feature = "no_optimize")]
|
||||
(),
|
||||
)?;
|
||||
let tc = state.tokenizer_control.borrow();
|
||||
Ok(tc.compressed.as_ref().unwrap().into())
|
||||
}
|
||||
}
|
@ -23,7 +23,7 @@ pub mod limits_unchecked;
|
||||
|
||||
pub mod events;
|
||||
|
||||
pub mod type_names;
|
||||
pub mod formatting;
|
||||
|
||||
pub mod custom_syntax;
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
//! Module defining external-loaded modules for Rhai.
|
||||
|
||||
#[cfg(feature = "metadata")]
|
||||
use crate::api::type_names::format_type;
|
||||
use crate::api::formatting::format_type;
|
||||
use crate::ast::FnAccess;
|
||||
use crate::func::{
|
||||
shared_take_or_clone, CallableFunction, FnCallArgs, IteratorFn, RegisterNativeFunction,
|
||||
|
@ -1,7 +1,7 @@
|
||||
//! Serialization of functions metadata.
|
||||
#![cfg(feature = "metadata")]
|
||||
|
||||
use crate::api::type_names::format_type;
|
||||
use crate::api::formatting::format_type;
|
||||
use crate::module::{calc_native_fn_hash, FuncInfo, ModuleFlags};
|
||||
use crate::{calc_fn_hash, Engine, FnAccess, SmartString, StaticVec, AST};
|
||||
use serde::Serialize;
|
||||
|
@ -26,6 +26,8 @@ pub struct TokenizerControlBlock {
|
||||
/// Global comments.
|
||||
#[cfg(feature = "metadata")]
|
||||
pub global_comments: String,
|
||||
/// Whitespace-compressed version of the script (if any).
|
||||
pub compressed: Option<String>,
|
||||
}
|
||||
|
||||
impl TokenizerControlBlock {
|
||||
@ -37,6 +39,7 @@ impl TokenizerControlBlock {
|
||||
is_within_text: false,
|
||||
#[cfg(feature = "metadata")]
|
||||
global_comments: String::new(),
|
||||
compressed: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -879,6 +882,8 @@ pub struct TokenizeState {
|
||||
pub include_comments: bool,
|
||||
/// Is the current tokenizer position within the text stream of an interpolated string?
|
||||
pub is_within_text_terminated_by: Option<char>,
|
||||
/// Last token
|
||||
pub last_token: Option<SmartString>,
|
||||
}
|
||||
|
||||
/// _(internals)_ Trait that encapsulates a peekable character input stream.
|
||||
@ -956,6 +961,10 @@ pub fn parse_string_literal(
|
||||
let mut skip_whitespace_until = 0;
|
||||
|
||||
state.is_within_text_terminated_by = Some(termination_char);
|
||||
state.last_token.as_mut().map(|last| {
|
||||
last.clear();
|
||||
last.push(termination_char);
|
||||
});
|
||||
|
||||
loop {
|
||||
assert!(
|
||||
@ -985,6 +994,8 @@ pub fn parse_string_literal(
|
||||
}
|
||||
};
|
||||
|
||||
state.last_token.as_mut().map(|last| last.push(next_char));
|
||||
|
||||
// String interpolation?
|
||||
if allow_interpolation
|
||||
&& next_char == '$'
|
||||
@ -1004,6 +1015,10 @@ pub fn parse_string_literal(
|
||||
// Double wrapper
|
||||
if stream.peek_next().map_or(false, |c| c == termination_char) {
|
||||
eat_next(stream, pos);
|
||||
state
|
||||
.last_token
|
||||
.as_mut()
|
||||
.map(|last| last.push(termination_char));
|
||||
} else {
|
||||
state.is_within_text_terminated_by = None;
|
||||
break;
|
||||
@ -1060,6 +1075,7 @@ pub fn parse_string_literal(
|
||||
.get_next()
|
||||
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
|
||||
|
||||
state.last_token.as_mut().map(|last| last.push(c));
|
||||
seq.push(c);
|
||||
pos.advance();
|
||||
|
||||
@ -1240,6 +1256,8 @@ fn get_next_token_inner(
|
||||
state: &mut TokenizeState,
|
||||
pos: &mut Position,
|
||||
) -> Option<(Token, Position)> {
|
||||
state.last_token.as_mut().map(|last| last.clear());
|
||||
|
||||
// Still inside a comment?
|
||||
if state.comment_level > 0 {
|
||||
let start_pos = *pos;
|
||||
@ -1398,6 +1416,8 @@ fn get_next_token_inner(
|
||||
negated_pos
|
||||
});
|
||||
|
||||
state.last_token.as_mut().map(|last| *last = result.clone());
|
||||
|
||||
// Parse number
|
||||
let token = radix_base.map_or_else(
|
||||
|| {
|
||||
@ -1452,14 +1472,14 @@ fn get_next_token_inner(
|
||||
#[cfg(not(feature = "unicode-xid-ident"))]
|
||||
('a'..='z' | '_' | 'A'..='Z', ..) => {
|
||||
return Some(
|
||||
parse_identifier_token(stream, pos, start_pos, c)
|
||||
parse_identifier_token(stream, state, pos, start_pos, c)
|
||||
.unwrap_or_else(|err| (Token::LexError(err.into()), start_pos)),
|
||||
);
|
||||
}
|
||||
#[cfg(feature = "unicode-xid-ident")]
|
||||
(ch, ..) if unicode_xid::UnicodeXID::is_xid_start(ch) || ch == '_' => {
|
||||
return Some(
|
||||
parse_identifier_token(stream, pos, start_pos, c)
|
||||
parse_identifier_token(stream, state, pos, start_pos, c)
|
||||
.unwrap_or_else(|err| (Token::LexError(err.into()), start_pos)),
|
||||
);
|
||||
}
|
||||
@ -1942,18 +1962,24 @@ fn get_next_token_inner(
|
||||
/// Get the next token, parsing it as an identifier.
|
||||
fn parse_identifier_token(
|
||||
stream: &mut impl InputStream,
|
||||
state: &mut TokenizeState,
|
||||
pos: &mut Position,
|
||||
start_pos: Position,
|
||||
first_char: char,
|
||||
) -> Result<(Token, Position), LexError> {
|
||||
let mut identifier = SmartString::new_const();
|
||||
identifier.push(first_char);
|
||||
state.last_token.as_mut().map(|last| {
|
||||
last.clear();
|
||||
last.push(first_char);
|
||||
});
|
||||
|
||||
while let Some(next_char) = stream.peek_next() {
|
||||
match next_char {
|
||||
x if is_id_continue(x) => {
|
||||
identifier.push(x);
|
||||
eat_next(stream, pos);
|
||||
identifier.push(x);
|
||||
state.last_token.as_mut().map(|last| last.push(x));
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
@ -2129,7 +2155,7 @@ impl<'a> Iterator for TokenIterator<'a> {
|
||||
type Item = (Token, Position);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
{
|
||||
let (within_interpolated, compress_script) = {
|
||||
let control = &mut *self.state.tokenizer_control.borrow_mut();
|
||||
|
||||
if control.is_within_text {
|
||||
@ -2138,7 +2164,12 @@ impl<'a> Iterator for TokenIterator<'a> {
|
||||
// Reset it
|
||||
control.is_within_text = false;
|
||||
}
|
||||
}
|
||||
|
||||
(
|
||||
self.state.is_within_text_terminated_by.is_some(),
|
||||
control.compressed.is_some(),
|
||||
)
|
||||
};
|
||||
|
||||
let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
|
||||
// {EOF}
|
||||
@ -2230,6 +2261,49 @@ impl<'a> Iterator for TokenIterator<'a> {
|
||||
None => token,
|
||||
};
|
||||
|
||||
// Collect the compressed script, if needed
|
||||
if compress_script {
|
||||
let control = &mut *self.state.tokenizer_control.borrow_mut();
|
||||
|
||||
if let Some(ref mut compressed) = control.compressed {
|
||||
if !matches!(token, Token::EOF) {
|
||||
use std::fmt::Write;
|
||||
|
||||
let last_token = self.state.last_token.as_ref().unwrap();
|
||||
let mut buf = SmartString::new_const();
|
||||
|
||||
if last_token.is_empty() {
|
||||
write!(buf, "{token}").unwrap();
|
||||
} else if within_interpolated
|
||||
&& matches!(
|
||||
token,
|
||||
Token::StringConstant(..) | Token::InterpolatedString(..)
|
||||
)
|
||||
{
|
||||
compressed.push_str(&last_token[1..]);
|
||||
} else {
|
||||
buf = last_token.clone();
|
||||
}
|
||||
|
||||
if !buf.is_empty() {
|
||||
if !compressed.is_empty() {
|
||||
let prev = compressed.chars().last().unwrap();
|
||||
let cur = buf.chars().next().unwrap();
|
||||
if (prev == '_' || is_id_first_alphabetic(prev) || is_id_continue(prev))
|
||||
&& (cur == '_'
|
||||
|| is_id_first_alphabetic(cur)
|
||||
|| is_id_continue(cur))
|
||||
{
|
||||
compressed.push(' ');
|
||||
}
|
||||
}
|
||||
|
||||
compressed.push_str(&buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some((token, pos))
|
||||
}
|
||||
}
|
||||
@ -2281,6 +2355,7 @@ impl Engine {
|
||||
comment_level: 0,
|
||||
include_comments: false,
|
||||
is_within_text_terminated_by: None,
|
||||
last_token: None,
|
||||
},
|
||||
pos: Position::new(1, 0),
|
||||
stream: MultiInputsStream {
|
||||
|
Loading…
Reference in New Issue
Block a user