feat: add base parser

Signed-off-by: kjuulh <contact@kjuulh.io>

feat: with basic assignment

Signed-off-by: kjuulh <contact@kjuulh.io>

feat: remove target

Signed-off-by: kjuulh <contact@kjuulh.io>
This commit is contained in:
Kasper Juul Hermansen 2023-07-03 20:16:09 +02:00
commit 67e3f73ab4
Signed by: kjuulh
GPG Key ID: 57B6E1465221F912
18 changed files with 4113 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
build/
node_modules/
target/

59
Cargo.lock generated Normal file
View File

@ -0,0 +1,59 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
dependencies = [
"memchr",
]
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "regex"
version = "1.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78"
[[package]]
name = "tree-sitter"
version = "0.20.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e747b1f9b7b931ed39a548c1fae149101497de3c1fc8d9e18c62c1a66c683d3d"
dependencies = [
"cc",
"regex",
]
[[package]]
name = "tree-sitter-rhai"
version = "0.0.1"
dependencies = [
"cc",
"tree-sitter",
]

22
Cargo.toml Normal file
View File

@ -0,0 +1,22 @@
[package]
name = "tree-sitter-rhai"
description = "rhai grammar for the tree-sitter parsing library"
version = "0.0.1"
authors = ["Kasper J. Hermansen <contact@kasperhermansen.com>"]
keywords = ["incremental", "parsing", "rhai"]
categories = ["parsing", "text-editors"]
repository = "https://github.com/tree-sitter/tree-sitter-rhai"
edition = "2018"
license = "MIT"
build = "bindings/rust/build.rs"
include = ["bindings/rust/*", "grammar.js", "queries/*", "src/*"]
[lib]
path = "bindings/rust/lib.rs"
[dependencies]
tree-sitter = "~0.20.10"
[build-dependencies]
cc = "1.0"

20
binding.gyp Normal file
View File

@ -0,0 +1,20 @@
{
"targets": [
{
"target_name": "tree_sitter_rhai_binding",
"include_dirs": [
"<!(node -e \"require('nan')\")",
"src"
],
"sources": [
"bindings/node/binding.cc",
"src/parser.c",
"src/scanner.c",
# If your language uses an external scanner, add it here.
],
"cflags_c": [
"-std=c99",
]
}
]
}

28
bindings/node/binding.cc Normal file
View File

@ -0,0 +1,28 @@
#include "tree_sitter/parser.h"
#include <node.h>
#include "nan.h"
using namespace v8;
extern "C" TSLanguage * tree_sitter_rhai();
namespace {
NAN_METHOD(New) {}
void Init(Local<Object> exports, Local<Object> module) {
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
tpl->SetClassName(Nan::New("Language").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);
Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_rhai());
Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("rhai").ToLocalChecked());
Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
}
NODE_MODULE(tree_sitter_rhai_binding, Init)
} // namespace

19
bindings/node/index.js Normal file
View File

@ -0,0 +1,19 @@
try {
module.exports = require("../../build/Release/tree_sitter_rhai_binding");
} catch (error1) {
if (error1.code !== 'MODULE_NOT_FOUND') {
throw error1;
}
try {
module.exports = require("../../build/Debug/tree_sitter_rhai_binding");
} catch (error2) {
if (error2.code !== 'MODULE_NOT_FOUND') {
throw error2;
}
throw error1
}
}
try {
module.exports.nodeTypeInfo = require("../../src/node-types.json");
} catch (_) {}

40
bindings/rust/build.rs Normal file
View File

@ -0,0 +1,40 @@
fn main() {
let src_dir = std::path::Path::new("src");
let mut c_config = cc::Build::new();
c_config.include(&src_dir);
c_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable")
.flag_if_supported("-Wno-trigraphs");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
// If your language uses an external scanner written in C,
// then include this block of code:
/*
let scanner_path = src_dir.join("scanner.c");
c_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
*/
c_config.compile("parser");
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
// If your language uses an external scanner written in C++,
// then include this block of code:
/*
let mut cpp_config = cc::Build::new();
cpp_config.cpp(true);
cpp_config.include(&src_dir);
cpp_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable");
let scanner_path = src_dir.join("scanner.cc");
cpp_config.file(&scanner_path);
cpp_config.compile("scanner");
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
*/
}

52
bindings/rust/lib.rs Normal file
View File

@ -0,0 +1,52 @@
//! This crate provides rhai language support for the [tree-sitter][] parsing library.
//!
//! Typically, you will use the [language][language func] function to add this language to a
//! tree-sitter [Parser][], and then use the parser to parse some code:
//!
//! ```
//! let code = "";
//! let mut parser = tree_sitter::Parser::new();
//! parser.set_language(tree_sitter_rhai::language()).expect("Error loading rhai grammar");
//! let tree = parser.parse(code, None).unwrap();
//! ```
//!
//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
//! [language func]: fn.language.html
//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
//! [tree-sitter]: https://tree-sitter.github.io/
use tree_sitter::Language;
extern "C" {
fn tree_sitter_rhai() -> Language;
}
/// Get the tree-sitter [Language][] for this grammar.
///
/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
pub fn language() -> Language {
unsafe { tree_sitter_rhai() }
}
/// The content of the [`node-types.json`][] file for this grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json");
// Uncomment these to include any queries that this grammar contains
// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm");
// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm");
// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm");
// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm");
#[cfg(test)]
mod tests {
#[test]
fn test_can_load_grammar() {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(super::language())
.expect("Error loading rhai language");
}
}

52
corpus/assignment.txt Normal file
View File

@ -0,0 +1,52 @@
================================================================================
let assignment integer
================================================================================
let a = 42;
--------------------------------------------------------------------------------
(source_file
(let_declaration
(identifier)
(integer_literal)))
================================================================================
let assignment string
================================================================================
let some_value = "string";
--------------------------------------------------------------------------------
(source_file
(let_declaration
(identifier)
(string_literal)))
================================================================================
let assignment object
================================================================================
let some_value = some_other_value;
--------------------------------------------------------------------------------
(source_file
(let_declaration
(identifier)
(identifier)))
================================================================================
naked assignment integer
================================================================================
some_value = some_other_value;
--------------------------------------------------------------------------------
(source_file
(expression_statement
(assignment_expression
left: (identifier)
right: (identifier))))

1
example-file.rhai Normal file
View File

@ -0,0 +1 @@
hello

110
grammar.js Normal file
View File

@ -0,0 +1,110 @@
const PREC = {
range: 15,
call: 14,
field: 13,
unary: 12,
cast: 11,
multiplicative: 10,
additive: 9,
shift: 8,
bitand: 7,
bitxor: 6,
bitor: 5,
comparative: 4,
and: 3,
or: 2,
assign: 0,
closure: -1,
};
module.exports = grammar({
name: "rhai",
externals: ($) => [
$._string_content,
$.raw_string_literal,
$.float_literal,
$.block_comment,
],
extras: (_) => [/\s/],
word: ($) => $.identifier,
inline: ($) => [$._declaration_statement],
supertypes: ($) => [
$._expression,
$._literal,
$._literal_pattern,
$._declaration_statement,
$._pattern,
],
rules: {
source_file: ($) => repeat($._statement),
// statements
_statement: ($) => choice($.expression_statement, $._declaration_statement),
expression_statement: ($) => choice(seq($._expression, ";")),
_declaration_statement: ($) => choice($.let_declaration),
let_declaration: ($) =>
seq(
"let",
field("pattern", $._pattern),
optional(seq("=", field("value", $._expression))),
";"
),
// patterns
_pattern: ($) => choice($._literal_pattern, $.identifier, "_"),
// expressions
_expression: ($) =>
choice($.assignment_expression, $._literal, prec.left($.identifier)),
assignment_expression: ($) =>
prec.left(
PREC.assign,
seq(field("left", $._expression), "=", field("right", $._expression))
),
// literals
_literal: ($) =>
choice($.string_literal, $.raw_string_literal, $.integer_literal),
_literal_pattern: ($) =>
choice($.string_literal, $.raw_string_literal, $.integer_literal),
integer_literal: (_) => token(seq(choice(/[0-9][0-9_]*/))),
string_literal: ($) => seq(
alias(/b?"/, '"'),
repeat(choice($.escape_sequence, $._string_content)),
token.immediate('"')
),
escape_sequence: (_) =>
token.immediate(
seq(
"\\",
choice(
/[^xu]/,
/u[0-9a-fA-F]{4}/,
/u{[0-9a-fA-F]+}/,
/x[0-9a-fA-F]{2}/
)
)
),
// identifiers
identifier: ($) => /(r#)?[_\p{XID_Start}][_\p{XID_Continue}]*/,
},
});

50
package-lock.json generated Normal file
View File

@ -0,0 +1,50 @@
{
"name": "tree-sitter-rhai",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "tree-sitter-rhai",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"nan": "^2.17.0"
},
"devDependencies": {
"prettier": "^2.8.8",
"tree-sitter-cli": "^0.20.8"
}
},
"node_modules/nan": {
"version": "2.17.0",
"resolved": "https://registry.npmjs.org/nan/-/nan-2.17.0.tgz",
"integrity": "sha512-2ZTgtl0nJsO0KQCjEpxcIr5D+Yv90plTitZt9JBfQvVJDS5seMl3FOvsh3+9CoYWXf/1l5OaZzzF6nDm4cagaQ=="
},
"node_modules/prettier": {
"version": "2.8.8",
"resolved": "https://registry.npmjs.org/prettier/-/prettier-2.8.8.tgz",
"integrity": "sha512-tdN8qQGvNjw4CHbY+XXk0JgCXn9QiF21a55rBe5LJAU+kDyC4WQn4+awm2Xfk2lQMk5fKup9XgzTZtGkjBdP9Q==",
"dev": true,
"bin": {
"prettier": "bin-prettier.js"
},
"engines": {
"node": ">=10.13.0"
},
"funding": {
"url": "https://github.com/prettier/prettier?sponsor=1"
}
},
"node_modules/tree-sitter-cli": {
"version": "0.20.8",
"resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.20.8.tgz",
"integrity": "sha512-XjTcS3wdTy/2cc/ptMLc/WRyOLECRYcMTrSWyhZnj1oGSOWbHLTklgsgRICU3cPfb0vy+oZCC33M43u6R1HSCA==",
"dev": true,
"hasInstallScript": true,
"bin": {
"tree-sitter": "cli.js"
}
}
}
}

30
package.json Normal file
View File

@ -0,0 +1,30 @@
{
"name": "tree-sitter-rhai",
"version": "1.0.0",
"description": "",
"main": "bindings/node",
"scripts": {
"tree-sitter": "tree-sitter",
"generate": "tree-sitter generate",
"test": "tree-sitter test",
"format": "prettier -w grammar.js"
},
"author": "",
"license": "ISC",
"dependencies": {
"nan": "^2.17.0"
},
"devDependencies": {
"tree-sitter-cli": "^0.20.8",
"prettier": "^2.8.8"
},
"tree-sitter": [
{
"scope": "source.rust",
"injection-regex": "rust",
"file-type": [
"rs"
]
}
]
}

328
src/grammar.json Normal file
View File

@ -0,0 +1,328 @@
{
"name": "rhai",
"word": "identifier",
"rules": {
"source_file": {
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "_statement"
}
},
"_statement": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "expression_statement"
},
{
"type": "SYMBOL",
"name": "_declaration_statement"
}
]
},
"expression_statement": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": ";"
}
]
}
]
},
"_declaration_statement": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "let_declaration"
}
]
},
"let_declaration": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "let"
},
{
"type": "FIELD",
"name": "pattern",
"content": {
"type": "SYMBOL",
"name": "_pattern"
}
},
{
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "="
},
{
"type": "FIELD",
"name": "value",
"content": {
"type": "SYMBOL",
"name": "_expression"
}
}
]
},
{
"type": "BLANK"
}
]
},
{
"type": "STRING",
"value": ";"
}
]
},
"_pattern": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_literal_pattern"
},
{
"type": "SYMBOL",
"name": "identifier"
},
{
"type": "STRING",
"value": "_"
}
]
},
"_expression": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "assignment_expression"
},
{
"type": "SYMBOL",
"name": "_literal"
},
{
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SYMBOL",
"name": "identifier"
}
}
]
},
"assignment_expression": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{
"type": "FIELD",
"name": "left",
"content": {
"type": "SYMBOL",
"name": "_expression"
}
},
{
"type": "STRING",
"value": "="
},
{
"type": "FIELD",
"name": "right",
"content": {
"type": "SYMBOL",
"name": "_expression"
}
}
]
}
},
"_literal": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "string_literal"
},
{
"type": "SYMBOL",
"name": "raw_string_literal"
},
{
"type": "SYMBOL",
"name": "integer_literal"
}
]
},
"_literal_pattern": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "string_literal"
},
{
"type": "SYMBOL",
"name": "raw_string_literal"
},
{
"type": "SYMBOL",
"name": "integer_literal"
}
]
},
"integer_literal": {
"type": "TOKEN",
"content": {
"type": "SEQ",
"members": [
{
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[0-9][0-9_]*"
}
]
}
]
}
},
"string_literal": {
"type": "SEQ",
"members": [
{
"type": "ALIAS",
"content": {
"type": "PATTERN",
"value": "b?\""
},
"named": false,
"value": "\""
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "escape_sequence"
},
{
"type": "SYMBOL",
"name": "_string_content"
}
]
}
},
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "STRING",
"value": "\""
}
}
]
},
"escape_sequence": {
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "\\"
},
{
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[^xu]"
},
{
"type": "PATTERN",
"value": "u[0-9a-fA-F]{4}"
},
{
"type": "PATTERN",
"value": "u{[0-9a-fA-F]+}"
},
{
"type": "PATTERN",
"value": "x[0-9a-fA-F]{2}"
}
]
}
]
}
},
"identifier": {
"type": "PATTERN",
"value": "(r#)?[_\\p{XID_Start}][_\\p{XID_Continue}]*"
}
},
"extras": [
{
"type": "PATTERN",
"value": "\\s"
}
],
"conflicts": [],
"precedences": [],
"externals": [
{
"type": "SYMBOL",
"name": "_string_content"
},
{
"type": "SYMBOL",
"name": "raw_string_literal"
},
{
"type": "SYMBOL",
"name": "float_literal"
},
{
"type": "SYMBOL",
"name": "block_comment"
}
],
"inline": [
"_declaration_statement"
],
"supertypes": [
"_expression",
"_literal",
"_literal_pattern",
"_declaration_statement",
"_pattern"
]
}

221
src/node-types.json Normal file
View File

@ -0,0 +1,221 @@
[
{
"type": "_declaration_statement",
"named": true,
"subtypes": [
{
"type": "let_declaration",
"named": true
}
]
},
{
"type": "_expression",
"named": true,
"subtypes": [
{
"type": "_literal",
"named": true
},
{
"type": "assignment_expression",
"named": true
},
{
"type": "identifier",
"named": true
}
]
},
{
"type": "_literal",
"named": true,
"subtypes": [
{
"type": "integer_literal",
"named": true
},
{
"type": "raw_string_literal",
"named": true
},
{
"type": "string_literal",
"named": true
}
]
},
{
"type": "_literal_pattern",
"named": true,
"subtypes": [
{
"type": "integer_literal",
"named": true
},
{
"type": "raw_string_literal",
"named": true
},
{
"type": "string_literal",
"named": true
}
]
},
{
"type": "_pattern",
"named": true,
"subtypes": [
{
"type": "_",
"named": false
},
{
"type": "_literal_pattern",
"named": true
},
{
"type": "identifier",
"named": true
}
]
},
{
"type": "assignment_expression",
"named": true,
"fields": {
"left": {
"multiple": false,
"required": true,
"types": [
{
"type": "_expression",
"named": true
}
]
},
"right": {
"multiple": false,
"required": true,
"types": [
{
"type": "_expression",
"named": true
}
]
}
}
},
{
"type": "expression_statement",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "_expression",
"named": true
}
]
}
},
{
"type": "let_declaration",
"named": true,
"fields": {
"pattern": {
"multiple": false,
"required": true,
"types": [
{
"type": "_pattern",
"named": true
}
]
},
"value": {
"multiple": false,
"required": false,
"types": [
{
"type": "_expression",
"named": true
}
]
}
}
},
{
"type": "source_file",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "_declaration_statement",
"named": true
},
{
"type": "expression_statement",
"named": true
}
]
}
},
{
"type": "string_literal",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "escape_sequence",
"named": true
}
]
}
},
{
"type": "\"",
"named": false
},
{
"type": ";",
"named": false
},
{
"type": "=",
"named": false
},
{
"type": "_",
"named": false
},
{
"type": "escape_sequence",
"named": true
},
{
"type": "identifier",
"named": true
},
{
"type": "integer_literal",
"named": true
},
{
"type": "let",
"named": false
},
{
"type": "raw_string_literal",
"named": true
}
]

2663
src/parser.c Normal file

File diff suppressed because it is too large Load Diff

191
src/scanner.c Normal file
View File

@ -0,0 +1,191 @@
#include <tree_sitter/parser.h>
#include <wctype.h>
enum TokenType {
STRING_CONTENT,
RAW_STRING_LITERAL,
FLOAT_LITERAL,
BLOCK_COMMENT,
};
void *tree_sitter_rhai_external_scanner_create() { return NULL; }
void tree_sitter_rhai_external_scanner_destroy(void *p) {}
void tree_sitter_rhai_external_scanner_reset(void *p) {}
unsigned tree_sitter_rhai_external_scanner_serialize(void *p, char *buffer) {
return 0;
}
void tree_sitter_rhai_external_scanner_deserialize(void *p, const char *b,
unsigned n) {}
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static bool is_num_char(int32_t c) { return c == '_' || iswdigit(c); }
bool tree_sitter_rhai_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
if (valid_symbols[STRING_CONTENT] && !valid_symbols[FLOAT_LITERAL]) {
bool has_content = false;
for (;;) {
if (lexer->lookahead == '\"' || lexer->lookahead == '\\') {
break;
} else if (lexer->lookahead == 0) {
return false;
}
has_content = true;
advance(lexer);
}
lexer->result_symbol = STRING_CONTENT;
return has_content;
}
while (iswspace(lexer->lookahead))
lexer->advance(lexer, true);
if (valid_symbols[RAW_STRING_LITERAL] &&
(lexer->lookahead == 'r' || lexer->lookahead == 'b')) {
lexer->result_symbol = RAW_STRING_LITERAL;
if (lexer->lookahead == 'b')
advance(lexer);
if (lexer->lookahead != 'r')
return false;
advance(lexer);
unsigned opening_hash_count = 0;
while (lexer->lookahead == '#') {
advance(lexer);
opening_hash_count++;
}
if (lexer->lookahead != '"')
return false;
advance(lexer);
for (;;) {
if (lexer->lookahead == 0) {
return false;
} else if (lexer->lookahead == '"') {
advance(lexer);
unsigned hash_count = 0;
while (lexer->lookahead == '#' && hash_count < opening_hash_count) {
advance(lexer);
hash_count++;
}
if (hash_count == opening_hash_count) {
return true;
}
} else {
advance(lexer);
}
}
}
if (valid_symbols[FLOAT_LITERAL] && iswdigit(lexer->lookahead)) {
lexer->result_symbol = FLOAT_LITERAL;
advance(lexer);
while (is_num_char(lexer->lookahead)) {
advance(lexer);
}
bool has_fraction = false, has_exponent = false;
if (lexer->lookahead == '.') {
has_fraction = true;
advance(lexer);
if (iswalpha(lexer->lookahead)) {
// The dot is followed by a letter: 1.max(2) => not a float but an
// integer
return false;
}
if (lexer->lookahead == '.') {
return false;
}
while (is_num_char(lexer->lookahead)) {
advance(lexer);
}
}
lexer->mark_end(lexer);
if (lexer->lookahead == 'e' || lexer->lookahead == 'E') {
has_exponent = true;
advance(lexer);
if (lexer->lookahead == '+' || lexer->lookahead == '-') {
advance(lexer);
}
if (!is_num_char(lexer->lookahead)) {
return true;
}
advance(lexer);
while (is_num_char(lexer->lookahead)) {
advance(lexer);
}
lexer->mark_end(lexer);
}
if (!has_exponent && !has_fraction)
return false;
if (lexer->lookahead != 'u' && lexer->lookahead != 'i' &&
lexer->lookahead != 'f') {
return true;
}
advance(lexer);
if (!iswdigit(lexer->lookahead)) {
return true;
}
while (iswdigit(lexer->lookahead)) {
advance(lexer);
}
lexer->mark_end(lexer);
return true;
}
if (lexer->lookahead == '/') {
advance(lexer);
if (lexer->lookahead != '*')
return false;
advance(lexer);
bool after_star = false;
unsigned nesting_depth = 1;
for (;;) {
switch (lexer->lookahead) {
case '\0':
return false;
case '*':
advance(lexer);
after_star = true;
break;
case '/':
if (after_star) {
advance(lexer);
after_star = false;
nesting_depth--;
if (nesting_depth == 0) {
lexer->result_symbol = BLOCK_COMMENT;
return true;
}
} else {
advance(lexer);
after_star = false;
if (lexer->lookahead == '*') {
nesting_depth++;
advance(lexer);
}
}
break;
default:
advance(lexer);
after_star = false;
break;
}
}
}
return false;
}

224
src/tree_sitter/parser.h Normal file
View File

@ -0,0 +1,224 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_