summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHenri Chataing <henrichataing@google.com>2022-02-18 13:15:08 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2022-02-18 13:15:08 +0000
commit1155f6d8cb775a736a93961942c6b3902aae2e04 (patch)
treed3be57fc40f87178760a5a5e5d76a6d238555a70
parentf0dbfff9ce5727cf4e2a361a5f4dc91759156eaa (diff)
parentd240aa6ca72eaaed37ccd8ae74d2e8ecfa0b721a (diff)
downloadpest_meta-1155f6d8cb775a736a93961942c6b3902aae2e04.tar.gz
Import pest_meta crate. am: d240aa6ca7
Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/pest_meta/+/1976546 Change-Id: I4b5015aaad96708f68998c50223e78f3a62e47bc
-rw-r--r--Android.bp18
-rw-r--r--Cargo.toml42
-rw-r--r--Cargo.toml.orig26
l---------LICENSE1
-rw-r--r--LICENSE-APACHE201
-rw-r--r--LICENSE-MIT23
-rw-r--r--METADATA20
-rw-r--r--MODULE_LICENSE_APACHE20
-rw-r--r--OWNERS1
-rw-r--r--_README.md169
-rw-r--r--cargo2android.json5
-rw-r--r--patches/0001-Remove-unused-extern-crate-maplit.patch25
-rw-r--r--patches/Android.bp.diff12
-rw-r--r--src/ast.rs315
-rw-r--r--src/grammar.pest98
-rw-r--r--src/grammar.rs2
-rw-r--r--src/lib.rs133
-rw-r--r--src/optimizer/concatenator.rs34
-rw-r--r--src/optimizer/factorizer.rs38
-rw-r--r--src/optimizer/mod.rs510
-rw-r--r--src/optimizer/restorer.rs157
-rw-r--r--src/optimizer/rotater.rs45
-rw-r--r--src/optimizer/skipper.rs57
-rw-r--r--src/optimizer/unroller.rs67
-rw-r--r--src/parser.rs1488
-rw-r--r--src/validator.rs851
26 files changed, 4338 insertions, 0 deletions
diff --git a/Android.bp b/Android.bp
new file mode 100644
index 0000000..a145633
--- /dev/null
+++ b/Android.bp
@@ -0,0 +1,18 @@
+// This file is generated by cargo2android.py --config cargo2android.json.
+// Do not modify this file as changes will be overridden on upgrade.
+
+
+
+rust_library_host {
+ name: "libpest_meta",
+ // has rustc warnings
+ crate_name: "pest_meta",
+ cargo_env_compat: true,
+ cargo_pkg_version: "2.1.3",
+ srcs: ["src/lib.rs"],
+ edition: "2015",
+ rustlibs: [
+ "libpest",
+ ],
+ compile_multilib: "first",
+}
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..74f8377
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,42 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+name = "pest_meta"
+version = "2.1.3"
+authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
+exclude = ["src/grammar.pest"]
+include = ["Cargo.toml", "src/**/*", "src/grammar.rs", "_README.md", "LICENSE-*"]
+description = "pest meta language parser and validator"
+homepage = "https://pest-parser.github.io/"
+documentation = "https://docs.rs/pest"
+readme = "_README.md"
+keywords = ["pest", "parser", "meta", "optimizer"]
+categories = ["parsing"]
+license = "MIT/Apache-2.0"
+repository = "https://github.com/pest-parser/pest"
+[dependencies.maplit]
+version = "1.0"
+
+[dependencies.pest]
+version = "2.1.0"
+[build-dependencies.sha-1]
+version = "0.8"
+default-features = false
+[badges.codecov]
+repository = "pest-parser/pest"
+
+[badges.maintenance]
+status = "actively-developed"
+
+[badges.travis-ci]
+repository = "pest-parser/pest"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..40c7570
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,26 @@
+[package]
+name = "pest_meta"
+description = "pest meta language parser and validator"
+version = "2.1.3"
+authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
+homepage = "https://pest-parser.github.io/"
+repository = "https://github.com/pest-parser/pest"
+documentation = "https://docs.rs/pest"
+keywords = ["pest", "parser", "meta", "optimizer"]
+categories = ["parsing"]
+license = "MIT/Apache-2.0"
+readme = "_README.md"
+exclude = ["src/grammar.pest"]
+include = ["Cargo.toml", "src/**/*", "src/grammar.rs", "_README.md", "LICENSE-*"]
+
+[dependencies]
+maplit = "1.0"
+pest = { path = "../pest", version = "2.1.0" }
+
+[badges]
+codecov = { repository = "pest-parser/pest" }
+maintenance = { status = "actively-developed" }
+travis-ci = { repository = "pest-parser/pest" }
+
+[build-dependencies]
+sha-1 = { version = "0.8", default-features = false }
diff --git a/LICENSE b/LICENSE
new file mode 120000
index 0000000..6b579aa
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1 @@
+LICENSE-APACHE \ No newline at end of file
diff --git a/LICENSE-APACHE b/LICENSE-APACHE
new file mode 100644
index 0000000..16fe87b
--- /dev/null
+++ b/LICENSE-APACHE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/LICENSE-MIT b/LICENSE-MIT
new file mode 100644
index 0000000..31aa793
--- /dev/null
+++ b/LICENSE-MIT
@@ -0,0 +1,23 @@
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..7a2a522
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,20 @@
+name: "pest_meta"
+description: "pest meta language parser and validator"
+third_party {
+ url {
+ type: HOMEPAGE
+ value: "https://crates.io/crates/pest_meta"
+ }
+ url {
+ type: ARCHIVE
+ value: "https://static.crates.io/crates/pest_meta/pest_meta-2.1.3.crate"
+ }
+ version: "2.1.3"
+ # Dual-licensed, using the least restrictive per go/thirdpartylicenses#same.
+ license_type: NOTICE
+ last_upgrade_date {
+ year: 2022
+ month: 1
+ day: 27
+ }
+}
diff --git a/MODULE_LICENSE_APACHE2 b/MODULE_LICENSE_APACHE2
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_APACHE2
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..45dc4dd
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1 @@
+include platform/prebuilts/rust:master:/OWNERS
diff --git a/_README.md b/_README.md
new file mode 100644
index 0000000..8e55f29
--- /dev/null
+++ b/_README.md
@@ -0,0 +1,169 @@
+<p align="center">
+ <img src="https://raw.github.com/pest-parser/pest/master/pest-logo.svg?sanitize=true" width="80%"/>
+</p>
+
+# pest. The Elegant Parser
+
+[![Join the chat at https://gitter.im/dragostis/pest](https://badges.gitter.im/dragostis/pest.svg)](https://gitter.im/dragostis/pest?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+[![Book](https://img.shields.io/badge/book-WIP-4d76ae.svg)](https://pest-parser.github.io/book)
+[![Docs](https://docs.rs/pest/badge.svg)](https://docs.rs/pest)
+
+[![Build Status](https://travis-ci.org/pest-parser/pest.svg?branch=master)](https://travis-ci.org/pest-parser/pest)
+[![codecov](https://codecov.io/gh/pest-parser/pest/branch/master/graph/badge.svg)](https://codecov.io/gh/pest-parser/pest)
+[![Fuzzit Status](https://app.fuzzit.dev/badge?org_id=pest-parser)](https://app.fuzzit.dev/orgs/pest-parser/dashboard)
+[![Crates.io](https://img.shields.io/crates/d/pest.svg)](https://crates.io/crates/pest)
+[![Crates.io](https://img.shields.io/crates/v/pest.svg)](https://crates.io/crates/pest)
+
+pest is a general purpose parser written in Rust with a focus on accessibility,
+correctness, and performance. It uses parsing expression grammars
+(or [PEG]) as input, which are similar in spirit to regular expressions, but
+which offer the enhanced expressivity needed to parse complex languages.
+
+[PEG]: https://en.wikipedia.org/wiki/Parsing_expression_grammar
+
+## Getting started
+
+The recommended way to start parsing with pest is to read the official [book].
+
+Other helpful resources:
+
+* API reference on [docs.rs]
+* play with grammars and share them on our [fiddle]
+* leave feedback, ask questions, or greet us on [Gitter]
+
+[book]: https://pest-parser.github.io/book
+[docs.rs]: https://docs.rs/pest
+[fiddle]: https://pest-parser.github.io/#editor
+[Gitter]: https://gitter.im/dragostis/pest
+
+## Example
+
+The following is an example of a grammar for a list of alpha-numeric identifiers
+where the first identifier does not start with a digit:
+
+```rust
+alpha = { 'a'..'z' | 'A'..'Z' }
+digit = { '0'..'9' }
+
+ident = { (alpha | digit)+ }
+
+ident_list = _{ !digit ~ ident ~ (" " ~ ident)+ }
+ // ^
+ // ident_list rule is silent which means it produces no tokens
+```
+
+Grammars are saved in separate .pest files which are never mixed with procedural
+code. This results in an always up-to-date formalization of a language that is
+easy to read and maintain.
+
+## Meaningful error reporting
+
+Based on the grammar definition, the parser also includes automatic error
+reporting. For the example above, the input `"123"` will result in:
+
+```
+thread 'main' panicked at ' --> 1:1
+ |
+1 | 123
+ | ^---
+ |
+ = unexpected digit', src/main.rs:12
+```
+while `"ab *"` will result in:
+```
+thread 'main' panicked at ' --> 1:1
+ |
+1 | ab *
+ | ^---
+ |
+ = expected ident', src/main.rs:12
+```
+
+## Pairs API
+
+The grammar can be used to derive a `Parser` implementation automatically.
+Parsing returns an iterator of nested token pairs:
+
+```rust
+extern crate pest;
+#[macro_use]
+extern crate pest_derive;
+
+use pest::Parser;
+
+#[derive(Parser)]
+#[grammar = "ident.pest"]
+struct IdentParser;
+
+fn main() {
+    let pairs = IdentParser::parse(Rule::ident_list, "a1 b2").unwrap_or_else(|e| panic!("{}", e));
+
+ // Because ident_list is silent, the iterator will contain idents
+ for pair in pairs {
+ // A pair is a combination of the rule which matched and a span of input
+ println!("Rule: {:?}", pair.as_rule());
+ println!("Span: {:?}", pair.as_span());
+ println!("Text: {}", pair.as_str());
+
+ // A pair can be converted to an iterator of the tokens which make it up:
+ for inner_pair in pair.into_inner() {
+ match inner_pair.as_rule() {
+ Rule::alpha => println!("Letter: {}", inner_pair.as_str()),
+ Rule::digit => println!("Digit: {}", inner_pair.as_str()),
+ _ => unreachable!()
+ };
+ }
+ }
+}
+```
+
+This produces the following output:
+```
+Rule: ident
+Span: Span { start: 0, end: 2 }
+Text: a1
+Letter: a
+Digit: 1
+Rule: ident
+Span: Span { start: 3, end: 5 }
+Text: b2
+Letter: b
+Digit: 2
+```
+
+## Other features
+
+* Precedence climbing
+* Input handling
+* Custom errors
+* Runs on stable Rust
+
+## Projects using pest
+
+* [pest_meta](https://github.com/pest-parser/pest/blob/master/meta/src/grammar.pest) (bootstrapped)
+* [AshPaper](https://github.com/shnewto/ashpaper)
+* [brain](https://github.com/brain-lang/brain)
+* [Chelone](https://github.com/Aaronepower/chelone)
+* [comrak](https://github.com/kivikakk/comrak)
+* [elastic-rs](https://github.com/cch123/elastic-rs)
+* [graphql-parser](https://github.com/Keats/graphql-parser)
+* [handlebars-rust](https://github.com/sunng87/handlebars-rust)
+* [hexdino](https://github.com/Luz/hexdino)
+* [Huia](https://gitlab.com/jimsy/huia/)
+* [jql](https://github.com/yamafaktory/jql)
+* [json5-rs](https://github.com/callum-oakley/json5-rs)
+* [mt940](https://github.com/svenstaro/mt940-rs)
+* [py_literal](https://github.com/jturner314/py_literal)
+* [rouler](https://github.com/jarcane/rouler)
+* [RuSh](https://github.com/lwandrebeck/RuSh)
+* [rs_pbrt](https://github.com/wahn/rs_pbrt)
+* [stache](https://github.com/dgraham/stache)
+* [tera](https://github.com/Keats/tera)
+* [ui_gen](https://github.com/emoon/ui_gen)
+* [ukhasnet-parser](https://github.com/adamgreig/ukhasnet-parser)
+* [ZoKrates](https://github.com/ZoKrates/ZoKrates)
+
+## Special thanks
+
+A special round of applause goes to prof. Marius Minea for his guidance and all
+pest contributors, some of which being none other than my friends.
diff --git a/cargo2android.json b/cargo2android.json
new file mode 100644
index 0000000..833343e
--- /dev/null
+++ b/cargo2android.json
@@ -0,0 +1,5 @@
+{
+ "patch": "patches/Android.bp.diff",
+ "run": true,
+ "host-first-multilib": true
+}
diff --git a/patches/0001-Remove-unused-extern-crate-maplit.patch b/patches/0001-Remove-unused-extern-crate-maplit.patch
new file mode 100644
index 0000000..abcf752
--- /dev/null
+++ b/patches/0001-Remove-unused-extern-crate-maplit.patch
@@ -0,0 +1,25 @@
+From cca209337abe86ad35dc705dd293b1ab264f5e46 Mon Sep 17 00:00:00 2001
+From: Henri Chataing <henrichataing@google.com>
+Date: Fri, 18 Feb 2022 11:49:59 +0100
+Subject: [PATCH] Remove unused extern crate maplit
+
+Change-Id: I4fdf3fb28528ba228d3c4869c7a4a17921f8293d
+---
+ src/lib.rs | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/src/lib.rs b/src/lib.rs
+index 1f9c5bc..28f33b0 100644
+--- a/src/lib.rs
++++ b/src/lib.rs
+@@ -9,7 +9,6 @@
+
+ #![allow(clippy::range_plus_one)]
+
+-extern crate maplit;
+ #[cfg(test)]
+ #[macro_use]
+ extern crate pest;
+--
+2.35.1.265.g69c8d7142f-goog
+
diff --git a/patches/Android.bp.diff b/patches/Android.bp.diff
new file mode 100644
index 0000000..14c6cb9
--- /dev/null
+++ b/patches/Android.bp.diff
@@ -0,0 +1,12 @@
+diff --git a/Android.bp b/Android.bp
+index 62e8f1f..691d848 100644
+--- a/Android.bp
++++ b/Android.bp
+@@ -12,7 +12,6 @@ rust_library_host {
+ srcs: ["src/lib.rs"],
+ edition: "2015",
+ rustlibs: [
+- "libmaplit",
+ "libpest",
+ ],
+ compile_multilib: "first",
diff --git a/src/ast.rs b/src/ast.rs
new file mode 100644
index 0000000..b80c596
--- /dev/null
+++ b/src/ast.rs
@@ -0,0 +1,315 @@
+// pest. The Elegant Parser
+// Copyright (c) 2018 Dragoș Tiselice
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Rule {
+ pub name: String,
+ pub ty: RuleType,
+ pub expr: Expr,
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum RuleType {
+ Normal,
+ Silent,
+ Atomic,
+ CompoundAtomic,
+ NonAtomic,
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Expr {
+ /// Matches an exact string, e.g. `"a"`
+ Str(String),
+ /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"`
+ Insens(String),
+ /// Matches one character in the range, e.g. `'a'..'z'`
+ Range(String, String),
+ /// Matches the rule with the given name, e.g. `a`
+ Ident(String),
+ /// Matches a custom part of the stack, e.g. `PEEK[..]`
+ PeekSlice(i32, Option<i32>),
+ /// Positive lookahead; matches expression without making progress, e.g. `&e`
+ PosPred(Box<Expr>),
+ /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
+ NegPred(Box<Expr>),
+ /// Matches a sequence of two expressions, e.g. `e1 ~ e2`
+ Seq(Box<Expr>, Box<Expr>),
+ /// Matches either of two expressions, e.g. `e1 | e2`
+ Choice(Box<Expr>, Box<Expr>),
+ /// Optionally matches an expression, e.g. `e?`
+ Opt(Box<Expr>),
+ /// Matches an expression zero or more times, e.g. `e*`
+ Rep(Box<Expr>),
+ /// Matches an expression one or more times, e.g. `e+`
+ RepOnce(Box<Expr>),
+ /// Matches an expression an exact number of times, e.g. `e{n}`
+ RepExact(Box<Expr>, u32),
+ /// Matches an expression at least a number of times, e.g. `e{n,}`
+ RepMin(Box<Expr>, u32),
+ /// Matches an expression at most a number of times, e.g. `e{,n}`
+ RepMax(Box<Expr>, u32),
+ /// Matches an expression a number of times within a range, e.g. `e{m, n}`
+ RepMinMax(Box<Expr>, u32, u32),
+ /// Continues to match expressions until one of the strings in the `Vec` is found
+ Skip(Vec<String>),
+ /// Matches an expression and pushes it to the stack, e.g. `push(e)`
+ Push(Box<Expr>),
+}
+
+impl Expr {
+ pub fn iter_top_down(&self) -> ExprTopDownIterator {
+ ExprTopDownIterator::new(self)
+ }
+
+ pub fn map_top_down<F>(self, mut f: F) -> Expr
+ where
+ F: FnMut(Expr) -> Expr,
+ {
+ fn map_internal<F>(expr: Expr, f: &mut F) -> Expr
+ where
+ F: FnMut(Expr) -> Expr,
+ {
+ let expr = f(expr);
+
+ match expr {
+ // TODO: Use box syntax when it gets stabilized.
+ Expr::PosPred(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::PosPred(mapped)
+ }
+ Expr::NegPred(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::NegPred(mapped)
+ }
+ Expr::Seq(lhs, rhs) => {
+ let mapped_lhs = Box::new(map_internal(*lhs, f));
+ let mapped_rhs = Box::new(map_internal(*rhs, f));
+ Expr::Seq(mapped_lhs, mapped_rhs)
+ }
+ Expr::Choice(lhs, rhs) => {
+ let mapped_lhs = Box::new(map_internal(*lhs, f));
+ let mapped_rhs = Box::new(map_internal(*rhs, f));
+ Expr::Choice(mapped_lhs, mapped_rhs)
+ }
+ Expr::Rep(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::Rep(mapped)
+ }
+ Expr::RepOnce(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::RepOnce(mapped)
+ }
+ Expr::RepExact(expr, max) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::RepExact(mapped, max)
+ }
+ Expr::RepMin(expr, num) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::RepMin(mapped, num)
+ }
+ Expr::RepMax(expr, num) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::RepMax(mapped, num)
+ }
+ Expr::RepMinMax(expr, min, max) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::RepMinMax(mapped, min, max)
+ }
+ Expr::Opt(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::Opt(mapped)
+ }
+ Expr::Push(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::Push(mapped)
+ }
+ expr => expr,
+ }
+ }
+
+ map_internal(self, &mut f)
+ }
+
+ pub fn map_bottom_up<F>(self, mut f: F) -> Expr
+ where
+ F: FnMut(Expr) -> Expr,
+ {
+ fn map_internal<F>(expr: Expr, f: &mut F) -> Expr
+ where
+ F: FnMut(Expr) -> Expr,
+ {
+ let mapped = match expr {
+ Expr::PosPred(expr) => {
+ // TODO: Use box syntax when it gets stabilized.
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::PosPred(mapped)
+ }
+ Expr::NegPred(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::NegPred(mapped)
+ }
+ Expr::Seq(lhs, rhs) => {
+ let mapped_lhs = Box::new(map_internal(*lhs, f));
+ let mapped_rhs = Box::new(map_internal(*rhs, f));
+ Expr::Seq(mapped_lhs, mapped_rhs)
+ }
+ Expr::Choice(lhs, rhs) => {
+ let mapped_lhs = Box::new(map_internal(*lhs, f));
+ let mapped_rhs = Box::new(map_internal(*rhs, f));
+ Expr::Choice(mapped_lhs, mapped_rhs)
+ }
+ Expr::Rep(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::Rep(mapped)
+ }
+ Expr::RepOnce(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::RepOnce(mapped)
+ }
+ Expr::RepExact(expr, num) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::RepExact(mapped, num)
+ }
+ Expr::RepMin(expr, max) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::RepMin(mapped, max)
+ }
+ Expr::RepMax(expr, max) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::RepMax(mapped, max)
+ }
+ Expr::RepMinMax(expr, min, max) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::RepMinMax(mapped, min, max)
+ }
+ Expr::Opt(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::Opt(mapped)
+ }
+ Expr::Push(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ Expr::Push(mapped)
+ }
+ expr => expr,
+ };
+
+ f(mapped)
+ }
+
+ map_internal(self, &mut f)
+ }
+}
+
+pub struct ExprTopDownIterator {
+ current: Option<Expr>,
+ next: Option<Expr>,
+ right_branches: Vec<Expr>,
+}
+
+impl ExprTopDownIterator {
+ pub fn new(expr: &Expr) -> Self {
+ let mut iter = ExprTopDownIterator {
+ current: None,
+ next: None,
+ right_branches: vec![],
+ };
+ iter.iterate_expr(expr.clone());
+ iter
+ }
+
+ fn iterate_expr(&mut self, expr: Expr) {
+ self.current = Some(expr.clone());
+ match expr {
+ Expr::Seq(lhs, rhs) => {
+ self.right_branches.push(*rhs);
+ self.next = Some(*lhs);
+ }
+ Expr::Choice(lhs, rhs) => {
+ self.right_branches.push(*rhs);
+ self.next = Some(*lhs);
+ }
+ Expr::PosPred(expr)
+ | Expr::NegPred(expr)
+ | Expr::Rep(expr)
+ | Expr::RepOnce(expr)
+ | Expr::RepExact(expr, _)
+ | Expr::RepMin(expr, _)
+ | Expr::RepMax(expr, _)
+ | Expr::RepMinMax(expr, ..)
+ | Expr::Opt(expr)
+ | Expr::Push(expr) => {
+ self.next = Some(*expr);
+ }
+ _ => {
+ self.next = None;
+ }
+ }
+ }
+}
+
+impl Iterator for ExprTopDownIterator {
+ type Item = Expr;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let result = self.current.take();
+
+ if let Some(expr) = self.next.take() {
+ self.iterate_expr(expr);
+ } else if let Some(expr) = self.right_branches.pop() {
+ self.iterate_expr(expr);
+ }
+
+ result
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn top_down_iterator() {
+ let expr = Expr::Choice(
+ Box::new(Expr::Str(String::from("a"))),
+ Box::new(Expr::Str(String::from("b"))),
+ );
+ let mut top_down = expr.clone().iter_top_down();
+ assert_eq!(top_down.next(), Some(expr));
+ assert_eq!(top_down.next(), Some(Expr::Str(String::from("a"))));
+ assert_eq!(top_down.next(), Some(Expr::Str(String::from("b"))));
+ assert_eq!(top_down.next(), None);
+ }
+
+ #[test]
+ fn identity() {
+ let expr = Expr::Choice(
+ Box::new(Expr::Seq(
+ Box::new(Expr::Ident("a".to_owned())),
+ Box::new(Expr::Str("b".to_owned())),
+ )),
+ Box::new(Expr::PosPred(Box::new(Expr::NegPred(Box::new(Expr::Rep(
+ Box::new(Expr::RepOnce(Box::new(Expr::Opt(Box::new(Expr::Choice(
+ Box::new(Expr::Insens("c".to_owned())),
+ Box::new(Expr::Push(Box::new(Expr::Range(
+ "'d'".to_owned(),
+ "'e'".to_owned(),
+ )))),
+ )))))),
+ )))))),
+ );
+
+ assert_eq!(
+ expr.clone()
+ .map_bottom_up(|expr| expr)
+ .map_top_down(|expr| expr),
+ expr
+ );
+ }
+}
diff --git a/src/grammar.pest b/src/grammar.pest
new file mode 100644
index 0000000..0d03ba8
--- /dev/null
+++ b/src/grammar.pest
@@ -0,0 +1,98 @@
+// pest. The Elegant Parser
+// Copyright (c) 2018 Dragoș Tiselice
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+grammar_rules = _{ SOI ~ grammar_rule+ ~ EOI }
+
+grammar_rule = {
+ identifier ~ assignment_operator ~ modifier? ~
+ opening_brace ~ expression ~ closing_brace
+}
+
+assignment_operator = { "=" }
+opening_brace = { "{" }
+closing_brace = { "}" }
+opening_paren = { "(" }
+closing_paren = { ")" }
+opening_brack = { "[" }
+closing_brack = { "]" }
+
+modifier = _{
+ silent_modifier |
+ atomic_modifier |
+ compound_atomic_modifier |
+ non_atomic_modifier
+}
+
+silent_modifier = { "_" }
+atomic_modifier = { "@" }
+compound_atomic_modifier = { "$" }
+non_atomic_modifier = { "!" }
+
+expression = { term ~ (infix_operator ~ term)* }
+term = { prefix_operator* ~ node ~ postfix_operator* }
+node = _{ opening_paren ~ expression ~ closing_paren | terminal }
+terminal = _{ _push | peek_slice | identifier | string | insensitive_string | range }
+
+prefix_operator = _{ positive_predicate_operator | negative_predicate_operator }
+infix_operator = _{ sequence_operator | choice_operator }
+postfix_operator = _{
+ optional_operator |
+ repeat_operator |
+ repeat_once_operator |
+ repeat_exact |
+ repeat_min |
+ repeat_max |
+ repeat_min_max
+}
+
+positive_predicate_operator = { "&" }
+negative_predicate_operator = { "!" }
+sequence_operator = { "~" }
+choice_operator = { "|" }
+optional_operator = { "?" }
+repeat_operator = { "*" }
+repeat_once_operator = { "+" }
+
+repeat_exact = { opening_brace ~ number ~ closing_brace }
+repeat_min = { opening_brace ~ number ~ comma ~ closing_brace }
+repeat_max = { opening_brace ~ comma ~ number ~ closing_brace }
+repeat_min_max = { opening_brace ~ number ~ comma ~ number ~ closing_brace }
+
+number = @{ '0'..'9'+ }
+integer = @{ number | "-" ~ "0"* ~ '1'..'9' ~ number? }
+
+comma = { "," }
+
+_push = { "PUSH" ~ opening_paren ~ expression ~ closing_paren }
+peek_slice = { "PEEK" ~ opening_brack ~ integer? ~ range_operator ~ integer? ~ closing_brack }
+
+identifier = @{ !"PUSH" ~ ("_" | alpha) ~ ("_" | alpha_num)* }
+alpha = _{ 'a'..'z' | 'A'..'Z' }
+alpha_num = _{ alpha | '0'..'9' }
+
+string = ${ quote ~ inner_str ~ quote }
+insensitive_string = { "^" ~ string }
+range = { character ~ range_operator ~ character }
+character = ${ single_quote ~ inner_chr ~ single_quote }
+
+inner_str = @{ (!("\"" | "\\") ~ ANY)* ~ (escape ~ inner_str)? }
+inner_chr = @{ escape | ANY }
+escape = @{ "\\" ~ ("\"" | "\\" | "r" | "n" | "t" | "0" | "'" | code | unicode) }
+code = @{ "x" ~ hex_digit{2} }
+unicode = @{ "u" ~ opening_brace ~ hex_digit{2, 6} ~ closing_brace }
+hex_digit = @{ '0'..'9' | 'a'..'f' | 'A'..'F' }
+
+quote = { "\"" }
+single_quote = { "'" }
+range_operator = { ".." }
+
+newline = _{ "\n" | "\r\n" }
+WHITESPACE = _{ " " | "\t" | newline }
+block_comment = _{ "/*" ~ (block_comment | !"*/" ~ ANY)* ~ "*/" }
+COMMENT = _{ block_comment | ("//" ~ (!newline ~ ANY)*) }
diff --git a/src/grammar.rs b/src/grammar.rs
new file mode 100644
index 0000000..bf94a1e
--- /dev/null
+++ b/src/grammar.rs
@@ -0,0 +1,2 @@
+pub struct PestParser;
+# [ allow ( dead_code , non_camel_case_types ) ] # [ derive ( Clone , Copy , Debug , Eq , Hash , Ord , PartialEq , PartialOrd ) ] pub enum Rule { EOI , grammar_rules , grammar_rule , assignment_operator , opening_brace , closing_brace , opening_paren , closing_paren , opening_brack , closing_brack , modifier , silent_modifier , atomic_modifier , compound_atomic_modifier , non_atomic_modifier , expression , term , node , terminal , prefix_operator , infix_operator , postfix_operator , positive_predicate_operator , negative_predicate_operator , sequence_operator , choice_operator , optional_operator , repeat_operator , repeat_once_operator , repeat_exact , repeat_min , repeat_max , repeat_min_max , number , integer , comma , _push , peek_slice , identifier , alpha , alpha_num , string , insensitive_string , range , character , inner_str , inner_chr , escape , code , unicode , hex_digit , quote , single_quote , range_operator , newline , WHITESPACE , block_comment , COMMENT } # [ allow ( clippy :: all ) ] impl :: pest :: Parser < Rule > for PestParser { fn parse < 'i > ( rule : Rule , input : & 'i str ) -> :: std :: result :: Result < :: pest :: iterators :: Pairs < 'i , Rule > , :: pest :: error :: Error < Rule > > { mod rules { pub mod hidden { use super :: super :: Rule ; # [ inline ] # [ allow ( dead_code , non_snake_case , unused_variables ) ] pub fn skip ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { if state . atomicity ( ) == :: pest :: Atomicity :: NonAtomic { state . sequence ( | state | { state . repeat ( | state | super :: visible :: WHITESPACE ( state ) ) . and_then ( | state | { state . repeat ( | state | { state . sequence ( | state | { super :: visible :: COMMENT ( state ) . and_then ( | state | { state . repeat ( | state | super :: visible :: WHITESPACE ( state ) ) } ) } ) } ) } ) } ) } else { Ok ( state ) } } } pub mod visible { use super :: super :: Rule ; # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn grammar_rules ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . sequence ( | state | { self :: SOI ( state ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { state . sequence ( | state | { self :: grammar_rule ( state ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { state . sequence ( | state | { state . optional ( | state | { self :: grammar_rule ( state ) . and_then ( | state | { state . repeat ( | state | { state . sequence ( | state | { super :: hidden :: skip ( state ) . and_then ( | state | { self :: grammar_rule ( state ) } ) } ) } ) } ) } ) } ) } ) } ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: EOI ( state ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn grammar_rule ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: grammar_rule , | state | { state . sequence ( | state | { self :: identifier ( state ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: assignment_operator ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { state . optional ( | state | { self :: modifier ( state ) } ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: opening_brace ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: expression ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: closing_brace ( state ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn assignment_operator ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: assignment_operator , | state | { state . match_string ( "=" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn opening_brace ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: opening_brace , | state | { state . match_string ( "{" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn closing_brace ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: closing_brace , | state | { state . match_string ( "}" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn opening_paren ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: opening_paren , | state | { state . match_string ( "(" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn closing_paren ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: closing_paren , | state | { state . match_string ( ")" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn opening_brack ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: opening_brack , | state | { state . match_string ( "[" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn closing_brack ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: closing_brack , | state | { state . match_string ( "]" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn modifier ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { self :: silent_modifier ( state ) . or_else ( | state | { self :: atomic_modifier ( state ) } ) . or_else ( | state | { self :: compound_atomic_modifier ( state ) } ) . or_else ( | state | { self :: non_atomic_modifier ( state ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn silent_modifier ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: silent_modifier , | state | { state . match_string ( "_" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn atomic_modifier ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: atomic_modifier , | state | { state . match_string ( "@" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn compound_atomic_modifier ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: compound_atomic_modifier , | state | { state . match_string ( "$" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn non_atomic_modifier ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: non_atomic_modifier , | state | { state . match_string ( "!" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn expression ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: expression , | state | { state . sequence ( | state | { self :: term ( state ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { state . sequence ( | state | { state . optional ( | state | { state . sequence ( | state | { self :: infix_operator ( state ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: term ( state ) } ) } ) . and_then ( | state | { state . repeat ( | state | { state . sequence ( | state | { super :: hidden :: skip ( state ) . and_then ( | state | { state . sequence ( | state | { self :: infix_operator ( state ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: term ( state ) } ) } ) } ) } ) } ) } ) } ) } ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn term ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: term , | state | { state . sequence ( | state | { state . sequence ( | state | { state . optional ( | state | { self :: prefix_operator ( state ) . and_then ( | state | { state . repeat ( | state | { state . sequence ( | state | { super :: hidden :: skip ( state ) . and_then ( | state | { self :: prefix_operator ( state ) } ) } ) } ) } ) } ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: node ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { state . sequence ( | state | { state . optional ( | state | { self :: postfix_operator ( state ) . and_then ( | state | { state . repeat ( | state | { state . sequence ( | state | { super :: hidden :: skip ( state ) . and_then ( | state | { self :: postfix_operator ( state ) } ) } ) } ) } ) } ) } ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn node ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . sequence ( | state | { self :: opening_paren ( state ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: expression ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: closing_paren ( state ) } ) } ) . or_else ( | state | { self :: terminal ( state ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn terminal ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { self :: _push ( state ) . or_else ( | state | { self :: peek_slice ( state ) } ) . or_else ( | state | { self :: identifier ( state ) } ) . or_else ( | state | { self :: string ( state ) } ) . or_else ( | state | { self :: insensitive_string ( state ) } ) . or_else ( | state | { self :: range ( state ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn prefix_operator ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { self :: positive_predicate_operator ( state ) . or_else ( | state | { self :: negative_predicate_operator ( state ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn infix_operator ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { self :: sequence_operator ( state ) . or_else ( | state | { self :: choice_operator ( state ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn postfix_operator ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { self :: optional_operator ( state ) . or_else ( | state | { self :: repeat_operator ( state ) } ) . or_else ( | state | { self :: repeat_once_operator ( state ) } ) . or_else ( | state | { self :: repeat_exact ( state ) } ) . or_else ( | state | { self :: repeat_min ( state ) } ) . or_else ( | state | { self :: repeat_max ( state ) } ) . or_else ( | state | { self :: repeat_min_max ( state ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn positive_predicate_operator ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: positive_predicate_operator , | state | { state . match_string ( "&" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn negative_predicate_operator ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: negative_predicate_operator , | state | { state . match_string ( "!" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn sequence_operator ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: sequence_operator , | state | { state . match_string ( "~" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn choice_operator ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: choice_operator , | state | { state . match_string ( "|" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn optional_operator ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: optional_operator , | state | { state . match_string ( "?" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn repeat_operator ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: repeat_operator , | state | { state . match_string ( "*" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn repeat_once_operator ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: repeat_once_operator , | state | { state . match_string ( "+" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn repeat_exact ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: repeat_exact , | state | { state . sequence ( | state | { self :: opening_brace ( state ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: number ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: closing_brace ( state ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn repeat_min ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: repeat_min , | state | { state . sequence ( | state | { self :: opening_brace ( state ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: number ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: comma ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: closing_brace ( state ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn repeat_max ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: repeat_max , | state | { state . sequence ( | state | { self :: opening_brace ( state ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: comma ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: number ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: closing_brace ( state ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn repeat_min_max ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: repeat_min_max , | state | { state . sequence ( | state | { self :: opening_brace ( state ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: number ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: comma ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: number ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: closing_brace ( state ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn number ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: number , | state | { state . atomic ( :: pest :: Atomicity :: Atomic , | state | { state . sequence ( | state | { state . match_range ( '0' .. '9' ) . and_then ( | state | { state . repeat ( | state | { state . match_range ( '0' .. '9' ) } ) } ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn integer ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: integer , | state | { state . atomic ( :: pest :: Atomicity :: Atomic , | state | { self :: number ( state ) . or_else ( | state | { state . sequence ( | state | { state . match_string ( "-" ) . and_then ( | state | { state . repeat ( | state | { state . match_string ( "0" ) } ) } ) . and_then ( | state | { state . match_range ( '1' .. '9' ) } ) . and_then ( | state | { state . optional ( | state | { self :: number ( state ) } ) } ) } ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn comma ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: comma , | state | { state . match_string ( "," ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn _push ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: _push , | state | { state . sequence ( | state | { state . match_string ( "PUSH" ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: opening_paren ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: expression ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: closing_paren ( state ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn peek_slice ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: peek_slice , | state | { state . sequence ( | state | { state . match_string ( "PEEK" ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: opening_brack ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { state . optional ( | state | { self :: integer ( state ) } ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: range_operator ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { state . optional ( | state | { self :: integer ( state ) } ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: closing_brack ( state ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn identifier ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: identifier , | state | { state . atomic ( :: pest :: Atomicity :: Atomic , | state | { state . sequence ( | state | { state . lookahead ( false , | state | { state . match_string ( "PUSH" ) } ) . and_then ( | state | { state . match_string ( "_" ) . or_else ( | state | { self :: alpha ( state ) } ) } ) . and_then ( | state | { state . repeat ( | state | { state . match_string ( "_" ) . or_else ( | state | { self :: alpha_num ( state ) } ) } ) } ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn alpha ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . match_range ( 'a' .. 'z' ) . or_else ( | state | { state . match_range ( 'A' .. 'Z' ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn alpha_num ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { self :: alpha ( state ) . or_else ( | state | { state . match_range ( '0' .. '9' ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn string ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . atomic ( :: pest :: Atomicity :: CompoundAtomic , | state | { state . rule ( Rule :: string , | state | { state . sequence ( | state | { self :: quote ( state ) . and_then ( | state | { self :: inner_str ( state ) } ) . and_then ( | state | { self :: quote ( state ) } ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn insensitive_string ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: insensitive_string , | state | { state . sequence ( | state | { state . match_string ( "^" ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: string ( state ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn range ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: range , | state | { state . sequence ( | state | { self :: character ( state ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: range_operator ( state ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: character ( state ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn character ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . atomic ( :: pest :: Atomicity :: CompoundAtomic , | state | { state . rule ( Rule :: character , | state | { state . sequence ( | state | { self :: single_quote ( state ) . and_then ( | state | { self :: inner_chr ( state ) } ) . and_then ( | state | { self :: single_quote ( state ) } ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn inner_str ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: inner_str , | state | { state . atomic ( :: pest :: Atomicity :: Atomic , | state | { state . sequence ( | state | { let strings = [ "\"" , "\\" ] ; state . skip_until ( & strings ) . and_then ( | state | { state . optional ( | state | { state . sequence ( | state | { self :: escape ( state ) . and_then ( | state | { self :: inner_str ( state ) } ) } ) } ) } ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn inner_chr ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: inner_chr , | state | { state . atomic ( :: pest :: Atomicity :: Atomic , | state | { self :: escape ( state ) . or_else ( | state | { self :: ANY ( state ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn escape ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: escape , | state | { state . atomic ( :: pest :: Atomicity :: Atomic , | state | { state . sequence ( | state | { state . match_string ( "\\" ) . and_then ( | state | { state . match_string ( "\"" ) . or_else ( | state | { state . match_string ( "\\" ) } ) . or_else ( | state | { state . match_string ( "r" ) } ) . or_else ( | state | { state . match_string ( "n" ) } ) . or_else ( | state | { state . match_string ( "t" ) } ) . or_else ( | state | { state . match_string ( "0" ) } ) . or_else ( | state | { state . match_string ( "'" ) } ) . or_else ( | state | { self :: code ( state ) } ) . or_else ( | state | { self :: unicode ( state ) } ) } ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn code ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: code , | state | { state . atomic ( :: pest :: Atomicity :: Atomic , | state | { state . sequence ( | state | { state . match_string ( "x" ) . and_then ( | state | { self :: hex_digit ( state ) } ) . and_then ( | state | { self :: hex_digit ( state ) } ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn unicode ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: unicode , | state | { state . atomic ( :: pest :: Atomicity :: Atomic , | state | { state . sequence ( | state | { state . match_string ( "u" ) . and_then ( | state | { self :: opening_brace ( state ) } ) . and_then ( | state | { state . sequence ( | state | { self :: hex_digit ( state ) . and_then ( | state | { self :: hex_digit ( state ) } ) . and_then ( | state | { state . optional ( | state | { self :: hex_digit ( state ) } ) } ) . and_then ( | state | { state . optional ( | state | { self :: hex_digit ( state ) } ) } ) . and_then ( | state | { state . optional ( | state | { self :: hex_digit ( state ) } ) } ) . and_then ( | state | { state . optional ( | state | { self :: hex_digit ( state ) } ) } ) } ) } ) . and_then ( | state | { self :: closing_brace ( state ) } ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn hex_digit ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: hex_digit , | state | { state . atomic ( :: pest :: Atomicity :: Atomic , | state | { state . match_range ( '0' .. '9' ) . or_else ( | state | { state . match_range ( 'a' .. 'f' ) } ) . or_else ( | state | { state . match_range ( 'A' .. 'F' ) } ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn quote ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: quote , | state | { state . match_string ( "\"" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn single_quote ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: single_quote , | state | { state . match_string ( "'" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn range_operator ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: range_operator , | state | { state . match_string ( ".." ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn newline ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . match_string ( "\n" ) . or_else ( | state | { state . match_string ( "\r\n" ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn WHITESPACE ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . atomic ( :: pest :: Atomicity :: Atomic , | state | { state . match_string ( " " ) . or_else ( | state | { state . match_string ( "\t" ) } ) . or_else ( | state | { self :: newline ( state ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn block_comment ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . sequence ( | state | { state . match_string ( "/*" ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { state . sequence ( | state | { state . optional ( | state | { self :: block_comment ( state ) . or_else ( | state | { state . sequence ( | state | { state . lookahead ( false , | state | { state . match_string ( "*/" ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: ANY ( state ) } ) } ) } ) . and_then ( | state | { state . repeat ( | state | { state . sequence ( | state | { super :: hidden :: skip ( state ) . and_then ( | state | { self :: block_comment ( state ) . or_else ( | state | { state . sequence ( | state | { state . lookahead ( false , | state | { state . match_string ( "*/" ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { self :: ANY ( state ) } ) } ) } ) } ) } ) } ) } ) } ) } ) } ) . and_then ( | state | { super :: hidden :: skip ( state ) } ) . and_then ( | state | { state . match_string ( "*/" ) } ) } ) } # [ inline ] # [ allow ( non_snake_case , unused_variables ) ] pub fn COMMENT ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . atomic ( :: pest :: Atomicity :: Atomic , | state | { self :: block_comment ( state ) . or_else ( | state | { state . sequence ( | state | { state . match_string ( "//" ) . and_then ( | state | { state . repeat ( | state | { state . sequence ( | state | { state . lookahead ( false , | state | { self :: newline ( state ) } ) . and_then ( | state | { self :: ANY ( state ) } ) } ) } ) } ) } ) } ) } ) } # [ inline ] # [ allow ( dead_code , non_snake_case , unused_variables ) ] pub fn SOI ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . start_of_input ( ) } # [ inline ] # [ allow ( dead_code , non_snake_case , unused_variables ) ] pub fn ANY ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . skip ( 1 ) } # [ inline ] # [ allow ( dead_code , non_snake_case , unused_variables ) ] pub fn EOI ( state : Box < :: pest :: ParserState < Rule >> ) -> :: pest :: ParseResult < Box < :: pest :: ParserState < Rule >> > { state . rule ( Rule :: EOI , | state | state . end_of_input ( ) ) } } pub use self :: visible :: * ; } :: pest :: state ( input , | state | { match rule { Rule :: grammar_rules => rules :: grammar_rules ( state ) , Rule :: grammar_rule => rules :: grammar_rule ( state ) , Rule :: assignment_operator => rules :: assignment_operator ( state ) , Rule :: opening_brace => rules :: opening_brace ( state ) , Rule :: closing_brace => rules :: closing_brace ( state ) , Rule :: opening_paren => rules :: opening_paren ( state ) , Rule :: closing_paren => rules :: closing_paren ( state ) , Rule :: opening_brack => rules :: opening_brack ( state ) , Rule :: closing_brack => rules :: closing_brack ( state ) , Rule :: modifier => rules :: modifier ( state ) , Rule :: silent_modifier => rules :: silent_modifier ( state ) , Rule :: atomic_modifier => rules :: atomic_modifier ( state ) , Rule :: compound_atomic_modifier => rules :: compound_atomic_modifier ( state ) , Rule :: non_atomic_modifier => rules :: non_atomic_modifier ( state ) , Rule :: expression => rules :: expression ( state ) , Rule :: term => rules :: term ( state ) , Rule :: node => rules :: node ( state ) , Rule :: terminal => rules :: terminal ( state ) , Rule :: prefix_operator => rules :: prefix_operator ( state ) , Rule :: infix_operator => rules :: infix_operator ( state ) , Rule :: postfix_operator => rules :: postfix_operator ( state ) , Rule :: positive_predicate_operator => rules :: positive_predicate_operator ( state ) , Rule :: negative_predicate_operator => rules :: negative_predicate_operator ( state ) , Rule :: sequence_operator => rules :: sequence_operator ( state ) , Rule :: choice_operator => rules :: choice_operator ( state ) , Rule :: optional_operator => rules :: optional_operator ( state ) , Rule :: repeat_operator => rules :: repeat_operator ( state ) , Rule :: repeat_once_operator => rules :: repeat_once_operator ( state ) , Rule :: repeat_exact => rules :: repeat_exact ( state ) , Rule :: repeat_min => rules :: repeat_min ( state ) , Rule :: repeat_max => rules :: repeat_max ( state ) , Rule :: repeat_min_max => rules :: repeat_min_max ( state ) , Rule :: number => rules :: number ( state ) , Rule :: integer => rules :: integer ( state ) , Rule :: comma => rules :: comma ( state ) , Rule :: _push => rules :: _push ( state ) , Rule :: peek_slice => rules :: peek_slice ( state ) , Rule :: identifier => rules :: identifier ( state ) , Rule :: alpha => rules :: alpha ( state ) , Rule :: alpha_num => rules :: alpha_num ( state ) , Rule :: string => rules :: string ( state ) , Rule :: insensitive_string => rules :: insensitive_string ( state ) , Rule :: range => rules :: range ( state ) , Rule :: character => rules :: character ( state ) , Rule :: inner_str => rules :: inner_str ( state ) , Rule :: inner_chr => rules :: inner_chr ( state ) , Rule :: escape => rules :: escape ( state ) , Rule :: code => rules :: code ( state ) , Rule :: unicode => rules :: unicode ( state ) , Rule :: hex_digit => rules :: hex_digit ( state ) , Rule :: quote => rules :: quote ( state ) , Rule :: single_quote => rules :: single_quote ( state ) , Rule :: range_operator => rules :: range_operator ( state ) , Rule :: newline => rules :: newline ( state ) , Rule :: WHITESPACE => rules :: WHITESPACE ( state ) , Rule :: block_comment => rules :: block_comment ( state ) , Rule :: COMMENT => rules :: COMMENT ( state ) , Rule :: EOI => rules :: EOI ( state ) } } ) } }
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..28f33b0
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,133 @@
+// pest. The Elegant Parser
+// Copyright (c) 2018 Dragoș Tiselice
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+#![allow(clippy::range_plus_one)]
+
+#[cfg(test)]
+#[macro_use]
+extern crate pest;
+#[cfg(not(test))]
+extern crate pest;
+
+use std::fmt::Display;
+
+pub mod ast;
+pub mod optimizer;
+pub mod parser;
+pub mod validator;
+
+pub fn unwrap_or_report<T, E>(result: Result<T, E>) -> T
+where
+ E: IntoIterator,
+ E::Item: Display,
+{
+ result.unwrap_or_else(|e| {
+ panic!(
+ "grammar error\n\n".to_owned()
+ + &e.into_iter()
+ .map(|error| format!("{}", error))
+ .collect::<Vec<_>>()
+ .join("\n\n")
+ )
+ })
+}
+
+#[doc(hidden)]
+pub static UNICODE_PROPERTY_NAMES: &[&str] = &[
+ /* BINARY */ "ALPHABETIC",
+ "BIDI_CONTROL",
+ "CASE_IGNORABLE",
+ "CASED",
+ "CHANGES_WHEN_CASEFOLDED",
+ "CHANGES_WHEN_CASEMAPPED",
+ "CHANGES_WHEN_LOWERCASED",
+ "CHANGES_WHEN_TITLECASED",
+ "CHANGES_WHEN_UPPERCASED",
+ "DASH",
+ "DEFAULT_IGNORABLE_CODE_POINT",
+ "DEPRECATED",
+ "DIACRITIC",
+ "EXTENDER",
+ "GRAPHEME_BASE",
+ "GRAPHEME_EXTEND",
+ "GRAPHEME_LINK",
+ "HEX_DIGIT",
+ "HYPHEN",
+ "IDS_BINARY_OPERATOR",
+ "IDS_TRINARY_OPERATOR",
+ "ID_CONTINUE",
+ "ID_START",
+ "IDEOGRAPHIC",
+ "JOIN_CONTROL",
+ "LOGICAL_ORDER_EXCEPTION",
+ "LOWERCASE",
+ "MATH",
+ "NONCHARACTER_CODE_POINT",
+ "OTHER_ALPHABETIC",
+ "OTHER_DEFAULT_IGNORABLE_CODE_POINT",
+ "OTHER_GRAPHEME_EXTEND",
+ "OTHER_ID_CONTINUE",
+ "OTHER_ID_START",
+ "OTHER_LOWERCASE",
+ "OTHER_MATH",
+ "OTHER_UPPERCASE",
+ "PATTERN_SYNTAX",
+ "PATTERN_WHITE_SPACE",
+ "PREPENDED_CONCATENATION_MARK",
+ "QUOTATION_MARK",
+ "RADICAL",
+ "REGIONAL_INDICATOR",
+ "SENTENCE_TERMINAL",
+ "SOFT_DOTTED",
+ "TERMINAL_PUNCTUATION",
+ "UNIFIED_IDEOGRAPH",
+ "UPPERCASE",
+ "VARIATION_SELECTOR",
+ "WHITE_SPACE",
+ "XID_CONTINUE",
+ "XID_START",
+ /* CATEGORY */ "CASED_LETTER",
+ "CLOSE_PUNCTUATION",
+ "CONNECTOR_PUNCTUATION",
+ "CONTROL",
+ "CURRENCY_SYMBOL",
+ "DASH_PUNCTUATION",
+ "DECIMAL_NUMBER",
+ "ENCLOSING_MARK",
+ "FINAL_PUNCTUATION",
+ "FORMAT",
+ "INITIAL_PUNCTUATION",
+ "LETTER",
+ "LETTER_NUMBER",
+ "LINE_SEPARATOR",
+ "LOWERCASE_LETTER",
+ "MARK",
+ "MATH_SYMBOL",
+ "MODIFIER_LETTER",
+ "MODIFIER_SYMBOL",
+ "NONSPACING_MARK",
+ "NUMBER",
+ "OPEN_PUNCTUATION",
+ "OTHER",
+ "OTHER_LETTER",
+ "OTHER_NUMBER",
+ "OTHER_PUNCTUATION",
+ "OTHER_SYMBOL",
+ "PARAGRAPH_SEPARATOR",
+ "PRIVATE_USE",
+ "PUNCTUATION",
+ "SEPARATOR",
+ "SPACE_SEPARATOR",
+ "SPACING_MARK",
+ "SURROGATE",
+ "SYMBOL",
+ "TITLECASE_LETTER",
+ "UNASSIGNED",
+ "UPPERCASE_LETTER",
+];
diff --git a/src/optimizer/concatenator.rs b/src/optimizer/concatenator.rs
new file mode 100644
index 0000000..e0aab7b
--- /dev/null
+++ b/src/optimizer/concatenator.rs
@@ -0,0 +1,34 @@
+// pest. The Elegant Parser
+// Copyright (c) 2018 Dragoș Tiselice
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+use ast::*;
+
+pub fn concatenate(rule: Rule) -> Rule {
+ match rule {
+ Rule { name, ty, expr } => Rule {
+ name,
+ ty,
+ expr: expr.map_bottom_up(|expr| {
+ if ty == RuleType::Atomic {
+ // TODO: Use box syntax when it gets stabilized.
+ match expr {
+ Expr::Seq(lhs, rhs) => match (*lhs, *rhs) {
+ (Expr::Str(lhs), Expr::Str(rhs)) => Expr::Str(lhs + &rhs),
+ (Expr::Insens(lhs), Expr::Insens(rhs)) => Expr::Insens(lhs + &rhs),
+ (lhs, rhs) => Expr::Seq(Box::new(lhs), Box::new(rhs)),
+ },
+ expr => expr,
+ }
+ } else {
+ expr
+ }
+ }),
+ },
+ }
+}
diff --git a/src/optimizer/factorizer.rs b/src/optimizer/factorizer.rs
new file mode 100644
index 0000000..236289e
--- /dev/null
+++ b/src/optimizer/factorizer.rs
@@ -0,0 +1,38 @@
+// pest. The Elegant Parser
+// Copyright (c) 2018 Dragoș Tiselice
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+use ast::*;
+
+pub fn factor(rule: Rule) -> Rule {
+ match rule {
+ Rule { name, ty, expr } => Rule {
+ name,
+ ty,
+ expr: expr.map_top_down(|expr| {
+ // TODO: Use box syntax when it gets stabilized.
+ match expr {
+ Expr::Choice(lhs, rhs) => match (*lhs, *rhs) {
+ (Expr::Seq(l1, r1), Expr::Seq(l2, r2)) => {
+ if l1 == l2 {
+ Expr::Seq(l1, Box::new(Expr::Choice(r1, r2)))
+ } else {
+ Expr::Choice(
+ Box::new(Expr::Seq(l1, r1)),
+ Box::new(Expr::Seq(l2, r2)),
+ )
+ }
+ }
+ (lhs, rhs) => Expr::Choice(Box::new(lhs), Box::new(rhs)),
+ },
+ expr => expr,
+ }
+ }),
+ },
+ }
+}
diff --git a/src/optimizer/mod.rs b/src/optimizer/mod.rs
new file mode 100644
index 0000000..7013c43
--- /dev/null
+++ b/src/optimizer/mod.rs
@@ -0,0 +1,510 @@
+// pest. The Elegant Parser
+// Copyright (c) 2018 Dragoș Tiselice
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+use ast::*;
+use std::collections::HashMap;
+
+#[cfg(test)]
+macro_rules! box_tree {
+ ( $node:ident( $( $child:ident( $($args:tt)* ) ),+ ) ) => (
+ $node ( $( Box::new( box_tree!( $child ( $($args )* ) ) ) ),+ )
+ );
+ ($expr:expr) => ($expr);
+}
+
+mod concatenator;
+mod factorizer;
+mod restorer;
+mod rotater;
+mod skipper;
+mod unroller;
+
+pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> {
+ let optimized: Vec<OptimizedRule> = rules
+ .into_iter()
+ .map(rotater::rotate)
+ .map(skipper::skip)
+ .map(unroller::unroll)
+ .map(concatenator::concatenate)
+ .map(factorizer::factor)
+ .map(rule_to_optimized_rule)
+ .collect();
+
+ let rules = to_hash_map(&optimized);
+ optimized
+ .into_iter()
+ .map(|rule| restorer::restore_on_err(rule, &rules))
+ .collect()
+}
+
+fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule {
+ fn to_optimized(expr: Expr) -> OptimizedExpr {
+ match expr {
+ Expr::Str(string) => OptimizedExpr::Str(string),
+ Expr::Insens(string) => OptimizedExpr::Insens(string),
+ Expr::Range(start, end) => OptimizedExpr::Range(start, end),
+ Expr::Ident(ident) => OptimizedExpr::Ident(ident),
+ Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end),
+ Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))),
+ Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))),
+ Expr::Seq(lhs, rhs) => {
+ OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
+ }
+ Expr::Choice(lhs, rhs) => {
+ OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
+ }
+ Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))),
+ Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))),
+ Expr::Skip(strings) => OptimizedExpr::Skip(strings),
+ Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))),
+ Expr::RepOnce(_)
+ | Expr::RepExact(..)
+ | Expr::RepMin(..)
+ | Expr::RepMax(..)
+ | Expr::RepMinMax(..) => unreachable!("No valid transformation to OptimizedRule"),
+ }
+ }
+
+ OptimizedRule {
+ name: rule.name,
+ ty: rule.ty,
+ expr: to_optimized(rule.expr),
+ }
+}
+
+fn to_hash_map(rules: &[OptimizedRule]) -> HashMap<String, OptimizedExpr> {
+ rules
+ .iter()
+ .map(|r| (r.name.clone(), r.expr.clone()))
+ .collect()
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct OptimizedRule {
+ pub name: String,
+ pub ty: RuleType,
+ pub expr: OptimizedExpr,
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum OptimizedExpr {
+ Str(String),
+ Insens(String),
+ Range(String, String),
+ Ident(String),
+ PeekSlice(i32, Option<i32>),
+ PosPred(Box<OptimizedExpr>),
+ NegPred(Box<OptimizedExpr>),
+ Seq(Box<OptimizedExpr>, Box<OptimizedExpr>),
+ Choice(Box<OptimizedExpr>, Box<OptimizedExpr>),
+ Opt(Box<OptimizedExpr>),
+ Rep(Box<OptimizedExpr>),
+ Skip(Vec<String>),
+ Push(Box<OptimizedExpr>),
+ RestoreOnErr(Box<OptimizedExpr>),
+}
+
+impl OptimizedExpr {
+ pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator {
+ OptimizedExprTopDownIterator::new(self)
+ }
+
+ pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr
+ where
+ F: FnMut(OptimizedExpr) -> OptimizedExpr,
+ {
+ fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
+ where
+ F: FnMut(OptimizedExpr) -> OptimizedExpr,
+ {
+ let expr = f(expr);
+
+ match expr {
+ // TODO: Use box syntax when it gets stabilized.
+ OptimizedExpr::PosPred(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ OptimizedExpr::PosPred(mapped)
+ }
+ OptimizedExpr::NegPred(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ OptimizedExpr::NegPred(mapped)
+ }
+ OptimizedExpr::Seq(lhs, rhs) => {
+ let mapped_lhs = Box::new(map_internal(*lhs, f));
+ let mapped_rhs = Box::new(map_internal(*rhs, f));
+ OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
+ }
+ OptimizedExpr::Choice(lhs, rhs) => {
+ let mapped_lhs = Box::new(map_internal(*lhs, f));
+ let mapped_rhs = Box::new(map_internal(*rhs, f));
+ OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
+ }
+ OptimizedExpr::Rep(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ OptimizedExpr::Rep(mapped)
+ }
+ OptimizedExpr::Opt(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ OptimizedExpr::Opt(mapped)
+ }
+ OptimizedExpr::Push(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ OptimizedExpr::Push(mapped)
+ }
+ expr => expr,
+ }
+ }
+
+ map_internal(self, &mut f)
+ }
+
+ pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr
+ where
+ F: FnMut(OptimizedExpr) -> OptimizedExpr,
+ {
+ fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
+ where
+ F: FnMut(OptimizedExpr) -> OptimizedExpr,
+ {
+ let mapped = match expr {
+ OptimizedExpr::PosPred(expr) => {
+ // TODO: Use box syntax when it gets stabilized.
+ let mapped = Box::new(map_internal(*expr, f));
+ OptimizedExpr::PosPred(mapped)
+ }
+ OptimizedExpr::NegPred(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ OptimizedExpr::NegPred(mapped)
+ }
+ OptimizedExpr::Seq(lhs, rhs) => {
+ let mapped_lhs = Box::new(map_internal(*lhs, f));
+ let mapped_rhs = Box::new(map_internal(*rhs, f));
+ OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
+ }
+ OptimizedExpr::Choice(lhs, rhs) => {
+ let mapped_lhs = Box::new(map_internal(*lhs, f));
+ let mapped_rhs = Box::new(map_internal(*rhs, f));
+ OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
+ }
+ OptimizedExpr::Rep(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ OptimizedExpr::Rep(mapped)
+ }
+ OptimizedExpr::Opt(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ OptimizedExpr::Opt(mapped)
+ }
+ OptimizedExpr::Push(expr) => {
+ let mapped = Box::new(map_internal(*expr, f));
+ OptimizedExpr::Push(mapped)
+ }
+ expr => expr,
+ };
+
+ f(mapped)
+ }
+
+ map_internal(self, &mut f)
+ }
+}
+
+pub struct OptimizedExprTopDownIterator {
+ current: Option<OptimizedExpr>,
+ next: Option<OptimizedExpr>,
+ right_branches: Vec<OptimizedExpr>,
+}
+
+impl OptimizedExprTopDownIterator {
+ pub fn new(expr: &OptimizedExpr) -> Self {
+ let mut iter = OptimizedExprTopDownIterator {
+ current: None,
+ next: None,
+ right_branches: vec![],
+ };
+ iter.iterate_expr(expr.clone());
+ iter
+ }
+
+ fn iterate_expr(&mut self, expr: OptimizedExpr) {
+ self.current = Some(expr.clone());
+ match expr {
+ OptimizedExpr::Seq(lhs, rhs) => {
+ self.right_branches.push(*rhs);
+ self.next = Some(*lhs);
+ }
+ OptimizedExpr::Choice(lhs, rhs) => {
+ self.right_branches.push(*rhs);
+ self.next = Some(*lhs);
+ }
+ OptimizedExpr::PosPred(expr)
+ | OptimizedExpr::NegPred(expr)
+ | OptimizedExpr::Rep(expr)
+ | OptimizedExpr::Opt(expr)
+ | OptimizedExpr::Push(expr) => {
+ self.next = Some(*expr);
+ }
+ _ => {
+ self.next = None;
+ }
+ }
+ }
+}
+
+impl Iterator for OptimizedExprTopDownIterator {
+ type Item = OptimizedExpr;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let result = self.current.take();
+
+ if let Some(expr) = self.next.take() {
+ self.iterate_expr(expr);
+ } else if let Some(expr) = self.right_branches.pop() {
+ self.iterate_expr(expr);
+ }
+
+ result
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn rotate() {
+ let rules = {
+ use ast::Expr::*;
+ vec![Rule {
+ name: "rule".to_owned(),
+ ty: RuleType::Normal,
+ expr: box_tree!(Choice(
+ Choice(
+ Choice(Str(String::from("a")), Str(String::from("b"))),
+ Str(String::from("c"))
+ ),
+ Str(String::from("d"))
+ )),
+ }]
+ };
+ let rotated = {
+ use optimizer::OptimizedExpr::*;
+ vec![OptimizedRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Normal,
+ expr: box_tree!(Choice(
+ Str(String::from("a")),
+ Choice(
+ Str(String::from("b")),
+ Choice(Str(String::from("c")), Str(String::from("d")))
+ )
+ )),
+ }]
+ };
+
+ assert_eq!(optimize(rules), rotated);
+ }
+
+ #[test]
+ fn skip() {
+ let rules = {
+ use ast::Expr::*;
+ vec![Rule {
+ name: "rule".to_owned(),
+ ty: RuleType::Atomic,
+ expr: box_tree!(Rep(Seq(
+ NegPred(Choice(Str(String::from("a")), Str(String::from("b")))),
+ Ident(String::from("ANY"))
+ ))),
+ }]
+ };
+ let skipped = vec![OptimizedRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Atomic,
+ expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]),
+ }];
+
+ assert_eq!(optimize(rules), skipped);
+ }
+
+ #[test]
+ fn concat_strings() {
+ let rules = {
+ use ast::Expr::*;
+ vec![Rule {
+ name: "rule".to_owned(),
+ ty: RuleType::Atomic,
+ expr: box_tree!(Seq(
+ Seq(Str(String::from("a")), Str(String::from("b"))),
+ Seq(Str(String::from("c")), Str(String::from("d")))
+ )),
+ }]
+ };
+ let concatenated = vec![OptimizedRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Atomic,
+ expr: OptimizedExpr::Str(String::from("abcd")),
+ }];
+
+ assert_eq!(optimize(rules), concatenated);
+ }
+
+ #[test]
+ fn unroll_loop_exact() {
+ let rules = vec![Rule {
+ name: "rule".to_owned(),
+ ty: RuleType::Atomic,
+ expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3),
+ }];
+ let unrolled = {
+ use optimizer::OptimizedExpr::*;
+ vec![OptimizedRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Atomic,
+ expr: box_tree!(Seq(
+ Ident(String::from("a")),
+ Seq(Ident(String::from("a")), Ident(String::from("a")))
+ )),
+ }]
+ };
+
+ assert_eq!(optimize(rules), unrolled);
+ }
+
+ #[test]
+ fn unroll_loop_max() {
+ let rules = vec![Rule {
+ name: "rule".to_owned(),
+ ty: RuleType::Atomic,
+ expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3),
+ }];
+ let unrolled = {
+ use optimizer::OptimizedExpr::*;
+ vec![OptimizedRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Atomic,
+ expr: box_tree!(Seq(
+ Opt(Str(String::from("a"))),
+ Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a"))))
+ )),
+ }]
+ };
+
+ assert_eq!(optimize(rules), unrolled);
+ }
+
+ #[test]
+ fn unroll_loop_min() {
+ let rules = vec![Rule {
+ name: "rule".to_owned(),
+ ty: RuleType::Atomic,
+ expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2),
+ }];
+ let unrolled = {
+ use optimizer::OptimizedExpr::*;
+ vec![OptimizedRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Atomic,
+ expr: box_tree!(Seq(
+ Str(String::from("a")),
+ Seq(Str(String::from("a")), Rep(Str(String::from("a"))))
+ )),
+ }]
+ };
+
+ assert_eq!(optimize(rules), unrolled);
+ }
+
+ #[test]
+ fn unroll_loop_min_max() {
+ let rules = vec![Rule {
+ name: "rule".to_owned(),
+ ty: RuleType::Atomic,
+ expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3),
+ }];
+ let unrolled = {
+ use optimizer::OptimizedExpr::*;
+ vec![OptimizedRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Atomic,
+ /* TODO possible room for improvement here:
+ * if the sequences were rolled out in the opposite
+ * order, we could further optimize the strings
+ * in cases like this.
+ Str(String::from(("aa")),
+ Opt(Str(String::from("a")))
+ */
+ expr: box_tree!(Seq(
+ Str(String::from("a")),
+ Seq(Str(String::from("a")), Opt(Str(String::from("a"))))
+ )),
+ }]
+ };
+
+ assert_eq!(optimize(rules), unrolled);
+ }
+
+ #[test]
+ fn concat_insensitive_strings() {
+ let rules = {
+ use ast::Expr::*;
+ vec![Rule {
+ name: "rule".to_owned(),
+ ty: RuleType::Atomic,
+ expr: box_tree!(Seq(
+ Seq(Insens(String::from("a")), Insens(String::from("b"))),
+ Seq(Insens(String::from("c")), Insens(String::from("d")))
+ )),
+ }]
+ };
+ let concatenated = vec![OptimizedRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Atomic,
+ expr: OptimizedExpr::Insens(String::from("abcd")),
+ }];
+
+ assert_eq!(optimize(rules), concatenated);
+ }
+
+ #[test]
+ fn long_common_sequence() {
+ let rules = {
+ use ast::Expr::*;
+ vec![Rule {
+ name: "rule".to_owned(),
+ ty: RuleType::Silent,
+ expr: box_tree!(Choice(
+ Seq(
+ Ident(String::from("a")),
+ Seq(Ident(String::from("b")), Ident(String::from("c")))
+ ),
+ Seq(
+ Seq(Ident(String::from("a")), Ident(String::from("b"))),
+ Ident(String::from("d"))
+ )
+ )),
+ }]
+ };
+ let optimized = {
+ use optimizer::OptimizedExpr::*;
+ vec![OptimizedRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Silent,
+ expr: box_tree!(Seq(
+ Ident(String::from("a")),
+ Seq(
+ Ident(String::from("b")),
+ Choice(Ident(String::from("c")), Ident(String::from("d")))
+ )
+ )),
+ }]
+ };
+
+ assert_eq!(optimize(rules), optimized);
+ }
+}
diff --git a/src/optimizer/restorer.rs b/src/optimizer/restorer.rs
new file mode 100644
index 0000000..34a710e
--- /dev/null
+++ b/src/optimizer/restorer.rs
@@ -0,0 +1,157 @@
+// pest. The Elegant Parser
+// Copyright (c) 2018 Dragoș Tiselice
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+use std::collections::HashMap;
+
+use optimizer::*;
+
+pub fn restore_on_err(
+ rule: OptimizedRule,
+ rules: &HashMap<String, OptimizedExpr>,
+) -> OptimizedRule {
+ match rule {
+ OptimizedRule { name, ty, expr } => {
+ let expr = expr.map_bottom_up(|expr| wrap_branching_exprs(expr, rules));
+
+ OptimizedRule { name, ty, expr }
+ }
+ }
+}
+
+fn wrap_branching_exprs(
+ expr: OptimizedExpr,
+ rules: &HashMap<String, OptimizedExpr>,
+) -> OptimizedExpr {
+ match expr {
+ OptimizedExpr::Opt(expr) => {
+ if child_modifies_state(&expr, rules, &mut HashMap::new()) {
+ OptimizedExpr::Opt(Box::new(OptimizedExpr::RestoreOnErr(expr)))
+ } else {
+ OptimizedExpr::Opt(expr)
+ }
+ }
+ OptimizedExpr::Choice(lhs, rhs) => {
+ let wrapped_lhs = if child_modifies_state(&lhs, rules, &mut HashMap::new()) {
+ Box::new(OptimizedExpr::RestoreOnErr(lhs))
+ } else {
+ lhs
+ };
+ let wrapped_rhs = if child_modifies_state(&rhs, rules, &mut HashMap::new()) {
+ Box::new(OptimizedExpr::RestoreOnErr(rhs))
+ } else {
+ rhs
+ };
+ OptimizedExpr::Choice(wrapped_lhs, wrapped_rhs)
+ }
+ OptimizedExpr::Rep(expr) => {
+ if child_modifies_state(&expr, rules, &mut HashMap::new()) {
+ OptimizedExpr::Rep(Box::new(OptimizedExpr::RestoreOnErr(expr)))
+ } else {
+ OptimizedExpr::Rep(expr)
+ }
+ }
+ _ => expr,
+ }
+}
+
+fn child_modifies_state(
+ expr: &OptimizedExpr,
+ rules: &HashMap<String, OptimizedExpr>,
+ cache: &mut HashMap<String, Option<bool>>,
+) -> bool {
+ expr.iter_top_down().any(|expr| match expr {
+ OptimizedExpr::Push(_) => true,
+ OptimizedExpr::Ident(ref name) if name == "DROP" => true,
+ OptimizedExpr::Ident(ref name) if name == "POP" => true,
+ OptimizedExpr::Ident(ref name) => match cache.get(name).cloned() {
+ Some(option) => match option {
+ Some(cached) => cached,
+ None => {
+ cache.insert(name.to_owned(), Some(false));
+ false
+ }
+ },
+ None => {
+ cache.insert(name.to_owned(), None);
+
+ let result = match rules.get(name) {
+ Some(expr) => child_modifies_state(expr, rules, cache),
+ None => false,
+ };
+
+ cache.insert(name.to_owned(), Some(result));
+
+ result
+ }
+ },
+ _ => false,
+ })
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use optimizer::OptimizedExpr::*;
+
+ #[test]
+ fn restore_no_stack_children() {
+ let rules = vec![OptimizedRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Normal,
+ expr: box_tree!(Opt(Str("a".to_string()))),
+ }];
+
+ assert_eq!(
+ restore_on_err(rules[0].clone(), &to_hash_map(&rules)),
+ rules[0].clone()
+ );
+ }
+
+ #[test]
+ fn restore_with_child_stack_ops() {
+ let rules = vec![OptimizedRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Normal,
+ expr: box_tree!(Rep(Push(Str("a".to_string())))),
+ }];
+
+ let restored = OptimizedRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Normal,
+ expr: box_tree!(Rep(RestoreOnErr(Push(Str("a".to_string()))))),
+ };
+
+ assert_eq!(
+ restore_on_err(rules[0].clone(), &to_hash_map(&rules)),
+ restored
+ );
+ }
+
+ #[test]
+ fn restore_choice_branch_with_and_branch_without() {
+ let rules = vec![OptimizedRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Normal,
+ expr: box_tree!(Choice(Push(Str("a".to_string())), Str("a".to_string()))),
+ }];
+
+ let restored = OptimizedRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Normal,
+ expr: box_tree!(Choice(
+ RestoreOnErr(Push(Str("a".to_string()))),
+ Str("a".to_string())
+ )),
+ };
+
+ assert_eq!(
+ restore_on_err(rules[0].clone(), &to_hash_map(&rules)),
+ restored
+ );
+ }
+}
diff --git a/src/optimizer/rotater.rs b/src/optimizer/rotater.rs
new file mode 100644
index 0000000..3588738
--- /dev/null
+++ b/src/optimizer/rotater.rs
@@ -0,0 +1,45 @@
+// pest. The Elegant Parser
+// Copyright (c) 2018 Dragoș Tiselice
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+use ast::*;
+
+pub fn rotate(rule: Rule) -> Rule {
+ fn rotate_internal(expr: Expr) -> Expr {
+ match expr {
+ // TODO: Use box syntax when it gets stabilized.
+ Expr::Seq(lhs, rhs) => {
+ let lhs = *lhs;
+ match lhs {
+ Expr::Seq(ll, lr) => {
+ rotate_internal(Expr::Seq(ll, Box::new(Expr::Seq(lr, rhs))))
+ }
+ lhs => Expr::Seq(Box::new(lhs), rhs),
+ }
+ }
+ Expr::Choice(lhs, rhs) => {
+ let lhs = *lhs;
+ match lhs {
+ Expr::Choice(ll, lr) => {
+ rotate_internal(Expr::Choice(ll, Box::new(Expr::Choice(lr, rhs))))
+ }
+ lhs => Expr::Choice(Box::new(lhs), rhs),
+ }
+ }
+ expr => expr,
+ }
+ }
+
+ match rule {
+ Rule { name, ty, expr } => Rule {
+ name,
+ ty,
+ expr: expr.map_top_down(rotate_internal),
+ },
+ }
+}
diff --git a/src/optimizer/skipper.rs b/src/optimizer/skipper.rs
new file mode 100644
index 0000000..f55edc0
--- /dev/null
+++ b/src/optimizer/skipper.rs
@@ -0,0 +1,57 @@
+// pest. The Elegant Parser
+// Copyright (c) 2018 Dragoș Tiselice
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+use ast::*;
+
+pub fn skip(rule: Rule) -> Rule {
+ fn populate_choices(expr: Expr, mut choices: Vec<String>) -> Option<Expr> {
+ match expr {
+ Expr::Choice(lhs, rhs) => {
+ if let Expr::Str(string) = *lhs {
+ choices.push(string);
+ populate_choices(*rhs, choices)
+ } else {
+ None
+ }
+ }
+ Expr::Str(string) => {
+ choices.push(string);
+ Some(Expr::Skip(choices))
+ }
+ _ => None,
+ }
+ }
+
+ match rule {
+ Rule { name, ty, expr } => Rule {
+ name,
+ ty,
+ expr: if ty == RuleType::Atomic {
+ expr.map_top_down(|expr| {
+ // TODO: Use box syntax when it gets stabilized.
+ if let Expr::Rep(expr) = expr.clone() {
+ if let Expr::Seq(lhs, rhs) = *expr.clone() {
+ if let (Expr::NegPred(expr), Expr::Ident(ident)) = (*lhs, *rhs) {
+ if ident == "ANY" {
+ if let Some(expr) = populate_choices(*expr, vec![]) {
+ return expr;
+ }
+ }
+ }
+ }
+ };
+
+ expr
+ })
+ } else {
+ expr
+ },
+ },
+ }
+}
diff --git a/src/optimizer/unroller.rs b/src/optimizer/unroller.rs
new file mode 100644
index 0000000..fff1733
--- /dev/null
+++ b/src/optimizer/unroller.rs
@@ -0,0 +1,67 @@
+// pest. The Elegant Parser
+// Copyright (c) 2018 Dragoș Tiselice
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+use ast::*;
+
+pub fn unroll(rule: Rule) -> Rule {
+ match rule {
+ Rule { name, ty, expr } => Rule {
+ name,
+ ty,
+ expr: expr.map_bottom_up(|expr| match expr {
+ Expr::RepOnce(expr) => Expr::Seq(expr.clone(), Box::new(Expr::Rep(expr))),
+ Expr::RepExact(expr, num) => (1..num + 1)
+ .map(|_| *expr.clone())
+ .rev()
+ .fold(None, |rep, expr| match rep {
+ None => Some(expr),
+ Some(rep) => Some(Expr::Seq(Box::new(expr), Box::new(rep))),
+ })
+ .unwrap(),
+ Expr::RepMin(expr, min) => (1..min + 2)
+ .map(|i| {
+ if i <= min {
+ *expr.clone()
+ } else {
+ Expr::Rep(expr.clone())
+ }
+ })
+ .rev()
+ .fold(None, |rep, expr| match rep {
+ None => Some(expr),
+ Some(rep) => Some(Expr::Seq(Box::new(expr), Box::new(rep))),
+ })
+ .unwrap(),
+ Expr::RepMax(expr, max) => (1..max + 1)
+ .map(|_| Expr::Opt(expr.clone()))
+ .rev()
+ .fold(None, |rep, expr| match rep {
+ None => Some(expr),
+ Some(rep) => Some(Expr::Seq(Box::new(expr), Box::new(rep))),
+ })
+ .unwrap(),
+ Expr::RepMinMax(expr, min, max) => (1..max + 1)
+ .map(|i| {
+ if i <= min {
+ *expr.clone()
+ } else {
+ Expr::Opt(expr.clone())
+ }
+ })
+ .rev()
+ .fold(None, |rep, expr| match rep {
+ None => Some(expr),
+ Some(rep) => Some(Expr::Seq(Box::new(expr), Box::new(rep))),
+ })
+ .unwrap(),
+ expr => expr,
+ }),
+ },
+ }
+}
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..5f6f3b3
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,1488 @@
+// pest. The Elegant Parser
+// Copyright (c) 2018 Dragoș Tiselice
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+use std::char;
+use std::iter::Peekable;
+
+use pest::error::{Error, ErrorVariant};
+use pest::iterators::{Pair, Pairs};
+use pest::prec_climber::{Assoc, Operator, PrecClimber};
+use pest::{Parser, Span};
+
+use ast::{Expr, Rule as AstRule, RuleType};
+use validator;
+
+include!("grammar.rs");
+
+pub fn parse(rule: Rule, data: &str) -> Result<Pairs<Rule>, Error<Rule>> {
+ PestParser::parse(rule, data)
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ParserRule<'i> {
+ pub name: String,
+ pub span: Span<'i>,
+ pub ty: RuleType,
+ pub node: ParserNode<'i>,
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ParserNode<'i> {
+ pub expr: ParserExpr<'i>,
+ pub span: Span<'i>,
+}
+
+impl<'i> ParserNode<'i> {
+ pub fn filter_map_top_down<F, T>(self, mut f: F) -> Vec<T>
+ where
+ F: FnMut(ParserNode<'i>) -> Option<T>,
+ {
+ pub fn filter_internal<'i, F, T>(node: ParserNode<'i>, f: &mut F, result: &mut Vec<T>)
+ where
+ F: FnMut(ParserNode<'i>) -> Option<T>,
+ {
+ if let Some(value) = f(node.clone()) {
+ result.push(value);
+ }
+
+ match node.expr {
+ // TODO: Use box syntax when it gets stabilized.
+ ParserExpr::PosPred(node) => {
+ filter_internal(*node, f, result);
+ }
+ ParserExpr::NegPred(node) => {
+ filter_internal(*node, f, result);
+ }
+ ParserExpr::Seq(lhs, rhs) => {
+ filter_internal(*lhs, f, result);
+ filter_internal(*rhs, f, result);
+ }
+ ParserExpr::Choice(lhs, rhs) => {
+ filter_internal(*lhs, f, result);
+ filter_internal(*rhs, f, result);
+ }
+ ParserExpr::Rep(node) => {
+ filter_internal(*node, f, result);
+ }
+ ParserExpr::RepOnce(node) => {
+ filter_internal(*node, f, result);
+ }
+ ParserExpr::RepExact(node, _) => {
+ filter_internal(*node, f, result);
+ }
+ ParserExpr::RepMin(node, _) => {
+ filter_internal(*node, f, result);
+ }
+ ParserExpr::RepMax(node, _) => {
+ filter_internal(*node, f, result);
+ }
+ ParserExpr::RepMinMax(node, ..) => {
+ filter_internal(*node, f, result);
+ }
+ ParserExpr::Opt(node) => {
+ filter_internal(*node, f, result);
+ }
+ ParserExpr::Push(node) => {
+ filter_internal(*node, f, result);
+ }
+ _ => (),
+ }
+ }
+
+ let mut result = vec![];
+
+ filter_internal(self, &mut f, &mut result);
+
+ result
+ }
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ParserExpr<'i> {
+ Str(String),
+ Insens(String),
+ Range(String, String),
+ Ident(String),
+ PeekSlice(i32, Option<i32>),
+ PosPred(Box<ParserNode<'i>>),
+ NegPred(Box<ParserNode<'i>>),
+ Seq(Box<ParserNode<'i>>, Box<ParserNode<'i>>),
+ Choice(Box<ParserNode<'i>>, Box<ParserNode<'i>>),
+ Opt(Box<ParserNode<'i>>),
+ Rep(Box<ParserNode<'i>>),
+ RepOnce(Box<ParserNode<'i>>),
+ RepExact(Box<ParserNode<'i>>, u32),
+ RepMin(Box<ParserNode<'i>>, u32),
+ RepMax(Box<ParserNode<'i>>, u32),
+ RepMinMax(Box<ParserNode<'i>>, u32, u32),
+ Push(Box<ParserNode<'i>>),
+}
+
+fn convert_rule(rule: ParserRule) -> AstRule {
+ match rule {
+ ParserRule { name, ty, node, .. } => {
+ let expr = convert_node(node);
+
+ AstRule { name, ty, expr }
+ }
+ }
+}
+
+fn convert_node(node: ParserNode) -> Expr {
+ match node.expr {
+ ParserExpr::Str(string) => Expr::Str(string),
+ ParserExpr::Insens(string) => Expr::Insens(string),
+ ParserExpr::Range(start, end) => Expr::Range(start, end),
+ ParserExpr::Ident(ident) => Expr::Ident(ident),
+ ParserExpr::PeekSlice(start, end) => Expr::PeekSlice(start, end),
+ ParserExpr::PosPred(node) => Expr::PosPred(Box::new(convert_node(*node))),
+ ParserExpr::NegPred(node) => Expr::NegPred(Box::new(convert_node(*node))),
+ ParserExpr::Seq(node1, node2) => Expr::Seq(
+ Box::new(convert_node(*node1)),
+ Box::new(convert_node(*node2)),
+ ),
+ ParserExpr::Choice(node1, node2) => Expr::Choice(
+ Box::new(convert_node(*node1)),
+ Box::new(convert_node(*node2)),
+ ),
+ ParserExpr::Opt(node) => Expr::Opt(Box::new(convert_node(*node))),
+ ParserExpr::Rep(node) => Expr::Rep(Box::new(convert_node(*node))),
+ ParserExpr::RepOnce(node) => Expr::RepOnce(Box::new(convert_node(*node))),
+ ParserExpr::RepExact(node, num) => Expr::RepExact(Box::new(convert_node(*node)), num),
+ ParserExpr::RepMin(node, max) => Expr::RepMin(Box::new(convert_node(*node)), max),
+ ParserExpr::RepMax(node, max) => Expr::RepMax(Box::new(convert_node(*node)), max),
+ ParserExpr::RepMinMax(node, min, max) => {
+ Expr::RepMinMax(Box::new(convert_node(*node)), min, max)
+ }
+ ParserExpr::Push(node) => Expr::Push(Box::new(convert_node(*node))),
+ }
+}
+
+pub fn consume_rules(pairs: Pairs<Rule>) -> Result<Vec<AstRule>, Vec<Error<Rule>>> {
+ let rules = consume_rules_with_spans(pairs)?;
+ let errors = validator::validate_ast(&rules);
+ if errors.is_empty() {
+ Ok(rules.into_iter().map(convert_rule).collect())
+ } else {
+ Err(errors)
+ }
+}
+
+fn consume_rules_with_spans<'i>(
+ pairs: Pairs<'i, Rule>,
+) -> Result<Vec<ParserRule<'i>>, Vec<Error<Rule>>> {
+ let climber = PrecClimber::new(vec![
+ Operator::new(Rule::choice_operator, Assoc::Left),
+ Operator::new(Rule::sequence_operator, Assoc::Left),
+ ]);
+
+ pairs
+ .filter(|pair| pair.as_rule() == Rule::grammar_rule)
+ .map(|pair| {
+ let mut pairs = pair.into_inner().peekable();
+
+ let span = pairs.next().unwrap().as_span();
+ let name = span.as_str().to_owned();
+
+ pairs.next().unwrap(); // assignment_operator
+
+ let ty = if pairs.peek().unwrap().as_rule() != Rule::opening_brace {
+ match pairs.next().unwrap().as_rule() {
+ Rule::silent_modifier => RuleType::Silent,
+ Rule::atomic_modifier => RuleType::Atomic,
+ Rule::compound_atomic_modifier => RuleType::CompoundAtomic,
+ Rule::non_atomic_modifier => RuleType::NonAtomic,
+ _ => unreachable!(),
+ }
+ } else {
+ RuleType::Normal
+ };
+
+ pairs.next().unwrap(); // opening_brace
+
+ let node = consume_expr(pairs.next().unwrap().into_inner().peekable(), &climber)?;
+
+ Ok(ParserRule {
+ name,
+ span,
+ ty,
+ node,
+ })
+ })
+ .collect()
+}
+
+fn consume_expr<'i>(
+ pairs: Peekable<Pairs<'i, Rule>>,
+ climber: &PrecClimber<Rule>,
+) -> Result<ParserNode<'i>, Vec<Error<Rule>>> {
+ fn unaries<'i>(
+ mut pairs: Peekable<Pairs<'i, Rule>>,
+ climber: &PrecClimber<Rule>,
+ ) -> Result<ParserNode<'i>, Vec<Error<Rule>>> {
+ let pair = pairs.next().unwrap();
+
+ let node = match pair.as_rule() {
+ Rule::opening_paren => {
+ let node = unaries(pairs, climber)?;
+ let end = node.span.end_pos();
+
+ ParserNode {
+ expr: node.expr,
+ span: pair.as_span().start_pos().span(&end),
+ }
+ }
+ Rule::positive_predicate_operator => {
+ let node = unaries(pairs, climber)?;
+ let end = node.span.end_pos();
+
+ ParserNode {
+ expr: ParserExpr::PosPred(Box::new(node)),
+ span: pair.as_span().start_pos().span(&end),
+ }
+ }
+ Rule::negative_predicate_operator => {
+ let node = unaries(pairs, climber)?;
+ let end = node.span.end_pos();
+
+ ParserNode {
+ expr: ParserExpr::NegPred(Box::new(node)),
+ span: pair.as_span().start_pos().span(&end),
+ }
+ }
+ other_rule => {
+ let node = match other_rule {
+ Rule::expression => consume_expr(pair.into_inner().peekable(), climber)?,
+ Rule::_push => {
+ let start = pair.clone().as_span().start_pos();
+ let mut pairs = pair.into_inner();
+ pairs.next().unwrap(); // opening_paren
+ let pair = pairs.next().unwrap();
+
+ let node = consume_expr(pair.into_inner().peekable(), climber)?;
+ let end = node.span.end_pos();
+
+ ParserNode {
+ expr: ParserExpr::Push(Box::new(node)),
+ span: start.span(&end),
+ }
+ }
+ Rule::peek_slice => {
+ let mut pairs = pair.clone().into_inner();
+ pairs.next().unwrap(); // opening_brack
+ let pair_start = pairs.next().unwrap(); // .. or integer
+ let start: i32 = match pair_start.as_rule() {
+ Rule::range_operator => 0,
+ Rule::integer => {
+ pairs.next().unwrap(); // ..
+ pair_start.as_str().parse().unwrap()
+ }
+ _ => unreachable!(),
+ };
+ let pair_end = pairs.next().unwrap(); // integer or }
+ let end: Option<i32> = match pair_end.as_rule() {
+ Rule::closing_brack => None,
+ Rule::integer => {
+ pairs.next().unwrap(); // }
+ Some(pair_end.as_str().parse().unwrap())
+ }
+ _ => unreachable!(),
+ };
+ ParserNode {
+ expr: ParserExpr::PeekSlice(start, end),
+ span: pair.as_span(),
+ }
+ }
+ Rule::identifier => ParserNode {
+ expr: ParserExpr::Ident(pair.as_str().to_owned()),
+ span: pair.clone().as_span(),
+ },
+ Rule::string => {
+ let string = unescape(pair.as_str()).expect("incorrect string literal");
+ ParserNode {
+ expr: ParserExpr::Str(string[1..string.len() - 1].to_owned()),
+ span: pair.clone().as_span(),
+ }
+ }
+ Rule::insensitive_string => {
+ let string = unescape(pair.as_str()).expect("incorrect string literal");
+ ParserNode {
+ expr: ParserExpr::Insens(string[2..string.len() - 1].to_owned()),
+ span: pair.clone().as_span(),
+ }
+ }
+ Rule::range => {
+ let mut pairs = pair.into_inner();
+ let pair = pairs.next().unwrap();
+ let start = unescape(pair.as_str()).expect("incorrect char literal");
+ let start_pos = pair.clone().as_span().start_pos();
+ pairs.next();
+ let pair = pairs.next().unwrap();
+ let end = unescape(pair.as_str()).expect("incorrect char literal");
+ let end_pos = pair.clone().as_span().end_pos();
+
+ ParserNode {
+ expr: ParserExpr::Range(
+ start[1..start.len() - 1].to_owned(),
+ end[1..end.len() - 1].to_owned(),
+ ),
+ span: start_pos.span(&end_pos),
+ }
+ }
+ _ => unreachable!(),
+ };
+
+ pairs.fold(
+ Ok(node),
+ |node: Result<ParserNode<'i>, Vec<Error<Rule>>>, pair| {
+ let node = node?;
+
+ let node = match pair.as_rule() {
+ Rule::optional_operator => {
+ let start = node.span.start_pos();
+ ParserNode {
+ expr: ParserExpr::Opt(Box::new(node)),
+ span: start.span(&pair.as_span().end_pos()),
+ }
+ }
+ Rule::repeat_operator => {
+ let start = node.span.start_pos();
+ ParserNode {
+ expr: ParserExpr::Rep(Box::new(node)),
+ span: start.span(&pair.as_span().end_pos()),
+ }
+ }
+ Rule::repeat_once_operator => {
+ let start = node.span.start_pos();
+ ParserNode {
+ expr: ParserExpr::RepOnce(Box::new(node)),
+ span: start.span(&pair.as_span().end_pos()),
+ }
+ }
+ Rule::repeat_exact => {
+ let mut inner = pair.clone().into_inner();
+
+ inner.next().unwrap(); // opening_brace
+
+ let number = inner.next().unwrap();
+ let num = if let Ok(num) = number.as_str().parse::<u32>() {
+ num
+ } else {
+ return Err(vec![Error::new_from_span(
+ ErrorVariant::CustomError {
+ message: "number cannot overflow u32".to_owned(),
+ },
+ number.as_span(),
+ )]);
+ };
+
+ if num == 0 {
+ let error: Error<Rule> = Error::new_from_span(
+ ErrorVariant::CustomError {
+ message: "cannot repeat 0 times".to_owned(),
+ },
+ number.as_span(),
+ );
+
+ return Err(vec![error]);
+ }
+
+ let start = node.span.start_pos();
+ ParserNode {
+ expr: ParserExpr::RepExact(Box::new(node), num),
+ span: start.span(&pair.as_span().end_pos()),
+ }
+ }
+ Rule::repeat_min => {
+ let mut inner = pair.clone().into_inner();
+
+ inner.next().unwrap(); // opening_brace
+
+ let min_number = inner.next().unwrap();
+ let min = if let Ok(min) = min_number.as_str().parse::<u32>() {
+ min
+ } else {
+ return Err(vec![Error::new_from_span(
+ ErrorVariant::CustomError {
+ message: "number cannot overflow u32".to_owned(),
+ },
+ min_number.as_span(),
+ )]);
+ };
+
+ let start = node.span.start_pos();
+ ParserNode {
+ expr: ParserExpr::RepMin(Box::new(node), min),
+ span: start.span(&pair.as_span().end_pos()),
+ }
+ }
+ Rule::repeat_max => {
+ let mut inner = pair.clone().into_inner();
+
+ inner.next().unwrap(); // opening_brace
+ inner.next().unwrap(); // comma
+
+ let max_number = inner.next().unwrap();
+ let max = if let Ok(max) = max_number.as_str().parse::<u32>() {
+ max
+ } else {
+ return Err(vec![Error::new_from_span(
+ ErrorVariant::CustomError {
+ message: "number cannot overflow u32".to_owned(),
+ },
+ max_number.as_span(),
+ )]);
+ };
+
+ if max == 0 {
+ let error: Error<Rule> = Error::new_from_span(
+ ErrorVariant::CustomError {
+ message: "cannot repeat 0 times".to_owned(),
+ },
+ max_number.as_span(),
+ );
+
+ return Err(vec![error]);
+ }
+
+ let start = node.span.start_pos();
+ ParserNode {
+ expr: ParserExpr::RepMax(Box::new(node), max),
+ span: start.span(&pair.as_span().end_pos()),
+ }
+ }
+ Rule::repeat_min_max => {
+ let mut inner = pair.clone().into_inner();
+
+ inner.next().unwrap(); // opening_brace
+
+ let min_number = inner.next().unwrap();
+ let min = if let Ok(min) = min_number.as_str().parse::<u32>() {
+ min
+ } else {
+ return Err(vec![Error::new_from_span(
+ ErrorVariant::CustomError {
+ message: "number cannot overflow u32".to_owned(),
+ },
+ min_number.as_span(),
+ )]);
+ };
+
+ inner.next().unwrap(); // comma
+
+ let max_number = inner.next().unwrap();
+ let max = if let Ok(max) = max_number.as_str().parse::<u32>() {
+ max
+ } else {
+ return Err(vec![Error::new_from_span(
+ ErrorVariant::CustomError {
+ message: "number cannot overflow u32".to_owned(),
+ },
+ max_number.as_span(),
+ )]);
+ };
+
+ if max == 0 {
+ let error: Error<Rule> = Error::new_from_span(
+ ErrorVariant::CustomError {
+ message: "cannot repeat 0 times".to_owned(),
+ },
+ max_number.as_span(),
+ );
+
+ return Err(vec![error]);
+ }
+
+ let start = node.span.start_pos();
+ ParserNode {
+ expr: ParserExpr::RepMinMax(Box::new(node), min, max),
+ span: start.span(&pair.as_span().end_pos()),
+ }
+ }
+ Rule::closing_paren => {
+ let start = node.span.start_pos();
+
+ ParserNode {
+ expr: node.expr,
+ span: start.span(&pair.as_span().end_pos()),
+ }
+ }
+ _ => unreachable!(),
+ };
+
+ Ok(node)
+ },
+ )?
+ }
+ };
+
+ Ok(node)
+ }
+
+ let term = |pair: Pair<'i, Rule>| unaries(pair.into_inner().peekable(), climber);
+ let infix = |lhs: Result<ParserNode<'i>, Vec<Error<Rule>>>,
+ op: Pair<'i, Rule>,
+ rhs: Result<ParserNode<'i>, Vec<Error<Rule>>>| match op.as_rule() {
+ Rule::sequence_operator => {
+ let lhs = lhs?;
+ let rhs = rhs?;
+
+ let start = lhs.span.start_pos();
+ let end = rhs.span.end_pos();
+
+ Ok(ParserNode {
+ expr: ParserExpr::Seq(Box::new(lhs), Box::new(rhs)),
+ span: start.span(&end),
+ })
+ }
+ Rule::choice_operator => {
+ let lhs = lhs?;
+ let rhs = rhs?;
+
+ let start = lhs.span.start_pos();
+ let end = rhs.span.end_pos();
+
+ Ok(ParserNode {
+ expr: ParserExpr::Choice(Box::new(lhs), Box::new(rhs)),
+ span: start.span(&end),
+ })
+ }
+ _ => unreachable!(),
+ };
+
+ climber.climb(pairs, term, infix)
+}
+
+fn unescape(string: &str) -> Option<String> {
+ let mut result = String::new();
+ let mut chars = string.chars();
+
+ loop {
+ match chars.next() {
+ Some('\\') => match chars.next()? {
+ '"' => result.push('"'),
+ '\\' => result.push('\\'),
+ 'r' => result.push('\r'),
+ 'n' => result.push('\n'),
+ 't' => result.push('\t'),
+ '0' => result.push('\0'),
+ '\'' => result.push('\''),
+ 'x' => {
+ let string: String = chars.clone().take(2).collect();
+
+ if string.len() != 2 {
+ return None;
+ }
+
+ for _ in 0..string.len() {
+ chars.next()?;
+ }
+
+ let value = u8::from_str_radix(&string, 16).ok()?;
+
+ result.push(char::from(value));
+ }
+ 'u' => {
+ if chars.next()? != '{' {
+ return None;
+ }
+
+ let string: String = chars.clone().take_while(|c| *c != '}').collect();
+
+ if string.len() < 2 || 6 < string.len() {
+ return None;
+ }
+
+ for _ in 0..string.len() + 1 {
+ chars.next()?;
+ }
+
+ let value = u32::from_str_radix(&string, 16).ok()?;
+
+ result.push(char::from_u32(value)?);
+ }
+ _ => return None,
+ },
+ Some(c) => result.push(c),
+ None => return Some(result),
+ };
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::super::unwrap_or_report;
+ use super::*;
+
+ #[test]
+ fn rules() {
+ parses_to! {
+ parser: PestParser,
+ input: "a = { b } c = { d }",
+ rule: Rule::grammar_rules,
+ tokens: [
+ grammar_rule(0, 9, [
+ identifier(0, 1),
+ assignment_operator(2, 3),
+ opening_brace(4, 5),
+ expression(6, 8, [
+ term(6, 8, [
+ identifier(6, 7)
+ ])
+ ]),
+ closing_brace(8, 9)
+ ]),
+ grammar_rule(10, 19, [
+ identifier(10, 11),
+ assignment_operator(12, 13),
+ opening_brace(14, 15),
+ expression(16, 18, [
+ term(16, 18, [
+ identifier(16, 17)
+ ])
+ ]),
+ closing_brace(18, 19)
+ ])
+ ]
+ };
+ }
+
+ #[test]
+ fn rule() {
+ parses_to! {
+ parser: PestParser,
+ input: "a = ! { b ~ c }",
+ rule: Rule::grammar_rule,
+ tokens: [
+ grammar_rule(0, 15, [
+ identifier(0, 1),
+ assignment_operator(2, 3),
+ non_atomic_modifier(4, 5),
+ opening_brace(6, 7),
+ expression(8, 14, [
+ term(8, 10, [
+ identifier(8, 9)
+ ]),
+ sequence_operator(10, 11),
+ term(12, 14, [
+ identifier(12, 13)
+ ])
+ ]),
+ closing_brace(14, 15)
+ ])
+ ]
+ };
+ }
+
+ #[test]
+ fn expression() {
+ parses_to! {
+ parser: PestParser,
+ input: "_a | 'a'..'b' ~ !^\"abc\" ~ (d | e)*?",
+ rule: Rule::expression,
+ tokens: [
+ expression(0, 35, [
+ term(0, 3, [
+ identifier(0, 2)
+ ]),
+ choice_operator(3, 4),
+ term(5, 14, [
+ range(5, 13, [
+ character(5, 8, [
+ single_quote(5, 6),
+ inner_chr(6, 7),
+ single_quote(7, 8)
+ ]),
+ range_operator(8, 10),
+ character(10, 13, [
+ single_quote(10, 11),
+ inner_chr(11, 12),
+ single_quote(12, 13)
+ ])
+ ])
+ ]),
+ sequence_operator(14, 15),
+ term(16, 24, [
+ negative_predicate_operator(16, 17),
+ insensitive_string(17, 23, [
+ string(18, 23, [
+ quote(18, 19),
+ inner_str(19, 22),
+ quote(22, 23)
+ ])
+ ])
+ ]),
+ sequence_operator(24, 25),
+ term(26, 35, [
+ opening_paren(26, 27),
+ expression(27, 32, [
+ term(27, 29, [
+ identifier(27, 28)
+ ]),
+ choice_operator(29, 30),
+ term(31, 32, [
+ identifier(31, 32)
+ ])
+ ]),
+ closing_paren(32, 33),
+ repeat_operator(33, 34),
+ optional_operator(34, 35)
+ ])
+ ])
+ ]
+ };
+ }
+
+ #[test]
+ fn repeat_exact() {
+ parses_to! {
+ parser: PestParser,
+ input: "{1}",
+ rule: Rule::repeat_exact,
+ tokens: [
+ repeat_exact(0, 3, [
+ opening_brace(0, 1),
+ number(1, 2),
+ closing_brace(2, 3)
+ ])
+ ]
+ };
+ }
+
+ #[test]
+ fn repeat_min() {
+ parses_to! {
+ parser: PestParser,
+ input: "{2,}",
+ rule: Rule::repeat_min,
+ tokens: [
+ repeat_min(0, 4, [
+ opening_brace(0,1),
+ number(1,2),
+ comma(2,3),
+ closing_brace(3,4)
+ ])
+ ]
+ }
+ }
+
+ #[test]
+ fn repeat_max() {
+ parses_to! {
+ parser: PestParser,
+ input: "{, 3}",
+ rule: Rule::repeat_max,
+ tokens: [
+ repeat_max(0, 5, [
+ opening_brace(0,1),
+ comma(1,2),
+ number(3,4),
+ closing_brace(4,5)
+ ])
+ ]
+ }
+ }
+
+ #[test]
+ fn repeat_min_max() {
+ parses_to! {
+ parser: PestParser,
+ input: "{1, 2}",
+ rule: Rule::repeat_min_max,
+ tokens: [
+ repeat_min_max(0, 6, [
+ opening_brace(0, 1),
+ number(1, 2),
+ comma(2, 3),
+ number(4, 5),
+ closing_brace(5, 6)
+ ])
+ ]
+ };
+ }
+
+ #[test]
+ fn push() {
+ parses_to! {
+ parser: PestParser,
+ input: "PUSH ( a )",
+ rule: Rule::_push,
+ tokens: [
+ _push(0, 10, [
+ opening_paren(5, 6),
+ expression(7, 9, [
+ term(7, 9, [
+ identifier(7, 8)
+ ])
+ ]),
+ closing_paren(9, 10)
+ ])
+ ]
+ };
+ }
+
+ #[test]
+ fn peek_slice_all() {
+ parses_to! {
+ parser: PestParser,
+ input: "PEEK[..]",
+ rule: Rule::peek_slice,
+ tokens: [
+ peek_slice(0, 8, [
+ opening_brack(4, 5),
+ range_operator(5, 7),
+ closing_brack(7, 8)
+ ])
+ ]
+ };
+ }
+
+ #[test]
+ fn peek_slice_start() {
+ parses_to! {
+ parser: PestParser,
+ input: "PEEK[1..]",
+ rule: Rule::peek_slice,
+ tokens: [
+ peek_slice(0, 9, [
+ opening_brack(4, 5),
+ integer(5, 6),
+ range_operator(6, 8),
+ closing_brack(8, 9)
+ ])
+ ]
+ };
+ }
+
+ #[test]
+ fn peek_slice_end() {
+ parses_to! {
+ parser: PestParser,
+ input: "PEEK[ ..-1]",
+ rule: Rule::peek_slice,
+ tokens: [
+ peek_slice(0, 11, [
+ opening_brack(4, 5),
+ range_operator(6, 8),
+ integer(8, 10),
+ closing_brack(10, 11)
+ ])
+ ]
+ };
+ }
+
+ #[test]
+ fn peek_slice_start_end() {
+ parses_to! {
+ parser: PestParser,
+ input: "PEEK[-5..10]",
+ rule: Rule::peek_slice,
+ tokens: [
+ peek_slice(0, 12, [
+ opening_brack(4, 5),
+ integer(5, 7),
+ range_operator(7, 9),
+ integer(9, 11),
+ closing_brack(11, 12)
+ ])
+ ]
+ };
+ }
+
+ #[test]
+ fn identifier() {
+ parses_to! {
+ parser: PestParser,
+ input: "_a8943",
+ rule: Rule::identifier,
+ tokens: [
+ identifier(0, 6)
+ ]
+ };
+ }
+
+ #[test]
+ fn string() {
+ parses_to! {
+ parser: PestParser,
+ input: "\"aaaaa\\n\\r\\t\\\\\\0\\'\\\"\\x0F\\u{123abC}\\u{12}aaaaa\"",
+ rule: Rule::string,
+ tokens: [
+ string(0, 46, [
+ quote(0, 1),
+ inner_str(1, 45),
+ quote(45, 46)
+ ])
+ ]
+ };
+ }
+
+ #[test]
+ fn insensitive_string() {
+ parses_to! {
+ parser: PestParser,
+ input: "^ \"\\\"hi\"",
+ rule: Rule::insensitive_string,
+ tokens: [
+ insensitive_string(0, 9, [
+ string(3, 9, [
+ quote(3, 4),
+ inner_str(4, 8),
+ quote(8, 9)
+ ])
+ ])
+ ]
+ };
+ }
+
+ #[test]
+ fn range() {
+ parses_to! {
+ parser: PestParser,
+ input: "'\\n' .. '\\x1a'",
+ rule: Rule::range,
+ tokens: [
+ range(0, 14, [
+ character(0, 4, [
+ single_quote(0, 1),
+ inner_chr(1, 3),
+ single_quote(3, 4)
+ ]),
+ range_operator(5, 7),
+ character(8, 14, [
+ single_quote(8, 9),
+ inner_chr(9, 13),
+ single_quote(13, 14)
+ ])
+ ])
+ ]
+ };
+ }
+
+ #[test]
+ fn character() {
+ parses_to! {
+ parser: PestParser,
+ input: "'\\u{123abC}'",
+ rule: Rule::character,
+ tokens: [
+ character(0, 12, [
+ single_quote(0, 1),
+ inner_chr(1, 11),
+ single_quote(11, 12)
+ ])
+ ]
+ };
+ }
+
+ #[test]
+ fn number() {
+ parses_to! {
+ parser: PestParser,
+ input: "0123",
+ rule: Rule::number,
+ tokens: [
+ number(0, 4)
+ ]
+ };
+ }
+
+ #[test]
+ fn comment() {
+ parses_to! {
+ parser: PestParser,
+ input: "a ~ // asda\n b",
+ rule: Rule::expression,
+ tokens: [
+ expression(0, 17, [
+ term(0, 2, [
+ identifier(0, 1)
+ ]),
+ sequence_operator(2, 3),
+ term(16, 17, [
+ identifier(16, 17)
+ ])
+ ])
+ ]
+ };
+ }
+
+ #[test]
+ fn wrong_identifier() {
+ fails_with! {
+ parser: PestParser,
+ input: "0",
+ rule: Rule::grammar_rules,
+ positives: vec![Rule::identifier],
+ negatives: vec![],
+ pos: 0
+ };
+ }
+
+ #[test]
+ fn missing_assignment_operator() {
+ fails_with! {
+ parser: PestParser,
+ input: "a {}",
+ rule: Rule::grammar_rules,
+ positives: vec![Rule::assignment_operator],
+ negatives: vec![],
+ pos: 2
+ };
+ }
+
+ #[test]
+ fn wrong_modifier() {
+ fails_with! {
+ parser: PestParser,
+ input: "a = *{}",
+ rule: Rule::grammar_rules,
+ positives: vec![
+ Rule::opening_brace,
+ Rule::silent_modifier,
+ Rule::atomic_modifier,
+ Rule::compound_atomic_modifier,
+ Rule::non_atomic_modifier
+ ],
+ negatives: vec![],
+ pos: 4
+ };
+ }
+
+ #[test]
+ fn missing_opening_brace() {
+ fails_with! {
+ parser: PestParser,
+ input: "a = _",
+ rule: Rule::grammar_rules,
+ positives: vec![Rule::opening_brace],
+ negatives: vec![],
+ pos: 5
+ };
+ }
+
+ #[test]
+ fn empty_rule() {
+ fails_with! {
+ parser: PestParser,
+ input: "a = {}",
+ rule: Rule::grammar_rules,
+ positives: vec![Rule::term],
+ negatives: vec![],
+ pos: 5
+ };
+ }
+
+ #[test]
+ fn missing_rhs() {
+ fails_with! {
+ parser: PestParser,
+ input: "a = { b ~ }",
+ rule: Rule::grammar_rules,
+ positives: vec![Rule::term],
+ negatives: vec![],
+ pos: 10
+ };
+ }
+
+ #[test]
+ fn wrong_op() {
+ fails_with! {
+ parser: PestParser,
+ input: "a = { b % }",
+ rule: Rule::grammar_rules,
+ positives: vec![
+ Rule::opening_brace,
+ Rule::closing_brace,
+ Rule::sequence_operator,
+ Rule::choice_operator,
+ Rule::optional_operator,
+ Rule::repeat_operator,
+ Rule::repeat_once_operator
+ ],
+ negatives: vec![],
+ pos: 8
+ };
+ }
+
+ #[test]
+ fn missing_closing_paren() {
+ fails_with! {
+ parser: PestParser,
+ input: "a = { (b }",
+ rule: Rule::grammar_rules,
+ positives: vec![
+ Rule::opening_brace,
+ Rule::closing_paren,
+ Rule::sequence_operator,
+ Rule::choice_operator,
+ Rule::optional_operator,
+ Rule::repeat_operator,
+ Rule::repeat_once_operator
+ ],
+ negatives: vec![],
+ pos: 9
+ };
+ }
+
+ #[test]
+ fn missing_term() {
+ fails_with! {
+ parser: PestParser,
+ input: "a = { ! }",
+ rule: Rule::grammar_rules,
+ positives: vec![
+ Rule::opening_paren,
+ Rule::positive_predicate_operator,
+ Rule::negative_predicate_operator,
+ Rule::_push,
+ Rule::peek_slice,
+ Rule::identifier,
+ Rule::insensitive_string,
+ Rule::quote,
+ Rule::single_quote
+ ],
+ negatives: vec![],
+ pos: 8
+ };
+ }
+
+ #[test]
+ fn string_missing_ending_quote() {
+ fails_with! {
+ parser: PestParser,
+ input: "a = { \" }",
+ rule: Rule::grammar_rules,
+ positives: vec![Rule::quote],
+ negatives: vec![],
+ pos: 9
+ };
+ }
+
+ #[test]
+ fn insensitive_missing_string() {
+ fails_with! {
+ parser: PestParser,
+ input: "a = { ^ }",
+ rule: Rule::grammar_rules,
+ positives: vec![Rule::quote],
+ negatives: vec![],
+ pos: 8
+ };
+ }
+
+ #[test]
+ fn char_missing_ending_single_quote() {
+ fails_with! {
+ parser: PestParser,
+ input: "a = { \' }",
+ rule: Rule::grammar_rules,
+ positives: vec![Rule::single_quote],
+ negatives: vec![],
+ pos: 8
+ };
+ }
+
+ #[test]
+ fn range_missing_range_operator() {
+ fails_with! {
+ parser: PestParser,
+ input: "a = { \'a\' }",
+ rule: Rule::grammar_rules,
+ positives: vec![Rule::range_operator],
+ negatives: vec![],
+ pos: 10
+ };
+ }
+
+ #[test]
+ fn wrong_postfix() {
+ fails_with! {
+ parser: PestParser,
+ input: "a = { a& }",
+ rule: Rule::grammar_rules,
+ positives: vec![
+ Rule::opening_brace,
+ Rule::closing_brace,
+ Rule::sequence_operator,
+ Rule::choice_operator,
+ Rule::optional_operator,
+ Rule::repeat_operator,
+ Rule::repeat_once_operator
+ ],
+ negatives: vec![],
+ pos: 7
+ };
+ }
+
+ #[test]
+ fn ast() {
+ let input =
+ "rule = _{ a{1} ~ \"a\"{3,} ~ b{, 2} ~ \"b\"{1, 2} | !(^\"c\" | PUSH('d'..'e'))?* }";
+
+ let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
+ let ast = consume_rules_with_spans(pairs).unwrap();
+ let ast: Vec<_> = ast.into_iter().map(|rule| convert_rule(rule)).collect();
+
+ assert_eq!(
+ ast,
+ vec![AstRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Silent,
+ expr: Expr::Choice(
+ Box::new(Expr::Seq(
+ Box::new(Expr::Seq(
+ Box::new(Expr::Seq(
+ Box::new(Expr::RepExact(Box::new(Expr::Ident("a".to_owned())), 1)),
+ Box::new(Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 3))
+ )),
+ Box::new(Expr::RepMax(Box::new(Expr::Ident("b".to_owned())), 2))
+ )),
+ Box::new(Expr::RepMinMax(Box::new(Expr::Str("b".to_owned())), 1, 2))
+ )),
+ Box::new(Expr::NegPred(Box::new(Expr::Rep(Box::new(Expr::Opt(
+ Box::new(Expr::Choice(
+ Box::new(Expr::Insens("c".to_owned())),
+ Box::new(Expr::Push(Box::new(Expr::Range(
+ "d".to_owned(),
+ "e".to_owned()
+ ))))
+ ))
+ ))))))
+ )
+ },]
+ );
+ }
+
+ #[test]
+ fn ast_peek_slice() {
+ let input = "rule = _{ PEEK[-04..] ~ PEEK[..3] }";
+
+ let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
+ let ast = consume_rules_with_spans(pairs).unwrap();
+ let ast: Vec<_> = ast.into_iter().map(|rule| convert_rule(rule)).collect();
+
+ assert_eq!(
+ ast,
+ vec![AstRule {
+ name: "rule".to_owned(),
+ ty: RuleType::Silent,
+ expr: Expr::Seq(
+ Box::new(Expr::PeekSlice(-4, None)),
+ Box::new(Expr::PeekSlice(0, Some(3))),
+ )
+ }],
+ );
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:13
+ |
+1 | rule = { \"\"{4294967297} }
+ | ^--------^
+ |
+ = number cannot overflow u32")]
+ fn repeat_exact_overflow() {
+ let input = "rule = { \"\"{4294967297} }";
+
+ let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
+ unwrap_or_report(consume_rules_with_spans(pairs));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:13
+ |
+1 | rule = { \"\"{0} }
+ | ^
+ |
+ = cannot repeat 0 times")]
+ fn repeat_exact_zero() {
+ let input = "rule = { \"\"{0} }";
+
+ let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
+ unwrap_or_report(consume_rules_with_spans(pairs));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:13
+ |
+1 | rule = { \"\"{4294967297,} }
+ | ^--------^
+ |
+ = number cannot overflow u32")]
+ fn repeat_min_overflow() {
+ let input = "rule = { \"\"{4294967297,} }";
+
+ let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
+ unwrap_or_report(consume_rules_with_spans(pairs));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:14
+ |
+1 | rule = { \"\"{,4294967297} }
+ | ^--------^
+ |
+ = number cannot overflow u32")]
+ fn repeat_max_overflow() {
+ let input = "rule = { \"\"{,4294967297} }";
+
+ let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
+ unwrap_or_report(consume_rules_with_spans(pairs));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:14
+ |
+1 | rule = { \"\"{,0} }
+ | ^
+ |
+ = cannot repeat 0 times")]
+ fn repeat_max_zero() {
+ let input = "rule = { \"\"{,0} }";
+
+ let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
+ unwrap_or_report(consume_rules_with_spans(pairs));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:13
+ |
+1 | rule = { \"\"{4294967297,4294967298} }
+ | ^--------^
+ |
+ = number cannot overflow u32")]
+ fn repeat_min_max_overflow() {
+ let input = "rule = { \"\"{4294967297,4294967298} }";
+
+ let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
+ unwrap_or_report(consume_rules_with_spans(pairs));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:15
+ |
+1 | rule = { \"\"{0,0} }
+ | ^
+ |
+ = cannot repeat 0 times")]
+ fn repeat_min_max_zero() {
+ let input = "rule = { \"\"{0,0} }";
+
+ let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
+ unwrap_or_report(consume_rules_with_spans(pairs));
+ }
+
+ #[test]
+ fn unescape_all() {
+ let string = r"a\nb\x55c\u{111}d";
+
+ assert_eq!(unescape(string), Some("a\nb\x55c\u{111}d".to_owned()));
+ }
+
+ #[test]
+ fn unescape_empty_escape() {
+ let string = r"\";
+
+ assert_eq!(unescape(string), None);
+ }
+
+ #[test]
+ fn unescape_wrong_escape() {
+ let string = r"\w";
+
+ assert_eq!(unescape(string), None);
+ }
+
+ #[test]
+ fn unescape_backslash() {
+ let string = "\\\\";
+ assert_eq!(unescape(string), Some("\\".to_owned()));
+ }
+
+ #[test]
+ fn unescape_return() {
+ let string = "\\r";
+ assert_eq!(unescape(string), Some("\r".to_owned()));
+ }
+
+ #[test]
+ fn unescape_tab() {
+ let string = "\\t";
+ assert_eq!(unescape(string), Some("\t".to_owned()));
+ }
+
+ #[test]
+ fn unescape_null() {
+ let string = "\\0";
+ assert_eq!(unescape(string), Some("\0".to_owned()));
+ }
+
+ #[test]
+ fn unescape_single_quote() {
+ let string = "\\'";
+ assert_eq!(unescape(string), Some("\'".to_owned()));
+ }
+
+ #[test]
+ fn unescape_wrong_byte() {
+ let string = r"\xfg";
+
+ assert_eq!(unescape(string), None);
+ }
+
+ #[test]
+ fn unescape_short_byte() {
+ let string = r"\xf";
+
+ assert_eq!(unescape(string), None);
+ }
+
+ #[test]
+ fn unescape_no_open_brace_unicode() {
+ let string = r"\u11";
+
+ assert_eq!(unescape(string), None);
+ }
+
+ #[test]
+ fn unescape_no_close_brace_unicode() {
+ let string = r"\u{11";
+
+ assert_eq!(unescape(string), None);
+ }
+
+ #[test]
+ fn unescape_short_unicode() {
+ let string = r"\u{1}";
+
+ assert_eq!(unescape(string), None);
+ }
+
+ #[test]
+ fn unescape_long_unicode() {
+ let string = r"\u{1111111}";
+
+ assert_eq!(unescape(string), None);
+ }
+}
diff --git a/src/validator.rs b/src/validator.rs
new file mode 100644
index 0000000..a358dbe
--- /dev/null
+++ b/src/validator.rs
@@ -0,0 +1,851 @@
+// pest. The Elegant Parser
+// Copyright (c) 2018 Dragoș Tiselice
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+use std::collections::{HashMap, HashSet};
+
+use pest::error::{Error, ErrorVariant, InputLocation};
+use pest::iterators::Pairs;
+use pest::Span;
+
+use parser::{ParserExpr, ParserNode, ParserRule, Rule};
+use UNICODE_PROPERTY_NAMES;
+
+#[allow(clippy::needless_pass_by_value)]
+pub fn validate_pairs<'i>(pairs: Pairs<'i, Rule>) -> Result<Vec<&'i str>, Vec<Error<Rule>>> {
+ let mut rust_keywords = HashSet::new();
+ rust_keywords.insert("abstract");
+ rust_keywords.insert("alignof");
+ rust_keywords.insert("as");
+ rust_keywords.insert("become");
+ rust_keywords.insert("box");
+ rust_keywords.insert("break");
+ rust_keywords.insert("const");
+ rust_keywords.insert("continue");
+ rust_keywords.insert("crate");
+ rust_keywords.insert("do");
+ rust_keywords.insert("else");
+ rust_keywords.insert("enum");
+ rust_keywords.insert("extern");
+ rust_keywords.insert("false");
+ rust_keywords.insert("final");
+ rust_keywords.insert("fn");
+ rust_keywords.insert("for");
+ rust_keywords.insert("if");
+ rust_keywords.insert("impl");
+ rust_keywords.insert("in");
+ rust_keywords.insert("let");
+ rust_keywords.insert("loop");
+ rust_keywords.insert("macro");
+ rust_keywords.insert("match");
+ rust_keywords.insert("mod");
+ rust_keywords.insert("move");
+ rust_keywords.insert("mut");
+ rust_keywords.insert("offsetof");
+ rust_keywords.insert("override");
+ rust_keywords.insert("priv");
+ rust_keywords.insert("proc");
+ rust_keywords.insert("pure");
+ rust_keywords.insert("pub");
+ rust_keywords.insert("ref");
+ rust_keywords.insert("return");
+ rust_keywords.insert("Self");
+ rust_keywords.insert("self");
+ rust_keywords.insert("sizeof");
+ rust_keywords.insert("static");
+ rust_keywords.insert("struct");
+ rust_keywords.insert("super");
+ rust_keywords.insert("trait");
+ rust_keywords.insert("true");
+ rust_keywords.insert("type");
+ rust_keywords.insert("typeof");
+ rust_keywords.insert("unsafe");
+ rust_keywords.insert("unsized");
+ rust_keywords.insert("use");
+ rust_keywords.insert("virtual");
+ rust_keywords.insert("where");
+ rust_keywords.insert("while");
+ rust_keywords.insert("yield");
+
+ let mut pest_keywords = HashSet::new();
+ pest_keywords.insert("_");
+ pest_keywords.insert("ANY");
+ pest_keywords.insert("DROP");
+ pest_keywords.insert("EOI");
+ pest_keywords.insert("PEEK");
+ pest_keywords.insert("PEEK_ALL");
+ pest_keywords.insert("POP");
+ pest_keywords.insert("POP_ALL");
+ pest_keywords.insert("PUSH");
+ pest_keywords.insert("SOI");
+
+ let mut builtins = HashSet::new();
+ builtins.insert("ANY");
+ builtins.insert("DROP");
+ builtins.insert("EOI");
+ builtins.insert("PEEK");
+ builtins.insert("PEEK_ALL");
+ builtins.insert("POP");
+ builtins.insert("POP_ALL");
+ builtins.insert("SOI");
+ builtins.insert("ASCII_DIGIT");
+ builtins.insert("ASCII_NONZERO_DIGIT");
+ builtins.insert("ASCII_BIN_DIGIT");
+ builtins.insert("ASCII_OCT_DIGIT");
+ builtins.insert("ASCII_HEX_DIGIT");
+ builtins.insert("ASCII_ALPHA_LOWER");
+ builtins.insert("ASCII_ALPHA_UPPER");
+ builtins.insert("ASCII_ALPHA");
+ builtins.insert("ASCII_ALPHANUMERIC");
+ builtins.insert("ASCII");
+ builtins.insert("NEWLINE");
+ builtins.extend(UNICODE_PROPERTY_NAMES);
+
+ let definitions: Vec<_> = pairs
+ .clone()
+ .filter(|pair| pair.as_rule() == Rule::grammar_rule)
+ .map(|pair| pair.into_inner().next().unwrap().as_span())
+ .collect();
+ let called_rules: Vec<_> = pairs
+ .clone()
+ .filter(|pair| pair.as_rule() == Rule::grammar_rule)
+ .flat_map(|pair| {
+ pair.into_inner()
+ .flatten()
+ .skip(1)
+ .filter(|pair| pair.as_rule() == Rule::identifier)
+ .map(|pair| pair.as_span())
+ })
+ .collect();
+
+ let mut errors = vec![];
+
+ errors.extend(validate_rust_keywords(&definitions, &rust_keywords));
+ errors.extend(validate_pest_keywords(&definitions, &pest_keywords));
+ errors.extend(validate_already_defined(&definitions));
+ errors.extend(validate_undefined(&definitions, &called_rules, &builtins));
+
+ if !errors.is_empty() {
+ return Err(errors);
+ }
+
+ let definitions: HashSet<_> = definitions.iter().map(|span| span.as_str()).collect();
+ let called_rules: HashSet<_> = called_rules.iter().map(|span| span.as_str()).collect();
+
+ let defaults = called_rules.difference(&definitions);
+
+ Ok(defaults.cloned().collect())
+}
+
+#[allow(clippy::implicit_hasher, clippy::ptr_arg)]
+pub fn validate_rust_keywords<'i>(
+ definitions: &Vec<Span<'i>>,
+ rust_keywords: &HashSet<&str>,
+) -> Vec<Error<Rule>> {
+ let mut errors = vec![];
+
+ for definition in definitions {
+ let name = definition.as_str();
+
+ if rust_keywords.contains(name) {
+ errors.push(Error::new_from_span(
+ ErrorVariant::CustomError {
+ message: format!("{} is a rust keyword", name),
+ },
+ definition.clone(),
+ ))
+ }
+ }
+
+ errors
+}
+
+#[allow(clippy::implicit_hasher, clippy::ptr_arg)]
+pub fn validate_pest_keywords<'i>(
+ definitions: &Vec<Span<'i>>,
+ pest_keywords: &HashSet<&str>,
+) -> Vec<Error<Rule>> {
+ let mut errors = vec![];
+
+ for definition in definitions {
+ let name = definition.as_str();
+
+ if pest_keywords.contains(name) {
+ errors.push(Error::new_from_span(
+ ErrorVariant::CustomError {
+ message: format!("{} is a pest keyword", name),
+ },
+ definition.clone(),
+ ))
+ }
+ }
+
+ errors
+}
+
+#[allow(clippy::ptr_arg)]
+pub fn validate_already_defined<'i>(definitions: &Vec<Span<'i>>) -> Vec<Error<Rule>> {
+ let mut errors = vec![];
+ let mut defined = HashSet::new();
+
+ for definition in definitions {
+ let name = definition.as_str();
+
+ if defined.contains(&name) {
+ errors.push(Error::new_from_span(
+ ErrorVariant::CustomError {
+ message: format!("rule {} already defined", name),
+ },
+ definition.clone(),
+ ))
+ } else {
+ defined.insert(name);
+ }
+ }
+
+ errors
+}
+
+#[allow(clippy::implicit_hasher, clippy::ptr_arg)]
+pub fn validate_undefined<'i>(
+ definitions: &Vec<Span<'i>>,
+ called_rules: &Vec<Span<'i>>,
+ builtins: &HashSet<&str>,
+) -> Vec<Error<Rule>> {
+ let mut errors = vec![];
+ let definitions: HashSet<_> = definitions.iter().map(|span| span.as_str()).collect();
+
+ for rule in called_rules {
+ let name = rule.as_str();
+
+ if !definitions.contains(name) && !builtins.contains(name) {
+ errors.push(Error::new_from_span(
+ ErrorVariant::CustomError {
+ message: format!("rule {} is undefined", name),
+ },
+ rule.clone(),
+ ))
+ }
+ }
+
+ errors
+}
+
+#[allow(clippy::ptr_arg)]
+pub fn validate_ast<'a, 'i: 'a>(rules: &'a Vec<ParserRule<'i>>) -> Vec<Error<Rule>> {
+ let mut errors = vec![];
+
+ errors.extend(validate_repetition(rules));
+ errors.extend(validate_choices(rules));
+ errors.extend(validate_whitespace_comment(rules));
+ errors.extend(validate_left_recursion(rules));
+
+ errors.sort_by_key(|error| match error.location {
+ InputLocation::Span(span) => span,
+ _ => unreachable!(),
+ });
+
+ errors
+}
+
+fn is_non_progressing<'i>(
+ expr: &ParserExpr<'i>,
+ rules: &HashMap<String, &ParserNode<'i>>,
+ trace: &mut Vec<String>,
+) -> bool {
+ match *expr {
+ ParserExpr::Str(ref string) => string == "",
+ ParserExpr::Ident(ref ident) => {
+ if ident == "soi" || ident == "eoi" {
+ return true;
+ }
+
+ if !trace.contains(ident) {
+ if let Some(node) = rules.get(ident) {
+ trace.push(ident.clone());
+ let result = is_non_progressing(&node.expr, rules, trace);
+ trace.pop().unwrap();
+
+ return result;
+ }
+ }
+
+ false
+ }
+ ParserExpr::PosPred(_) => true,
+ ParserExpr::NegPred(_) => true,
+ ParserExpr::Seq(ref lhs, ref rhs) => {
+ is_non_progressing(&lhs.expr, rules, trace)
+ && is_non_progressing(&rhs.expr, rules, trace)
+ }
+ ParserExpr::Choice(ref lhs, ref rhs) => {
+ is_non_progressing(&lhs.expr, rules, trace)
+ || is_non_progressing(&rhs.expr, rules, trace)
+ }
+ _ => false,
+ }
+}
+
+fn is_non_failing<'i>(
+ expr: &ParserExpr<'i>,
+ rules: &HashMap<String, &ParserNode<'i>>,
+ trace: &mut Vec<String>,
+) -> bool {
+ match *expr {
+ ParserExpr::Str(ref string) => string == "",
+ ParserExpr::Ident(ref ident) => {
+ if !trace.contains(ident) {
+ if let Some(node) = rules.get(ident) {
+ trace.push(ident.clone());
+ let result = is_non_failing(&node.expr, rules, trace);
+ trace.pop().unwrap();
+
+ return result;
+ }
+ }
+
+ false
+ }
+ ParserExpr::Opt(_) => true,
+ ParserExpr::Rep(_) => true,
+ ParserExpr::Seq(ref lhs, ref rhs) => {
+ is_non_failing(&lhs.expr, rules, trace) && is_non_failing(&rhs.expr, rules, trace)
+ }
+ ParserExpr::Choice(ref lhs, ref rhs) => {
+ is_non_failing(&lhs.expr, rules, trace) || is_non_failing(&rhs.expr, rules, trace)
+ }
+ _ => false,
+ }
+}
+
+fn validate_repetition<'a, 'i: 'a>(rules: &'a [ParserRule<'i>]) -> Vec<Error<Rule>> {
+ let mut result = vec![];
+ let map = to_hash_map(rules);
+
+ for rule in rules {
+ let mut errors = rule.node
+ .clone()
+ .filter_map_top_down(|node| match node.expr {
+ ParserExpr::Rep(ref other)
+ | ParserExpr::RepOnce(ref other)
+ | ParserExpr::RepMin(ref other, _) => {
+ if is_non_failing(&other.expr, &map, &mut vec![]) {
+ Some(Error::new_from_span(
+ ErrorVariant::CustomError {
+ message:
+ "expression inside repetition cannot fail and will repeat \
+ infinitely"
+ .to_owned()
+ },
+ node.span.clone()
+ ))
+ } else if is_non_progressing(&other.expr, &map, &mut vec![]) {
+ Some(Error::new_from_span(
+ ErrorVariant::CustomError {
+ message:
+ "expression inside repetition is non-progressing and will repeat \
+ infinitely"
+ .to_owned(),
+ },
+ node.span.clone()
+ ))
+ } else {
+ None
+ }
+ }
+ _ => None
+ });
+
+ result.append(&mut errors);
+ }
+
+ result
+}
+
+fn validate_choices<'a, 'i: 'a>(rules: &'a [ParserRule<'i>]) -> Vec<Error<Rule>> {
+ let mut result = vec![];
+ let map = to_hash_map(rules);
+
+ for rule in rules {
+ let mut errors = rule
+ .node
+ .clone()
+ .filter_map_top_down(|node| match node.expr {
+ ParserExpr::Choice(ref lhs, _) => {
+ let node = match lhs.expr {
+ ParserExpr::Choice(_, ref rhs) => rhs,
+ _ => lhs,
+ };
+
+ if is_non_failing(&node.expr, &map, &mut vec![]) {
+ Some(Error::new_from_span(
+ ErrorVariant::CustomError {
+ message:
+ "expression cannot fail; following choices cannot be reached"
+ .to_owned(),
+ },
+ node.span.clone(),
+ ))
+ } else {
+ None
+ }
+ }
+ _ => None,
+ });
+
+ result.append(&mut errors);
+ }
+
+ result
+}
+
+fn validate_whitespace_comment<'a, 'i: 'a>(rules: &'a [ParserRule<'i>]) -> Vec<Error<Rule>> {
+ let map = to_hash_map(rules);
+
+ rules
+ .iter()
+ .filter_map(|rule| {
+ if rule.name == "WHITESPACE" || rule.name == "COMMENT" {
+ if is_non_failing(&rule.node.expr, &map, &mut vec![]) {
+ Some(Error::new_from_span(
+ ErrorVariant::CustomError {
+ message: format!(
+ "{} cannot fail and will repeat infinitely",
+ &rule.name
+ ),
+ },
+ rule.node.span.clone(),
+ ))
+ } else if is_non_progressing(&rule.node.expr, &map, &mut vec![]) {
+ Some(Error::new_from_span(
+ ErrorVariant::CustomError {
+ message: format!(
+ "{} is non-progressing and will repeat infinitely",
+ &rule.name
+ ),
+ },
+ rule.node.span.clone(),
+ ))
+ } else {
+ None
+ }
+ } else {
+ None
+ }
+ })
+ .collect()
+}
+
+fn validate_left_recursion<'a, 'i: 'a>(rules: &'a [ParserRule<'i>]) -> Vec<Error<Rule>> {
+ left_recursion(to_hash_map(rules))
+}
+
+fn to_hash_map<'a, 'i: 'a>(rules: &'a [ParserRule<'i>]) -> HashMap<String, &'a ParserNode<'i>> {
+ rules.iter().map(|r| (r.name.clone(), &r.node)).collect()
+}
+
+#[allow(clippy::needless_pass_by_value)]
+fn left_recursion<'a, 'i: 'a>(rules: HashMap<String, &'a ParserNode<'i>>) -> Vec<Error<Rule>> {
+ fn check_expr<'a, 'i: 'a>(
+ node: &'a ParserNode<'i>,
+ rules: &'a HashMap<String, &ParserNode<'i>>,
+ trace: &mut Vec<String>,
+ ) -> Option<Error<Rule>> {
+ match node.expr.clone() {
+ ParserExpr::Ident(other) => {
+ if trace[0] == other {
+ trace.push(other);
+ let chain = trace
+ .iter()
+ .map(|ident| ident.as_ref())
+ .collect::<Vec<_>>()
+ .join(" -> ");
+
+ return Some(Error::new_from_span(
+ ErrorVariant::CustomError {
+ message: format!(
+ "rule {} is left-recursive ({}); pest::prec_climber might be useful \
+ in this case",
+ node.span.as_str(),
+ chain
+ )
+ },
+ node.span.clone()
+ ));
+ }
+
+ if !trace.contains(&other) {
+ if let Some(node) = rules.get(&other) {
+ trace.push(other);
+ let result = check_expr(node, rules, trace);
+ trace.pop().unwrap();
+
+ return result;
+ }
+ }
+
+ None
+ }
+ ParserExpr::Seq(ref lhs, ref rhs) => {
+ if is_non_failing(&lhs.expr, rules, &mut vec![trace.last().unwrap().clone()]) {
+ check_expr(rhs, rules, trace)
+ } else {
+ check_expr(lhs, rules, trace)
+ }
+ }
+ ParserExpr::Choice(ref lhs, ref rhs) => {
+ check_expr(&lhs, rules, trace).or_else(|| check_expr(&rhs, rules, trace))
+ }
+ ParserExpr::Rep(ref node) => check_expr(&node, rules, trace),
+ ParserExpr::RepOnce(ref node) => check_expr(&node, rules, trace),
+ ParserExpr::Opt(ref node) => check_expr(&node, rules, trace),
+ ParserExpr::PosPred(ref node) => check_expr(&node, rules, trace),
+ ParserExpr::NegPred(ref node) => check_expr(&node, rules, trace),
+ ParserExpr::Push(ref node) => check_expr(&node, rules, trace),
+ _ => None,
+ }
+ }
+
+ let mut errors = vec![];
+
+ for (ref name, ref node) in &rules {
+ let name = (*name).clone();
+
+ if let Some(error) = check_expr(node, &rules, &mut vec![name]) {
+ errors.push(error);
+ }
+ }
+
+ errors
+}
+
+#[cfg(test)]
+mod tests {
+ use super::super::parser::{consume_rules, PestParser};
+ use super::super::unwrap_or_report;
+ use super::*;
+ use pest::Parser;
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:1
+ |
+1 | let = { \"a\" }
+ | ^-^
+ |
+ = let is a rust keyword")]
+ fn rust_keyword() {
+ let input = "let = { \"a\" }";
+ unwrap_or_report(validate_pairs(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:1
+ |
+1 | ANY = { \"a\" }
+ | ^-^
+ |
+ = ANY is a pest keyword")]
+ fn pest_keyword() {
+ let input = "ANY = { \"a\" }";
+ unwrap_or_report(validate_pairs(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:13
+ |
+1 | a = { \"a\" } a = { \"a\" }
+ | ^
+ |
+ = rule a already defined")]
+ fn already_defined() {
+ let input = "a = { \"a\" } a = { \"a\" }";
+ unwrap_or_report(validate_pairs(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:7
+ |
+1 | a = { b }
+ | ^
+ |
+ = rule b is undefined")]
+ fn undefined() {
+ let input = "a = { b }";
+ unwrap_or_report(validate_pairs(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ fn valid_recursion() {
+ let input = "a = { \"\" ~ \"a\"? ~ \"a\"* ~ (\"a\" | \"b\") ~ a }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:16
+ |
+1 | WHITESPACE = { \"\" }
+ | ^^
+ |
+ = WHITESPACE cannot fail and will repeat infinitely")]
+ fn non_failing_whitespace() {
+ let input = "WHITESPACE = { \"\" }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:13
+ |
+1 | COMMENT = { soi }
+ | ^-^
+ |
+ = COMMENT is non-progressing and will repeat infinitely")]
+ fn non_progressing_comment() {
+ let input = "COMMENT = { soi }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:7
+ |
+1 | a = { (\"\")* }
+ | ^---^
+ |
+ = expression inside repetition cannot fail and will repeat infinitely")]
+ fn non_failing_repetition() {
+ let input = "a = { (\"\")* }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:18
+ |
+1 | a = { \"\" } b = { a* }
+ | ^^
+ |
+ = expression inside repetition cannot fail and will repeat infinitely")]
+ fn indirect_non_failing_repetition() {
+ let input = "a = { \"\" } b = { a* }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:20
+ |
+1 | a = { \"a\" ~ (\"b\" ~ (\"\")*) }
+ | ^---^
+ |
+ = expression inside repetition cannot fail and will repeat infinitely")]
+ fn deep_non_failing_repetition() {
+ let input = "a = { \"a\" ~ (\"b\" ~ (\"\")*) }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:7
+ |
+1 | a = { (\"\" ~ &\"a\" ~ !\"a\" ~ (soi | eoi))* }
+ | ^-------------------------------^
+ |
+ = expression inside repetition is non-progressing and will repeat infinitely")]
+ fn non_progressing_repetition() {
+ let input = "a = { (\"\" ~ &\"a\" ~ !\"a\" ~ (soi | eoi))* }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:20
+ |
+1 | a = { !\"a\" } b = { a* }
+ | ^^
+ |
+ = expression inside repetition is non-progressing and will repeat infinitely")]
+ fn indirect_non_progressing_repetition() {
+ let input = "a = { !\"a\" } b = { a* }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:7
+ |
+1 | a = { a }
+ | ^
+ |
+ = rule a is left-recursive (a -> a); pest::prec_climber might be useful in this case")]
+ fn simple_left_recursion() {
+ let input = "a = { a }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:7
+ |
+1 | a = { b } b = { a }
+ | ^
+ |
+ = rule b is left-recursive (b -> a -> b); pest::prec_climber might be useful in this case
+
+ --> 1:17
+ |
+1 | a = { b } b = { a }
+ | ^
+ |
+ = rule a is left-recursive (a -> b -> a); pest::prec_climber might be useful in this case")]
+ fn indirect_left_recursion() {
+ let input = "a = { b } b = { a }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:39
+ |
+1 | a = { \"\" ~ \"a\"? ~ \"a\"* ~ (\"a\" | \"\") ~ a }
+ | ^
+ |
+ = rule a is left-recursive (a -> a); pest::prec_climber might be useful in this case")]
+ fn non_failing_left_recursion() {
+ let input = "a = { \"\" ~ \"a\"? ~ \"a\"* ~ (\"a\" | \"\") ~ a }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:13
+ |
+1 | a = { \"a\" | a }
+ | ^
+ |
+ = rule a is left-recursive (a -> a); pest::prec_climber might be useful in this case")]
+ fn non_primary_choice_left_recursion() {
+ let input = "a = { \"a\" | a }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:7
+ |
+1 | a = { \"a\"* | \"a\" | \"b\" }
+ | ^--^
+ |
+ = expression cannot fail; following choices cannot be reached")]
+ fn lhs_non_failing_choice() {
+ let input = "a = { \"a\"* | \"a\" | \"b\" }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:13
+ |
+1 | a = { \"a\" | \"a\"* | \"b\" }
+ | ^--^
+ |
+ = expression cannot fail; following choices cannot be reached")]
+ fn lhs_non_failing_choice_middle() {
+ let input = "a = { \"a\" | \"a\"* | \"b\" }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ #[should_panic(expected = "grammar error
+
+ --> 1:7
+ |
+1 | a = { b | \"a\" } b = { \"b\"* | \"c\" }
+ | ^
+ |
+ = expression cannot fail; following choices cannot be reached
+
+ --> 1:23
+ |
+1 | a = { b | \"a\" } b = { \"b\"* | \"c\" }
+ | ^--^
+ |
+ = expression cannot fail; following choices cannot be reached")]
+ fn lhs_non_failing_nested_choices() {
+ let input = "a = { b | \"a\" } b = { \"b\"* | \"c\" }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+
+ #[test]
+ fn skip_can_be_defined() {
+ let input = "skip = { \"\" }";
+ unwrap_or_report(consume_rules(
+ PestParser::parse(Rule::grammar_rules, input).unwrap(),
+ ));
+ }
+}