在哪里可以找到LR(1)解析器生成器的_simple_,易于理解的实现?

在哪里可以找到LR(1)解析器生成器的简单(尽可能但不简单!)实现?

我不是在寻找性能,只是能够生成LR(1)状态(项目集)。
C ++,C#,Java和Python都适合我。

我在C#中写了一个非常简单的文章,想在这里分享一下。

它基本上填充了action查找表,它告诉您要切换到哪个状态或用于减少的规则。
如果数字是非负数,则表示新状态; 如果它是负数,那么它的一个补码(即~x )表示规则指数。

现在您只需要制作词法分析器并使用操作表进行实际解析。

注1:在生成真实语法的状态时可能会很慢,因此在生产代码中使用它之前可能需要三思而后行!

注意2:您可能需要仔细检查其正确性,因为我只检查了一下。

 using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using size_t = System.UInt32; public class LRParser { private string[] symbols; // index => symbol private IDictionary interned = new SortedDictionary(); // symbol => index private int[/*state*/, /*lookahead*/] actions; // If >= 0, represents new state after shift. If < 0, represents one's complement (ie ~x) of reduction rule. public LRParser(params KeyValuePair[] grammar) { this.interned.Add(string.Empty, new size_t()); foreach (var rule in grammar) { if (!this.interned.ContainsKey(rule.Key)) { this.interned.Add(rule.Key, (size_t)this.interned.Count); } foreach (var symbol in rule.Value) { if (!this.interned.ContainsKey(symbol)) { this.interned.Add(symbol, (size_t)this.interned.Count); } } } this.symbols = this.interned.ToArray().OrderBy(p => p.Value).Select(p => p.Key).ToArray(); var syntax = Array.ConvertAll(grammar, r => new KeyValuePair(this.interned[r.Key], Array.ConvertAll(r.Value, s => this.interned[s]))); var nonterminals = Array.ConvertAll(this.symbols, s => new List()); for (size_t i = 0; i < syntax.Length; i++) { nonterminals[syntax[i].Key].Add(i); } var firsts = Array.ConvertAll(Enumerable.Range(0, this.symbols.Length).ToArray(), s => nonterminals[s].Count > 0 ? new HashSet() : new HashSet() { (size_t)s }); int old; do { old = firsts.Select(l => l.Count).Sum(); foreach (var rule in syntax) { foreach (var i in First(rule.Value, firsts)) { firsts[rule.Key].Add(i); } } } while (old < firsts.Select(l => l.Count).Sum()); var actions = new Dictionary>>(); var states = new Dictionary, int>(HashSet.CreateSetComparer()); var todo = new Stack>(); var root = new Item(0, 0, new size_t()); todo.Push(new HashSet()); Closure(root, todo.Peek(), firsts, syntax, nonterminals); states.Add(new HashSet(todo.Peek()), states.Count); while (todo.Count > 0) { var set = todo.Pop(); var closure = new HashSet(); foreach (var item in set) { Closure(item, closure, firsts, syntax, nonterminals); } var grouped = Array.ConvertAll(this.symbols, _ => new HashSet()); foreach (var item in closure) { if (item.Symbol >= syntax[item.Rule].Value.Length) { IDictionary> map; if (!actions.TryGetValue(states[set], out map)) { actions[states[set]] = map = new Dictionary>(); } IList list; if (!map.TryGetValue(item.Lookahead, out list)) { map[item.Lookahead] = list = new List(); } list.Add(~(int)item.Rule); continue; } var next = item; next.Symbol++; grouped[syntax[item.Rule].Value[item.Symbol]].Add(next); } for (size_t symbol = 0; symbol < grouped.Length; symbol++) { var g = new HashSet(); foreach (var item in grouped[symbol]) { Closure(item, g, firsts, syntax, nonterminals); } if (g.Count > 0) { int state; if (!states.TryGetValue(g, out state)) { state = states.Count; states.Add(g, state); todo.Push(g); } IDictionary> map; if (!actions.TryGetValue(states[set], out map)) { actions[states[set]] = map = new Dictionary>(); } IList list; if (!map.TryGetValue(symbol, out list)) { map[symbol] = list = new List(); } list.Add(state); } } } this.actions = new int[states.Count, this.symbols.Length]; for (int i = 0; i < this.actions.GetLength(0); i++) { for (int j = 0; j < this.actions.GetLength(1); j++) { this.actions[i, j] = int.MinValue; } } foreach (var p in actions) { foreach (var q in p.Value) { this.actions[p.Key, q.Key] = q.Value.Single(); } } foreach (var state in states.OrderBy(p => p.Value)) { Console.WriteLine("State {0}:", state.Value); foreach (var item in state.Key.OrderBy(i => i.Rule).ThenBy(i => i.Symbol).ThenBy(i => i.Lookahead)) { Console.WriteLine( "\t{0}: {1} \xB7 {2} | {3} → {0}", this.symbols[syntax[item.Rule].Key], string.Join(" ", syntax[item.Rule].Value.Take((int)item.Symbol).Select(s => this.symbols[s]).ToArray()), string.Join(" ", syntax[item.Rule].Value.Skip((int)item.Symbol).Select(s => this.symbols[s]).ToArray()), this.symbols[item.Lookahead] == string.Empty ? "\x04" : this.symbols[item.Lookahead], string.Join( ", ", Array.ConvertAll( actions[state.Value][item.Symbol < syntax[item.Rule].Value.Length ? syntax[item.Rule].Value[item.Symbol] : item.Lookahead].ToArray(), a => a >= 0 ? string.Format("state {0}", a) : string.Format("{0} (rule {1})", this.symbols[syntax[~a].Key], ~a)))); } Console.WriteLine(); } } private static void Closure(Item item, HashSet closure /*output*/, HashSet[] firsts, KeyValuePair[] syntax, IList[] nonterminals) { if (closure.Add(item) && item.Symbol >= syntax[item.Rule].Value.Length) { foreach (var r in nonterminals[syntax[item.Rule].Value[item.Symbol]]) { foreach (var i in First(syntax[item.Rule].Value.Skip((int)(item.Symbol + 1)), firsts)) { Closure(new Item(r, 0, i == new size_t() ? item.Lookahead : i), closure, firsts, syntax, nonterminals); } } } } private struct Item : IEquatable { public size_t Rule; public size_t Symbol; public size_t Lookahead; public Item(size_t rule, size_t symbol, size_t lookahead) { this.Rule = rule; this.Symbol = symbol; this.Lookahead = lookahead; } public override bool Equals(object obj) { return obj is Item && this.Equals((Item)obj); } public bool Equals(Item other) { return this.Rule == other.Rule && this.Symbol == other.Symbol && this.Lookahead == other.Lookahead; } public override int GetHashCode() { return this.Rule.GetHashCode() ^ this.Symbol.GetHashCode() ^ this.Lookahead.GetHashCode(); } } private static IEnumerable First(IEnumerable symbols, IEnumerable[] map) { foreach (var symbol in symbols) { bool epsilon = false; foreach (var s in map[symbol]) { if (s == new size_t()) { epsilon = true; } else { yield return s; } } if (!epsilon) { yield break; } } yield return new size_t(); } private static KeyValuePair MakePair(K k, V v) { return new KeyValuePair(k, v); } private static void Main(string[] args) { var sw = Stopwatch.StartNew(); var parser = new LRParser( MakePair("start", new string[] { "exps" }), MakePair("exps", new string[] { "exps", "exp" }), MakePair("exps", new string[] { }), MakePair("exp", new string[] { "INTEGER" }) ); Console.WriteLine(sw.ElapsedMilliseconds); } } 

LRSTAR 9.1是最小的LR(1)和LR(*)解析器生成器。 您可以使用选项’/ s’来validation解析器生成器是否提供了正确的状态。 LRSTAR已经针对HYACC进行了测试,发现它给出了正确的LR(1)状态。 LRSTAR和6个Microsoft Visual Studio项目提供了20个语法。