Update to Finding problematic EOF references in Antlr4 grammars
This is an update to the start rule problem I outlined here. I’ve updated the script to check additional problems and output the rules that fail certain checks.
# Verify that we have an EOF-terminated start rule. E.g.
# foobar : ('foo' | 'bar')* EOF;
#
lines=`trparse $@ | \
trxgrep " //parserRuleSpec[ruleBlock//TOKEN_REF/text()='EOF']/RULE_REF" | \
trtext`
if [ "$lines" == "" ]
then
echo $@ does not have an EOF-start rule.
exit 1
fi
# Verify that we don't have a grammar where the EOF symbol is followed
# by a grammar symbol. E.g.,
# foobar : ('foo' | 'bar')* EOF 'wonderful';
#
lines=`trparse $@ | \
trxgrep ' //parserRuleSpec[.//alternative/element[.//TOKEN_REF/text()="EOF"]/following-sibling::element]' | \
trtext`
if [ "$lines" != "" ]
then
echo $lines
echo $@ has an EOF usage followed by another element.
exit 1
fi
# Verify that we don't have a grammar with an EOF in one alt, and not
# in all the other alts. E.g.,
# newLine: '\n'+ | EOF;
#
lines=`trparse $@ | \
trxgrep ' //labeledAlt[.//TOKEN_REF/text()="EOF" and count(../labeledAlt) > 1]/ancestor::parserRuleSpec' | \
trtext`
if [ "$lines" != "" ]
then
echo $lines
echo $@ has an EOF in one alt, but not in another.
exit 1
fi
# Verify that the start symbol is not used on the right-hand side of
# any rule. E.g.,
# startingRule: 'test' newLine EOF;
# unusedBadRule: startingRule '}';
#
lines=`trparse $@ | \
trxgrep 'for $i in (//parserRuleSpec[ruleBlock//TOKEN_REF/text()="EOF"]/RULE_REF/text() ) return //parserRuleSpec[./ruleBlock//RULE_REF = $i]' | \
trtext`
if [ "$lines" != "" ]
then
echo $lines
echo $@ has start symbol that is used on the RHS of a rule.
exit 1
fi