lundi 27 avril 2020

Java CUP - How to solve Shift/Reduce conflict in IF ELSE statement?

I'm trying to write a CUP specification for VC - a simplified version of C. Here is the part that troubling me:

if_stmt ::= IF LPAREN expr RPAREN stmt
    | IF LPAREN expr RPAREN stmt ELSE stmt
    ;

When trying to generate parser, it always says that:

Warning : *** Shift/Reduce conflict found in state #135
between else_stmt ::= stmt (*)
and     else_stmt ::= stmt (*) ELSE stmt
under symbol ELSE
Resolved in favor of shifting.

I guess the problem is that, this is due to the stmt after RPAREN. If the stmt later converted to if_stmt and become IF LPAREN expr RPAREN IF LPAREN expr RPAREN stmt ELSE stmt, the parser will not know whether it will be:

IF LPAREN expr RPAREN
    IF LPAREN expr RPAREN
        stmt
    ELSE
        stmt

or

IF LPAREN expr RPAREN
    IF LPAREN expr RPAREN
        stmt
ELSE
    stmt

According to VC specification, the first representation is used. How can I fix this in order to comply with the specification?

Here is what I have written:

// CUP specification for VC Language

// Package and import specifications
import java_cup.runtime.*;

// User Code Components
// Preliminaries to set up and use the scanner.
parser code {:
    public void report_error(String message, Object info) {
        StringBuilder m = new StringBuilder("Error");
        if (info instanceof java_cup.runtime.Symbol)
            m.append("(" + info.toString() + ")");
        m.append(" : " + message);
        System.out.println(m);
    }

    public void report_fatal_error(String message, Object info) {
        report_error(message, info);
        throw new RuntimeException("Fatal Syntax Error");
    }
:};
init with {:
    scanner.init();
:};
scan with {:
    return scanner.next_token();
:};

// Symbol Lists
// Terminals
// Keywords
terminal BOOLEAN, BREAK, CONTINUE, ELSE, FOR, FLOAT, IF, INT, RETURN, VOID, WHILE;

// Literals
terminal Integer INTLITERAL;
terminal Float FLOATLITERAL;
terminal Boolean BOOLLITERAL;
terminal String STRINGLITERAL;
terminal String ID;

// Operators
// Arithmetic Operators
terminal PLUS, MINUS, MULT, DIV;

// Relational Operators
terminal LT, LTEQ, GT, GTEQ;

// Equality Operators
terminal EQEQ, NOTEQ;

// Logical Operators
terminal OROR, ANDAND, NOT;

// Assignment Operator
terminal EQ;

// Separators
terminal LBRACE, RBRACE, LPAREN, RPAREN, LBRACK, RBRACK, SEMICOLON, COMMA;

// Non-terminals
non terminal program;

// Declarations
non terminal func_decl, var_decl, init_declarator_list, init_declarator, declarator, initialiser;

// Primitive types
non terminal type;

// Identifiers
non terminal identifier;

// Statements
non terminal compound_stmt, stmt, if_stmt, for_stmt, while_stmt, break_stmt, continue_stmt, return_stmt, expr_stmt;

// Expressions
non terminal expr, assignment_expr, cond_or_expr, cond_and_expr, equality_expr, rel_expr, additive_expr, multiplicative_expr, unary_expr, primary_expr;

// Parameters
non terminal para_list, proper_para_list, para_decl, arg_list, proper_arg_list, arg;

// Extra
non terminal initialiser_temp1, initialiser_temp2, compound_stmt1, compound_stmt2, expr1, assignment_expr1, proper_para_list1, proper_arg_list1;

// Precedence and Associativity declarations
precedence right EQ; // 8
precedence left OROR; // 7
precedence left ANDAND; // 6
precedence left EQEQ, NOTEQ; // 5
precedence left LT, LTEQ, GT, GTEQ; // 4
precedence left PLUS, MINUS; // 3
precedence left MULT, DIV; // 2
precedence right NOT; // 1

// The Grammar
start with program;

program ::= program func_decl
    |   program var_decl
    |
    ;

// Declarations
func_decl ::= type identifier para_list compound_stmt;
var_decl ::= type init_declarator_list SEMICOLON;
init_declarator_list ::= init_declarator
    | init_declarator COMMA init_declarator_list
    ;
init_declarator ::= declarator
    | declarator EQ initialiser
    ;
declarator ::= identifier
    | identifier LBRACK RBRACK
    | identifier LBRACK INTLITERAL RBRACK
    ;
initialiser ::= expr
    | LBRACE expr initialiser_temp1 RBRACE
    ;
initialiser_temp1 ::= COMMA initialiser_temp2
    |
    ;
initialiser_temp2 ::= expr
    | expr COMMA initialiser_temp2
    ;

// Primitive types
type ::= VOID
    | BOOLEAN
    | INT
    | FLOAT
    ;

// Identifiers
identifier ::= ID;

// Statements
compound_stmt ::= LBRACE compound_stmt1 compound_stmt2 RBRACE;
compound_stmt1 ::= var_decl compound_stmt1
    |
    ;
compound_stmt2 ::= stmt compound_stmt2
    |
    ;
stmt ::= compound_stmt
    | if_stmt
    | for_stmt
    | while_stmt
    | break_stmt
    | continue_stmt
    | return_stmt
    | expr_stmt
    ;
if_stmt ::= IF LPAREN expr RPAREN stmt
    | IF LPAREN expr RPAREN stmt ELSE stmt
    ;
for_stmt ::= FOR LPAREN expr1 SEMICOLON expr1 SEMICOLON expr1 RPAREN stmt;
expr1 ::= expr
    |
    ;
while_stmt ::= WHILE LPAREN expr RPAREN stmt;
break_stmt ::= BREAK SEMICOLON;
continue_stmt ::= CONTINUE SEMICOLON;
return_stmt ::= RETURN expr1 SEMICOLON;
expr_stmt ::= expr1 SEMICOLON;

// Expressions
expr ::= assignment_expr;
assignment_expr ::= assignment_expr1 cond_or_expr;
assignment_expr1 ::= assignment_expr1 cond_or_expr EQ
    |
    ;
cond_or_expr ::= cond_and_expr
    | cond_or_expr OROR cond_and_expr
    ;
cond_and_expr ::= equality_expr
    | cond_and_expr ANDAND equality_expr
    ;
equality_expr ::= rel_expr
    | equality_expr EQEQ rel_expr
    | equality_expr NOTEQ rel_expr
    ;
rel_expr ::= additive_expr
    | rel_expr LT additive_expr
    | rel_expr LTEQ additive_expr
    | rel_expr GT additive_expr
    | rel_expr GTEQ additive_expr
    ;
additive_expr ::= multiplicative_expr
    | additive_expr PLUS multiplicative_expr
    | additive_expr MINUS multiplicative_expr
    ;
multiplicative_expr ::= unary_expr
    | multiplicative_expr MULT unary_expr
    | multiplicative_expr DIV unary_expr
    ;
unary_expr ::= PLUS unary_expr
    | MINUS unary_expr
    | NOT unary_expr
    | primary_expr
    ;
primary_expr ::= identifier
    | identifier arg_list
    | identifier LBRACK expr RBRACK
    | LPAREN expr RPAREN
    | INTLITERAL
    | FLOATLITERAL
    | BOOLLITERAL
    | STRINGLITERAL
    ;

// Parameters
para_list ::= LPAREN proper_para_list RPAREN
    | LPAREN RPAREN
    ;
proper_para_list ::= para_decl proper_para_list1;
proper_para_list1 ::= COMMA para_decl proper_para_list1
    |
    ;
para_decl ::= type declarator;
arg_list ::= LPAREN proper_arg_list RPAREN
    | LPAREN RPAREN
    ;
proper_arg_list ::= arg proper_arg_list1;
proper_arg_list1 ::= COMMA arg proper_arg_list1
    |
    ;
arg ::= expr;

Aucun commentaire:

Enregistrer un commentaire