tutorial 4 -- Calculator β

A calculator that converts sentences to tree structures (AST). So-so difficult.

The AST File

Define AST (abstract semantic tree) as below, and save it as calc1_ast.hpp.

struct Node {
        virtual ~Node(){}
        virtual int calc() = 0;
};

struct Expr : public Node {
};

struct Term : public Node {
};

struct Number : public Node {
        int number;
        Number( int n ) : number( n ) {}
        int calc() { return number; }
};

struct AddExpr : public Expr {
        Expr*   lhs;
        Expr*   rhs;
        AddExpr( Expr* x, Expr* y ) : lhs( x ), rhs( y ) {}
        int calc() { return lhs->calc() + rhs->calc(); }
};

struct SubExpr : public Expr {
        Expr*   lhs;
        Expr*   rhs;
        SubExpr( Expr* x, Expr* y ) : lhs( x ), rhs( y ) {}
        int calc() { return lhs->calc() - rhs->calc(); }
};

struct TermExpr : public Expr {
        Term*   term;
        TermExpr( Term* x )  : term( x ) {}
        int calc() { return term->calc(); }
};

struct MulTerm : public Term {
        Term*   lhs;
        Term*   rhs;
        MulTerm( Term* x, Term* y ) : lhs( x ), rhs( y ) {}
        int calc() { return lhs->calc() * rhs->calc(); }
};

struct DivTerm : public Term {
        Term*   lhs;
        Term*   rhs;
        DivTerm( Term* x, Term* y ) : lhs( x ), rhs( y ) {}
        int calc() { return lhs->calc() / rhs->calc(); }
};

struct NumberTerm : public Term {
        Number* number;
        NumberTerm( Number* x ) : number( x ) {}
        int calc() { return number->calc(); }
};

The Grammar File

%token Number<Number*> Add Sub Mul Div;
%namespace calc;
%dont_use_stl;

Expr<Expr*> : [MakeExpr] Term(0)
            | [MakeAdd] Expr(0) Add Term(1)
            | [MakeSub] Expr(0) Sub Term(1)
            ;

Term<Term*> : [MakeTerm] Number(0)
            | [MakeMul] Term(0) Mul Number(1)
            | [MakeDiv] Term(0) Div Number(1)
            ;

The grammar file was not very changed from calc0, but the types of symbol were changed as below:

Number<Number*>
Expr<Expr*>
Term<Term*>
		

*** We don't think about memory leaks for simplicity! ***

The parser this grammar generates, needs the types of "Number" "Expr" "Term" etc., so you must #include the AST definition before parser is #include'd. So, please create the following file and save it as "calc1.hpp".

#include "cacl1_ast.hpp"
#include "cacl1_parser.hpp"
		

You must #include calc1_ast.hpp before #includ'ing calc1_parser.hpp, that is generated from calc1_parser.cpg. Otherwise compilation error occurs.

The Handling File

#include "calc1.hpp"
#include <iostream>

class unexpected_char : public std::exception {};

template < class It >
class scanner {
public:
        typedef int char_type;

public:
        scanner( It b, It e ) : b_(b), e_(e), c_(b), unget_(EOF) { }

        calc::Token get( Node*& v )
        {
                v = NULL;
                int c;
                do {
                        c = getc();
                } while( isspace( c ) );

                // symbols
                switch( c ) {
                case '+': return calc::token_Add;
                case '-': return calc::token_Sub;
                case '*': return calc::token_Mul;
                case '/': return calc::token_Div;
                case EOF: return calc::token_eof;
                }

                // integers
                if( isdigit( c ) ) {
                        int n = 0;
                        while( c != EOF && isdigit( c ) ) {
                                n *= 10;
                                n += c - '0';
                                c = getc();
                        }
                        ungetc( c );
                        v = new Number( n );
                        return calc::token_Number;
                }


                std::cerr << char(c) << std::endl;
                throw unexpected_char();
        }

private:
        char_type getc()
        {
                int c;
                if( unget_ != EOF ) {
                        c = unget_;
                        unget_ = EOF;
                } else if( c_ == e_ ) {
                        c = EOF; 
                } else {
                        c = *c_++;
                }
                return c;
        }

        void ungetc( char_type c )
        {
                if( c != EOF ) {
                        unget_ = c;
                }
        }

private:
        It              b_;
        It              e_;
        It              c_;
        char_type       unget_;

};

struct SemanticAction {
        void syntax_error(){}
        void stack_overflow(){}

        template < class T >
        void downcast( T*& x, Node* y ) { x = static_cast<T*>( y ); }

        template < class T >
        void upcast( Node*& x, T* y ) { x = y; }

        Expr* MakeExpr( Term* x ) { return new TermExpr( x ); }
        Expr* MakeAdd( Expr* x, Term* y )
        {
                std::cerr << "expr " << x << " + " << y << std::endl;
                return new AddExpr( x, new TermExpr( y ) ) ;
        }
        Expr* MakeSub( Expr* x, Term* y )
        {
                std::cerr << "expr " << x << " - " << y << std::endl;
                return new SubExpr( x, new TermExpr( y ) ) ;
        }
        Term* MakeTerm( Number* x ) { return new NumberTerm( x ); }
        Term* MakeMul( Term* x, Number* y )
        {
                std::cerr << "expr " << x << " * " << y << std::endl;
                return new MulTerm( x, new NumberTerm( y ) ) ;
        }
        Term* MakeDiv( Term* x, Number* y )
        {
                std::cerr << "expr " << x << " / " << y << std::endl;
                return new DivTerm( x, new NumberTerm( y ) ) ;
        }
};

int main( int, char** )
{
        // The scanner
        typedef std::istreambuf_iterator<char> is_iterator;
        is_iterator b( std::cin );
        is_iterator e;
        scanner< is_iterator > s( b, e );

        SemanticAction sa;

        calc::Parser< Node*, SemanticAction > parser( sa );

        calc::Token token;
        for(;;) {
                Node* v;
                token = s.get( v );
                if( parser.post( token, v ) ) { break; }
        }

        Node* v;
        if( parser.accept( v ) ) {
                std::cerr << "accpeted\n";
                std::cerr << v->calc() << std::endl;
        }

        return 0;
}

Oh, it's a little bit long.

At first, the scanner returns the token value as the type of Node* (not int). Concretely, it returns the value of new Number(n) on returning token_Number.

At second, we will explain semantic actions. This time, we use Node* for the type of the whole set of semantic values. And then, as shown at the items of the grammar file, each type of grammar items is different from Node*, so we must convert to adjust them.

This case, the downcast function does convert Node* to Expr*/Term*/Number*. All of Expr/Term/Number are Node classes, so we can handle them by the definition of the following template member function.

template < class T >
void downcast( T*& x, Node* y ) { x = static_cast<T*>( y ); }
		

Inversely, to convert Expr*/Term*/Number* to Node*, all of Expr/Term/Number are Node-derived classes, so we can also handle them by the following way:

template < class T >
void upcast( Node*& x, T* y ) { x = y; }
		

Considering the facts mentioned above, the semantic action will become as below.

Expr* MakeAdd( Expr* x, Term* y )
{
    std::cerr << "expr " << x << " + " << y << std::endl;
    return new AddExpr( x, new TermExpr( y ) ) ;
}

No difficult things. There is no casting code seemingly.

The main function is not very changed. The result of accept is the root node of AST. The calc function is executed to display the calcucation result.