
//
// See the file LICENSE for redistribution information.
//
// Copyright (c) 2002-2003
//	Sleepycat Software.  All rights reserved.
//

//
// XML Path Language (XPath) Version 1.0
// W3C Recommendation 16 November 1999
// http://www.w3.org/TR/1999/REC-xpath-19991116
//

header
{
	#include "dbxml/XmlPortability.hpp"
	#include "dbxml/XmlException.hpp"
	#include <sstream>
	#include <vector>
	#include <cmath>
	#include "QueryContext.hpp"

	using namespace antlr;
	using namespace DbXml;

	// The ParserContext is passed through the production rules
	// as the XPath expression is passed. The context contains
	// a number of state variables so that the rules can be
	// smart about where their productions appear in the
	// expression.
	//
	class Step
	{
	public:
		Step(const std::string &axis, const std::string &name)
		 : axis_(axis), name_(name)
		{
		}
		std::string axis_;
		std::string name_;
	};
	class StepStack
	{
	public:
		StepStack()
		 : top_(0)
		{
			v_.reserve(16);
		}
		void push(const Step &n)
		{
			if (top_ == v_.size()) {
				v_.push_back(n);
				++top_;
			} else {
				v_[top_]= n;
				++top_;
			}
		}
		void pop()
		{
			if (top_ != 0) {
				--top_;
			}
		}
		bool empty() const
		{
			return top_==0;
		}
		const Step &top(size_t n= 0) const
		{
			return v_[top_-1-n];
		}
		const size_t size() const
		{
			return top_;
		}
	private:
		std::vector<Step> v_; // The most recent step node test.
		size_t top_;
	};
	class ParserContext
	{
	public:
		ParserContext()
		{
		}
		void push_step(const std::string &a, const std::string &n)
		{
			steps_.push(Step(a, n));
		}
		void pop_step(size_t n=1)
		{
			for(;n>0;--n)
				steps_.pop();
		}
		const std::string step() const
		{
			std::string r;
			if(
				steps_.size()>=3 &&
				steps_.top(0).axis_=="attribute" &&
				steps_.top(1).name_=="*" &&
				steps_.top(2).name_==".any")
			{
				r+= ".any\\";
				r+= steps_.top(0).name_;
			}
			else if(
				steps_.size()>=3 &&
				steps_.top(1).name_!="*" &&
				steps_.top(2).name_==".any")
			{
				r+= ".any\\";
				r+= steps_.top(1).name_;
				r+= '\\';
				r+= steps_.top(0).name_;
			}
			else if (steps_.size()>=2)
			{
				r+= steps_.top(1).name_;
				r+= '\\';
				r+= steps_.top(0).name_;
			}
			return r;
		}
		const std::string step_axis() const
		{
			std::string r;
			if(!steps_.empty())
				r= steps_.top().axis_;
			return r;
		}
		const std::string step_name() const
		{
			std::string r;
			if(!steps_.empty())
				r= steps_.top().name_;
			return r;
		}
		size_t size() const
		{
			return steps_.size();
		}
	private:
		StepStack steps_;
	};
}

options
{
	language="Cpp";
}

// ================================================================================
// P A R S E R
// ================================================================================

{

	void parserError(RecognitionException &ex, const char *description)
	{
		std::ostringstream s;
		if(description!=0)
		{
			s << description;
			s << " ";
		}
		if(ex.getLine() > 0)
		{
			s << "Line ";
			s << ex.getLine();
			s << ", ";
		}
		if(ex.getColumn() > 0)
		{
			s << "Column ";
			s << ex.getColumn();
			s << ", ";
		}
		s << ex.getMessage();
		throw ANTLRException(s.str().c_str());
	}

	std::string mapPrefixToUri(QueryContext &context, const std::string &prefix)
	{
	    // Lookup the URI for the prefix.
	    std::string uri(context.getNamespace(prefix));
	    if(uri.empty())
	    {
	    	throw XmlException(XmlException::XPATH_PARSER_ERROR,"The prefix '"+prefix+"' is not bound to a URI.");
	    }
		return uri;
	}
}

class XPathParser extends Parser;

options
{
    k= 3; // jcm - If I were clever this would be 2.
	importVocab= XPathLexer;
    exportVocab= XPathParser;
    buildAST= true;
	defaultErrorHandler= false; // The default handler sends output to stderr
//    analyzerDebug= true;
//    codeGenDebug= true;
}

tokens
{
	// A Stanford Lore paper describes their Logical and
	// Physical Query Plan construction...
	// http://www-db.stanford.edu/lore/pubs/qo.pdf
	// I've reused their naming convension for these
	// imaginary tokens.
	//
	XPATH;
	DISCOVER;
	CHAIN;
	ROOT;
	AXIS;
	OPERATION;
	NAME;
	FUNCTION;
	VARIABLE;
	TEXT;
	NODE;
	COMMENT;
	PROCESSING_INSTRUCTION;
	PREDICATE;
}

xpath [QueryContext &context]
  {
	ParserContext pc;
	pc.push_step("", ".root");
  }
  : a1:expr[context,pc]
  { ##= #([XPATH,"X"], a1); }  
  | EOF
  { ##= nullAST; }
  ;
  exception
  catch [RecognitionException &ex]
  {
	parserError(ex, 0);
  }
						
// XPath:[1] LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
locationPath [QueryContext &context,ParserContext &pc]
  : relativeLocationPath[context,pc]
  | absoluteLocationPath[context,pc]
  ;

// XPath:[2] AbsoluteLocationPath ::= '/' RelativeLocationPath? | AbbreviatedAbsoluteLocationPath
// XPath:[10] AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath
absoluteLocationPath! [QueryContext &context,ParserContext &pc]
  : SLASH ((AT|STAR|axisName|NCNAME)=>a1:relativeLocationPath[context,pc])?
  // SLASH means select the document root
  {
  	if(#a1==nullAST)
  	{
  		##= #([ROOT,"R"]);
  	}
  	else
  	{
  		##= #([CHAIN,"C"], [ROOT,"R"], a1);
  	}
  }  
  | SLASHSLASH { pc.push_step("", ".any"); } a2:relativeLocationPath[context,pc] { pc.pop_step(); }
  // SLASHSLASH means /descendant-or-self::node()/
  {
  	##= #([CHAIN,"C"], [ROOT,"R"], #a2);
  }
  ;
  exception[a1]
  catch [RecognitionException &ex]
  {
	parserError(ex, "Expecting relative location path after '/'.");
  }
  exception[a2]
  catch [RecognitionException &ex]
  {
	parserError(ex, "Expecting relative location path after '//'.");
  }

// XPath:[3] RelativeLocationPath ::= step | RelativeLocationPath '/' Step | AbbreviatedRelativeLocationPath
// XPath:[11] AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step
relativeLocationPath! [QueryContext &context,ParserContext &pc]
  : a1:step[context,pc] b1:steps[context,pc]
  {
  	if(#b1==nullAST)
  	{
  		##= #a1;
  	}
  	else
  	{
  		##= #([CHAIN,"C"], a1, b1);
  	}
  }  
  ;

steps! [QueryContext &context,ParserContext &pc]
  : SLASH a1:step[context,pc] b1:steps[context,pc]
  {
  	if(#b1==nullAST)
  	{
  		##=#a1;
  	}
  	else
  	{
  		##= #([CHAIN,"C"], a1, b1);
  	}
  }
  | SLASHSLASH { pc.push_step("", ".any"); } a2:step[context,pc] b2:steps[context,pc] { pc.pop_step(); }
  // SLASHSLASH means /descendant-or-self::node()/
  {
  	if(#b1==nullAST)
  	{
		##= #a2;
  	}
  	else
	{
 		##= #([CHAIN,"C"], a2, b2);
	}
  } 
  |
  // It's OK to match nothing.
  ;

// XPath:[4] Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
step! [QueryContext &context,ParserContext &pc]
  : 
	((axisName COLONCOLON)|AT)=> a1:axisSpecifier 
	b1:nodeTest[context,pc]
  { 
	if(#b1->getType()==NAME)
	{
	  	pc.push_step(#a1->getText(), #b1->getText());
	  	#b1->setText(pc.step());
	}
	else if(#b1->getType()==TEXT)
	{
	  	#b1->setText(pc.step());
	}
  } 
	c1:predicates[context,pc]
  {
  	if(#c1==nullAST)
  	{
  		##=#([DISCOVER,"D"], a1, b1);
		
  	}
  	else
  	{
  		##= #([CHAIN,"C"], #([DISCOVER,"D"], a1, b1), c1);
  	}
  }
  | 
	a2:nodeTest[context,pc]
  {
	if(#a2->getType()==NAME)
	{
	  	pc.push_step("child", #a2->getText()); 
	  	#a2->setText(pc.step());
	}
	else if(#a2->getType()==TEXT)
	{
	  	#a2->setText(pc.step());
	}
  }
	b2:predicates[context,pc]
  {
  	if(#b2==nullAST)
  	{
  		##=#([DISCOVER,"D"], #([AXIS,"child"]), a2);
  	}
  	else
  	{
  		##= #([CHAIN,"C"], #([DISCOVER,"D"], #([AXIS,"child"]), a2), b2);
  	}
  }
  | a3:abbreviatedStep[context, pc]
  { ##= #a3; }
  ;

predicates! [QueryContext &context,ParserContext &pc]
  : a1:predicate[context,pc] b1:predicates[context,pc]
  {
  	if(#b1==nullAST)
  	{
  		##=#a1;
  	}
  	else
  	{
  		##= #([CHAIN,"C"], a1, b1);
  	}
  }
  |
  // It's OK to match nothing.
  ;

// XPath:[5] AxisSpecifier ::= AxisName '::' | AbbreviatedAxisSpecifier
// XPath:[13] AbbreviatedAxisSpecifier ::= '@'?
// We only get here if the syntactic predicate in the 'step' rule is true.
axisSpecifier!
  : a1:axisName COLONCOLON
  { ##=#([AXIS,a1_AST->getText()]); }
  | AT 
  { ##=#([AXIS,"attribute"]); } /* Abbreviation for attribute:: */
  | 
  { ##=#([AXIS,"child"]); } /* Abbreviation for child:: */
  ;

// XPath:[6] AxisName ::= 'ancestor' | 'ancestor-or-self' | 'attribute' | 'child' | 'descendant' | 'descendant-or-self' | 'following' | 'following-sibling' | 'namespace' | 'parent' | 'preceding' | 'preceding-sibling' | 'self'
axisName: 
	LITERAL_ancestor | 
	LITERAL_ancestor_or_self | 
	LITERAL_attribute |
	LITERAL_child |
	LITERAL_descendant |
	LITERAL_descendant_or_self |
	LITERAL_following |
	LITERAL_following_sibling |
	LITERAL_namespace |
	LITERAL_parent |
	LITERAL_preceding |
	LITERAL_preceding_sibling |
	LITERAL_self ;

// XPath:[7] NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
nodeTest [QueryContext &context,ParserContext &pc]
  :! a1:nameTest[context,pc]
  {
  	##= #([NAME,a1_AST->getText()]);
  }
  | b1:NCNAME LB RB
  {
//  | COMMENT LB RB
	if(#b1->getText().compare("comment")==0)
	{
		##= #([COMMENT,#b1->getText()]);
	}
//  | TEXT LB RB
	else if(#b1->getText().compare("text")==0)
	{
		##= #([TEXT,#b1->getText()]);
	}
//  | NODE LB RB
	else if(#b1->getText().compare("node")==0)
	{
		##= #([NODE,#b1->getText()]);
	}
	else
	{
		// JCM 
	}
  }
  | c1:NCNAME LB LITERAL RB
  {
//  | PROCESSING_INSTRUCTION LB LITERAL RB
	if(#c1->getText().compare("processing-instruction")==0)
	{
		##= #([PROCESSING_INSTRUCTION,#c1->getText()]);
	}
	else
	{
		// JCM
	} 
  }
  ;

// XPath:[8] Predicate ::= '[' PredicateExpr ']'
// XPath:[9] PredicateExpr ::= Expr
predicate! [QueryContext &context,ParserContext &pc]
  : LSB a1:expr[context,pc] RSB
  { ##= #([PREDICATE,"P"], a1); }
  ;

// XPath:[12] AbbreviatedStep ::= '.' | '..'
abbreviatedStep! [QueryContext &context,ParserContext &pc]
  : DOT // Abbreviation for self::node()   
  {
	if(!pc.step_axis().empty())
	{
		##= #([DISCOVER,"D"], [AXIS,pc.step_axis()], [NAME,pc.step()]); 
	}
	else
	{
		##= #([DISCOVER,"D"], [AXIS,"self"], [NODE]); // Abbreviation for self::node()
	}
  }

  | DOTDOT { ##=#([DISCOVER,"D"], [AXIS,"parent"], [NODE]); } // Abbreviation for parent::node()
  ;

// XPath:[14] Expr ::= OrExpr
//
// An expression is evaluated to yield an object, which has one of the following four
// basic types: node-set (an unordered collection of nodes without duplicates, boolean,
// number (a floating point number), string.
//
// The evaluation context is: a node (the context node), a pair of non-zero positive 
// integers ( the context position and the context size), a set of variable bindings,
// a function library, and the set of namespace declarations in scope for the expression.
//
expr [QueryContext &context,ParserContext &pc]
  : orExpr[context,pc]
  ;

// XPath:[15] PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal
//                      | Number | FunctionCall
primaryExpr [QueryContext &context,ParserContext &pc]
  : variableReference[context,pc] 
  | LB! expr[context,pc] RB! 
  | literal 
  | number 
  | functionCall[context,pc]
  ;

literal
  : LITERAL^
  ;

number
  : NUMBER^
  ;

// XPath:[16] FunctionCall ::= FunctionName '(' ( Argument ( ',' Argument )* )? ')'
functionCall! [QueryContext &context,ParserContext &pc]
  : a1:functionName[context,pc]
    {
	if(context.isFunction(#a1->getText()))
	{
	    ##=#([FUNCTION,a1_AST->getText()]);
	}
	else
	{
		throw XmlException(XmlException::XPATH_PARSER_ERROR,"The XPath function '"+#a1->getText()+"' is not known.");
	}
    } 
  LB ( b1:argument[context,pc]
    { ##->addChild(b1_AST); } 
  ( COMMA c1:argument[context,pc]
    { ##->addChild(c1_AST); } 
  )* )? RB
  ;

// XPath:[17] Argument ::= Expr
argument [QueryContext &context,ParserContext &pc]
  : expr[context,pc]
  ;

// Xpath:[18] UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
unionExpr [QueryContext &context,ParserContext &pc]
  : pathExpr[context,pc] (UNION^ pathExpr[context,pc])*
  ;

// XPath:[19] PathExpr ::= LocationPath | FilterExpr
//                | FilterExpr '/' RelativeLocationPath
//                | FilterExpr '//' RelativeLocationPath
pathExpr [QueryContext &context,ParserContext &pc]
  {
	size_t stack_posn= pc.size();
  }
  : (NCNAME LB)=>
  {
    LT(1)->getText().compare("text")==0 || 
    LT(1)->getText().compare("comment")==0 || 
    LT(1)->getText().compare("node")==0 || 
    LT(1)->getText().compare("processing-instruction")==0
  }? locationPath[context,pc] 
  { pc.pop_step(pc.size()-stack_posn); }
  | (DOLLAR|LB|LITERAL|NUMBER|NCNAME LB)=>filterExpr[context,pc]
  { pc.pop_step(pc.size()-stack_posn); }
  | (DOT|DOTDOT|SLASH|NCNAME|axisName|SLASHSLASH|AT|STAR)=>locationPath[context,pc] 
  { pc.pop_step(pc.size()-stack_posn); }
  | filterExpr[context,pc] ((SLASH|SLASHSLASH) relativeLocationPath[context,pc])
  { pc.pop_step(pc.size()-stack_posn); }
  ;

// XPath:[20] FilterExpr ::= PrimaryExpr | FilterExpr Predicate
filterExpr [QueryContext &context,ParserContext &pc]
  : primaryExpr[context,pc] (predicate[context,pc])* 
  ;

// XPath:[21] OrExpr ::= AndExpr | OrExpr 'or' AndExpr
orExpr [QueryContext &context,ParserContext &pc]
  : andExpr[context,pc] (OR^ andExpr[context,pc])*
  ;

// XPath:[22] AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
andExpr [QueryContext &context,ParserContext &pc]
  : equalityExpr[context,pc] (AND^ equalityExpr[context,pc])*
  ;

// XPath:[23] EqualityExpr ::= RelationalExpr | EqualityExpr '=' RelationalExpr
//                    | EqualityExpr '!=' RelationalExpr
// jcm - The production rule suggests 'x (op x)*', but 'x (op x)?' would make more sense
equalityExpr [QueryContext &context,ParserContext &pc]
  : relationalExpr[context,pc] ((EQUAL^|NOTEQUAL^) relationalExpr[context,pc])*
  ;

// XPath:[24] RelationalExpr ::= AdditiveExpr
//                       | RelationalExpr '<' AdditiveExpr
//                       | RelationalExpr '>' AdditiveExpr
//                       | RelationalExpr '<=' AdditiveExpr
//                       | RelationalExpr '>=' AdditiveExpr
// jcm - The production rule suggests 'x (op x)*', but 'x (op x)?' would make more sense
relationalExpr [QueryContext &context,ParserContext &pc]
  : additiveExpr[context,pc] ((LTX^|GTX^|LTE^|GTE^) additiveExpr[context,pc])*
  ;

// XPath:[25] AdditiveExpr ::= MultiplicativeExpr
//                     | AdditiveExpr '+' MultiplicativeExpr
//                     | AdditiveExpr '-' MultiplicativeExpr
additiveExpr [QueryContext &context,ParserContext &pc]
  : multiplicativeExpr[context,pc] ((PLUS^|MINUS^) multiplicativeExpr[context,pc])*
  ;

// XPath:[26] MultiplicativeExpr ::= UnaryExpr
//                     | MultiplicativeExpr MultiplyOperator UnaryExpr
//                     | MultiplicativeExpr 'div' UnaryExpr
//                     | MultiplicativeExpr 'mod' UnaryExpr
multiplicativeExpr [QueryContext &context,ParserContext &pc]
  : unaryExpr[context,pc] ((STAR^|DIV^|MOD^) unaryExpr[context,pc])*
  ;

// XPath:[27] UnaryExpr ::= UnionExpr | '-' UnaryExpr
unaryExpr [QueryContext &context,ParserContext &pc]
  : unionExpr[context,pc] | MINUS^ unaryExpr[context,pc]
  ;

// XPath:[35] FunctionName ::= QName - NodeType
functionName [QueryContext &context,ParserContext &pc]
  : qName[context,pc]
  ;

// XPath:[36] VariableReference ::= "$" QName
variableReference! [QueryContext &context,ParserContext &pc]
  : DOLLAR a1:qName[context,pc]
  {
   	##=#([VARIABLE,a1_AST->getText()]);
  }
  ;

// XPath:[37] NameTest ::= "*" | NCName ":" "*" | QName
nameTest [QueryContext &context,ParserContext &pc]
  : STAR
  {
	// JCM ???
  }
  | (NCNAME COLON STAR)=>a1:NCNAME COLON STAR
  {
	// Lookup the URI for the prefix.
	std::string uri(mapPrefixToUri(context, #a1->getText()));
	// JCM - Should use the STAR token instead of the token text?
	##->setText(uri+":*");
  }
  | qName[context,pc]
  // Both 'div' and 'mod' can be used as operators within a numeric
  // expression, and as node names. The lexer always thinks they are
  // operators, so the parser must turn them back into qname's when
  // they appear where we expect to find a qname.
  | DIV
  {
   	##=#([NCNAME,"div"]);
  }
  | MOD
  {
   	##=#([NCNAME,"mod"]);
  }
  // The axis specifiers (child, parent, etc) can be used as node
  // names, so the parser must turn them back into qname's when
  // they appear where we expect to find a qname.
  | a5:axisName
  {
   	##=#([NCNAME,a5_AST->getText()]);
  }
  ;

// Namespace:[6]  QName ::=  (Prefix ":")? LocalPart 
// Namespace:[7]  Prefix ::=  NCName 
// Namespace:[8]  LocalPart ::=  NCName 
qName [QueryContext &context,ParserContext &pc]
  : b1:NCNAME
  {
  	UNUSED(pc);
	##=#b1;
  }
  | a2:NCNAME COLON b2:NCNAME
  {
	// Lookup the URI for the prefix.
	std::string uri(mapPrefixToUri(context, #a2->getText()));
	##->setText(uri+":"+#b2->getText());
  }
  | a3:NCNAME COLON DIV
  {
    // Lookup the URI for the prefix.
	std::string uri(mapPrefixToUri(context, #a3->getText()));
   	##->setText(uri+":div");
  }
  | a4:NCNAME COLON MOD
  {
    // Lookup the URI for the prefix.
	std::string uri(mapPrefixToUri(context, #a4->getText()));
   	##->setText(uri+":mod");
  }
  ;
