//
// See the file LICENSE for redistribution information.
//
// Copyright (c) 2002-2003
//	Sleepycat Software.  All rights reserved.
//
// $Id: Indexer.hpp,v 1.42 2003/10/13 16:39:45 merrells Exp $
//

#ifndef __INDEXER_HPP
#define	__INDEXER_HPP

#include <string>

#if defined(DBXML_DOM_XERCES2)
#include <xercesc/sax2/DefaultHandler.hpp>
#include <xercesc/sax2/SAX2XMLReader.hpp>
#include <xercesc/util/XMLUTF8Transcoder.hpp>
#if defined(XERCES_HAS_CPP_NAMESPACE)
XERCES_CPP_NAMESPACE_USE
#endif
#endif

#include "ID.hpp"
#include "Statistics.hpp"
#include "IndexSpecification.hpp"
#include "Key.hpp"

class DbTxn;

namespace DbXml
{

class Container;
class Buffer;
class Document;

typedef std::vector<XMLByte> XMLByteVector;

class Transcoder
{
public:
	Transcoder();
	~Transcoder();
	const char *transcode(XMLByteVector &b, const XMLCh* const s) const;
	const char *transcodeName(XMLByteVector &b, const XMLCh* const uri, const XMLCh* const qname) const;
	const char *transcode(XMLByteVector &b, const XMLCh* const s, size_t sourceLength, size_t &targetLength) const;
private:
	XMLTranscoder* transcoder_;
};

class IndexerState
{
public:
	IndexerState();
	~IndexerState();
	void initialize();
	void startNode(Container &container, const IndexSpecification &indexSpecification, DbTxn *txn, const Name &name);
	void startNode(const IndexSpecification &indexSpecification, const Transcoder &transcoder, const XMLCh* const uri, const XMLCh* const localname);
	void characters(const char *s, size_t l);
	void characters(const Transcoder &transcoder, const XMLCh* const chars, const unsigned int length);
	void characters(const Transcoder &transcoder, const XMLCh* const chars);
	bool isIndexed()const;
	Key &getKey(Container &container, DbTxn *txn);
	void reset();
	const IndexVector &iv() const
	{
		return *iv_;
	}

private:
	IndexerState(const IndexerState&);
	void operator=(const IndexerState&);

	Key *key_;
	const IndexVector *iv_;
	XMLByteVector *name_;
	XMLByteVector *value_;
};

class IndexerStateStack
{
public:
	IndexerStateStack();
	~IndexerStateStack();

	IndexerState *push();
	IndexerState *top();
	void pop();
	bool empty();

private:
	IndexerStateStack(const IndexerStateStack&);
	void operator=(const IndexerStateStack &);

	std::vector<IndexerState*> v_;
	size_t top_;
};

class Indexer : public DefaultHandler
{
public:
	Indexer(Container *container);
	~Indexer();

	/**
	 * Generates a set of index keys for the document based on the
	 * indexing specification.
	 *
	 * This method is not thread safe. One indexer is needed per thread.
	 *
	 * After the document is indexed the addOrDeleteKeys method must be
	 * called to write the keys to the index databases.
	 *
	 * \param txn The transaction within which the document is indexed.
	 * \param container For the dictionary.
	 * \param indexSpecification The index specification to index for.
	 * \param document The document to be indexed.
	 * \exception XmlException
	 */
	int indexDocument(DbTxn *txn, const IndexSpecification &indexSpecification, const Document &document); // throws XmlException
	/**
	 * Writes a set of index keys to the index databases.
	 *
	 * \param context The context within which tp perform the update.
	 * \param add True means add the keys to the container.
	 * False means delete the keys from the container.
	 * \return error code.
	 * \exception XmlException
	 */
	int addOrDeleteKeys(OperationContext &context, bool add); // throws XmlException

	void reset();

private:
	// no need for copy and assignment
	Indexer(const Indexer&);
	Indexer &operator=(const Indexer &);

	void startElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname, const Attributes& attrs);
	void characters(const XMLCh* const chars, const unsigned int length);
	void endElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname);

	void warning(const SAXParseException& exception);
	void error(const SAXParseException& exception);
	void fatalError(const SAXParseException& exception);

	const char *transcode(XMLByteVector &b, const XMLCh* const s);
	const char *transcodeName(XMLByteVector &b, const XMLCh* const uri, const XMLCh* const qname);
	const char *transcode(XMLByteVector &b, const XMLCh* const s, size_t sourceLength, size_t &targetLength);
	void addAttributeValueKey(IndexerState *eis, IndexerState *ais);
	void parse(const char *buffer, size_t length); // throws XmlException
	void generateKeys(const IndexVector &iv, Index::Type pnk, Index::Type mask, Key &key);

	// The SAX parser for scanning the document content.
	SAX2XMLReader* parser_;

	// The transaction within which the index keys are added to the container.
	DbTxn *txn_;

	// Reference to the Container that this Indexer indexes for.
	Container *container_;

	// The index keys that are to be created.
	const IndexSpecification *indexSpecification_;

	// We check the index specification up front to see if there are any
	// attribute indexes. We can avoid some code if they're not needed.
	bool attributesIndexed_;

	// We check the index specification up front to see if there are any
	// element indexes. We can avoid some code if they're not needed.
	bool elementsIndexed_;

	// The document being indexed.
	const Document *document_;

	// Data structures maintained whilst parsing the XML document.
	IndexerStateStack stateStack_;

	// The index keys are buffered up in a 'keyStash'. Defined in Key.hpp
	KeyStash keyStash_;

	// Key Statistics maintained as we index.
	Statistics statistics_;

	// Cache the debug category lookup.
	bool tracing_;

	// We transcode from XMLCh to UTF8
	Transcoder transcoder_;
};

}

#endif

