//
// See the file LICENSE for redistribution information.
//
// Copyright (c) 2002,2006 Oracle.  All rights reserved.
//
// $Id: Benchmark.cpp,v 1.16 2006/10/30 17:46:10 bostic Exp $
//
#include <dbxml/DbXml.hpp>
#include "Timer.hpp"

#include <string>
#include <vector>
#include <map>
#include <sstream>
#include <fstream>
#include <stdio.h>
#include <time.h>

#if defined(WIN32) && !defined(__CYGWIN__)
#define snprintf _snprintf
#endif

#ifdef HAVE_GETOPT
#include <unistd.h>
#else
extern "C" int getopt(int argc, char * const argv[], const char *optstring);
extern "C" char *optarg;
extern "C" int optind;
#endif

using namespace DbXml;
using namespace std;

static char *ALL_INDEX_TYPE[] = { "indexed", "notindexed", 0 };
static char *INDEXED_INDEX_TYPE[] = { "indexed", 0 };
static char *NOTINDEXED_INDEX_TYPE[] = { "notindexed", 0 };

XmlResults query(XmlManager &mgr, const XmlValue &val, string query)
{
	XmlQueryContext qc = mgr.createQueryContext();
	XmlQueryExpression expr = mgr.prepare(query, qc);
	return expr.execute(val, qc);
}

string timestamp(string &dateTime)
{
	time_t tt;
	time(&tt);

	struct tm *tm_p;
#ifdef _MSC_VER 	
	tm_p = localtime(&tt);
#else
	struct tm tm;
	tm_p = &tm;
	localtime_r(&tt, &tm);
#endif

	char szDate[256];
	sprintf(szDate,"%04d-%02d-%02dT%02d:%02d:%02dZ",
		tm.tm_year+1900,
		tm.tm_mon+1,
		tm.tm_mday,
		tm.tm_hour,
		tm.tm_min,
		tm.tm_sec);
	dateTime = szDate;

	sprintf(szDate,"%04d%02d%02d%02d%02d%02d",
		tm.tm_year+1900,
		tm.tm_mon+1,
		tm.tm_mday,
		tm.tm_hour,
		tm.tm_min,
		tm.tm_sec);
	return szDate;
}

class Stats {
public:
	string name;
	string data;
	string size;
	string storage;
	int indexes;

	map<string, string> info;

	int count;
	Timer timer;

	Stats() {
		name.clear();
		data.clear();
		size.clear();
		storage.clear();
		indexes = 0;
		reset();
	}

	void reset() {
		info.clear();
		count = 0;
		timer.reset();
	}
};

class StatsReporter {
public:
	StatsReporter(string name, bool verbose)
		: verbose_(verbose),
		  totalTime_(0),
		  documentTime_(0),
		  documentPlusTime_(0),
		  nodeTime_(0),
		  nodePlusTime_(0),
		  textCentricTime_(0),
		  dataCentricTime_(0),
		  multipleDocumentTime_(0),
		  singleDocumentTime_(0),
		  file_((name + timestamp(timestamp_) + ".xml").c_str())
	{
		if(!file_.is_open()) {
			cerr << "Unable to open statistics file: " << name << timestamp_ << ".xml" << endl;
			exit(-1);
		}

		file_ << "<statistics";
		file_ << " timestamp=\"" << timestamp_ << "\"";
		file_ << ">" << endl;

		if(verbose_) {
			cout << "*********************************************************************" << endl;
			cout << "Benchmark Statistics" << endl << endl;
			::snprintf(buffer_, 255, " %5s | %5s | %9s | %8s | %7s | %10s | %5s ",
				"Name", "Data", "Size", "Storage", "Indexes", "Time/s", "Count");
			cout << buffer_ << endl;
			cout << "-------+-------+-----------+----------+---------+------------+-------" << endl;
		}
	}

	~StatsReporter() {
		file_ << "</statistics>" << endl;
		file_.close();

		if(verbose_) {
			cout << endl;
			cout << "Document Storage Total time/s: " << documentTime_ << endl;
			cout << "DocumentPlus Storage Total time/s: " << documentPlusTime_ << endl;
			cout << "Node Storage Total time/s: " << nodeTime_ << endl;
			cout << "NodePlus Storage Total time/s: " << nodePlusTime_ << endl;
			cout << endl;
			cout << "Text Centric Total time/s: " << textCentricTime_ << endl;
			cout << "Data Centric Total time/s: " << dataCentricTime_ << endl;
			cout << endl;
			cout << "Multiple Document Total time/s: " << multipleDocumentTime_ << endl;
			cout << "Single Document Total time/s: " << singleDocumentTime_ << endl;
			cout << endl;
			cout << "Total time/s: " << totalTime_ << endl;
			cout << "*********************************************************************" << endl;
		}
	}

	void reportStats(const Stats &stats) {
		file_ << "  <entry";
		file_ << " name=\"" << stats.name << "\"";
		file_ << ">" << endl;

		file_ << "    <data_type>" << stats.data << "</data_type>" << endl;
		file_ << "    <size>" << stats.size << "</size>" << endl;
		file_ << "    <storage>" << stats.storage << "</storage>" << endl;
		file_ << "    <indexes>" << stats.indexes << "</indexes>" << endl;

		for(map<string, string>::const_iterator i = stats.info.begin();
		    i != stats.info.end(); ++i) {
			file_ << "    <" << i->first << ">" << i->second << "</" << i->first << ">" << endl;
		}

		file_ << "    <count>" << stats.count << "</count>" << endl;
		file_ << "    <time>" << stats.timer.durationInSeconds() << "</time>" << endl;

		file_ << "  </entry>" << endl;

		if(verbose_) {
			::snprintf(buffer_, 255, " %5s | %5s | %9s | %8s | %7i | %10f | %5i",
				stats.name.c_str(),
				stats.data.c_str(),
				stats.size.c_str(),
				stats.storage.c_str(),
				stats.indexes,
				stats.timer.durationInSeconds(),
				stats.count);
				
			cout << buffer_ << endl;

			totalTime_ += stats.timer.durationInSeconds();
			if(stats.storage == "document") {
				documentTime_ += stats.timer.durationInSeconds();
			}
			else if(stats.storage == "documentplus") {
				documentPlusTime_ += stats.timer.durationInSeconds();
			}
			else if(stats.storage == "nodeplus") {
				nodePlusTime_ += stats.timer.durationInSeconds();
			}
			else {
				nodeTime_ += stats.timer.durationInSeconds();
			}
			if(stats.data.find("TC") != string::npos) {
				textCentricTime_ += stats.timer.durationInSeconds();
			}
			else {
				dataCentricTime_ += stats.timer.durationInSeconds();
			}
			if(stats.data.find("MD") != string::npos) {
				multipleDocumentTime_ += stats.timer.durationInSeconds();
			}
			else {
				singleDocumentTime_ += stats.timer.durationInSeconds();
			}
		}
	}

private:
	bool verbose_;
	char buffer_[255];
	double totalTime_;
	double documentTime_;
	double documentPlusTime_;
	double nodeTime_;
	double nodePlusTime_;
	double textCentricTime_;
	double dataCentricTime_;
	double multipleDocumentTime_;
	double singleDocumentTime_;

	string timestamp_;
	ofstream file_;
};

string getContainerName(const Stats &stats) {
	string containerName = "benchmark";
	containerName += "_";
	containerName += stats.data;
	containerName += "_";
	containerName += stats.size;
	containerName += "_";
	containerName += stats.storage;
	containerName += "_";
	containerName += (stats.indexes > 0 ? "indexed" : "notindexed");
	containerName += ".dbxml";
	return containerName;
}

void loadBenchmarkData(XmlManager &mgr, const string &dataPath, const XmlDocument &config, Stats &stats, StatsReporter &reporter)
{
	string containerName = getContainerName(stats);

	XmlContainer::ContainerType cont_type = XmlContainer::WholedocContainer;
	u_int32_t flags = 0;
	// Create the container
	if(stats.storage == "node") {
		cont_type = XmlContainer::NodeContainer;
	}
	else if(stats.storage == "nodeplus") {
		cont_type = XmlContainer::NodeContainer;
		flags = DBXML_INDEX_NODES;
	}
	else if(stats.storage == "documentplus") {
		flags = DBXML_INDEX_NODES;
	}

	XmlContainer cont = mgr.createContainer(containerName, flags, cont_type);
	XmlUpdateContext uc = mgr.createUpdateContext();

	if(stats.indexes > 0) {
		// Find the indexes
		XmlResults indexes = query(mgr, config, "for $a in /benchmark_data/data_type[@name = '" + stats.data
			+ "']/index return (data($a/@uri), data($a/@name), data($a/@index))");
		XmlValue val;
		while(indexes.next(val)) {
			string uri = val.asString();
			indexes.next(val);
			string name = val.asString();
			indexes.next(val);
			string index = val.asString();

			cont.addIndex(uri, name, index, uc);
		}
	}
				

	// Find the files
	XmlResults files = query(mgr, config, "for $a in /benchmark_data/data_type[@name = '" + stats.data
		+ "']/size[@name = '" + stats.size + "']/file/@name return data($a)");
	stats.count = files.size();
	XmlValue file;
	while(files.next(file)) {
		string fullpath = dataPath + stats.data + "/" + stats.size + "/" + file.asString();

		stats.timer.start();
		cont.putDocument(file.asString(), mgr.createLocalFileInputStream(fullpath), uc);
		stats.timer.stop();
	}

	stats.name = "load";
	reporter.reportStats(stats);
}

void queryBenchmarkData(XmlManager &mgr, const XmlDocument &config, Stats &stats, StatsReporter &reporter)
{
	if(stats.indexes == 0) {
		// Don't run query benchmarks without indexes
		return;
	}

	string containerName = getContainerName(stats);
			
	// Open the container
	XmlContainer cont = mgr.openContainer(containerName);

	// Find the queries
	XmlResults queries = query(mgr, config, "for $a at $pos in /benchmark_data/data_type[@name = '" + stats.data
		+ "']/query return (data($a/description), data($a/action), $pos)");
	XmlQueryContext qc = mgr.createQueryContext();
	qc.setEvaluationType(XmlQueryContext::Lazy);
	qc.setDefaultCollection(containerName);

	XmlValue query;
	while(queries.next(query)) {
		stats.reset();

		stats.info["description"] = query.asString();
		queries.next(query);
		string action = query.asString();
		queries.next(query);
		stats.info["query_index"] = query.asString();

		// Replace "input()" with the correct "collection()" function call
		string::size_type pos = action.find("input()");
		while(pos != string::npos) {
			action = action.replace(pos, 7, "collection()");
			pos = action.find("input()");
		}
		stats.info["action"] = "<![CDATA[" + action + "]]>";

		try {
			stats.timer.start();
			{
				XmlQueryExpression expr = mgr.prepare(action, qc);
				XmlResults results = expr.execute(qc, DBXML_LAZY_DOCS);
				XmlValue val;
				while(results.next(val)) {}
			}
			stats.timer.stop();
			++stats.count;
		}
		catch(XmlException &e) {
			cerr << "Exception while executing query:" << action << endl;
			cerr << "Error message: " << e.what() << endl;
			exit(-1);
		}

		stats.name = "query";
		reporter.reportStats(stats);
	}
}

void usage()
{
	cerr << "dbxml_benchmark [OPTIONS] path_to_dbenv path_to_benchmark_configuration" << endl
	     << "Runs the benchmark tests." << endl
	     << "Options:" << endl
	     << "\t-m - run in the specified mode (default: all)" << endl
	     << "\t-s - use the specified storage method (default: all)" << endl
	     << "\t-i - use the specified indexing scheme (default: all)" << endl
	     << "\t-t - use a transactional environment" << endl
	     << "\t-v - verbose output" << endl
	     << "Mode:" << endl
	     << "\tload - load the benchmark data into containers" << endl
	     << "\tquery - query the existing benchmark containers" << endl
	     << "\tall - load the benchmark data into containers, then query them" << endl
	     << "Storage Method:" << endl
	     << "\tdocument - document level storage" << endl
	     << "\tdocumentplus - document level storage with indexed nodes" << endl
	     << "\tnode - node level storage" << endl
	     << "\tnodeplus - node level storage with indexed nodes" << endl
	     << "\tall - both storage models" << endl
	     << "Indexing Scheme:" << endl
	     << "\tindexed - with indexes" << endl
	     << "\tnotindexed - without indexes" << endl
	     << "\tall - both indexing schemes" << endl;
	exit(-1);
}

int main(int argc, char **argv)
{
	string mode = "all";
	vector<string> sTypes;
	char **iTypes = ALL_INDEX_TYPE;
	string path2DbEnv;
	string configPath;
	string dataPath;
	bool transactional = false;
	bool verbose = false;

	int ch;
	while((ch = getopt(argc, argv, "?hvm:s:i:")) != -1) {
		switch (ch) {
		case 'v': {
			if(verbose) {
				setLogLevel(LEVEL_ALL, true);
				setLogCategory(CATEGORY_ALL, true);
			}
			else verbose = true;
			break;
		}
		case 't': {
			transactional = true;
			break;
		}
		case 's': {
			string arg = optarg;
			if(arg == "document") {
				sTypes.push_back("document");
			}
			else if(arg == "documentplus") {
				sTypes.push_back("documentplus");
			}
			else if(arg == "node") {
				sTypes.push_back("node");
			}
			else if(arg == "nodeplus") {
				sTypes.push_back("nodeplus");
			}
			else if(arg == "all") {
				sTypes.push_back("document");
				sTypes.push_back("documentplus");
				sTypes.push_back("node");
				sTypes.push_back("nodeplus");
			}
			else {
				cerr << "Unknown storage type specified: " << arg << endl;
				usage();
			}
			break;
		}
		case 'i': {
			string arg = optarg;
			if(arg == "indexed") {
				iTypes = INDEXED_INDEX_TYPE;
			}
			else if(arg == "notindexed") {
				iTypes = NOTINDEXED_INDEX_TYPE;
			}
			else if(arg == "all") {
				iTypes = ALL_INDEX_TYPE;
			}
			else {
				cerr << "Unknown index scheme specified: " << arg << endl;
				usage();
			}
			break;
		}
		case 'm': {
			mode = optarg;
			break;
		}
		case 'h':
		case '?': {
			usage();
			break;
		}
		default: {
			cerr << "Unknown option: " << ch << endl;
			usage();
			break;
		}
		}
	}

	argc -= optind;
	argv += optind;
	for (int i = 0; i < argc; i++) {
		if(path2DbEnv.empty()) {
			path2DbEnv = argv[i];
		}
		else if(configPath.empty()) {
			configPath = argv[i];

			string::size_type pos = configPath.rfind("/");
			if(pos != string::npos) {
				dataPath = configPath.substr(0, pos + 1);
			}
		}
		else {
			cerr << "Too many parameters" << endl;
			usage();
		}
	}

	if(path2DbEnv.empty()) {
		cerr << "Path to database environment not specified" << endl;
		usage();
	}

	if(configPath.empty()) {
		cerr << "Path to the benchmark configuration not specified"<< endl;
		usage();
	}

	if(mode != "load" && mode != "query" && mode != "all") {
		cerr << "Unknown mode specified: " << mode << endl;
		usage();
	}

	if(sTypes.empty()) {
		sTypes.push_back("document");
		sTypes.push_back("documentplus");
		sTypes.push_back("node");
		sTypes.push_back("nodeplus");
	}

	try {

		// Open the DbEnv
		DbEnv *dbenv = new DbEnv(0);
		dbenv->set_error_stream(&cerr);
		dbenv->set_cachesize(0, 64 * 1024 * 1024, 1);
		if(mode == "load" || mode == "all") {
			u_int32_t envFlags = DB_CREATE|DB_INIT_MPOOL;
			if(transactional) {
				envFlags = envFlags|DB_INIT_TXN|DB_INIT_LOCK|DB_INIT_LOG;
			}
			dbenv->open(path2DbEnv.c_str(), envFlags, 0);
		}
		else {
			dbenv->open(path2DbEnv.c_str(), DB_JOINENV, 0);
			u_int32_t eflags = 0;
			dbenv->get_open_flags(&eflags);
			transactional = (eflags & DB_INIT_TXN) != 0;
		}

		// Create the XmlManager
		XmlManager mgr(dbenv, DBXML_ADOPT_DBENV);
		if(transactional) {
			mgr.setDefaultContainerFlags(DBXML_TRANSACTIONAL);
		}

		XmlDocument config = mgr.createDocument();
		config.setContentAsXmlInputStream(mgr.createLocalFileInputStream(configPath));

		XmlResults data_types = query(mgr, config, "distinct-values(/benchmark_data/data_type/@name)");
		XmlResults size_types = query(mgr, config, "distinct-values(/benchmark_data/data_type/size/@name)");

		StatsReporter reporter("statistics", verbose);

		Stats stats;
		XmlValue data;
		while(data_types.next(data)) {
			stats.data = data.asString();

			size_types.reset();
			XmlValue size;
			while(size_types.next(size)) {
				stats.size = size.asString();

				for(vector<string>::iterator i = sTypes.begin(); i != sTypes.end(); ++i) {
					stats.storage = *i;

					for(char **index = iTypes; *index != 0; ++index) {

						if(string(*index) == "indexed") {
							// Find the indexes
							XmlResults indexes = query(mgr, config, "/benchmark_data/data_type[@name = '" + stats.data
								+ "']/index");
							stats.indexes = indexes.size();
						}
						else stats.indexes = 0;

					
						if(mode == "load" || mode == "all") {
							stats.reset();
							loadBenchmarkData(mgr, dataPath, config, stats, reporter);
						}

						if(mode == "query" || mode == "all") {
							stats.reset();
							queryBenchmarkData(mgr, config, stats, reporter);
						}
					}
				}
			}
		}
	}
	catch(DbException &e) {
		cerr << "DbException caught: "
		     << e.what() << endl;
		exit(-1);
	}
	catch(XmlException &e) {
		cerr << "XmlException caught: "
		     << e.what() << endl;
		exit(-1);
	}

	return 0;
}

