/*-
 * See the file LICENSE for redistribution information.
 *
 * Copyright (c) 1998,2006 Oracle.  All rights reserved.
 *
 * $Id: perf_work.c,v 1.6 2006/10/30 17:46:17 bostic Exp $
 */

#include "perf_work.h"
#include "perf_os.h"
#include <string.h>
#include <stdio.h>

void __os_clock(void *, u_int32_t *, u_int32_t *);
static int run_iterations(struct perf_workload *load, int id);
static void * run_thread(void *arg);
static struct perf_timer **allocate_timers(int nthreads, int iterations);
static void deallocate_timers(struct perf_timer **timers);

int perf_init_globals(struct perf_globals *globals,
		      int iterations, int nthreads,
		      int timeops)
{
  memset(globals, 0, sizeof(*globals));
  if (iterations == 0)
    iterations = 1;
  if (nthreads == 0)
    nthreads = 1;
  globals->g_iterations = iterations;
  globals->g_nthreads = nthreads;
  if (timeops) {
    globals->g_timers = allocate_timers(nthreads, iterations);
    if (!globals->g_timers)
      return -1;
  }
  return 0;
}

void perf_uninit_globals(struct perf_globals *globals)
{
  if (globals->g_timers)
    deallocate_timers(globals->g_timers);
}

/*
 * Run a workload:
 *   1.  Create child procs (if required)
 *   2.  Call setup function, before creating threads
 *   3.  Call workload function g_iterations times in
 *       each thread
 *   (wait for threads)
 *   4.  Call cleanup function
 *   (wait for processes)
 *   5.  Return
 */
int perf_run_workload(struct perf_workload *load,
		      struct perf_globals *globals)
{
  int ret_setup, ret_work, ret_cleanup;
  ret_work = ret_cleanup = 0;
  ret_setup = load->w_setup(globals);
  if (ret_setup == 0) {
    perf_start_timer(&globals->g_total_timer);

    /* spawn threads */
    load->w_globals = globals;
    if (globals && (globals->g_nthreads > 1)) {
      void *thds = run_threads(load, run_thread, globals->g_nthreads);
      if (thds)
	ret_work = wait_threads(thds);
      else
	ret_work = -1;
    } else {
      ret_work = run_iterations(load, 0);
    }

    perf_end_timer(&globals->g_total_timer);
    ret_cleanup = load->w_cleanup(globals);
  }
  return (ret_setup + ret_work + ret_cleanup);
}

static void * run_thread(void * arg)
{
  struct perf_thread_arg *pta = (struct perf_thread_arg *)arg;
  struct perf_workload *load = (struct perf_workload *)pta->pt_load;
  return (void*) run_iterations(load, pta->pt_id);
}

static int run_iterations(struct perf_workload *load, int id)
{
  struct perf_globals *globals = load->w_globals;
  int i, iterations, ret_work = 0;
  iterations = (globals ? globals->g_iterations : 1);
  for (i = 0; i < iterations; i++) {
    if (globals && globals->g_timers)
      perf_start_timer(&(globals->g_timers[id][i]));
    
    ret_work = load->w_work(globals, id, &i);
    
    if (ret_work !=  0)
      break;
    if (globals && globals->g_timers)
      perf_end_timer(&(globals->g_timers[id][i]));
  }
  return 0;
}

/* timers -- unit is usecs unless specified */

int perf_start_timer(struct perf_timer *timer)
{
  __os_clock(0, &timer->pt_start.secs, &timer->pt_start.usecs);
  return 0;
}

int perf_end_timer(struct perf_timer *timer)
{
  __os_clock(0, &timer->pt_end.secs, &timer->pt_end.usecs);
  return 0;
}

#define USEC_MULT 1000000
#include <stdio.h>

long perf_timer_duration(struct perf_timer *timer)
{
  long dusecs = (timer->pt_end.secs-timer->pt_start.secs) * USEC_MULT;
  dusecs += (timer->pt_end.usecs - timer->pt_start.usecs);
  return dusecs;
}

struct perf_timer **allocate_timers(int nthreads, int iterations)
{
  int i;
  struct perf_timer **timers = (struct perf_timer **)
    perf_malloc((nthreads + 1) * sizeof(struct perf_timer *));
  if (timers) {
    timers[nthreads] = 0;
    for (i = 0; i < nthreads; i++) {
      timers[i] = (struct perf_timer *)
	perf_malloc((iterations + 1) * sizeof(struct perf_timer));
      if (!timers[i]) {
	perf_free(timers);
	timers = 0;
	break;
      }
    }
  }
  return timers;
}

void deallocate_timers(struct perf_timer **timers)
{
  struct perf_timer **ttimer = timers;
  while (*ttimer) {
    perf_free(*ttimer++);
  }
  perf_free(timers);
}

void perf_dump_timers(struct perf_globals *globals)
{
  // look at timer output
  long total = 0;
  long min = 0;
  long max = 0;
  int niters = globals->g_iterations;
  int nthreads = globals->g_nthreads;
  int i, j;
  long thistime;
  int nruns = niters * nthreads;
  for (i = 0; i < nthreads; i++) {
    for (j = 0; j < niters; j++) {
      thistime = perf_timer_duration(&globals->g_timers[i][j]);
      total += thistime;
      if (min == 0 || thistime < min)
	min = thistime;
      if (thistime > max)
	max = thistime;
      //cout << "Iteration " << i+1 << ", usecs: " << thistime << endl;
    }
  }
  printf("%d threads, %d iterations each (total %d iters)\n\t usec min %ld, max %ld, avg %g\n",
	 nthreads, niters,
	 nruns, min, max, (double)total/(double)nruns);
  thistime = perf_timer_duration(&globals->g_total_timer);
  printf("Total duration in usecs: %ld, ops/sec %g\n",
	 thistime,
	 (double)nruns/(double)thistime * 1000000);
}
