#!d:/emx/bin/perl.exe
# a little program to generate a table of results
# just read all the RUN-*.log files and format them nicely
# Made by Luuk de Boer
# Patched by Monty

use Getopt::Long;

$opt_server="mysql";
$opt_cmp="mysql,pg,solid";
$opt_dir="output";
$opt_machine="";
$opt_relative=$opt_same_server=$opt_help=$opt_Information=$opt_skip_count=$opt_no_bars=0;

GetOptions("Information","help","server=s","cmp=s","machine=s","relative","same-server","dir=s","skip-count","no-bars","html") || usage();

usage() if ($opt_help || $opt_Information);

$opt_cmp=lc(join(",",sort(split(',',$opt_cmp))));

if ($opt_same_server)
{
  $files="$opt_dir/RUN-$opt_server*$opt_machine";
}
else
{
  $files="$opt_dir/RUN-*$opt_machine";
}
$files.= "-cmp-$opt_cmp" if (length($opt_cmp));

#
# Go trough all RUN files and gather statistics.
#

if ($#ARGV == -1)
{
  @ARGV=glob($files);
  $automatic_files=1;
}
else
{
  $opt_cmp="";
}

foreach (@ARGV)
{
  next if (!$opt_cmp && /-cmp-/ && $automatic_files);
  $prog=$filename = $_;
  /RUN-(.*)$/;
  $tot{$prog}{'version'}=$1;
  push(@key_order,$prog);
  $next = 0;
  open(TMP, "<$filename") || die "Can't open $filename: $!\n";
  while (<TMP>)
  {
    chomp;
    if ($next == 0) {
      if (/Server version:\s+(\S+.*)/i)
      {
	$tot{$prog}{'server'} = $1;
      }
      elsif (/Arguments:\s+(.+)/i)
      {
	$arguments= $1;
	# Remove some standard, not informative arguments
	$arguments =~ s/--log|--use-old\S*|--server=\S+|--cmp=\S+|--user=\S+|--pass=\S+|--machine=\S+|--dir=\S+//g;
	if (($tmp=index($arguments,"--comment")) >= 0)
	{
	  if (($end=index($arguments,$tmp+2,"--")) >= 0)
	  {
	    substr($arguments,$tmp,($end-$tmp))="";
	  }
	  else
	  {
	    $arguments=substr($arguments,0,$tmp);
	  }
	}
	$arguments =~ s/\s+/ /g;
	$tot{$prog}{'arguments'}=$arguments;
      }
      elsif (/Comments:\s+(.+)/i) {
	$tot{$prog}{'comments'} = $1;
      } elsif (/^(\S+):\s*(estimated\s|)total\stime:\s+(\d+)\s+secs/i)
      {
	$tmp = $1; $tmp =~ s/://;
	$tot{$prog}{$tmp} = [ $3, (length($2) ? "+" : "")];
	$op1{$tmp} = $tmp;
      } elsif (/Totals per operation:/i) {
	$next = 1;
	next;
      }
    }
    elsif ($next == 1)
    {
      if (/^(\S+)\s+(\d+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s*([+|?])*$/)
      {
	$tot1{$prog}{$1} = [$2,$6,$7];
	$op{$1} = $1;
      }
    }
  }
}

if (!%op)
{
  print "Didn't find any files matching: '$files'\n";
  print "Use the --cmp=server,server option to compare benchmarks\n";
  exit 1;
}


# everything is loaded ...
# now we have to create a fancy output :-)

# I prefer to redirect scripts instead to force it to file ; Monty
#
# open(RES, ">$resultfile") || die "Can't write to $resultfile: $!\n";
# select(RES)
#

if ($opt_html) {
  html_output();
} else {
  ascii_output();
}
exit 0;

#
# some output + format functions;
#

sub ascii_output {
  print <<EOF;
This is the result file of the different benchmark tests.

The number in () after each tests shows how many SQL commands the particular
test did.  As one test may have many different parameters this gives only
a rough picture of what was done.  Check the source for more information :)

Keep in mind that one can\'t compare benchmarks run with different --cmp
options. The --cmp options sets the all limits according to the worst
limit for all server in the benchmark.

Numbers marked with '+' are estimated according to previous runs because
the query took longer than a given time-limit to finish. The estimation
shouldn\'t be far from the real result thought.

Numbers marked with '?' contains results that gave wrong result. This can only
be used as an indication of how long it took for the server to produce a wrong
result :)

Hope this will give you some idea how each db is performing at what thing ....
Hope you like it .... Luuk & Monty (1997)

EOF

  if ($opt_relative)
  {
    print "Column 1 is in seconds. All other columns are presented relative\n";
    print "to this. 1.00 is the same, bigger numbers indicates slower\n\n";
  }

  if (length($opt_cmp))
  {
    print "The test was run with limits from: $opt_cmp\n\n";
  }
  print "The result logs which where found and the options:\n";
  $bar= $opt_no_bars ? " " : "|";
 
  # Move $opt_server first in array if not filename on command line
  if ($automatic_files)
  {
    @key_order=sort {$a cmp $b} keys %tot;
    for ($i=0; $i <= $#key_order; $i++)
    {
      if ($tot{$key_order[$i]}{'version'} =~ /^$opt_server-/)
      {
	unshift(@key_order,$key_order[$i]);
	splice(@key_order,$i+1,1);
	last;
      }
    }
  }
  # Print header

  $column_count=0;
  foreach $key (@key_order)
  {
    $tmp=$tmp=$tot{$key}{'version'};
    $tmp =~ s/-cmp-$opt_cmp// if (length($opt_cmp));
    $column_count++;
    printf "%2d %-30.30s: %s %s\n", $column_count, $tmp,
    $tot{$key}{'server'}, $tot{$key}{'arguments'};
    print "  $tot{$key}{'comments'}\n"
      if ($tot{$key}{'comments'} =~ /\w+/);
  }
  
  print "\n";
  
  $namewidth=$opt_skip_count ? 20 :25;
  $colwidth= $opt_relative ? 9 : 6;
  
  print_sep("=");
  printf "%-$namewidth.${namewidth}s${bar}", "Operation";
  $count = 1;
  foreach $key (@key_order)
  {
    printf "%${colwidth}d${bar}", $count;
    $count++;
  }
  printf "\n%-$namewidth.${namewidth}s${bar}", "";
  foreach $key (@key_order)
  {
    $ver=$tot{$key}{'version'};
    printf "%${colwidth}.${colwidth}s${bar}", substr($ver,0,index($ver,"-"));
    $count++;
  }
  print "\n";
  print_sep("-");
  print_string($opt_relative ? "Relative results per test (First column is in seconds):" : "Results per test in seconds:");
  print_sep("-");
  
  foreach $key (sort {$a cmp $b} keys %op1)
  {
    printf "%-$namewidth.${namewidth}s${bar}", $key;
    $first=undef();
    foreach $server (@key_order)
    {
      print_value($first,$tot{$server}{$key}->[0],$tot{$server}{$key}->[1]);
      $first=$tot{$server}{$key}->[0] if (!defined($first));
    }
    print "\n";
  }
  
  print_sep("-");
  print_string("The results per operation:");
  print_sep("-");
  
  foreach $key (sort {$a cmp $b} keys %op)
  {
    next if ($key =~ /TOTALS/i);
    $tmp=$key;
    $tmp.= " (" . $tot1{$key_order[0]}{$key}->[1] . ")" if (!$skip_count);
    printf "%-$namewidth.${namewidth}s${bar}", $tmp;
    $first=undef();
    foreach $server (@key_order)
    {
      print_value($first,$tot1{$server}{$key}->[0],$tot1{$server}{$key}->[2]);
      $first=$tot1{$server}{$key}->[0] if (!defined($first));
    }
    print "\n";
  }
  
  print_sep("-");
  $key="TOTALS";
  printf "%-$namewidth.${namewidth}s${bar}", $key;
  $first=undef();
  foreach $server (@key_order)
  {
    print_value($first,$tot1{$server}{$key}->[0],$tot1{$server}{$key}->[2]);
    $first=$tot1{$server}{$key}->[0] if (!defined($first));
  }
  print "\n";
  print_sep("=");
}


sub html_output
{
  my $template="template.html";
  my $title="MySQL Benchmark Results - Compare with $opt_cmp";
  my $image="info.gif";
  $bar="";

  open(TEMPLATE, $template) || die;
  while (<TEMPLATE>)
  {
    if (/<center>/)
    {
      print $_;
      print "<!---- This is AUTOMATICALLY Generated. Do not edit here! ---->\n";
    }
    elsif (/TITLE:SUBTITLE/)
    {
      s|TITLE:SUBTITLE|$title|;
      print $_;
    }
    elsif (m|/images/.gif|)
    {
      s|/images/.gif|/images/$image|;
      s|alt=""|alt="$title"|;
      print $_;
    }
    # Find line to inactivate
    elsif (m|<img src="/images/${text}1.gif" border="0" width="66" height="20" alt="$text">|)
    {
      # Print inactive thing
      print '<td align="center" bgcolor="#310063"><img src="/images/zero.gif" border="0" width="66" height="20" alt=""></td>';
    }
    elsif (/ subchapter name /)
    {
      # Nothing here for now
      print $_;
    }
    elsif (/ text of chapter /)
    {
      print $_;
      print_html_body();
    }
    else
    {
      print $_;
    }
  }
  close(TEMPLATE);
}


sub print_html_body
{
  my ($title,$count,$key);
  print <<EOF;
<center>
<font size=+4><b>MySQL Benchmark Results</b></font><br>
<font size=+1><b>Compare with $opt_cmp</b></font><p><p>
</center>
This is the result file of the different benchmark tests.
<p>

The number in () after each tests shows how many SQL commands the particular
test did.  As one test may have many different parameters this gives only
a rough picture of what was done.  Check the source for more information.
<p>
Keep in mind that one can\'t compare benchmarks run with different --cmp
options. The --cmp options sets the all limits according to the worst
limit for all server in the benchmark.
<p>
Numbers marked with '+' are estimated according to previous runs because
the query took longer than a given time-limit to finish. The estimation
shouldn\'t be far from the real result thought.
<p>
Numbers marked with '?' contains results that gave wrong result. This can only
be used as an indication of how long it took for the server to produce a wrong
result :)
<p>
Hope this will give you some idea how each db is performing at what thing ....
<br>
Hope you like it .... Luuk & Monty (1997)
<p><p>
EOF

  if ($opt_relative)
  {
    print "Column 1 is in seconds. All other columns are presented relative<br>\n";
    print "to this. 1.00 is the same, bigger numbers indicates slower<p>\n\n";
  }

  if (length($opt_cmp))
  {
    print "The test was run with limits from: $opt_cmp\n\n";
  }
  print "The result logs which where found and the options:<br>\n";
 
  # Move $opt_server first in array
  if ($automatic_files)
  {
    @key_order=sort {$a cmp $b} keys %tot;
    for ($i=0; $i <= $#key_order; $i++)
    {
      if ($tot{$key_order[$i]}{'version'} =~ /^$opt_server-/)
      {
	unshift(@key_order,$key_order[$i]);
	splice(@key_order,$i+1,1);
	last;
      }
    }
  }
  # Print header
  print "<p><center><table border=1 width=100%>\n";
  $column_count=0;
  foreach $key (@key_order)
  {
    $tmp=$tot{$key}{'version'};
    $tmp =~ s/-cmp-$opt_cmp// if (length($opt_cmp));
    $column_count++;
#    printf "<tr><td>%2d<td>%-30.30s<td>%s %s</tr>\n", $column_count, $tmp,
    printf "<tr><td>%2d</td><td>%s</td><td>%s %s</td></tr>\n",
    $column_count, $tmp, $tot{$key}{'server'}, $tot{$key}{'arguments'};
    print "<tr><td colspan=3>$tot{$key}{'comments'}</td></tr>\n"
      if ($tot{$key}{'comments'} =~ /\w+/);
  }
  
  print "</table></center><p><center><table border=1 width=100%>\n";
  
  $namewidth=$opt_skip_count ? 20 :25;
  $colwidth= $opt_relative ? 9 : 6;
  
  printf "<tr><td><b>%s</b></td>\n", "Operation";
  $count = 1;
  foreach $key (@key_order)
  {
    $ver=$tot{$key}{'version'};
    printf "<td align=center><b>%d", $count;
    printf "<br>%${colwidth}.${colwidth}s</b></td>\n", substr($ver,0,index($ver,"-"));
    $count++;
  }
  print "</tr>\n";
  $title = $opt_relative ? "Relative results per test (First column is in seconds):" : "Results per test in seconds:";
  printf "<tr><td colspan=%d><b>%s</b></td></tr>\n", $count, $title;
  
  foreach $key (sort {$a cmp $b} keys %op1)
  {
    printf "<tr><td>%-$namewidth.${namewidth}s</td>", $key;
    $first=undef();
    foreach $server (@key_order)
    {
      print_value($first,$tot{$server}{$key}->[0],$tot{$server}{$key}->[1]);
      $first=$tot{$server}{$key}->[0] if (!defined($first));
    }
    print "</tr>\n";
  }
  
  $title = "The results per operation:";
  printf "<tr><td colspan=%d><b>%s</b></td></tr>\n", $count, $title;
  
  foreach $key (sort {$a cmp $b} keys %op)
  {
    next if ($key =~ /TOTALS/i);
    $tmp=$key;
    $tmp.= " (" . $tot1{$key_order[0]}{$key}->[1] . ")" if (!$skip_count);
    printf "<tr><td>%-$namewidth.${namewidth}s</td>", $tmp;
    $first=undef();
    foreach $server (@key_order)
    {
      print_value($first,$tot1{$server}{$key}->[0],$tot1{$server}{$key}->[2]);
      $first=$tot1{$server}{$key}->[0] if (!defined($first));
    }
    print "</tr>\n";
  }
  
  $key="TOTALS";
  printf "<tr><td><b>%-$namewidth.${namewidth}s</b></td>", $key;
  $first=undef();
  foreach $server (@key_order)
  {
    print_value($first,$tot1{$server}{$key}->[0],$tot1{$server}{$key}->[2]);
    $first=$tot1{$server}{$key}->[0] if (!defined($first));
  }
  print "</tr>\n</table>\n";
}


sub print_sep
{
  my ($sep)=@_;
  print $sep x ($namewidth + (($colwidth+1) * $column_count)+1),"\n";
}


sub print_value
{
  my ($first,$value,$flags)=@_;
  my ($tmp);

  if (defined($value))
  {
    if (!defined($first) || !$opt_relative)
    {
      $tmp=sprintf("%d",$value);
    }
    else
    {
      $first=1 if (!$first);	# Assume that it took one second instead of 0
      $tmp= sprintf("%.2f",$value/$first);
    }
    if (defined($flags))
    {
      $tmp="+".$tmp if ($flags =~ /\+/);
      $tmp="?".$tmp if ($flags =~ /\?/);
    }
  }
  else
  {
    $tmp="";
  }
  $tmp= " " x ($colwidth-length($tmp)) . $tmp if (length($tmp) < $colwidth);
  if (!$opt_html) {
    print $tmp . "${bar}";
  } else {
    print "<td align=right>$tmp</td>";
  }
}


sub print_string
{
  my ($str)=@_;
  my ($width);
  $width=$namewidth + ($colwidth+1)*$column_count;

  $str=substr($str,1,$width) if (length($str) > $width);
  print($str," " x ($width - length($str)),"${bar}\n");
}


sub usage
{
    print <<EOF;
$0  Ver 1.1

This program parses all RUN files from old 'run-all-tests --log' scripts
and makes a nice comparable table.

$0 takes currently the following options:

--help or --Information		
  Shows this help

--server='server name'  (Default $opt_server)
Put this server in the first result column.

--relative
Show all numbers in times of the first server where the time for the
first server is 1.0

--cmp=server,server,server (Default $opt_cmp)
Compares all runs that are done with the same --cmp options to run-all-tests.
The most normal options are '--cmp=mysql,pg,solid' and '--cmp ""'

--machine='full-machine-name' (Default $opt_machine)
Use only runs that match this machine.

--same-server
Compare all runs for --server=....  The --machine is not used in this case
This is nice to compare how the same server runs on different machines.

--skip-counts
Do not write counts after the tests.

--dir=...  (Default $opt_dir)
From which directory one should get the runs.  All runs made by
run-all-tests --log is saved in the 'output' directory.
In the 'results' directory you may have some example runs from different
databases.

EOF

  exit(0);
}
