<?php
#
# A Page to monitor the PBS parallel job scheduling queues.
# Uses the pbs.py gschedule module which publishes PBS data
# using Ganglias gmetric. Ganglia parsing code based on work
# by Matt Massie <massie@cs.berkeley.edu>.
#
# Requires ganglia >= 2.5.1 with garbage collecting.
#
# Changed by: Bas van der Vlies <basv@sara.nl>
# Date      : 22 May 2003
# Desc.     : Made it cluster aware and make it work for our environment
#             Uses the pbs_stat.py module which publishes PBS data
#             Look for The HvB tags what is changed.
#
# SVN INFO:
#  $Id: job.php 838 2004-11-11 15:36:40Z bas $

# HvB: PHP directive register_globals being off or on.
#      Just use the extract function.
#
if (!empty($_GET))
{
	extract($_GET);
}


$GHOME="../..";
include_once "$GHOME/class.TemplatePower.inc.php";
include_once "$GHOME/conf.php";
include_once "./get-pbs.php";
include_once "./get-ganglia.php";
include_once "./functions.php";

# HvB added clusterURL
#
$clusterURL = rawurlencode($cluster); 


$tpl = new TemplatePower("templates/job.tpl");
$tpl->assignInclude("header", "templates/header.tpl");
$tpl->prepare();

$self="./job.php";
$tpl->assign("self",$self);
$tpl->assign("title", "Parallel Job $id Detail");

# HvB added clusterURL
#
$tpl->assign("link", "<a href=./queue.php?c=$clusterURL>Back to Job Queue</a>");

$tpl->assign("clusterURL",$clusterURL);
$tpl->assign("cluster",$cluster);


if (!is_numeric($id) and !$id) {
	echo "<h1>Missing a Job ID</h1>";
	return;
}

$job = $jobs[$cluster][$id];
if (!is_array($job)) {
	echo "<h4>PBS Job ID $id does not exist anymore</h4>";

	# HvB added clusterURL
	#
	echo "<a href=\"./queue.php?c=$clusterURL\">Back to job list</a>";
	return;
}

$name = $job[name] ? "<i>$job[name]</i>" : "Job";
$tpl->assign("name",$name);
$tpl->assign("user",$job[user]);
$tpl->assign("id",$id);
$tpl->assign("now",date("r"));
if (!$job[P]){$tpl->assign("P", 0);} else {$tpl->assign("P", $job[P]);}


// HvB new time calculation
//
$mtime = $job[mtime];
$secs_in_state = $QueueState[LOCALTIME] - $mtime;
$time_in_state = nicetime($secs_in_state);

if ($job[state] == "R") 
	{
		$tpl->assign("status","has been running for $time_in_state.");
		$running=1;
		$tpl->newBlock("running");
	}
else if ($job[state] == "Q") 
	{
		$tpl->assign("status", "is queued and waiting to run." .
			"<p>This job has been queued for $time_in_state.");

	}
else 
	$tpl->assign("status", "is in state $job[state].");

if (!$running) {
	$tpl->printToScreen();
	return;
}

$tpl->assign("P", $job[P]);
# I hate how we have to redefine everything inside a new block.

# HvB added cluster aware value
#
$tpl->assign("cluster",$cluster);

$tpl->assign("self",$self);
$tpl->assign("id",$id);

# Get the metric attributes from the first host in our cluster. Assumes at least
# one is up. Choose only metrics that we have graphs for.
$host=key($hosts[$cluster]);

foreach($metrics[$cluster] as $m=>$M) {
	if ($M[$host][SLOPE]=="both" and $M[$host][SOURCE]=="gmond")
		$context_metrics[] = $m;
}

# Default metric:
if (!$metric) $metric=$default_metric;

# Build the list of metrics.
$metric_menu = "<B>Metric</B>&nbsp;&nbsp;".
	"<SELECT NAME=metric OnChange=\"metric_form.submit();\">\n";

sort($context_metrics);

foreach ($context_metrics as $m) {
	$metric_menu .= "<OPTION VALUE=\"". rawurlencode($m) ."\" ";
	if ($m == $metric) 
		$metric_menu .= "SELECTED";
	$metric_menu .= ">$m\n";
}
$metric_menu .= "</SELECT>\n";
$tpl->assign("metric_menu", $metric_menu);


#
# Function to print out a node box with its hardware and load
# like physical_view.php in ganglia-webfrontend core.
#
function nodebox($cluster, $name, $P)
{
	global $metrics, $hosts, $GHOME;
	$load_scalar=0.2;
	$node = "";
	#echo "Cluster is $cluster, host name is $name<br>";

	$M=$metrics[$cluster];
	$mem_totalMB = intval(intval($M[mem_total][$name][VAL])/1024);
	$load_one=$M[load_one][$name][VAL];
	$cpu_speed=$M[cpu_speed][$name][VAL];
   
	# Choose load color. 
	$cpu_num=$M[cpu_num][$name][VAL]; 
	if (!$cpu_num) { $cpu_num=1; }
	$loadindex=intval($load_one/($load_scalar*$cpu_num))+1;
	# 10 is currently the highest allowed load index.
	$load= $loadindex > 10 ? "L10" : "L$loadindex";
 
	$rowclass = ($hosts[$cluster][$name]) ? rowStyle() : "down";
	$nameurl=rawurlencode($name);
	$clusterurl=rawurlencode($cluster);
	$node .= "<tr><td class=$rowclass>".
	   "<table width=100% cellpadding=1 cellspacing=0 border=0><tr>".
	   "<td><a href=\"$GHOME/?p=2&c=$clusterurl&h=$nameurl\">$P: $name</a>".
	   "&nbsp;<br>\n";
	$hardware = "<i>$cpu_num x</i> $cpu_speed<em>Mhz</em>, $mem_totalMB<em>MB</em>";

	#
	# Load box.
	#
	$node .= "</td><td align=right valign=top>".
	   "<table cellspacing=1 cellpadding=3 border=0><tr>".
	   "<td class=$load align=right><small>$load_one</small>".
	   "</td></tr></table>";
	$node .= "<tr><td colspan=2>$hardware</td></tr></table>\n";
	$node .= "</td></tr>\n";
	return $node;
}

#
# Build a dictionary of unique compute nodes.
#
$nodes = array();

$nodelist = decode($job[nodes], $job["domain"]);
if (is_array($nodelist))
	foreach ($nodelist as $name) {
		$nodes[$name] += 1;
	}
else
	$tpl->assign("_ROOT.error", 
		"<h2>No nodes found. Perhaps your queueing system is not well?</h2>");


# Start = the number of seconds ago this job started. 
# We make these negative for rrdtool.
$start = $secs_in_state;

# The graphing routing usually takes a range like "hour". Here we give it
# an absolute range, in seconds. One a little bigger than start.
$r = intval($start * 1.25);
$jobrange = ($start < 3600) ? -3600 : -$r ;

#
# Bring it all together.
#

# First pass to find the max value in all graphs for this
# job. 
$max=0;
$min=0;
foreach ($nodes as $name => $P) {
	$val = $metrics[$cluster][$metric][$name];
	$rrd_dir = "$gmetad_root/rrds/$cluster/$host";
	$command = RRDTOOL . " graph - --start $jobrange --end N ".
		"DEF:limits='$rrd_dir/$metric.rrd':'sum':AVERAGE ".
		"PRINT:limits:MAX:%le ".
		"PRINT:limits:MIN:%le";
	#echo "Command is $command<br>";
	exec($command,$out);
	$thismax=$out[1];
	if ($max < $thismax) {
		$max = $thismax;
	}
	$thismin=$out[2];
	if ($min > $thismin) {
		$min = $thismin;
	}
}
# Note: sprintf cannot convert a float in scientific notation
# to a decimal. Therefore we must use %f for all metrics.
$max = sprintf("%.2f",$max);
$min = ($min>0) ? sprintf("%.2f",$min) : $min;

$node = "";
$i=0;
$cols=3;
foreach ($nodes as $name => $P) {
	#echo "Name is $name, metric is $metric<br>";
	$val = $metrics[$cluster][$metric][$name];

	$hostURL=rawurlencode($name);

	$jobstart = time() - $start;
	$graphargs = "z=small&c=$clusterURL&h=$hostURL&v=$val[VAL]".
		"&x=$max&n=$min&m=$metric&r=job&jr=$jobrange&js=$jobstart";
	$node .= "<td>";
	$node .= "<table cellspacing=5>";
	$node .= nodebox($cluster, $name, $P);
	$node .= "<tr><td align=center><b>$metric</b></td></tr>";
	$node .= "<tr><td>\n".
		"<a href=\"$GHOME/?c=$clusterURL&h=$hostURL&r=job&jr=$jobrange&js=$jobstart\">".
		"<IMG SRC=\"$GHOME/graph.php?$graphargs\" ".
			"HEIGHT=112 WIDTH=255 ALT=\"$name\" BORDER=0>".
		"</a></td></tr>\n";
	#$node .="<tr><td>$graphargs</td></tr>";
	$node .= "</table></td>\n";

	# Put $cols nodes in a row.
	$i++;
	if (!($i % $cols)) {
		$tpl->newBlock("noderow");
		$tpl->assign("node_row",$node);
		$node="";
	}
}	

# Kind of awkward to use TemplatePower for tables; here we pickup
# short rows.
if ($i % $cols) {
	$tpl->newBlock("noderow");
	$tpl->assign("node_row",$node);
}

$tpl->printToScreen();
?>
 
