#!/usr/bin/env python
#
# Author: Bas van der Vlies <basv@sara.nl>
# Date  : 14 May 2003
# Desc. : This Class/Script collects data from the pbs server and
#         convert it to ganglia pbs data. The ideas are based on
#         a script from NPACI rocks toolkit.
#
#
# SVN info:
# $Id: pbs_stat.py 1897 2006-02-23 15:43:58Z bas $
#
#
# Copyright (C) 2003
#
# This file is part of the pbs_stat utils
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
#
import sys
import os
import string
import socket
import time
import getopt
import re

import pbs

class pbs_stats:
  """
  This class collect some statistic from the pbs_server. It will make a 
  connection to the default server or a user specified server. It then
  collects some pbs stats and format this so the ganglia pbs monitor software
  can produce nice results. You can alter some settings  such as:
   channel)
     Multicast channel to send/receive on: <instance>.channel(<number>)
     default: 239.2.11.71
   port)      
     To which port must we send the data: <instance>.port(<number>)
     default: 8649
   interface) 
     On what interface must we send the data: <instance>.interface(<string>)
     default: let gmetric commando decides which interface to use
   gmetric) 
     Can set the localtion of the gmetric command <instance>.gmetric(<string>)
     default: /usr/bin/gmetric
   interval) 
     How long must we wait for the next run <instance>.interval(<number>)
     default: 25 seconds
   DEBUG) 
     Set the output verbose: <instance>.debug(1)
  """

  VERSION="0.9.13"
  DEBUG = 0
  DAEMON = 0
  NEW_GMETRIC = 1
  c = 0
  jobs = {}

  settings = {}
  settings['channel'] = '239.2.11.71'
  settings['port'] = 8649
  settings['interface'] = None
  settings['domain'] = None
  settings['tmax'] = 15
  settings['interval'] = 25
  settings['gmetric'] = '/usr/bin/gmetric'
  settings['P'] = 0

  # A regulare expression to parse node names
  #
  node_re = re.compile(r"""

  	(?P<basename>[a-zA-Z-_]+)
	(?P<rest>.+)

	""", re.VERBOSE)

  def __init__(self, server=None):
    if not server:
      self.pbs_server = pbs.pbs_default()
    else:
      self.pbs_server = server

    self.connect()


    # Only fetch this attributes for the jobs
    #
    self.jobq_attrl = pbs.new_attrl(6)
    self.jobq_attrl[0].name = pbs.ATTR_mtime
    self.jobq_attrl[1].name = pbs.ATTR_exechost
    self.jobq_attrl[2].name = pbs.ATTR_state
    self.jobq_attrl[3].name = pbs.ATTR_owner
    self.jobq_attrl[4].name = pbs.ATTR_name
    self.jobq_attrl[5].name = pbs.ATTR_queue
      
  def connect(self):

    # Close current conection
    #
    if self.c:
      pbs.pbs_disconnect(self.c)

    self.c = pbs.pbs_connect(self.pbs_server)
    if self.c < 0:
      self.error(pbs.pbs_geterrmsg(self.c))
      sys.exit(1)

  def debug(self, value):
    self.DEBUG = value

  def gmetric(self, value):
    self.settings['gmetric'] = value

  def channel(self, value):
    self.settings['channel'] = value

  def port(self, value):
    self.settings['port'] = value

  def interface(self, value):
    self.settings['interface'] = value

  def domain(self, value):
    self.settings['domain'] = value

  def tmax(self, value):
    self.settings['tmax'] = value

  def interval(self, value):
    self.settings['interval'] = value

  def gmetric_cmd(self, name, value):
    if not self.NEW_GMETRIC:
      cmd = '%s -c%s -p%s' \
	    %(cmd, 
	      self.settings['channel'], 
	      self.settings['port']) 

      if self.settings['interface']:
        cmd = cmd + ' -i%s' %(self.settings['interface'])
    else:
      cmd = '%s -n%s -v"%s" -tstring -x%s' \
	    %(self.settings['gmetric'], name, value, 
	      self.settings['tmax'] ) 

    if self.DEBUG:
      print cmd

    os.system(cmd)

  def number_of_processors(self):
    """
    Return the number of processors for this cluster
    """
    attrl = pbs.new_attrl(1);
    attrl[0].name='np'

    processors = 0
    nodes = pbs.pbs_statnode(self.c, 'NULL', attrl, 'NULL')

    for node in nodes:
      for attrib in node.attribs:
      	 processors = processors + int(attrib.value)

    if self.DEBUG:
      print "processors = %d" %(processors)

    return processors 

  def get_domainname(self):
    """
    Get the domainname via socket.getfqdn()
    """
    # python > 2.0 function
    # temp = string.splitfields( socket.getfqdn(), '.')
    temp = string.splitfields(socket.gethostbyaddr(socket.gethostname())[0], '.')

    self.settings['domain'] = string.joinfields(temp[1:], '.')

  def error(self, str):
    """
    Print error message to stdout or syslog
    """ 
    import syslog

    # check if str contains valid data
    # else we print a lot of None's
    #
    if not str:
      return

    if self.DAEMON:
      syslog.openlog("pbs_stat.py")
      syslog.syslog(str)
      syslog.closelog()
    else:
      print str

  def pack(self, nodes):
    """
    Reduce the number of bytes send for the nodes, gmetric can only 
    handle 1400 bytes.
    """
    basenames = {}
    for node in nodes:
      #
      # python > 1.5
      # result = re.match(self.node_re, node)
      #
      node_regexp = self.node_re
      result = node_regexp.match(node)

      if result:
        basename = result.group('basename')
	rest = result.group('rest')

	if not basenames.has_key(basename):
	  basenames[basename] = []

        basenames[basename].append(rest)

    str = ''
    for basename in basenames.keys():
      if not str:
        str = '%s:' %(basename)
      else:
        str = '%s;%s:' %(str,basename)

      size = len(basenames[basename])
      for ext in basenames[basename][0:size-1]:
        str = '%s%s,' %(str,ext)

      # Also append the last item without the ','
      #
      str = '%s%s' %(str,basenames[basename][size-1])

    return str
      
  def get(self):
    """
    Get the job queue
    """

    # Reset the jobs dictionary
    #
    self.jobs = {}
    jobq = pbs.pbs_statjob(self.c, 'NULL', self.jobq_attrl, 'NULL')

    if not jobq:
      self.error(pbs.pbs_geterrmsg(self.c))
      self.connect()

    for job in jobq:
       if self.DEBUG:
         print 'Job name: %s' %(job.name)

       # We are only interested in the job id and that is our index
       # in the dictionary
       #
       name =  string.splitfields(job.name, '.')[0]
       self.jobs[name] = {}

       for attrib in job.attribs:
         self.jobs[name][attrib.name] = attrib.value

       if self.DEBUG:
         print self.jobs

    pbs.pbs_statfree(jobq) 

  def send(self):
    """
    Convert the PBS values to ganglia values and send them using the
    gmetric commando.
    """
    
    # Get domain if not set 
    #
    if not self.settings['domain']:
    	self.get_domainname()

    for key in self.jobs.keys():

      # This is what the rocks toolkit expect as key
      #
      job_id = "pbs-job-%s" %(key)

      # Now make up the line we send to gmon daemon
      #
      line = 'domain=%s' %(self.settings['domain'])

      resources = self.jobs[key]

      # Convert pbs values to ganglia values
      #
      for res in resources.keys():
        if res == pbs.ATTR_name:
          str = ' name=%s' %(resources[res])

        if res == pbs.ATTR_mtime:
	  str = ' mtime=%s' %(resources[res])

        if res == pbs.ATTR_state:
	  str = ' state=%s' %(resources[res])

        if res == pbs.ATTR_owner:
	  user = string.splitfields(resources[res], '@')[0]
	  str = ' user=%s' %(user)

        if res == pbs.ATTR_queue:
	  str = ' %s=%s' %(pbs.ATTR_queue, resources[res])

        if res == pbs.ATTR_exechost:
	  hosts = string.splitfields(resources[res], '+')
	  n_proc = len(hosts)

	  nodes = []
	  for host in hosts:
	    hostname = string.splitfields(host,"/")[0]

	    # We only want the short hostname
	    #
	    hostname = string.splitfields(hostname,".")[0]

            # Only add unknown hostnames
	    #
            if nodes.count(hostname) == 0:
	      nodes.append(hostname)

	  # Make a string variable from nodes list
	  #
	  nodes_str = self.pack(nodes)
	  str = ' nodes=%s P=%d' %(nodes_str, n_proc)

        line = line + str
        if self.DEBUG:
           print line

        self.gmetric_cmd(job_id, line)

  def daemon(self):
    """
    Run as daemon forever
    """
    self.DAEMON = 1

    # Fork the first child
    #
    pid = os.fork()
    if pid > 0:
      sys.exit(0)  # end parrent

    # creates a session and sets the process group ID 
    #
    os.setsid()

    # Fork the second child
    #
    pid = os.fork()
    if pid > 0:
      sys.exit(0)  # end parrent

    # Go to the root directory and set the umask
    #
    os.chdir('/')
    os.umask(0)

    sys.stdin.close()
    sys.stdout.close()
    sys.stderr.close()

    os.open('/dev/null', 0)
    os.dup(0)
    os.dup(0)

    self.run()

  def run(self):
    """
    Run this program forever and ever.
    """

    # Also send how many processors there are in the cluster
    # for the statistics
    #
    line = 'P=%s' %(self.number_of_processors())
    self.gmetric_cmd('pbs-state', line)

    while ( 1 ) :
      self.get()
      self.send()
      time.sleep(self.settings['interval'])

  def once(self):
    """
    Run this program once
    """
    line = 'P=%s' %(self.number_of_processors())
    self.gmetric_cmd('pbs-state', line)
    self.get()
    self.send()

  def check_args(self, argv):
    """
    Usage: pbs_stat.py [cron | daemon]
      [-D|--debug] 
      [-v|--version] 
      [-h|--help] 
      [-i|--interface <name>]
      [-c|--channel <mcast_channel>]
      [-p|--port <number>]
      [-t|--interval <number>]
      [-d|--domain <name>]
      [-o|--old] : use old style for gmetric command
    """

    SHORT_LIST='hvD:c:p:i:t:d:o'
    LONG_LIST=['help', 'version', 'debug=', 'channel=', 'port=', 'interface=', 'interval=', 'domain=', 'old' ]

    try:
      opts, args = getopt.getopt(argv[1:], SHORT_LIST, LONG_LIST)
    except getopt.error, detail:
      print self.check_args.__doc__
      print detail
      sys.exit(1)

    # Check given options
    #
    for opt, value in opts:
      if opt in ['-D', '--debug']:
        self.debug(value)
      elif opt in ['-c', '--channel']:
        self.channel(value)
      elif opt in ['-p', '--port']:
        self.port(value)
      elif opt in ['-i', '--interface']:
        self.interface(value)
      elif opt in ['-d', '--domain']:
        self.domain(value)
      elif opt in ['-t', '--interval']:
        try:
          value = string.atoi(value)
        except ValueError, detail:
          print 'Wrong value for interval: %s' %(value)
          sys.exit(1)
        self.interval(value)
      elif opt in ['-h', '--help']:
        print self.check_args.__doc__
	sys.exit(0)
      elif opt in ['-v', '--version']:
        print self.VERSION 
	sys.exit(0)
      elif opt in ['-o', '--old']:
        self.NEW_GMETRIC = 0 

    # How must we start the program
    #
    if len(args) >= 1:
      if args[0] == 'cron':
        self.once()
      elif args[0] == 'daemon':
        self.daemon()
      else:
        print self.check_args.__doc__
    else:
      self.debug(1)
      self.once()

if __name__ == "__main__":

  a = pbs_stats()

  # Change some default settings, hard coded or pass it on 
  # the command line
  #
  #a.channel('239.2.11.72')
  #a.port(8649)
  #a.interface('eth1')
  a.check_args(sys.argv)
