#!/usr/bin/env python
##############################################################################
# Copyright (c) Members of the EGEE Collaboration. 2011.
# See http://www.eu-egee.org/partners/ for details on the copyright
# holders.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS
# OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# NAME :        check_gridftp_activity
#
# DESCRIPTION : Process the Gridftp log to retrieve activity informations.
#
# AUTHORS :     Alexandre.beche@cern.ch
#
##############################################################################

import os
import re
import subprocess
import time
import copy
from datetime    import datetime, timedelta
from lcgdmcommon import *

class check_gridftp_activity:
  __version__     = "0.0.1"
  __nagios_id__   = "DM-GRIDFTP-ACTIVITY"

  # Defaults
  DEFAULT_MEDIUM   = "100M"
  DEFAULT_LARGE    = "1G"
  DEFAULT_LOG      = "/var/log/dpm-gsiftp/dpm-gsiftp.log"
  DEFAULT_INTERVAL = 10


  # Specific parameters, where key = short, value = long (i.e. {"h":"help", "C:":"command="})
  # getopt format. The long version will be the one passed even when the short is specified
  __additional_opts__ = {"m:"  : "medium=",
                         "l:"  : "large=",
                         "f:": "logfile=",
                         "i:": "interval="}

  # Specific usage information
  __usage__ = """
\t-f, --log\tThe dpm-gsiftp log file.Default %s
\t-i, --interval\tDefault interval to use. In seconds. Default: %d s.
\t-m, --medium\tLimit for minimum size of medium file. Default: %sB.
\t-l, --large\tLimit for minimum size of large file. Defautl: %sB.
""" % (DEFAULT_LOG, DEFAULT_INTERVAL, DEFAULT_MEDIUM, DEFAULT_LARGE)

  # Methods

  def __init__(self, opt = {}, args = []):
    """
    Constructor

    @param opt  Contains a dictionary with the long option name as the key, and the argument as value
    @param args Contains the arguments not associated with any option
    """

    # range definition for file size
    self.range = {}
    if "medium" in opt:
      self.range["medium"] = opt["medium"]
    else:
      self.range["medium"] = self.DEFAULT_MEDIUM
    if "large" in opt:
      self.range["large"] = opt["large"]
    else:
      self.range["large"] = self.DEFAULT_LARGE

    # convert human readable file size into filesize in byte
    for range, limit in self.range.iteritems():   
      self.range[range] = real_bytes(limit)
    self.range["small"] = None

    # GridFTP logfile to use
    if "logfile" in opt:
      self.log =  opt["logfile"]
    else:
      self.log = self.DEFAULT_LOG

    # Interval
    if "interval" in opt:
      self.interval = int(opt["interval"])
    else:
      self.interval = self.DEFAULT_INTERVAL

  # Return the file category regarding its size
  def find_range(self, file_size):
    if file_size <= self.range["medium"]:
      return "small"
    elif file_size <= self.range["large"]:
      return "medium"
    else:
      return "large"

  
  # Fill the correct field of a dict by considering some transfer caracteristics
  def add_data_in_global_dictionary(self, global_transfer_dictionary, transfer_mode, ellapsed_time, bytes_transfered):
    file_range = self.find_range(bytes_transfered)
 
    if ellapsed_time == 0:        
      ellapsed_time = 1
 
    try:
      for index in [file_range, "total"]:
        global_transfer_dictionary[index][transfer_mode]["transfer-count"] += 1
        global_transfer_dictionary[index][transfer_mode]["transfered-bytes"] += bytes_transfered
        global_transfer_dictionary[index][transfer_mode]["total-time"] += ellapsed_time
    except:
      print index, transfer_mode, bytes_transfered, ellapsed_time

    return global_transfer_dictionary

  # return a timestamp by considering a date in format #ddhhmmss 
  def get_gsi_timestamp(self, time):
    time = time.split(".")
    fulltime = time[0]
    day, hour, minute, second = int(fulltime[-8:-6]), int(fulltime[-6:-4]), int(fulltime[-4:-2]), int(fulltime[-2:])
    return (day * 24 *3600) + (hour * 3600) + (minute * 60) + second 

  # Initialisation of the global transfer dictionary
  def transfer_dictionary_init(self):
    global_transfer_dictionary = {}
    data = {"transfer-count":int(0),"transfered-bytes":float(0),"total-time":float(0)}
    for category in self.range.keys():
      global_transfer_dictionary[category] = {"read":copy.copy(data),"write":copy.copy(data)}

    global_transfer_dictionary["total"] = {"read":copy.copy(data),"write":copy.copy(data)}

    return global_transfer_dictionary

  def parse_logfile(self):
    global_transfer_dictionary = self.transfer_dictionary_init()

    gridftp = open(self.log, "r")
    pattern = re.compile("^DATE=(\d*\.\d*) .*START=(\d*\.\d*) .*NBYTES=(\d*) .*TYPE=(.*) .*$")

    current_time = datetime.now().strftime("%Y%m%d%H%M%S")
    for line in reversed_lines(gridftp):
      info_line = pattern.search(line)
      start_time, end_time = float(info_line.group(2)), info_line.group(1)
      if self.get_gsi_timestamp(end_time) < (self.get_gsi_timestamp(current_time) - (60*self.interval)):
        return global_transfer_dictionary

      end_time = float(end_time)

      ellapsed_time = end_time - start_time
      bytes_transfered, type = int(info_line.group(3)), info_line.group(4)

      if type == "STOR":
        global_transfer_dictionary = self.add_data_in_global_dictionary(global_transfer_dictionary, "write", ellapsed_time, bytes_transfered)   
      elif type == "RETR":
        global_transfer_dictionary = self.add_data_in_global_dictionary(global_transfer_dictionary, "read", ellapsed_time, bytes_transfered)   


      #print global_transfer_dictionary    

    gridftp.close()
    return global_transfer_dictionary  
    
  def main(self):
    """
    Test code itself. May raise exceptions.
    @return A tuple (exit code, message, performance)
    """
    transfer_dictionary = self.parse_logfile()

    return_result, performance_data = "", ""

    for size in self.range:
      for operation in transfer_dictionary[size].keys():
        transfered_bytes = float(transfer_dictionary[size][operation]["transfered-bytes"])
    
        performance_data += " %s_%s_transfer-count=%s" % (size, operation, str(transfer_dictionary[size][operation]["transfer-count"]))
        performance_data += " %s_%s_transfered-bytes=%.2fB" % (size, operation, transfer_dictionary[size][operation]["transfered-bytes"])
        performance_data += " %s_%s_link-throughput=%.2fB/s" % (size, operation, (transfered_bytes / (60 * self.interval)))

    for operation in transfer_dictionary["total"].keys():
      total_transfer = int(transfer_dictionary["total"][operation]["transfer-count"])
      transfered_bytes = float(transfer_dictionary["total"][operation]["transfered-bytes"])
      time_to_transfer = float(transfer_dictionary["total"][operation]["total-time"])

      if time_to_transfer == 0:
        transfer_rate  = 0
      else:
        transfer_rate = transfered_bytes / time_to_transfer

      return_result += " %d %s operation (%.2fB/s for %.2fB), " %(total_transfer, operation, transfer_rate, transfered_bytes)
      performance_data += " total_%s_transfer-rate=%.2fB/s" % (operation, transfer_rate)

    return (EX_OK, return_result, performance_data)


# When called directly
if __name__ == "__main__":
  run(check_gridftp_activity)

