/*
*         OpenPBS (Portable Batch System) v2.3 Software License
*
* Copyright (c) 1999-2000 Veridian Information Solutions, Inc.
* All rights reserved.
*
* ---------------------------------------------------------------------------
* For a license to use or redistribute the OpenPBS software under conditions
* other than those described below, or to purchase support for this software,
* please contact Veridian Systems, PBS Products Department ("Licensor") at:
*
*    www.OpenPBS.org  +1 650 967-4675                  sales@OpenPBS.org
*                        877 902-4PBS (US toll-free)
* ---------------------------------------------------------------------------
*
* This license covers use of the OpenPBS v2.3 software (the "Software") at
* your site or location, and, for certain users, redistribution of the
* Software to other sites and locations.  Use and redistribution of
* OpenPBS v2.3 in source and binary forms, with or without modification,
* are permitted provided that all of the following conditions are met.
* After December 31, 2001, only conditions 3-6 must be met:
*
* 1. Commercial and/or non-commercial use of the Software is permitted
*    provided a current software registration is on file at www.OpenPBS.org.
*    If use of this software contributes to a publication, product, or
*    service, proper attribution must be given; see www.OpenPBS.org/credit.html
*
* 2. Redistribution in any form is only permitted for non-commercial,
*    non-profit purposes.  There can be no charge for the Software or any
*    software incorporating the Software.  Further, there can be no
*    expectation of revenue generated as a consequence of redistributing
*    the Software.
*
* 3. Any Redistribution of source code must retain the above copyright notice
*    and the acknowledgment contained in paragraph 6, this list of conditions
*    and the disclaimer contained in paragraph 7.
*
* 4. Any Redistribution in binary form must reproduce the above copyright
*    notice and the acknowledgment contained in paragraph 6, this list of
*    conditions and the disclaimer contained in paragraph 7 in the
*    documentation and/or other materials provided with the distribution.
*
* 5. Redistributions in any form must be accompanied by information on how to
*    obtain complete source code for the OpenPBS software and any
*    modifications and/or additions to the OpenPBS software.  The source code
*    must either be included in the distribution or be available for no more
*    than the cost of distribution plus a nominal fee, and all modifications
*    and additions to the Software must be freely redistributable by any party
*    (including Licensor) without restriction.
*
* 6. All advertising materials mentioning features or use of the Software must
*    display the following acknowledgment:
*
*     "This product includes software developed by NASA Ames Research Center,
*     Lawrence Livermore National Laboratory, and Veridian Information
*     Solutions, Inc.
*     Visit www.OpenPBS.org for OpenPBS software support,
*     products, and information."
*
* 7. DISCLAIMER OF WARRANTY
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND. ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT
* ARE EXPRESSLY DISCLAIMED.
*
* IN NO EVENT SHALL VERIDIAN CORPORATION, ITS AFFILIATED COMPANIES, OR THE
* U.S. GOVERNMENT OR ANY OF ITS AGENCIES BE LIABLE FOR ANY DIRECT OR INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This license will be governed by the laws of the Commonwealth of Virginia,
* without reference to its choice of law rules.
*/




/*
 * node_func.c - various functions dealing with nodes, properties and
 *   the following global variables:
 * pbsnlist     - the server's global node list
 * svr_totnodes - total number of pbshost entries
 * svr_clnodes  - number of cluster (space-shared) nodes
 * svr_tsnodes  - number of time-shared nodes, one per host
 *
 * Included functions are:
 * find_nodebyname() - find a node host with a given name
 */
#include <pbs_config.h>   /* the master config generated by configure */
#include "node_func.h"

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <time.h>
#if defined(NTOHL_NEEDS_ARPA_INET_H) && defined(HAVE_ARPA_INET_H)
#include <arpa/inet.h>
#endif

#include "pbs_ifl.h"
#include "libpbs.h"
#include "list_link.h"
#include "attribute.h"
#include "credential.h"
#include "batch_request.h"
#include "server_limits.h"
#include "server.h"
#include "pbs_job.h"
#include "pbs_nodes.h"
#include "pbs_error.h"
#include "log.h"
#include "dis.h"
#include "../lib/Liblog/pbs_log.h"
#include "../lib/Liblog/log_event.h"
#include "pbs_proto.h"
#include "net_connect.h"
#include "utils.h"
#include "u_tree.h"
#include "../lib/Libattr/attr_node_func.h" /* free_prop_list */
#include "req_manager.h" /* mgr_set_node_attr */
#include "../lib/Libutils/u_lock_ctl.h" /* lock_node, unlock_node */
#include "svr_func.h" /* get_svr_attr_* */

#if !defined(H_ERRNO_DECLARED) && !defined(_AIX)
extern int h_errno;
#endif

/* Global Data */

extern hello_container  failures;
extern struct addrinfo  hints;
extern int              svr_totnodes;
extern int              svr_tsnodes;
extern int              svr_clnodes;
extern char            *path_nodes_new;
extern char            *path_nodes;
extern char            *path_nodestate;
extern char            *path_nodenote;
extern int              LOGLEVEL;
extern attribute_def    node_attr_def[];   /* node attributes defs */
extern AvlTree          ipaddrs;
extern AvlTree          streams;
extern dynamic_string  *hierarchy_holder;


/* Functions in this file
 * find_nodebyname()   -     given a node host name, search allnodes
 * find_subnodebyname() -     given a subnode name
 * save_characteristic() - save the the characteristics of the node along with
 *  the address of the node
 * chk_characteristic() -  check for changes to the node's set of
 *  characteristics and set appropriate flag bits in the "need_todo"
 *  location depending on which characteristics changed
 * status_nodeattrib() -    add status of each requested (or all) node-attribute
 *  to the status reply
 * initialize_pbsnode() -   performs node initialization on a new node
 * effective_node_delete() -  effectively deletes a node from the server's node
 *  list by setting the node's "deleted" bit
 * setup_notification() -   sets mechanism for notifying other hosts about a new
 *  host
 * process_host_name_part() - processes hostname part of a batch request into a
 *  prop structure, host's IP addresses into an array, and node
 *  node type (cluster/time-shared) into an int variable
 * update_nodes_file() -    used to update the nodes file when certain changes
 *  occur to the server's internal nodes list
 * recompute_ntype_cnts -   Recomputes the current number of cluster nodes and
 *  current number of time-shared nodes
 * create_pbs_node - create basic node structure for adding a node
 */


#include "work_task.h"




/* use IP address to look up matchin node structure */

struct pbsnode *PGetNodeFromAddr(

  pbs_net_t addr)  /* I */

  {
  struct pbsnode *pnode;
  int             iter = -1;
  int             aindex;

  while ((pnode = next_host(&allnodes,&iter,NULL)) != NULL)
    {
    for (aindex = 0; aindex < 10; aindex++)
      {
      if (pnode->nd_addrs[aindex] == 0)
        break;

      if (pnode->nd_addrs[aindex] == addr)
        {
        return(pnode);
        }
      }    /* END for (aindex) */

    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    } /* END for each node */

  return(NULL);
  }  /* END PGetNodeFromAddr() */




void bad_node_warning(

  pbs_net_t       addr,           /* I */
  struct pbsnode *node_possessed) /* I */

  {
  time_t          now;
  time_t          last;
  char            log_buf[LOCAL_LOG_BUF_SIZE];

  struct pbsnode *pnode = NULL;

  if (node_possessed == NULL)
    pnode = PGetNodeFromAddr(addr);
  else
    pnode = node_possessed;

  if (pnode != NULL)
    {
    /* matching node located */
    now = time(NULL);
    
    last = pnode->nd_warnbad;
    
    if (!last && (now - last >= 3600))
      {
      /* once per hour, log a warning that we can't reach the node */
      sprintf(log_buf, "ALERT: unable to contact node %s", pnode->nd_name);
      log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, "WARNING", log_buf);
      
      pnode->nd_warnbad = now;
      }
   
    /* only release the mutex if we obtained it in this function */
    if (node_possessed == NULL)
      unlock_node(pnode, "bad_node_warning", "attained in function", LOGLEVEL);
    }

  } /* END bad_node_warning() */




/*
 * return 0 if addr is a MOM node and node is in bad state,
 * return 1 otherwise (it is not a MOM node, or it's state is OK)
 */

int addr_ok(

  pbs_net_t       addr,  /* I */
  struct pbsnode *pnode) /* I */

  {
  int           status = 1;  /* assume destination host is healthy */
  int           release_mutex = FALSE;
  time_t        time_now = time(NULL);

  node_iterator iter;

  /* if a node wasn't passed in, then find the node */
  if (pnode == NULL)
    {
    reinitialize_node_iterator(&iter);
    
    while ((pnode = next_node(&allnodes,pnode,&iter)) != NULL)
      {
      /* NOTE:  should walk thru all nd_addrs for multi-homed hosts */
      
      /* NOTE:  deleted node may have already freed nd_addrs - check should be redundant */
      if ((pnode->nd_addrs == NULL) || 
          (pnode->nd_addrs[0] != addr))
        {
        continue;
        }

      /* node matches addr */
      break;
      }

    if (pnode == NULL)
      return(status);
    else
      release_mutex = TRUE;
    }

  if (pnode->nd_state & INUSE_UNKNOWN)
    {
    /* definitely not ok */
    status = 0;
    }
  else if (pnode->nd_state & INUSE_DOWN)
    {
    /* the node is ok if it is still talking to us */
    long chk_len = 300; 
    get_svr_attr_l(SRV_ATR_check_rate, &chk_len);
    
    if (pnode->nd_lastupdate != 0)
      {
      if (pnode->nd_lastupdate <= (time_now - chk_len))
        {
        status = 0;
        }
      }
    }

  if (release_mutex == TRUE)
    unlock_node(pnode, __func__, "release_mutex = TRUE", LOGLEVEL);

  return(status);
  }  /* END addr_ok() */




/*
 * find_nodebyname() - find a node host by its name
 */

struct pbsnode *find_nodebyname(

  char *nodename) /* I */

  {
  char           *pslash;
  char           *dash = NULL;
  char           *tmp;

  struct pbsnode *pnode = NULL;
  struct pbsnode *numa  = NULL;

  int             i;
  int             numa_index;

  if ((pslash = strchr(nodename, (int)'/')) != NULL)
    *pslash = '\0';

  pthread_mutex_lock(allnodes.allnodes_mutex);
  i = get_value_hash(allnodes.ht, nodename);

  if (i >= 0)
    pnode = (struct pbsnode *)allnodes.ra->slots[i].item;
  if (pnode != NULL)
    lock_node(pnode, __func__, NULL, LOGLEVEL);
  else
    {
    /* check if it was a numa node */
    tmp = nodename;
    while ((tmp = strchr(tmp, '-')) != NULL)
      {
      dash = tmp;
      tmp++;
      }

    if (dash != NULL)
      {
      *dash = '\0';
      numa_index = atoi(dash + 1);

      if ((i = get_value_hash(allnodes.ht, nodename)) >= 0)
        {
        if ((pnode = (struct pbsnode *)allnodes.ra->slots[i].item) != NULL)
          {
          lock_node(pnode, __func__, NULL, LOGLEVEL);
          
          /* get the NUMA node */
          numa = AVL_find(numa_index, pnode->nd_mom_port, pnode->node_boards);
          if (numa != NULL)
            lock_node(numa, __func__, NULL, LOGLEVEL);

          unlock_node(pnode, __func__, NULL, LOGLEVEL);
          pnode = numa;
          }
        }

      *dash = '-';
      }
    }


  pthread_mutex_unlock(allnodes.allnodes_mutex);

  if (pslash != NULL)
    *pslash = '/'; /* restore the slash */

  return(pnode);
  }  /* END find_nodebyname() */



/*
 * save_characteristic() -  save the characteristic values of the node along
 *       with the address of the node
 */

void save_characteristic(

  struct pbsnode  *pnode,
  node_check_info *nci)

  {
  nci->state        = pnode->nd_state;
  nci->ntype        = pnode->nd_ntype;
  nci->nprops       = pnode->nd_nprops;
  nci->nstatus      = pnode->nd_nstatus;
  nci->first        = pnode->nd_first;
  nci->first_status = pnode->nd_f_st;
  
  if (pnode->nd_note != NULL)
    nci->note = strdup(pnode->nd_note);
  else
    nci->note = NULL;
  }  /* END save_characteristic() */





/*
 * chk_characteristic() -  check the value of the characteristics against
 *   that which was saved earlier.
 *   Returns:
 *   -1  if parent address doesn't match saved parent address
 *    0  if successful check.  *pneed_todo gets appropriate
 *       bit(s) set depending on the results of the check.
 *       The "returned" bits get used by the caller.
 */


int chk_characteristic(

  struct pbsnode  *pnode,      /* I */
  node_check_info *nci,        /* I */
  int             *pneed_todo) /* O */

  {
  char  tmpLine[1024];
  char  log_buf[LOCAL_LOG_BUF_SIZE];

  tmpLine[0] = '\0';

  if (pnode->nd_state != nci->state)
    {
    if ((pnode->nd_state & INUSE_OFFLINE) && 
        !(nci->state & INUSE_OFFLINE))
      {
      *pneed_todo |= WRITENODE_STATE;  /*marked offline */

      strcat(tmpLine, "offline set");
      }
    else if (!(pnode->nd_state & INUSE_OFFLINE) &&
        (nci->state & INUSE_OFFLINE))
      {
      *pneed_todo |= WRITENODE_STATE;  /*removed offline*/

      strcat(tmpLine, "offline cleared");
      }
  
    if (tmpLine[0] != '\0')
      {
      if (LOGLEVEL >= 3)
        {
        sprintf(log_buf, "node %s state modified (%s)\n",
          pnode->nd_name,
          tmpLine);
        
        log_event(PBSEVENT_ADMIN,PBS_EVENTCLASS_SERVER,"chk_characteristic",log_buf);
        }
      }
    }

  if (pnode->nd_ntype != nci->ntype)
    *pneed_todo |= WRITE_NEW_NODESFILE;

  if ((nci->nprops != pnode->nd_nprops) || 
      (nci->first != pnode->nd_first))
    *pneed_todo |= WRITE_NEW_NODESFILE;

  if (pnode->nd_note != nci->note)    /* not both NULL or with the same address */
    {
    if (pnode->nd_note == NULL || nci->note == NULL)
      *pneed_todo |= WRITENODE_NOTE;        /*node's note changed*/
    else if (strcmp(pnode->nd_note, nci->note))
      *pneed_todo |= WRITENODE_NOTE;        /*node's note changed*/
    }

  if (nci->note != NULL)
    free(nci->note);

  return(PBSE_NONE);
  }  /* END chk_characteristic() */





/* status_nodeattrib() - add status of each requested (or all) node-attribute to
 *    the status reply
 *
 *      Returns:     0 is success
 *                != 0 is error, if a node-attribute is incorrectly specified, *bad is
 *   set to the node-attribute's ordinal position
 */

int status_nodeattrib(

  svrattrl        *pal,         /*an svrattrl from the request  */
  attribute_def   *padef, /*the defined node attributes   */
  struct pbsnode  *pnode, /*no longer an attribute ptr */
  int              limit, /*number of array elts in padef */
  int              priv, /*requester's privilege  */

  tlist_head       *phead, /*heads list of svrattrl structs that hang */
  /*off the brp_attr member of the status sub*/
  /*structure in the request's "reply area"  */

  int             *bad)         /*if node-attribute error, record it's*/
/*list position here                 */

  {
  int   i;
  int   rc = 0;  /*return code, 0 == success*/
  int   index;
  int   nth;  /*tracks list position (ordinal tacker)   */

  attribute atemp[ND_ATR_LAST]; /*temporary array of attributes   */
  memset(&atemp, 0, sizeof(attribute)*ND_ATR_LAST);

  priv &= ATR_DFLAG_RDACC;    /* user-client privilege          */

  for (i = 0;i < ND_ATR_LAST;i++)
    {
    /*set up attributes using data from node*/

    if (!strcmp((padef + i)->at_name, ATTR_NODE_state))
      atemp[i].at_val.at_short = pnode->nd_state;
    else if (!strcmp((padef + i)->at_name, ATTR_NODE_properties))
      atemp[i].at_val.at_arst = pnode->nd_prop;
    else if (!strcmp((padef + i)->at_name, ATTR_NODE_status))
      atemp[i].at_val.at_arst = pnode->nd_status;
    else if (!strcmp((padef + i)->at_name, ATTR_NODE_ntype))
      atemp[i].at_val.at_short = pnode->nd_ntype;
    else if (!strcmp((padef + i)->at_name, ATTR_NODE_jobs))
      atemp[i].at_val.at_jinfo = pnode;
    else if (!strcmp((padef + i)->at_name, ATTR_NODE_np))
      atemp[i].at_val.at_long = pnode->nd_nsn;
    else if (!strcmp((padef + i)->at_name, ATTR_NODE_note))
      atemp[i].at_val.at_str  = pnode->nd_note;
    else if (!strcmp((padef + i)->at_name, ATTR_NODE_mom_port))
      atemp[i].at_val.at_long  = pnode->nd_mom_port;
    else if (!strcmp((padef + i)->at_name, ATTR_NODE_mom_rm_port))
      atemp[i].at_val.at_long  = pnode->nd_mom_rm_port;
    /* skip NUMA attributes */
    else if (!strcmp((padef + i)->at_name, ATTR_NODE_num_node_boards))
      continue;
    else if (!strcmp((padef + i)->at_name, ATTR_NODE_numa_str))
      continue;
    else if (!strcmp((padef + i)->at_name, ATTR_NODE_gpus_str))
      continue;
    else if (!strcmp((padef + i)->at_name, ATTR_NODE_gpustatus))
      atemp[i].at_val.at_arst = pnode->nd_gpustatus;
    else if (!strcmp((padef + i)->at_name, ATTR_NODE_gpus))
      {
      atemp[i].at_val.at_long  = pnode->nd_ngpus;
      }
    else
      {
      /*we don't ever expect this*/

      *bad = 0;

      return(PBSE_UNKNODEATR);
      }

    atemp[i].at_flags = ATR_VFLAG_SET; /*artificially set the value's flags*/
    }

  if (pal != NULL)
    {
    /*caller has requested status on specific node-attributes*/
    nth = 0;

    while (pal != NULL)
      {
      ++nth;

      index = find_attr(padef, pal->al_name, limit);

      if (index < 0)
        {
        *bad = nth;  /*name in this position can't be found*/

        rc = PBSE_UNKNODEATR;

        break;
        }

      if ((padef + index)->at_flags & priv)
        {
        rc = ((padef + index)->at_encode(
                &atemp[index],
                phead,
                (padef + index)->at_name,
                NULL,
                ATR_ENCODE_CLIENT,
                0));

        if (rc < 0)
          {
          rc = -rc;

          break;
          }
        else
          {
          /* encoding was successful */

          rc = 0;
          }
        }

      pal = (svrattrl *)GET_NEXT(pal->al_link);
      }  /* END while (pal != NULL) */
    }    /* END if (pal != NULL) */
  else
    {
    /* non-specific request, return all readable attributes */

    for (index = 0; index < limit; index++)
      {
      if (((padef + index)->at_flags & priv) &&
          !((padef + index)->at_flags & ATR_DFLAG_NOSTAT))
        {
        rc = (padef + index)->at_encode(
               &atemp[index],
               phead,
               (padef + index)->at_name,
               NULL,
               ATR_ENCODE_CLIENT,
               0);

        if (rc < 0)
          {
          rc = -rc;

          break;
          }
        else
          {
          /* encoding was successful */

          rc = 0;
          }
        }
      }    /* END for (index) */
    }      /* END else (pal != NULL) */

  return(rc);
  }  /* END status_nodeattrib() */


/*
 * initialize_pbsnode - carries out initialization on a new
 * pbs node.  The assumption is that all the parameters are valid.
*/

static int initialize_pbsnode(

  struct pbsnode *pnode,
  char           *pname, /* node name */
  u_long         *pul,  /* host byte order array */
  /* ipaddrs for this node */
  int             ntype) /* time-shared or cluster */

  {
  static char *id = "initialize_pbsnode";

/*  int i; */

  memset(pnode, 0, sizeof(struct pbsnode));

  pnode->nd_name        = pname;
  pnode->nd_mom_port    = PBS_MOM_SERVICE_PORT;
  pnode->nd_mom_rm_port = PBS_MANAGER_SERVICE_PORT;
  pnode->nd_addrs       = pul;       /* list of host byte order */
  pnode->nd_ntype       = ntype;
  pnode->nd_nsn         = 0;
  pnode->nd_nsnfree     = 0;
  pnode->nd_needed      = 0;
  pnode->nd_order       = 0;
  pnode->nd_prop        = NULL;
  pnode->nd_status      = NULL;
  pnode->nd_note        = NULL;
  pnode->nd_psn         = NULL;
  pnode->nd_state       = INUSE_NEEDS_HELLO_PING | INUSE_DOWN;
  pnode->nd_first       = init_prop(pnode->nd_name);
  pnode->nd_last        = pnode->nd_first;
  pnode->nd_f_st        = init_prop(pnode->nd_name);
  pnode->nd_l_st        = pnode->nd_f_st;
  pnode->nd_nprops      = 0;
  pnode->nd_nstatus     = 0;
  pnode->nd_warnbad     = 0;
  pnode->nd_ngpus       = 0;
  pnode->nd_gpustatus   = NULL;
  pnode->nd_ngpustatus  = 0;

  pnode->nd_mutex = (pthread_mutex_t *)calloc(1, sizeof(pthread_mutex_t));
  if (pnode->nd_mutex == NULL)
    {
    log_err(ENOMEM,id,"Could not allocate memory for the node's mutex");
    
    return(ENOMEM);
    }

  pthread_mutex_init(pnode->nd_mutex,NULL);

  return(PBSE_NONE);
  }  /* END initialize_pbsnode() */


/*
 * subnode_delete - delete the specified subnode
 * by marking it deleted
 */

static void subnode_delete(

  struct pbssubn *psubn)

  {

  struct jobinfo *jip, *jipt;

  for (jip = psubn->jobs;jip;jip = jipt)
    {
    jipt = jip->next;

    free(jip);
    }

  psubn->host  = NULL;

  psubn->jobs  = NULL;
  psubn->next  = NULL;
  psubn->inuse = INUSE_DELETED;

  return;
  }


void effective_node_delete(

  struct pbsnode *pnode)

  {

  struct pbssubn  *psubn;

  struct pbssubn  *pnxt;
  u_long          *up;

  remove_node(&allnodes,pnode);
  unlock_node(pnode, "effective_node_delete", NULL, LOGLEVEL);
  free(pnode->nd_mutex);

  psubn = pnode->nd_psn;

  while (psubn != NULL)
    {
    pnxt = psubn->next;

    subnode_delete(psubn);

    psubn = pnxt;
    }

  pnode->nd_last->next = NULL;      /* just in case */

  free_prop_list(pnode->nd_first);

  pnode->nd_first = NULL;

  if (pnode->nd_addrs != NULL)
    {
    for (up = pnode->nd_addrs;*up != 0;up++)
      {
      /* del node's IP addresses from tree  */

      ipaddrs = AVL_delete_node( *up, pnode->nd_mom_port, ipaddrs);
      } 

    if (pnode->nd_addrs != NULL)
      {
      /* remove array of IP addresses */

      free(pnode->nd_addrs);

      pnode->nd_addrs = NULL;
      }
    }

  free(pnode->nd_name);

  free(pnode);

  return;
  }  /* END effective_node_delete() */





/**
 *  NOTE:  pul can return NULL even on SUCCESS of routine
 *
 */

static int process_host_name_part(

  char   *objname, /* node to be's name */
  u_long **pul,  /* 0 terminated host addrs array */
  char  **pname, /* node name w/o any :ts         */
  int   *ntype) /* node type; time-shared, not   */

  {
  static char     id[] = "process_host_name_part";
  char            log_buf[LOCAL_LOG_BUF_SIZE];

  struct addrinfo *addr_info;
  struct addrinfo *addr_iter;

  struct in_addr   addr;
  char            *phostname;  /* caller supplied hostname   */
  int              ipcount = 0;
  int              len;
  int              totalipcount;

  char             hname[MAXLINE];
  char             tmpHName[MAXLINE];
  char            *hptr;

  static int       NodeSuffixIsSet = 0;

  static char     *NodeSuffix;

  int              hindex;
  int              size = 0;
  ulong           *tmp = NULL;

  len = strlen(objname);

  if (len == 0)
    {
    return(PBSE_UNKNODE);
    }

  phostname = strdup(objname);

  if ((phostname == NULL) || (pul == NULL))
    {
    return(PBSE_SYSTEM);
    }

  *ntype = NTYPE_CLUSTER;

  *pul = NULL;

  if ((len >= 3) && !strcmp(&phostname[len - 3], ":ts"))
    {
    phostname[len - 3] = '\0';
    *ntype = NTYPE_TIMESHARED;
    }

  if (getaddrinfo(phostname, NULL, &hints, &addr_info) != 0)
    {
    sprintf(log_buf, "host %s not found", objname);

    log_err(PBSE_UNKNODE, id, log_buf);

    free(phostname);
    phostname = NULL;

    return(PBSE_UNKNODE);
    }

  if (LOGLEVEL >= 6)
    {
    char tmpLine[MAXLINE];

    snprintf(tmpLine, sizeof(tmpLine),
      "successfully loaded host structure for '%s'->'%s'",
      phostname,
      addr_info->ai_canonname);

    log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, id, tmpLine);
    }

  addr = ((struct sockaddr_in *)addr_info->ai_addr)->sin_addr;

  if (addr_info->ai_canonname == NULL)
    {
    free(phostname);
    
    return(PBSE_SYSTEM);
    }

  snprintf(hname, sizeof(hname), "%s", addr_info->ai_canonname);
  
  totalipcount = 0;
  
  if (NodeSuffixIsSet == 0)
    {
    char *node_suffix = NULL;
    get_svr_attr_str(SRV_ATR_NodeSuffix, &node_suffix);

    if (node_suffix != NULL)
      {
      NodeSuffix = strdup(node_suffix);
      }
    
    NodeSuffixIsSet = 1;
    }
  
  if (NodeSuffix != NULL)
    {
    char *ptr;
    
    /* NOTE:  extract outside of loop because hname will be freed */
    ptr = strchr(hname, '.');
    
    if (ptr != NULL)
      {
      *ptr = '\0';
      
      snprintf(tmpHName, sizeof(tmpHName), "%s%s.%s",
        hname,
        NodeSuffix,
        ptr + 1);
      
      *ptr = '.';
      }
    else
      {
      snprintf(tmpHName, sizeof(tmpHName), "%s%s", hname, NodeSuffix);
      }
    }
  
  for (hindex = 0;hindex < 2;hindex++)
    {
    if (hindex == 0)
      {
      hptr = hname;
      }
    else if (NodeSuffix != NULL)
      {
      hptr = tmpHName;
      }
    else
      {
      continue;
      }
    
    if (getaddrinfo(hptr, NULL, NULL, &addr_iter) != 0)
      {
      sprintf(log_buf, "bad cname %s, h_errno=%d errno=%d (%s)",
        hptr,
        h_errno,
        errno,
        pbs_strerror(errno));
      
      log_err(PBSE_UNKNODE, id, log_buf);
      
      if (phostname != NULL)
        {
        free(phostname);
        phostname = NULL;
        }
      
      return(PBSE_UNKNODE);
      }
    
    freeaddrinfo(addr_iter);
    
    /* count host ipaddrs */
    for (addr_iter = addr_info; addr_iter != NULL; addr_iter = addr_iter->ai_next)
      ipcount++;
    
    if (*pul == NULL)
      {
      size = sizeof(u_long) * (ipcount + 1);
      
      tmp = (u_long *)calloc(1, size);  /* zero-terminate list */
      }
    else
      {
      size += sizeof(u_long) * ipcount;
      
      tmp = (u_long *)realloc(*pul, size);
      }
    
    if (tmp == NULL)
      {
      if (phostname != NULL)
        {
        free(phostname);
        phostname = NULL;
        }
      }
    *pul = tmp;
    
    for (addr_iter = addr_info; addr_iter != NULL; addr_iter = addr_iter->ai_next)
      {
      u_long ipaddr;
      
      addr = ((struct sockaddr_in *)addr_iter->ai_addr)->sin_addr;
      
      ipaddr = ntohl(addr.s_addr);
      
      (*pul)[totalipcount++] = ipaddr;
      }
    
    (*pul)[totalipcount] = 0;  /* zero-term array ip addrs */
    }  /* END for (hindex) */
  
  freeaddrinfo(addr_info);
  
  *pname = phostname;   /* return node name     */

  return(PBSE_NONE);    /* function successful      */
  }  /* END process_host_name_part() */





/*
 * update_nodes_file - When called, this function will update
 *       the nodes file.  Specifically, it will
 *       walk the server's array of pbsnodes
 *       constructing for each entry a nodes file
 *       line if that entry is not marked as deleted.
 *       These are written to a temporary file.
 *       Upon successful conclusion that file replaces
 *       the nodes file.
*/

int update_nodes_file(
    
  struct pbsnode *held)

  {
#ifndef NDEBUG
  static char id[] = "update_nodes_file";
#endif

  struct pbsnode  *np;
  int              j;
  int              iter = -1;
  FILE            *nin;

  if (LOGLEVEL >= 2)
    {
    DBPRT(("%s: entered\n",
           id))
    }

  if ((nin = fopen(path_nodes_new, "w")) == NULL)
    {
    log_event(
      PBSEVENT_ADMIN,
      PBS_EVENTCLASS_SERVER,
      "nodes",
      "Node description file update failed");

    return(-1);
    }

  if ((svr_totnodes == 0))
    {
    log_event(
      PBSEVENT_ADMIN,
      PBS_EVENTCLASS_SERVER,
      "nodes",
      "Server has empty nodes list");

    fclose(nin);

    return(-1);
    }

  /* for each node ... */
  /* NOTE: DO NOT change this loop to iterate over numa nodes. Since they
   * aren't real hosts they should NOT appear in the nodes file */

  while ((np = next_host(&allnodes,&iter,held)) != NULL)
    {
    /* ... write its name, and if time-shared, append :ts */
    fprintf(nin, "%s", np->nd_name); /* write name */

    if (np->nd_ntype == NTYPE_TIMESHARED)
      fprintf(nin, ":ts");

    /* if number of subnodes is gt 1, write that; if only one,   */
    /* don't write to maintain compatability with old style file */
    if (np->nd_nsn > 1)
      fprintf(nin, " %s=%d", ATTR_NODE_np, np->nd_nsn);

    /* if number of gpus is gt 0, write that; if none,   */
    /* don't write to maintain compatability with old style file */
    if (np->nd_ngpus > 0)
      fprintf(nin, " %s=%d", ATTR_NODE_gpus, np->nd_ngpus);

    /* write out the numa attributes if needed */
    if (np->num_node_boards > 0)
      {
      fprintf(nin, " %s=%d",
        ATTR_NODE_num_node_boards,
        np->num_node_boards);
      }

    if ((np->numa_str != NULL) &&
        (np->numa_str[0] != '\0'))
      fprintf(nin, " %s=%s", ATTR_NODE_numa_str, np->numa_str);

    /* write out the ports if needed */
    if (np->nd_mom_port != PBS_MOM_SERVICE_PORT)
      fprintf(nin, " %s=%d", ATTR_NODE_mom_port, np->nd_mom_port);

    if (np->nd_mom_rm_port != PBS_MANAGER_SERVICE_PORT)
      fprintf(nin, " %s=%d", ATTR_NODE_mom_rm_port, np->nd_mom_rm_port);

    if ((np->gpu_str != NULL) &&
        (np->gpu_str[0] != '\0'))
      fprintf(nin, " %s=%s", ATTR_NODE_gpus_str, np->gpu_str);

    /* write out properties */
    for (j = 0;j < np->nd_nprops - 1;++j)
      fprintf(nin, " %s", np->nd_prop->as_string[j]);

    /* finish off line with new-line */
    fprintf(nin, "\n");

    fflush(nin);

    if (ferror(nin))
      {
      log_event(
        PBSEVENT_ADMIN,
        PBS_EVENTCLASS_SERVER,
        "nodes",
        "Node description file update failed");

      fclose(nin);
    
      if (held != np)
        unlock_node(np, "update_nodes_file", "error", LOGLEVEL);

      return(-1);
      }
    
    if (held != np)
      unlock_node(np, "update_nodes_file", "loop", LOGLEVEL);
    } /* for each node */

  fclose(nin);

  if (rename(path_nodes_new, path_nodes) != 0)
    {
    log_event(
      PBSEVENT_ADMIN,
      PBS_EVENTCLASS_SERVER,
      "nodes",
      "replacing old nodes file failed");

    return(-1);
    }

  return(PBSE_NONE);
  }  /* END update_nodes_file() */





/*
 * recompute_ntype_cnts - Recomputes the current number of cluster
 *          nodes and current number of time-shared nodes
 */
void recompute_ntype_cnts(void)

  {
  int              svr_loc_clnodes = 0;
  int              svr_loc_tsnodes = 0;

  struct pbsnode  *pnode = NULL;

  node_iterator iter;

  reinitialize_node_iterator(&iter);

  if (svr_totnodes)
    {
    while ((pnode = next_node(&allnodes, pnode, &iter)) != NULL)
      {
      /* count normally */
      if (pnode->nd_ntype == NTYPE_CLUSTER)
        svr_loc_clnodes += pnode->nd_nsn;
      else if (pnode->nd_ntype == NTYPE_TIMESHARED)
        svr_loc_tsnodes++;
      }

    svr_clnodes = svr_loc_clnodes;

    svr_tsnodes = svr_loc_tsnodes;
    }
  } /* END recompute_ntype_cnts() */





/*
 * init_prop - allocate and initialize a prop struct
 *
 * pname points to the property string
 */

struct prop *init_prop(

        char *pname) /* I */

  {

  struct prop *pp;

  if ((pp = (struct prop *)calloc(1, sizeof(struct prop))) != NULL)
    {
    pp->name    = pname;
    pp->mark    = 0;
    pp->next    = 0;
    }

  return(pp);
  }  /* END init_prop() */




/*
 * create_subnode - create a subnode entry and link to parent node
 *
 *  NOTE: pname arg must be a copy of prop list as it is linked directly in
 */

static struct pbssubn *create_subnode(

        struct pbsnode *pnode)

  {

  struct pbssubn  *psubn = NULL;

  struct pbssubn *nxtsn = NULL;
  struct pbssubn *lastsn = NULL;

  psubn = (struct pbssubn *)calloc(1, sizeof(struct pbssubn));

  if (psubn == NULL)
    {
    return(NULL);
    }

  /* initialize the subnode and link into the parent node */

  psubn->host  = pnode;

  psubn->next  = NULL;

  psubn->jobs  = NULL;

  psubn->flag  = okay;

  psubn->inuse = 0;

  psubn->index = pnode->nd_nsn++;

  pnode->nd_nsnfree++;

  if ((pnode->nd_state & (INUSE_JOB | INUSE_JOBSHARE)) != 0)
    pnode->nd_state &= ~(INUSE_JOB|INUSE_JOBSHARE);

  psubn->allocto = (resource_t)0;

  if (pnode->nd_psn == NULL)
    pnode->nd_psn = psubn;
  else
    {
    nxtsn = pnode->nd_psn;    /* link subnode onto parent node's list */
    while (nxtsn != NULL)
      {
      lastsn = nxtsn;
      nxtsn = nxtsn->next;
      }
    lastsn->next = psubn;
    }

  return(psubn);
  }  /* END create_subnode() */




int create_a_gpusubnode(
    
  struct pbsnode *pnode)

  {
  static char *id = "create_a_gpusubnode";
  struct gpusubn *tmp = calloc((1 + pnode->nd_ngpus), sizeof(struct gpusubn));

  if (tmp == NULL)
    {
    log_err(ENOMEM,id,"Couldn't allocate memory for a subnode. EPIC FAILURE");
    return(ENOMEM);
    }

  if (pnode->nd_ngpus > 0)
    {
    /* copy old memory to the new place */
    memcpy(tmp,pnode->nd_gpusn,(sizeof(struct gpusubn) * pnode->nd_ngpus));
    }

  /* now use the new memory */
  free(pnode->nd_gpusn);
  pnode->nd_gpusn = tmp;

  /* initialize the node */
  pnode->nd_gpus_real = FALSE;
  pnode->nd_gpusn[pnode->nd_ngpus].inuse = FALSE;
  pnode->nd_gpusn[pnode->nd_ngpus].mode = gpu_normal;
  pnode->nd_gpusn[pnode->nd_ngpus].state = gpu_unallocated;
  pnode->nd_gpusn[pnode->nd_ngpus].flag = 0;
  pnode->nd_gpusn[pnode->nd_ngpus].index = pnode->nd_ngpus;
  pnode->nd_gpusn[pnode->nd_ngpus].gpuid = NULL;

  /* increment the number of gpu subnodes and gpus free */
  pnode->nd_ngpus++;
  pnode->nd_ngpus_free++;

  return(PBSE_NONE);
  } /* END create_a_gpusubnode() */




/*
 * copy the properties of node src to node dest
 *
 * @param dest - the node where the properties will be copied to
 * @param src  - the node whose properties will be copied from
 */
int copy_properties(

  struct pbsnode *dest, /* I */
  struct pbsnode *src)  /* O */

  {
  int                    need;
  int                    i;

  struct prop           *pdest;
  struct prop          **plink;

  struct array_strings  *sub;
  struct array_strings  *main_node;

  /* copy features/properties */
  if (src->nd_prop == NULL)
    return(PBSE_NONE);

  main_node = src->nd_prop;
 
  /* allocate the properties for the numa node */
  need = sizeof(struct array_strings) + main_node->as_npointers - 1;
  dest->nd_prop = (struct array_strings *)calloc(1, need);
  sub  = dest->nd_prop;

  /* copy simple values */
  sub->as_npointers = main_node->as_npointers;
  sub->as_usedptr   = main_node->as_usedptr;
  sub->as_bufsize   = main_node->as_bufsize;

  /* allocate the buffer */
  sub->as_buf = (char *)calloc(1, sub->as_bufsize);
  memcpy(sub->as_buf,main_node->as_buf,sub->as_bufsize);

  /* set sub's offset to the same as main_nodes. Ugly and convoluted
   * but it works. Same process below when setting sub's as_string 
   * values */
  sub->as_next= sub->as_buf + (main_node->as_next - main_node->as_buf);

  plink = &dest->nd_first;

  for (i = 0; i < src->nd_nprops-1; i++)
    {
    sub->as_string[i] = sub->as_buf + (main_node->as_string[i] - main_node->as_buf);

    pdest = init_prop(sub->as_string[i]);

    *plink = pdest;
    plink = &pdest->next;
    }

  /* now add in name as last prop */
  pdest  = init_prop(dest->nd_name);
  *plink = pdest;
  dest->nd_last = pdest;

  return(PBSE_NONE);
  } /* END copy_properties() */




/*
 * accepts a string of numbers separated by commas. it places the 
 * number in val and advances the string to the next number past the comma
 */

int read_val_and_advance(

  int   *val,
  char **str)

  {
  char *comma;

  if ((*str == NULL) ||
      (val == NULL))
    return(-1);

  *val = atoi(*str);

  comma = strchr(*str,',');

  if (comma != NULL)
    *str += comma - *str + 1;

  return(PBSE_NONE);
  } /* END read_val_and_advance() */





/* creates the private numa nodes on this node 
 *
 * @param pnode - the node that will house the numa nodes
 *
 * @return 0 on success, -1 on failure
 */
int setup_node_boards(

  struct pbsnode *pnode,
  u_long         *pul)

  {
  int             i;
  int             j;
  struct pbsnode *pn;
  char            pname[MAX_LINE];
  char           *np_ptr = NULL;
  char           *gp_ptr = NULL;
  char           *allocd_name;
  int             np;
  int             gpus;
  int             rc;

  char           *id = "setup_node_boards";
  char            log_buf[LOCAL_LOG_BUF_SIZE];

  if (pnode == NULL)
    return(-1);

  pnode->numa_parent = NULL;

  /* if this isn't a numa node, return no error */
  if ((pnode->num_node_boards == 0) &&
      (pnode->numa_str == NULL))
    {
    return(PBSE_NONE);
    }

  /* determine the number of cores per node */
  if (pnode->numa_str != NULL)
    {
    np_ptr = pnode->numa_str;
    read_val_and_advance(&np,&np_ptr);
    }
  else
    np = pnode->nd_nsn / pnode->num_node_boards;

  /* determine the number of gpus per node */
  if (pnode->gpu_str != NULL)
    {
    gp_ptr = pnode->gpu_str;
    read_val_and_advance(&gpus,&gp_ptr);
    }
  else
    gpus = pnode->nd_ngpus / pnode->num_node_boards;

  for (i = 0; i < pnode->num_node_boards; i++)
    {
    pn = (struct pbsnode *)calloc(1, sizeof(struct pbsnode));

    /* each numa node just has a number for a name */
    snprintf(pname,sizeof(pname),"%s-%d",
      pnode->nd_name,
      i);

    allocd_name = strdup(pname);
    if (allocd_name == NULL)
      {
      /* no memory error */
      log_err(PBSE_SYSTEM,id,"Cannot allocate memory for node name\n");

      return(PBSE_SYSTEM);
      }

    rc = initialize_pbsnode(pn,allocd_name,pul,NTYPE_CLUSTER);
    if (rc != PBSE_NONE)
      return(rc);

    /* make sure the server communicates on the correct ports */
    pn->nd_mom_port = pnode->nd_mom_port;
    pn->nd_mom_rm_port = pnode->nd_mom_rm_port;

    /* update the np string pointer */
    if (np_ptr != NULL)
      read_val_and_advance(&np,&np_ptr);

    /* create the subnodes for this node */
    for (j = 0; j < np; j++)
      {
      if (create_subnode(pn) == NULL)
        {
        /* ERROR */
        return(PBSE_SYSTEM);
        }
      }

    /* create the gpu subnodes for this node */
    for (j = 0; j < gpus; j++)
      {
      if (create_a_gpusubnode(pn) != PBSE_NONE)
        {
        /* ERROR */
        return(PBSE_SYSTEM);
        }
      }

    /* update the gpu string pointer */
    if (gp_ptr != NULL)
      read_val_and_advance(&gpus,&gp_ptr);

    copy_properties(pn,pnode);

    /* add the node to the private tree */
    pnode->node_boards = AVL_insert(i,
        pn->nd_mom_port,
        pn,
        pnode->node_boards);

    /* set my parent node pointer */
    pn->numa_parent = pnode;
    } /* END for each node_board */

  if (LOGLEVEL >= 3)
    {
    snprintf(log_buf,sizeof(log_buf),
      "Successfully created %d numa nodes for node %s\n",
      pnode->num_node_boards,
      pnode->nd_name);

    log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_NODE,id,log_buf);
    }

  return(PBSE_NONE);
  } /* END setup_node_boards() */


/* recheck_for_node :
 * This function is called whenever an entry in the nodes file does
 * not resolve on server initialization. This function is called
 * periodically to see if the node is now resolvable and if so
 * add it to the list of available MOM nodes. */

void recheck_for_node(
   
  struct work_task *ptask)

  {
  node_info *host_info;
  int        rc;
  int        bad;

  if ((host_info = ptask->wt_parm1) == NULL)
    {
    free(ptask->wt_mutex);
    free(ptask);

    return;
    }

  if ((rc = create_pbs_node( host_info->nodename, host_info->plist, host_info->perms, &bad)))
    {
    /* we created a new host_info in create_pbs_node. We
       need to free this one */
    free_attrlist(&host_info->atrlist);

    if (host_info->nodename)
      {
      free(host_info->nodename);
      }

    free(host_info);
    }

  free(ptask->wt_mutex);
  free(ptask);
  return;
  } /* END recheck_for_node() */





/*
 * create_pbs_node - create pbs node structure, i.e. add a node
 */

int create_pbs_node(

  char     *objname,
  svrattrl *plist,
  int       perms,
  int      *bad)

  {
  static char     *id = "create_pbs_node"; 
  struct pbsnode  *pnode = NULL;
  char             log_buf[LOCAL_LOG_BUF_SIZE];

  int              ntype; /* node type; time-shared, not */
  char            *pname; /* node name w/o any :ts       */
  u_long          *pul;  /* 0 terminated host adrs array*/
  int              rc;
  node_info        *host_info;
  int              i;
  u_long           addr;
  time_t           time_now = time(NULL);

  if ((rc = process_host_name_part(objname, &pul, &pname, &ntype)) != 0)
    {
    svrattrl *pal, *pattrl;

    /* the host name in the nodes file did not resolve.
       We will set up a process to check periodically
       to see if the node will resolve later */
    host_info = (node_info *)calloc(1, sizeof(node_info));

    if (host_info == NULL)
      {
      log_err(-1, id, "create_pbs_node calloc failed");
      return(PBSE_SYSTEM);
      }

    CLEAR_HEAD(host_info->atrlist);

    /* allocate and copy the objname plist and perms */
    host_info->perms = perms;
    pal = plist;

    while (pal != NULL)
      {
      pattrl = attrlist_create(pal->al_atopl.name, 0, strlen(pal->al_atopl.value) + 1);
      if (pattrl == NULL)
        {
        log_err(-1, id, "cannot create node attribute");
        return(PBSE_SYSTEM);
        }

      strcpy(pattrl->al_value, pal->al_atopl.value);
      pattrl->al_flags = SET;

      append_link(&host_info->atrlist, &pattrl->al_link, pattrl);
      pal = GET_NEXT(pal->al_link);
      }

    pattrl = GET_NEXT(host_info->atrlist);
    host_info->plist = pattrl;

    if (objname != NULL)
      {
      host_info->nodename = (char *)calloc(1, strlen(objname)+1);
      
      if (host_info->nodename == NULL)
        {
        free(host_info);
        log_err(-1, id, "create_pbs_node calloc failed");
        return(PBSE_SYSTEM);
        }

      strcpy(host_info->nodename, objname);
      }

    /* does anyone know why that comment is there? --dbeer */
    set_task(WORK_Timed, time_now + 30 /*PBS_LOG_CHECK_RATE  five minutes */, recheck_for_node, host_info, FALSE);

    return(rc);
    }

  if (pul == NULL)
    {
    free(pname);

    snprintf(log_buf, sizeof(log_buf),
      "no valid IP addresses found for '%s' - check name service",
      objname);

    log_err(-1, "process_host_name_part", log_buf);

    return(PBSE_SYSTEM);
    }

  if ((pnode = find_nodebyname(pname)) != NULL)
    {
    free(pname);
    free(pul);

    unlock_node(pnode, "create_pbs_node", NULL, LOGLEVEL);

    return(PBSE_NODEEXIST);
    }
    
  if ((pnode = (struct pbsnode *)calloc(1, sizeof(struct pbsnode))) == NULL)
    {
    free(pul);
    free(pname);
    
    return(PBSE_SYSTEM);
    }

  if ((rc = initialize_pbsnode(pnode, pname, pul, ntype)) != PBSE_NONE)
    return(rc);

  /* create and initialize the first subnode to go with the parent node */
  if (create_subnode(pnode) == NULL)
    {
    free(pul);
    free(pname);

    return(PBSE_SYSTEM);
    }

  rc = mgr_set_node_attr(
         pnode,
         node_attr_def,
         ND_ATR_LAST,
         plist,
         perms,
         bad,
         (void *)pnode,
         ATR_ACTION_ALTER);

  if (rc != 0)
    {
    effective_node_delete(pnode);
    
    return(rc);
    }

  for (i = 0; pul[i]; i++)
    {
    if (LOGLEVEL >= 6)
      {
      sprintf(log_buf, "node '%s' allows trust for ipaddr %ld.%ld.%ld.%ld\n",
        pnode->nd_name,
        (pul[i] & 0xff000000) >> 24,
        (pul[i] & 0x00ff0000) >> 16,
        (pul[i] & 0x0000ff00) >> 8,
        (pul[i] & 0x000000ff));

      log_record(PBSEVENT_SCHED,PBS_EVENTCLASS_REQUEST,id,log_buf);
      }
    
    addr = pul[i];
    ipaddrs = AVL_insert(addr, pnode->nd_mom_port, pnode, ipaddrs);
    }  /* END for (i) */

  if ((rc = setup_node_boards(pnode,pul)) != PBSE_NONE)
    {
    return(rc);
    }

  insert_node(&allnodes,pnode);

  svr_totnodes++;

  recompute_ntype_cnts();

  /* SUCCESS */
  return(PBSE_NONE);
  } /* End create_pbs_node() */






/*
 * parse_node_token - parse tokens in the nodes file
 *
 * Token is returned, if null then there was none.
 * If there is an error, then "err" is set non-zero.
 * On following call, with argument "start" as null pointer, then
 * resume where left off.
 *
 * If "cok" is true, then this is first token (node name) and ':' is
 * allowed and '=' is not.   For following tokens, allow '=' as separator
 * between "keyword" and "value".  Will get value as next token.
 */

static char *parse_node_token(

  char *start, /* if null, restart where left off */
  int   cok, /* flag - non-zero if colon ":" allowed in token */
  int   comma, /* flag - non-zero if comma ',' allowed in token */
  int  *err, /* RETURN: non-zero if error */
  char *term) /* RETURN: character terminating token */

  {
  static char *pt;
  char        *ts;

  *err = 0;

  if (start)
    pt = start;

  while (*pt && isspace((int)*pt)) /* skip leading whitespace */
    pt++;

  if (*pt == '\0')
    {
    return (NULL);  /* no token */
    }

  ts = pt;

  /* test for legal characters in token */

  for (;pt[0] != '\0';pt++)
    {
    if (isalnum((int)*pt) || strchr("-._[]", *pt) || (*pt == '\0'))
      continue;

    if (isspace((int)*pt))
      break;

    if (cok && (*pt == ':'))
      continue;

    if (comma && (*pt == ','))
      continue;

    if (!cok && (*pt == '='))
      break;

    *err = 1;
    }  /* END for() */

  *term = *pt;

  if (*pt != '\0')
    {
    *pt++ = '\0';
    }

  return(ts);
  }  /* END parse_node_token() */





/*
 * Read the file, "nodes", containing the list of properties for each node.
 * The list of nodes is formed and stored in allnodes.
 * Return -1 on error, 0 otherwise.
 *
 * Read the node state file, "node_state", for any "offline"
 * conditions which should be set in the nodes.
*/

int setup_nodes(void)

  {
  static char *id = "setup_nodes";

  FILE  *nin;
  char   line[MAXLINE << 4];
  char   note[MAX_NOTE+1];
  char  *nodename;
  char   propstr[256];
  char  *token;
  char  *open_bracket;
  char  *close_bracket;
  char  *dash;
  char   tmp_node_name[MAX_LINE];
  char   log_buf[LOCAL_LOG_BUF_SIZE];
  int    bad;
  int    num;
  int    linenum;
  int    err;
  int    start = -1;
  int    end = -1;

  struct pbsnode *np;
  char     *val;
  char      xchar;
  svrattrl *pal;
  int   perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR;
  tlist_head atrlist;

  extern char server_name[];
  extern resource_t next_resource_tag;

  snprintf(log_buf, sizeof(log_buf), "%s()", id);

  log_record(PBSEVENT_SCHED,PBS_EVENTCLASS_REQUEST,id,log_buf);

  CLEAR_HEAD(atrlist);

  if ((nin = fopen(path_nodes, "r")) == NULL)
    {
    sprintf(log_buf, "cannot open node description file '%s' in setup_nodes()\n",
            path_nodes);

    log_event(PBSEVENT_ADMIN,PBS_EVENTCLASS_SERVER,server_name,log_buf);

    return(0);
    }

  next_resource_tag = time(0); /* initialize next resource handle */

  /*tfree(&streams);
  tfree(&ipaddrs);*/

  svr_totnodes = 0;

  /* clear out line so we don't have residual data if there is no LF */

  memset(line, '\0', sizeof(line));

  for (linenum = 1;fgets(line, sizeof(line), nin);linenum++)
    {
    if (line[0] == '#') /* comment */
      continue;

    /* first token is the node name, may have ":ts" appended */

    propstr[0] = '\0';

    token = parse_node_token(line, 1, 0, &err, &xchar);

    if (token == NULL)
      continue; /* blank line */

    if (err != 0)
      {
      sprintf(log_buf, "invalid character in token \"%s\" on line %d", token, linenum);

      goto errtoken2;
      }

    if (!isalpha((int)*token))
      {
      sprintf(log_buf, "token \"%s\" doesn't start with alpha on line %d", token, linenum);

      goto errtoken2;
      }

    nodename = token;

    /* now process remaining tokens (if any), they may be either */
    /* attributes (keyword=value) or old style properties        */

    while (1)
      {
      token = parse_node_token(NULL, 0, 0, &err, &xchar);

      if (err != 0)
        goto errtoken1;

      if (token == NULL)
        break;

      if (xchar == '=')
        {
        /* have new style attribute, keyword=value */

        val = parse_node_token(NULL, 0, 1, &err, &xchar);

        if ((val == NULL) || (err != 0) || (xchar == '='))
          goto errtoken1;

        pal = attrlist_create(token, 0, strlen(val) + 1);

        if (pal == NULL)
          {
          strcpy(log_buf, "cannot create node attribute");

          log_record(PBSEVENT_SCHED,PBS_EVENTCLASS_REQUEST,id,log_buf);

          goto errtoken2;
          }

        strcpy(pal->al_value, val);

        pal->al_flags = SET;

        append_link(&atrlist, &pal->al_link, pal);
        }
      else
        {
        /* old style properity */

        if (propstr[0] != '\0')
          strcat(propstr, ",");

        strcat(propstr, token);
        }
      }    /* END while(1) */

    /* if any properties, create property attr and add to list */

    if (propstr[0] != '\0')
      {
      pal = attrlist_create(ATTR_NODE_properties, 0, strlen(propstr) + 1);

      if (pal == NULL)
        {
        strcpy(log_buf, "cannot create node attribute");

        log_record(PBSEVENT_SCHED,PBS_EVENTCLASS_REQUEST,id,log_buf);

        /* FAILURE */

        return(-1);
        }

      strcpy(pal->al_value, propstr);

      pal->al_flags = SET;

      append_link(&atrlist, &pal->al_link, pal);
      }

    /* now create node and subnodes */

    pal = GET_NEXT(atrlist);

    if ((open_bracket = strchr(nodename,'[')) != NULL)
      {
      int num_digits;

      start = atoi(open_bracket+1);

      dash = strchr(open_bracket,'-');
      close_bracket = strchr(open_bracket,']');

      if ((dash == NULL) ||
          (close_bracket == NULL))
        {
        sprintf(log_buf,
          "malformed nodename with range: %s, must be of form [x-y]\n",
          nodename);

        log_err(-1,id,log_buf);

        goto errtoken2;
        }

      end = atoi(dash+1);

      /* nullify the open bracket */
      *open_bracket = '\0';

      num_digits = dash - open_bracket - 1;

      /* move past the closing bracket */
      close_bracket++;

      while (start <= end)
        {
        int num_len = 1;
        int tmp = 10;

        snprintf(tmp_node_name, sizeof(tmp_node_name), "%s", nodename);

        /* determine the length of the number */
        while (start / tmp > 0)
          {
          tmp *= 10;
          num_len++;
          }

        /* print extra zeros if needed */
        while (num_len < num_digits)
          {
          strcat(tmp_node_name,"0");

          num_len++;
          }

        sprintf(tmp_node_name+strlen(tmp_node_name),"%d%s",
          start,
          close_bracket);

        err = create_pbs_node(tmp_node_name,pal,perm,&bad);

        if (err != 0)
          break;

        start++;
        }
      }
    else
      {
      err = create_pbs_node(nodename, pal, perm, &bad);
      }

    if (err == PBSE_NODEEXIST)
      {
      sprintf(log_buf, "duplicate node \"%s\"on line %d",
        nodename,
        linenum);
      
      log_record(PBSEVENT_SCHED,PBS_EVENTCLASS_REQUEST,id,log_buf);

      goto errtoken2;
      }

    if (err != 0)
      {
      sprintf(log_buf, "could not create node \"%s\", error = %d",
        nodename,
        err);

      log_record(PBSEVENT_SCHED,PBS_EVENTCLASS_REQUEST,id,log_buf);

      free_attrlist(&atrlist);
      continue;
      }

    if (LOGLEVEL >= 3)
      {
      sprintf(log_buf, "node '%s' successfully loaded from nodes file", nodename);

      log_record(PBSEVENT_SCHED,PBS_EVENTCLASS_REQUEST,id,log_buf);
      }

    free_attrlist(&atrlist);
    }  /* END for (linenum) */

  fclose(nin);

  nin = fopen(path_nodestate, "r");

  if (nin != NULL)
    {
    while (fscanf(nin, "%s %d",
                  line,
                  &num) == 2)
      {
      int iter = -1;

      while ((np = next_host(&allnodes,&iter,NULL)) != NULL)
        {
        if (strcmp(np->nd_name, line) == 0)
          {
          np->nd_state = num | INUSE_NEEDS_HELLO_PING;

          /* exclusive bits are calculated later in set_old_nodes() */
          np->nd_state &= ~(INUSE_JOB | INUSE_JOBSHARE);

          unlock_node(np, __func__, "match", LOGLEVEL);

          break;
          }

        unlock_node(np, __func__, "no match", LOGLEVEL);
        }
      }

    fclose(nin);
    }

  /* initialize note attributes */
  nin = fopen(path_nodenote, "r");

  if (nin != NULL)
    {

    while (fscanf(nin, "%s %" MAX_NOTE_STR "[^\n]",
                  line,
                  note) == 2)
      {
      if ((np = find_nodebyname(line)) != NULL)
        {
        np->nd_note = strdup(note);
        
        if (np->nd_note == NULL)
          {
          sprintf(log_buf, "couldn't allocate space for note (node = %s)", np->nd_name);          
          log_record(PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, id, log_buf);
          }
        
        unlock_node(np, __func__, "init - no note", LOGLEVEL);
        }
      }

    fclose(nin);
    }

  /* SUCCESS */

  return(0);

errtoken1:

  sprintf(log_buf, "token \"%s\" in error on line %d of file nodes",
          token,
          linenum);

errtoken2:

  log_record(PBSEVENT_SCHED,PBS_EVENTCLASS_REQUEST,id,log_buf);

  free_attrlist(&atrlist);

  fclose(nin);

  /* FAILURE */

  return(-1);
  }  /* END setup_nodes() */





/*
 * delete_a_subnode - mark a (last) single subnode entry as deleted
 */

static void delete_a_subnode(

  struct pbsnode *pnode)

  {

  struct pbssubn *psubn;

  struct pbssubn *pprior = NULL;

  psubn = pnode->nd_psn;

  while (psubn->next)
    {
    pprior = psubn;
    psubn = psubn->next;
    }

  if (pprior == pnode->nd_psn)
    pnode->nd_psn = NULL;
  if (pprior != NULL)
    pprior->next = NULL;

  /*
   * found last subnode in list for given node, mark it deleted
   * note, have to update nd_nsnfree using pnode rather than psubn->host
   * because it point to the real node rather than the the copy (pnode)
   * and the real node is overwritten by the copy
   */

  if ((psubn->inuse & (INUSE_JOB | INUSE_JOBSHARE)) == 0)
    pnode->nd_nsnfree--;

  subnode_delete(psubn);
  memset(psubn, 252, sizeof(struct pbssubn));
  free(psubn);
  return;
  }  /* END delete_a_subnode() */




/*
 * deletes the last gpu subnode
 * frees the node and decrements the number to adjust
 */
static void delete_a_gpusubnode(

  struct pbsnode *pnode)

  {
  struct gpusubn *tmp = pnode->nd_gpusn + (pnode->nd_ngpus - 1);

  if (pnode->nd_ngpus < 1)
    {
    /* ERROR, can't free non-existent subnodes */
    return;
    }

  if (tmp->inuse == FALSE)
    pnode->nd_ngpus_free--;

  /* decrement the number of gpu subnodes */
  pnode->nd_gpusn--;

  /* free the gpu subnode */
  free(tmp);

  /* DONE */
  } /* END delete_a_gpusubnode() */





/*
 * node_np_action - action routine for node's np attribute
 */

int node_np_action(
    
  attribute *new,  /*derive props into this attribute*/
  void *pobj,      /*pointer to a pbsnode struct     */
  int actmode)     /*action mode; "NEW" or "ALTER"   */
  
  {

  struct pbsnode *pnode = (struct pbsnode *)pobj;
  short  old_np;
  short  new_np;

  switch (actmode)
    {

    case ATR_ACTION_NEW:
      new->at_val.at_long = pnode->nd_nsn;
      break;

    case ATR_ACTION_ALTER:
      old_np = pnode->nd_nsn;
      new_np = (short)new->at_val.at_long;

      if (new_np <= 0)
        return PBSE_BADATVAL;

      while (new_np != old_np)
        {

        if (new_np < old_np)
          {
          delete_a_subnode(pnode);
          old_np--;
          }
        else
          {
          create_subnode(pnode);
          old_np++;
          }
        }

      pnode->nd_nsn = old_np;

      break;
    }

  return 0;
  } /* END node_np_action */


/*
 * node_mom_port_action - action routine for node's port attribute
 */

int node_mom_port_action(

    attribute *new, /*derive props into this attribute*/
    void *pobj, /*pointer to a pbsnode struct     */
    int actmode) /*action mode; "NEW" or "ALTER"   */

  {

  struct pbsnode *pnode = (struct pbsnode *)pobj;
  int rc = 0;

  switch (actmode)
    {

    case ATR_ACTION_NEW:
      new->at_val.at_long = pnode->nd_mom_port;
      break;

    case ATR_ACTION_ALTER:
      pnode->nd_mom_port = new->at_val.at_long;
      break;

    default:

      rc = PBSE_INTERNAL;
    }

  return rc;
  }

/*
 * node_mom_rm_port_action - action routine for node's port attribute
 */

int node_mom_rm_port_action(new, pobj, actmode)
  attribute *new;  /*derive props into this attribute*/
  void  *pobj;  /*pointer to a pbsnode struct     */
  int   actmode; /*action mode; "NEW" or "ALTER"   */
  {

  struct pbsnode *pnode = (struct pbsnode *)pobj;
  int rc = 0;

  switch (actmode)
    {

    case ATR_ACTION_NEW:
      new->at_val.at_long = pnode->nd_mom_rm_port;
      break;

    case ATR_ACTION_ALTER:
      pnode->nd_mom_rm_port = new->at_val.at_long;
      break;

    default:

      rc = PBSE_INTERNAL;
    }

  return rc;
  }



int node_gpus_action(

  attribute *new,
  void      *pnode,
  int        actmode)

  {
  struct pbsnode *np = (struct pbsnode *)pnode;
  int             old_gp;
  int             new_gp;
  int             rc = 0;

  switch (actmode)
    {
    case ATR_ACTION_NEW:
      new->at_val.at_long = np->nd_ngpus;
      break;

    case ATR_ACTION_ALTER:
      old_gp = np->nd_ngpus;
      new_gp = new->at_val.at_long;

      if (new_gp <= 0)
        return PBSE_BADATVAL;

      while (new_gp != old_gp)
        {

        if (new_gp < old_gp)
          {
          delete_a_gpusubnode(pnode);
          old_gp--;
          }
        else
          {
          create_a_gpusubnode(pnode);
          old_gp++;
          }
        }

      break;

    default:
      rc = PBSE_INTERNAL;
    }

  return(rc);
  } /* END node_gpus_action() */




int node_numa_action(

  attribute *new,           /*derive status into this attribute*/
  void      *pnode,         /*pointer to a pbsnode struct     */
  int        actmode)       /*action mode; "NEW" or "ALTER"   */

  {

  struct pbsnode *np = (struct pbsnode *)pnode;
  int rc = 0;

  switch (actmode)
    {
    case ATR_ACTION_NEW:
      new->at_val.at_long = np->num_node_boards;
      break;

    case ATR_ACTION_ALTER:
      np->num_node_boards = new->at_val.at_long;
      break;

    default:
      rc = PBSE_INTERNAL;
    }

  return(rc);
  } /* END node_numa_action */




int numa_str_action(

  attribute *new,           /*derive status into this attribute*/
  void      *pnode,         /*pointer to a pbsnode struct     */
  int        actmode)       /*action mode; "NEW" or "ALTER"   */

  {
  struct pbsnode *np = (struct pbsnode *)pnode;
  int len;

  switch (actmode)
    {
    case ATR_ACTION_NEW:

      if (np->numa_str != NULL)
        {
        len = strlen(np->numa_str) + 1;
        new->at_val.at_str = (char *)calloc(len, sizeof(char));

        if (new->at_val.at_str == NULL)
          return(PBSE_SYSTEM);

        strcpy(new->at_val.at_str,np->numa_str);
        }
      else
        new->at_val.at_str = NULL;

      break;

    case ATR_ACTION_ALTER:

      if (new->at_val.at_str != NULL)
        {
        len = strlen(new->at_val.at_str) + 1;
        np->numa_str = (char *)calloc(len, sizeof(char));

        if (np->numa_str == NULL)
          return(PBSE_SYSTEM);

        strcpy(np->numa_str,new->at_val.at_str);
        }
      else
        np->numa_str = NULL;

      break;

    default:
      return(PBSE_INTERNAL);
    }

  return(0);
  } /* END numa_str_action() */




int gpu_str_action(

  attribute *new,
  void      *pnode,
  int        actmode)

  {
  struct pbsnode *np = (struct pbsnode *)pnode;
  int             len;

  switch (actmode)
    {
    case ATR_ACTION_NEW:

      if (np->gpu_str != NULL)
        {
        len = strlen(np->gpu_str) + 1;
        new->at_val.at_str = (char *)calloc(len, sizeof(char));

        if (new->at_val.at_str == NULL)
          return(PBSE_SYSTEM);

        strcpy(new->at_val.at_str,np->gpu_str);
        }
      else
        new->at_val.at_str = NULL;

      break;

    case ATR_ACTION_ALTER:

      if (new->at_val.at_str != NULL)
        {
        len = strlen(new->at_val.at_str) + 1;
        np->gpu_str = (char *)calloc(len, sizeof(char));

        if (np->gpu_str == NULL)
          return(PBSE_SYSTEM);

        strcpy(np->gpu_str,new->at_val.at_str);
        }
      else
        np->gpu_str = NULL;

      break;

    default:
      return(PBSE_INTERNAL);
    }

  return(PBSE_NONE);
  } /* END gpu_str_action() */




/* create_partial_pbs_node - similar to create_pbs_node except there will
   only be a name for the new node and no attributes or properties */

int create_partial_pbs_node(

  char *nodename,
  unsigned long addr,
  int perms)

  {
  int              ntype; /* node type; time-shared, not */
  int              rc;
  int              bad = 0;
  svrattrl        *plist = NULL;
  struct pbsnode  *pnode = NULL;
  u_long          *pul = NULL;
  char            *pname = NULL;

  pnode = (struct pbsnode *)calloc(1, sizeof(struct pbsnode));
  
  if (pnode == NULL)
    {
    return(PBSE_SYSTEM);
    }

  ntype = NTYPE_CLUSTER;
  pul = calloc(2, sizeof(u_long));
  if (!pul)
    {
    free(pnode);
    return(PBSE_SYSTEM);
    }

  memset(pul, 0, sizeof(u_long) * 2);
  *pul = addr;
  pname = strdup(nodename);

  if ((rc = initialize_pbsnode(pnode, pname, pul, ntype)) != PBSE_NONE)
    {
    free(pul);
    free(pname);
    free(pnode);

    return(rc);
    }

  /* create and initialize the first subnode to go with the parent node */

  if (create_subnode(pnode) == NULL)
    {
    free(pul);
    free(pname);
    free(pnode->nd_mutex);
    free(pnode);

    return(PBSE_SYSTEM);
    }

  rc = mgr_set_node_attr(
         pnode,
         node_attr_def,
         ND_ATR_LAST,
         plist,
         perms,
         &bad,
         (void *)pnode,
         ATR_ACTION_ALTER);

  if (rc != 0)
    {
    lock_node(pnode, __func__, NULL, LOGLEVEL);
    effective_node_delete(pnode);

    return(rc);
    }

  insert_node(&allnodes,pnode);
  AVL_insert(addr, pnode->nd_mom_port, pnode, ipaddrs);
  
  svr_totnodes++;
  recompute_ntype_cnts();

  return(PBSE_NONE);     /*create completely successful*/
  } /* END create_partial_pbs_node */





/*
 * @return a pointer to an initialized node iterator 
 */
node_iterator *get_node_iterator()

  {
  node_iterator *iter = (node_iterator *)calloc(1, sizeof(node_iterator));

  if (iter != NULL)
    {
    iter->node_index = -1;
    iter->numa_index = -1;
    }

  return(iter);
  } /* END get_node_iterator() */





/* 
 * initializes an allocated node iterator 
 */
void reinitialize_node_iterator(

  node_iterator *iter)

  {
  if (iter != NULL)
    {
    iter->node_index = -1;
    iter->numa_index = -1;
    }
  } /* END reinitialize_node_iterator() */




struct pbsnode *get_my_next_node_board(

  node_iterator  *iter,
  struct pbsnode *pnode)

  {
  struct pbsnode *numa;
  
  iter->numa_index++;
  numa = AVL_find(iter->numa_index, pnode->nd_mom_port, pnode->node_boards);
  
  unlock_node(pnode, __func__, "pnode", LOGLEVEL);
  if (numa != NULL)
    lock_node(numa, __func__, "numa", LOGLEVEL);

  return(numa);
  } /* END get_my_next_node_board() */




/* 
 * @return the next node, from 0->end, accounting for numa nodes
 */
struct pbsnode *next_node(

  all_nodes      *an,
  struct pbsnode *current,
  node_iterator  *iter)

  {
  struct pbsnode *next;
  struct pbsnode *tmp;

  if (current == NULL)
    {
    pthread_mutex_lock(an->allnodes_mutex);

    /* the first call to next_node */
    next = next_thing(an->ra,&iter->node_index);
    if (next != NULL)
      lock_node(next, "next_node", "next != NULL", LOGLEVEL);

    pthread_mutex_unlock(an->allnodes_mutex);

    if (next != NULL)
      {
      /* if I have node_boards, look at those and not me */
      if (next->num_node_boards > 0)
        {
        next = get_my_next_node_board(iter,next);
        }
      }
    } /* END first iteration */
  else
    {
    /* if current is a numa subnode, go back to the parent */
    if (iter->numa_index >= 0)
      {
      tmp = current->numa_parent;
      unlock_node(current, "next_node", "current == NULL && numa_index > 0", LOGLEVEL);
      lock_node(tmp, "next_node", "tmp && numa_index > 0", LOGLEVEL);
      current = tmp;
      }

    /* move to the next host or get my next node board? */
    if (iter->numa_index + 1 >= current->num_node_boards)
      {
      /* reset the numa_index to -1 */
      iter->numa_index = -1;

      /* go to the next node in all nodes */
      unlock_node(current, "next_node", "next == NULL && numa_index+1", LOGLEVEL);
      pthread_mutex_lock(an->allnodes_mutex);

      next = next_thing(an->ra, &iter->node_index);

      pthread_mutex_unlock(an->allnodes_mutex);

      if (next != NULL)
        {
        lock_node(next, "next_node", "next != NULL && numa_index+1", LOGLEVEL);

        if (next->num_node_boards > 0)
          {
          next = get_my_next_node_board(iter, next);
          }
        }
      }
    else
      {
      next = get_my_next_node_board(iter, current);
      }
    } /* END all other iterations */

  return(next);
  } /* END next_node() */




void initialize_all_nodes_array(

  all_nodes *an)

  {
  an->ra = initialize_resizable_array(INITIAL_NODE_SIZE);
  an->ht = create_hash(INITIAL_HASH_SIZE);

  an->allnodes_mutex = calloc(1, sizeof(pthread_mutex_t));
  pthread_mutex_init(an->allnodes_mutex,NULL);
  } /* END initialize_all_nodes_array() */




/*
 * insert a node into the array 
 *
 * @param pnode - the node to be inserted
 * @return PBSE_NONE on success 
 */

int insert_node(

  all_nodes      *an,    /* M */
  struct pbsnode *pnode) /* I */

  {
  static char *id = "insert_node";
  int          rc;

  pthread_mutex_lock(an->allnodes_mutex);

  if ((rc = insert_thing(an->ra,pnode)) == -1)
    {
    rc = ENOMEM;
    log_err(rc,id,"No memory to resize the array...SYSTEM FAILURE");
    }
  else
    {
    add_hash(an->ht,rc,pnode->nd_name);

    rc = PBSE_NONE;
    }

  pthread_mutex_unlock(an->allnodes_mutex);

  return(rc);
  } /* END insert_node() */




/* 
 * remove a node from the array
 *
 * @param pnode - the node to remove
 * @return PBSE_NONE if the node is removed 
 */

int remove_node(

  all_nodes      *an,
  struct pbsnode *pnode)

  {
  int rc = PBSE_NONE;

  if (pthread_mutex_trylock(an->allnodes_mutex))
    {
    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    pthread_mutex_lock(an->allnodes_mutex);
    lock_node(pnode, __func__, NULL, LOGLEVEL);
    }

  rc = remove_thing(an->ra,pnode);

  pthread_mutex_unlock(an->allnodes_mutex);

  return(rc);
  } /* END remove_node() */




struct pbsnode *next_host(

  all_nodes      *an,    /* I */
  int            *iter,  /* M */
  struct pbsnode *held)  /* I */

  {
  struct pbsnode *pnode;
  char           *name = NULL;

  if (pthread_mutex_trylock(an->allnodes_mutex))
    {
    if (held != NULL)
      {
      name = strdup(held->nd_name);
      unlock_node(held, __func__, NULL, 0);
      }
    pthread_mutex_lock(an->allnodes_mutex);
    }

  pnode = next_thing(an->ra,iter);
  if ((pnode != NULL) &&
      ((pnode != held) && 
       (name == NULL)))
    lock_node(pnode, __func__, NULL, LOGLEVEL);

  pthread_mutex_unlock(an->allnodes_mutex);

  if ((held != pnode) &&
      (name != NULL))
    held = find_nodebyname(name);

  if (name != NULL)
    free(name);

  return(pnode);
  } /* END next_host() */




void *send_hierarchy_threadtask(

  void *vp)

  {
  hello_info     *hi = (hello_info *)vp;
  struct pbsnode *pnode = find_nodebyname(hi->name);
  char            log_buf[LOCAL_LOG_BUF_SIZE];
  unsigned short  port;

  if (pnode != NULL)
    {
    port = pnode->nd_mom_rm_port;
    unlock_node(pnode, __func__, NULL, 0);

    if (send_hierarchy(hi->name, port) != PBSE_NONE)
      {
      if (hi->num_retries < 3)
        {
        hi->num_retries++;
        hi->last_retry = time(NULL);
        add_hello_info(&failures, hi);
        }
      }
    else
      {
      if (LOGLEVEL >= 3)
        {
        snprintf(log_buf, sizeof(log_buf),
          "Successfully sent hierarchy to %s", hi->name);
        log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, __func__, log_buf);
        }
      
      /* only free here because otherwise it is re-used */
      free(hi->name);
      free(hi);
      }
    }
  else
    {
    free(hi->name);
    free(hi);
    }

  return(NULL);
  } /* END send_hierarchy_threadtask() */




int send_hierarchy(

  char           *name,
  unsigned short  port)

  {
  char                log_buf[LOCAL_LOG_BUF_SIZE];
  char               *string;
  int                 ret;
  int                 sock;
  struct addrinfo    *addr_info;
  struct sockaddr_in  sa;

  if (getaddrinfo(name, NULL, NULL, &addr_info) != 0)
    {
    snprintf(log_buf, sizeof(log_buf),
      "Can't get address information for %s", name);
    log_err(PBSE_BADHOST, __func__, log_buf);

    return(PBSE_BADHOST);
    }

  sa.sin_addr = ((struct sockaddr_in *)addr_info->ai_addr)->sin_addr;
  sa.sin_family = AF_INET;
  sa.sin_port = htons(port);
  freeaddrinfo(addr_info);

  /* for now we'll only try once as this is going to be tried once each time in the loop */
  sock = tcp_connect_sockaddr((struct sockaddr *)&sa, sizeof(sa));

  if (sock < 0)
    {
    /* could not connect */
    /* - quiting after 5 retries",*/
    snprintf(log_buf, sizeof(log_buf),
      "Could not send mom hierarchy to host %s:%d",
      name, port);
    log_err(-1, __func__, log_buf);

    return(-1);
    }
  add_conn(sock, ToServerDIS, ntohl(sa.sin_addr.s_addr), sa.sin_port, PBS_SOCK_INET, NULL);

  DIS_tcp_setup(sock);

  /* write the protocol, version and command */
  if ((ret = diswsi(sock, IS_PROTOCOL)) == DIS_SUCCESS)
    {
    if ((ret = diswsi(sock, IS_PROTOCOL_VER)) == DIS_SUCCESS)
      {
      ret = diswsi(sock, IS_CLUSTER_ADDRS);
      }
    }

  if (ret == DIS_SUCCESS)
    {
    for (string = hierarchy_holder->str; string != NULL && *string != '\0'; string += strlen(string) + 1)
      {
      if ((ret = diswst(sock, string)) != DIS_SUCCESS)
        {
        if (ret > 0)
          {
          snprintf(log_buf, sizeof(log_buf),
            "Could not send mom hierarchy to host %s - %s",
            name, dis_emsg[ret]);
          }
        else
          {
          snprintf(log_buf, sizeof(log_buf),
            "Unknown error when sending mom hierarchy to host %s",
            name);
          }
        
        log_err(-1, __func__, log_buf);
        
        break;
        }
      }

    diswst(sock, IS_EOL_MESSAGE);

    DIS_tcp_wflush(sock);
    }

  close_conn(sock, FALSE);

  return(ret);
  } /* END send_hierarchy() */




void initialize_hello_container(

  hello_container *hc)

  {
  hc->ra = initialize_resizable_array(INITIAL_NODE_SIZE);

  hc->hello_mutex = calloc(1, sizeof(pthread_mutex_t));
  pthread_mutex_init(hc->hello_mutex, NULL);
  } /* END initialize_hello_container() */




int needs_hello(

  hello_container *hc,
  char            *node_name)

  {
  int needs;

  pthread_mutex_lock(hc->hello_mutex);
  needs = is_present(hc->ra, node_name);
  pthread_mutex_unlock(hc->hello_mutex);

  return(needs);
  } /* END needs_hello */




int add_hello(

  hello_container *hc,
  char            *node_name)

  {
  int         rc;
  hello_info *hi = calloc(1, sizeof(hello_info));
  hi->name = node_name;

  pthread_mutex_lock(hc->hello_mutex);

  if ((rc = insert_thing(hc->ra, hi)) == -1)
    {
    rc = ENOMEM;
    free(hi->name);
    free(hi);
    }

  pthread_mutex_unlock(hc->hello_mutex);

  return(rc);
  } /* END add_hello() */




int add_hello_info(

  hello_container *hc,
  hello_info      *hi)

  {
  int rc;

  pthread_mutex_lock(hc->hello_mutex);
  if ((rc = insert_thing(hc->ra, hi)) == -1)
    rc = ENOMEM;
  pthread_mutex_unlock(hc->hello_mutex);

  return(rc);
  } /* END add_hello_info() */




hello_info *pop_hello(

  hello_container *hc)

  {
  hello_info *hi = NULL;
  int         index;

  pthread_mutex_lock(hc->hello_mutex);
  index = hc->ra->slots[ALWAYS_EMPTY_INDEX].next;
  if (index != ALWAYS_EMPTY_INDEX)
    {
    hi = (hello_info *)hc->ra->slots[index].item;
    if (time(NULL) - hi->last_retry > HELLO_RESEND_WAIT_TIME)
      hi = (hello_info *)pop_thing(hc->ra);
    else
      hi = NULL;
    }
  pthread_mutex_unlock(hc->hello_mutex);

  return(hi);
  } /* END pop_hello() */




int remove_hello(

  hello_container *hc,
  char            *node_name)

  {
  int         rc = PBSE_NONE;
  int         iter = -1;
  int         prev_index = -1;
  hello_info *hi;

  pthread_mutex_lock(hc->hello_mutex);
  while ((hi = (hello_info *)next_thing(hc->ra, &iter)) != NULL)
    {
    if (!strcmp(hi->name, node_name))
      {
      if (prev_index == -1)
        prev_index = hc->ra->slots[ALWAYS_EMPTY_INDEX].next;

      rc = remove_thing_from_index(hc->ra, prev_index);
      }
    }
  pthread_mutex_unlock(hc->hello_mutex);

  return(rc);
  } /* END remove_hello() */

