#!/usr/bin/python
#   Copyright 2008 Red Hat, Inc.
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

import os
import sys
import logging
import signal
import time
import socket
import threading
import getopt
import Queue
import random
import tempfile
import shutil
import stat
from qmf.console import Session, Console
from condorutils.log import *
from condorutils.readconfig import *
from wallabyclient import WallabyHelpers
from wallabyclient.exceptions import WallabyUnsupportedAPI
try:
   import pwd
except:
   pass


class Timer (threading.Thread):
   def __init__ (self, interval, function, name=None, args=[], kwargs={}):
      threading.Thread.__init__(self)
      if name != None:
         self.name = name
      self.interval = interval
      self.function = function
      self.args = args
      self.kwargs = kwargs
      self.setDaemon(True)
      self.finished = threading.Event()


   def stop (self):
      self.finished.set()
      self.join()


   def run (self):
      while not self.finished.isSet():
         self.finished.wait(self.interval)
         if not self.finished.isSet():
            self.function(*self.args, **self.kwargs)


class EventConsole(Console):
   def __init__(self):
      self.logger_name = ''
      self.node_name = ''
      self.agent = None


   def config(self, node_name, log_name):
      self.node_name = node_name
      self.logger_name = log_name


   def brokerConnected(self, broker):
      log(logging.DEBUG, self.logger_name, 'Connected to broker "%s"' % broker.getFullUrl())


   def brokerDisconnected(self, broker):
      log(logging.DEBUG, self.logger_name, 'Lost connection to broker "%s"' % broker.getFullUrl())
   

   def newAgent(self, agent):
      if agent.label == 'com.redhat.grid.config:Store':
         log(logging.DEBUG, self.logger_name, 'Established connection to the configuration store')
         self.agent = agent


   def delAgent(self, agent):
      global stop_running

      if agent.label == 'com.redhat.grid.config:Store':
         if stop_running == False:
            log(logging.DEBUG, self.logger_name, 'Lost connection to the configuration store')
         self.agent = None


   def event(self, broker, event):
      global version_queue

      if event.getClassKey().getClassName() == 'NodeUpdatedNotice':
         log(logging.DEBUG, self.logger_name, 'Received a NodeUpdatedNotice')
         try:
            args = event.getArguments()
            nodes = args['nodes']
         except:
            log(logging.ERROR, self.logger_name, 'Unable to determine affected nodes.  Discarding')
            return
         
         if self.node_name in nodes or '*' in nodes:
            log(logging.DEBUG, self.logger_name, 'The event is for this node')
            try:
               event_ver = int(args['version'])
            except:
               log(logging.INFO, self.logger_name, 'Unable to determine configuration version in event.  Discarding')
               return

            try:
               version_queue.put(event_ver, False)
            except Queue.Full, error:
               log(logging.INFO, self.logger_name, 'Configuration version queue full.  Discarding event for version "%d"' % event_ver)


   def get_store_agent(self):
      return self.agent


class Service:
   def __init__(self):
      self.broker = None
      self.console = None
      self.session = None
      self.interval = 0
      self.logger_name = ''
      self.filename = ''
      self.node_name = ''
      self.timer = None
      self.managedfile = ''
      self.config_dir = ''
      self.override_dir = ''
      self.old_config = ''


   def init(self, node, name):
      self.console = EventConsole()
      self.session = Session(console=self.console, manageConnections=True, rcvObjects=True, rcvHeartbeats=False, rcvEvents=True, userBindings=True)
      self.session.bindEvent('com.redhat.grid.config', 'NodeUpdatedNotice')
      self.session.bindAgent(label='com.redhat.grid.config:Store')
      self.logger_name = name
      self.node_name = node
      self.console.config(self.node_name, self.logger_name)


   def config(self, filename, user='', password=''):
      global stop_running 

      if self.session == None:
         return(False)

      if self.broker != None and self.session != None:
         self.session.delBroker(self.broker)
         self.broker = None

      try:
         broker_ip = read_condor_config('CONFIGD', ['QMF_BROKER_HOST'])['qmf_broker_host']
      except ConfigError, error:
         # Broker host not defined, so exit
         log(logging.WARNING, self.logger_name, '%s' % error.msg)
         log(logging.WARNING, self.logger_name, 'Using default (127.0.0.1)')
         broker_ip = '127.0.0.1'
      except Exception, e:
         log(logging.WARNING, self.logger_name, '%s' % e)
         log(logging.WARNING, self.logger_name, 'Using default (127.0.0.1)')
         broker_ip = '127.0.0.1'

      try:
         port = read_condor_config('CONFIGD', ['QMF_BROKER_PORT'])['qmf_broker_port']
      except ConfigError, error:
         log(logging.DEBUG, self.logger_name, '%s. Using default (5672)' % error.msg)
         port = 5672
      except Exception, e:
         log(logging.DEBUG, self.logger_name, '%s. Using default (5672)' % e)
         port = 5672

      try:
         broker_port = int(port)
      except:
         log(logging.DEBUG, self.logger_name, 'Invalid broker port.  Using default (5672)')
         broker_port = 5672

      try:
         val = read_condor_config('CONFIGD', ['QMF_BROKER_AUTH_MECHANISM'])
         broker_auth_methods = str(val['qmf_broker_auth_mechanism']).replace(',', ' ')
      except ConfigError, error:
         log(logging.DEBUG, self.logger_name, '%s. Using defaults' % error.msg)
         broker_auth_methods = 'ANONYMOUS PLAIN GSSAPI'
      except Exception, e:
         log(logging.DEBUG, self.logger_name, '%s. Using defaults' % e)
         broker_auth_methods = 'ANONYMOUS PLAIN GSSAPI'

      try:
         val = read_condor_config('CONFIGD', ['CHECK_INTERVAL'], permit_param_only = False)
         self.interval = int(val['check_interval'])
      except ConfigError, error:
         log(logging.INFO, self.logger_name, error.msg)
         self.interval = 0
      except Exception, e:
         log(logging.INFO, self.logger_name, e)
         self.interval = 0

      if self.interval <= 0:
         log(logging.INFO, self.logger_name, 'Invalid CHECK_INTERVAL. Periodic node check-in disabled')

      if filename == '':
         try:
            self.managedfile = read_condor_config('', ['LOCAL_CONFIG_FILE'])['local_config_file']
         except ConfigError, error:
            log(logging.ERROR, self.logger_name, '%s.  Unable to write configuration file' % error.msg)
            return(False)
         except Exception, e:
            log(logging.ERROR, self.logger_name, e)
            log(logging.ERROR, self.logger_name, 'Invalid value for LOCAL_CONFIG_FILE')
            return(False)
      else:
         self.managedfile = filename
      self.managedfile = os.path.normpath(self.managedfile)
      log(logging.DEBUG, self.logger_name, 'Writing configuration file to "%s"' % self.managedfile)

      try:
         self.config_dir = read_condor_config('', ['LOCAL_CONFIG_DIR'])['local_config_dir']
         if os.access(self.config_dir, os.F_OK) != True or os.access(self.config_dir, os.R_OK) != True:
            log(logging.ERROR, self.logger_name, '"%s" either is not a directory or has incorrect permissions.  Exiting' % self.config_dir)
            return(False)
      except ConfigError, error:
         log(logging.ERROR, self.logger_name, '%s.  Unable to read configuration files.  Exiting' % error.msg)
         return(False)
      except Exception, e:
         log(logging.ERROR, self.logger_name, e)
         log(logging.ERROR, self.logger_name, 'Unable to read configuration files.  Exiting')
         return(False)

      try:
         self.override_dir = read_condor_config('CONFIGD', ['OVERRIDE_DIR'], permit_param_only = False)['override_dir']
         if os.access(self.override_dir, os.F_OK) != True or os.access(self.override_dir, os.R_OK) != True:
            log(logging.WARNING, self.logger_name, '"%s" either is not a directory or has incorrect permissions.  Configuration overrides will not take affect' % self.override_dir)
            self.override_dir = ''
      except:
         self.override_dir = ''

      if self.broker != None:
         try:
            self.session.delBroker(self.broker)
         except:
            self.broker = None

      if user != '' and password != '':
         broker_str = '%s/%s@%s:%d' % (user, password, broker_ip, broker_port)
      elif user != '':
         broker_str = '%s@%s:%d' % (user, broker_ip, broker_port)
      else:
         broker_str = '%s:%d' % (broker_ip, broker_port)

      try:
         self.broker = self.session.addBroker('amqp://%s' % broker_str, mechanisms=broker_auth_methods)
      except:
         if stop_running == False:
            log(logging.CRITICAL, self.logger_name, 'Unable to connect to broker "%s"' % broker_str)
         return(False)

      log(logging.DEBUG, self.logger_name, 'Connecting to broker "%s"' % broker_str)
      return(True)


   def get_store_obj(self):
      global stop_running

      if self.console == None:
         return(None)

      agent = self.console.get_store_agent()
      try:
         obj = agent.getObjects(_class='Store', _package='com.redhat.grid.config')
      except:
         if stop_running == False:
            log(logging.ERROR, self.logger_name, 'Failed to contact configuration store')
         return(None)
      return(obj[0])


   def get_node_obj(self):
      global stop_running

      store = self.get_store_obj()
      if store == None:
         return(None)

      try:
         result = store.getNode(self.node_name)
      except:
         if stop_running == False:
            log(logging.ERROR, self.logger_name, 'Exception retrieving node object')
         return(None)

      if result.status != 0:
         if stop_running == False:
            log(logging.ERROR, self.logger_name, '(%d, %s): Store does not know about this node' % (result.status, result.text))
         return(None)
      else:
         try:
            agent = self.console.get_store_agent()
            obj = agent.getObjects(_objectId=result.outArgs['obj'])
         except:
            if stop_running == False:
               log(logging.ERROR, self.logger_name, 'Unable to get node information object')
            return(None)
         return(obj[0])


   def clear_timers(self):
      if self.timer != None:
         del self.timer
      self.timer = None

   def setup_timers(self):
      global interval_thread_name

      # Setup the timer for checking configuration version
      if self.timer == None:
         if self.interval > 0:
            self.timer = Timer(self.interval, self.check_config_ver, interval_thread_name)
            self.timer.start()


   def shutdown(self):
      if self.timer != None:
         self.timer.stop()
         del self.timer
         self.timer = None
      if self.broker != None:
         self.session.delBroker(self.broker)
         self.broker = None


   def get_interval(self):
      return self.interval


   def check_config_ver(self, ver=0, force=False):
      log(logging.DEBUG, self.logger_name, 'Checking version of configuration')
      node_obj = self.get_node_obj()
      if node_obj == None:
         return(False)

      # If the passed version is 0, look at the last_updated_version
      if ver == 0:
         ver = int(node_obj.last_updated_version)

      # Install the new configuration (if needed)
      ret_val = True
      if self.get_config(ver, force) == False:
         # There was a problem retrieving the configuration.  Logging is
         # handled by the get_config function
         ret_val = False

      return(ret_val)


   def process_version_q(self):
      global version_queue

      # Pull all versions off the queue and only process the latest
      version = -1
      size = 0
      while version_queue.empty() == False:
         try:
            version = version_queue.get(False)
            size = version_queue.qsize()
         except Queue.Empty:
            size = 0
            break

      if version != -1:
         # There was a new version in the queue
         if self.check_config_ver(version) == False:
            if version_queue.empty() == True:
               # There hasn't been another version to look at, so put
               # this one back as there was an error installing it
               version_queue.put(version, False)
               size += 1
      return size


   def get_config(self, version=0, force=False):
      global replacing_file, stop_running, old_config_filename
      ignore_params = ["WALLABY_FORCE_CONFIG_PULL", "WALLABY_FORCE_RESTART"]

      # Check in with the store
      log(logging.DEBUG, self.logger_name, 'Performing a checkin with the store')
      node_obj = self.get_node_obj()
      if node_obj == None:
         return(False)

      try:
         node_obj.checkin(_timeout=20)
      except Exception, error:
         if stop_running == False:
            log(logging.ERROR, self.logger_name, 'Failed to check in with the store')
            log(logging.ERROR, self.logger_name, error)
         return(False)
      log(logging.DEBUG, self.logger_name, 'Checked in with the store')

      # Get the current WALLABY_CONFIG_VERSION.  If the system is running the
      # version as what was received, then there's no need to do anything
      (retval, running_version, err) = run_cmd('condor_config_val WALLABY_CONFIG_VERSION')
      try:
         running_version = int(running_version.strip())
      except:
         running_version = 0
 
      if version == running_version and force == False:
         log(logging.DEBUG, self.logger_name, 'The system is already running configuration version "%d"' % version)
         return(True)
      else:
         log(logging.INFO, self.logger_name, 'Retrieving configuration version "%d" from the store' % version)
   
         # Save the current configuraton
         (file_hdl,self.old_config) = tempfile.mkstemp('.tmp', old_config_filename, text=True)
         (retval, config_dump, err) = run_cmd('condor_config_val -dump')

         # Write the config from the dump
         for line in config_dump.split('\n'):
            if 'local_config_file' not in line.lower() and \
               'local_config_dir' not in line.lower():
               os.write(file_hdl, '%s\n' % line)
         os.close(file_hdl)

         # Retrieve the node's configuration
         try:
            if version <= 0:
               result = node_obj.getConfig({})
            else:
               result = node_obj.getConfig({'version':version})
         except Exception, error:
            # Something has gone away
            if stop_running == False:
               log(logging.ERROR, self.logger_name, 'Exception when attempting to retrieve configuration version "%d" from the store' % version)
               log(logging.ERROR, self.logger_name, error)
            return(False)

         if result.status != 0:
            # Problem getting the configuration, so do nothing
            if stop_running == False:
               log(logging.ERROR, self.logger_name, 'Failed to retrieve configuration "%d" from the store (%d, %s)' % (version, result.status, result.text))
            return(False)
         else:
            config = result.outArgs['config']

         try:
            (file_hdl,file_name) = tempfile.mkstemp('.tmp', 'condor_config.local', text=True)
            # Write the config from the store into the file
            for key in config.keys():
               if key.strip().lower() in ignore_params:
                  continue
               if key.strip().lower() == 'dc_daemon_list':
                  os.write(file_hdl, '%s =+ %s\n' % (key, config[key]))
               else:
                  os.write(file_hdl, '%s = %s\n' % (key, config[key]))

            # Add the wallaby group/feature information
            store = self.get_store_obj()
            if store != None:
               features = WallabyHelpers.get_node_features(node_obj, self.session, store)
            else:
               features = []

            print_str = ''
            for feat in features:
               print_str += '%s,' % feat
            print_str = print_str[:-1]
            os.write(file_hdl, 'WallabyFeatures = "%s"\n' % print_str)

            print_str = ''
            for group in node_obj.memberships:
               print_str += '%s,' % group
            print_str = print_str[:-1]
            os.write(file_hdl, 'WallabyGroups = "%s"\n' % print_str)
            os.write(file_hdl, 'MASTER_ATTRS = $(MASTER_ATTRS), WallabyFeatures, WallabyGroups\n')
            os.write(file_hdl, 'STARTD_ATTRS = $(STARTD_ATTRS), WallabyFeatures, WallabyGroups\n')

            # Now append the configuration from the configd configuration
            # file
            cfg_name = os.path.normpath('%s/99configd.config' % self.config_dir)
            cfg_file = open(cfg_name, 'r')
            for line in cfg_file:
               os.write(file_hdl, line)
            cfg_file.close()

            # Process any overriden parameters
            if self.override_dir != '':
               entries = os.listdir(self.override_dir)
               entries.sort()
               for file in entries:
                  fname = os.path.normpath('%s/%s' % (self.override_dir, file))
                  if os.access(fname, os.R_OK) == True:
                     try:
                        hdl = open(fname, 'r')
                        os.write(file_hdl, '# Override from "%s"\n' % fname)
                        for line in hdl:
                           os.write(file_hdl, line)
                     except:
                        log(logging.ERROR, self.logger_name, 'Problem reading file "%s"' % fname)
                     hdl.close()
                  else:
                     log(logging.ERROR, self.logger_name, 'Unable to access "%s".  Ignoring override file' % fname)

            # Ensure permissions for restart/reconfig
            try:
               subs = self.console.get_store_agent().getObjects(_class='Subsystem', _package='com.redhat.grid.config')

               dlist = []
               if 'DAEMON_LIST' in config.keys():
                  for d in config['DAEMON_LIST'].split(','):
                     d = d.strip()
                     if d != '':
                        dlist += [d.upper()]

               for sub in subs:
                  name = sub.name.upper()
                  if name in dlist:
                     os.write(file_hdl, '%s.SEC_ADMINISTRATOR_AUTHENTICATION_METHODS = $(%s.SEC_ADMINISTRATOR_AUTHENTICATION_METHODS), FS, NTLM, CLAIMTOBE\n' % (name, name))
            except Exception, error:
               log(logging.ERROR, self.logger_name, 'Store: %s' % error)
               log(logging.WARNING, self.logger_name, 'Failed to retrieve subsystem list.  Configuration could break restart/reconfig functionality')

            os.close(file_hdl)
         except IOError:
            try:
               os.close(file_hdl)
               os.remove(file_name)
            except:
               pass
            log(logging.ERROR, self.logger_name, 'Failed to read configd configuration/override file(s).  Not writing configuration file')
            return(False)
         except:
            try:
               os.close(file_hdl)
               os.remove(file_name)
            except:
               pass
            log(logging.ERROR, self.logger_name, 'Failed to write configuration to temp file')
            return(False)
   
         # Verify the config file is valid
         (retval, out, err) = run_cmd('condor_config_val -dump', environ={'CONDOR_CONFIG':'%s' % file_name})
         if retval != 0:
            log(logging.ERROR, self.logger_name, 'Configuration is invalid.  Discarding')
            os.remove(file_name)
            return(False)
   
         # Install the file for condor to use
         replacing_file = True
         if os.path.exists(self.managedfile):
            os.remove(self.managedfile)
         try:
            shutil.move(file_name, self.managedfile)
         except:
            log(logging.ERROR, self.logger_name, 'Error installing new configuration file')
            os.remove(file_name)
            replacing_file = False
            return(False)
         if os.name == 'nt' or os.name == 'ce':
            try:
               (retval, out, err) = run_cmd('icacls %s /grant Everyone:(R)' % self.managedfile)
               if retval != 0:
                  log(logging.ERROR, self.logger_name, 'Failed to set read permissions on "%s" (stdout = %s, stderr = %s)' % (self.managedfile, out, err))
            except Exception, e:
               # The command exists, and is needed, on newer versions of
               # windows that need to have the permissions changed.  If it
               # fails or doesn't exist, log data and move on
               log(logging.WARNING, self.logger_name, e)
               log(logging.WARNING, self.logger_name, 'Problem running icacls to set permissions')
         else:
            os.chmod(self.managedfile, stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH)
         replacing_file = False
         log(logging.INFO, self.logger_name, 'Retrieved configuration from the store')
   
         # Have the store tell us which subsystems to restart/reconfig
         try:
            result = node_obj.whatChanged(running_version, version)
         except:
            if stop_running == False:
               log(logging.ERROR, self.logger_name, 'Unable to retrieve configuration differences.  The configuration will not take affect')
            return(False)

         if result.status != 0:
            if stop_running == False:
               log(logging.ERROR, self.logger_name, 'Store error: %d, %s' % (result.status, result.text))
               log(logging.ERROR, self.logger_name, 'Failed to retrive differences between versions "%d" and "%d".  The configuration will not take affect' % (running_version, version))
            return(False)
         else:
            restart_list = result.outArgs['restart']
            reconfig_list = result.outArgs['affected']

         log(logging.DEBUG, self.logger_name, 'Daemons to restart: %s' % restart_list)
         log(logging.DEBUG, self.logger_name, 'Daemons to reconfig: %s' % reconfig_list)

         # Determine the list of daemons the master is running, and only send
         # events to these daemons.  Any new daemons that should be running (or
         # any daemons that should be stopped) will be handled by commands sent
         # to the master
         (retval, daemons, err) = run_cmd('condor_config_val -master DAEMON_LIST')
         daemon_list = []
         if daemons != None:
            for daemon in daemons.split(','):
               daemon = daemon.strip()
               if daemon != '':
                  daemon_list += [daemon.lower()]
   
         # Process the subsystem lists and act upon them.  Start by processing
         # the daemons to restart, and if master is listed then don't process
         # anything else because restarting the master will take care of
         # everything
         cmd = 'condor_restart'
         if 'master' in restart_list:
            return(self.send_condor_command(cmd, 'master'))
         else:
            success = self.act_upon_subsys_list(cmd, restart_list, daemon_list)
   
            # Send a reconfig to the master daemon so all subsystems are
            # running with the current config values
            return(self.send_condor_command('condor_reconfig', 'master') and success)
   
   
   def act_upon_subsys_list(self, command, list, running_daemons):
      nondc_daemons = ['ll_daemon', 'configd']
      all_success = True

      for subsys in list:
         subsys = str(subsys.strip())
         if subsys.lower() not in running_daemons:
            log(logging.DEBUG, self.logger_name, 'Not sending "%s" to subsystem "%s" since it is not currently running' % (command, subsys))
         else:
            if subsys.lower() in nondc_daemons:
               # Non-daemoncore daemon, so send an off first then an on
               # command.  It's possible the daemon isn't running for some
               # reason, and that is ok.  The important thing is to ensure
               # it is started
               self.send_condor_command('condor_off', subsys)
               if self.send_condor_command('condor_on', subsys) == False:
                  all_success = False
            else:
               if self.send_condor_command(command, subsys) == False:
                  all_success = False
      return(all_success)


   def send_condor_command(self, command, subsystem):
      log(logging.DEBUG, self.logger_name, 'Sending command "%s" to subsystem "%s"' % (command, subsystem))
      (retval, out, err) = run_cmd('%s -subsystem %s' % (command, subsystem), environ={'CONDOR_CONFIG':'%s' % self.old_config})
      if retval != 0:
         log(logging.ERROR, self.logger_name, 'Failed to send command "%s" to subsystem "%s" (retval: %d, stdout: "%s", stderr: "%s")' % (command, subsystem, retval, out, err))
         return(False)
      else:
         log(logging.DEBUG, self.logger_name, 'Sent command "%s" to subsystem "%s"' % (command, subsystem))
         return(True)


def exit_signal_handler(signum, frame):
   global service, stop_running, logger_name

   if service != None:
      log(logging.DEBUG, logger_name, 'Shutting down')
      log(logging.DEBUG, logger_name, 'Closing QMF connections')
      service.shutdown()
      del service
      service = None
      log(logging.DEBUG, logger_name, 'Closed QMF connections')

   if stop_running != True:
      log(logging.DEBUG, logger_name, 'Setting stop flag')
      stop_running = True


def test_for_shutdown():
   global pidfile, logger_name

   log(logging.DEBUG, logger_name, 'Testing for shutdown file')
   if os.path.isfile(pidfile):
      log(logging.DEBUG, logger_name, 'Found shutdown file')
      os.remove(pidfile)
      exit_signal_handler(signal.SIGTERM, None)


def stop(code):
   global logger_name

   exit_signal_handler(signal.SIGTERM, None)
   log(logging.INFO, logger_name, 'Exiting')
   logging.shutdown()
   return(code)


def monitor_timers():
   global interval_thread_name, service, logger_name
   interval_found = False

   if os.name == 'nt' or os.name == 'ce':
      global windows_shutdown_timer_name
      win_shutdown_found = False

   for thread in threading.enumerate():
      if thread.name == interval_thread_name:
         interval_found = True

      if (os.name == 'nt' or os.name == 'ce') and thread.name == windows_shutdown_timer_name:
         win_shutdown_found = True

   # Start the timers that are expected to be running but aren't
   if interval_found != True and service.interval > 0:
      log(logging.DEBUG, logger_name, 'Restarting interval timer')
      service.clear_timers()
      service.setup_timers()

   if (os.name == 'nt' or os.name == 'ce') and win_shutdown_found != True:
      log(logging.DEBUG, logger_name, 'Restarting windows shutdown timer')
      clear_shutdown_timer()
      start_shutdown_timer()


def clear_shutdown_timer():
   global shutdown_timer

   if shutdown_timer != None:
      del shutdown_timer
   shutdown_timer = None


def start_shutdown_timer():
   global windows_shutdown_timer_name, shutdown_timer, shutdown_interval

   if shutdown_timer == None:
      shutdown_timer = Timer(shutdown_interval, test_for_shutdown, windows_shutdown_timer_name)
      shutdown_timer.start()


version_queue = Queue.Queue()
service = Service()
stop_running = False
replacing_file = False
pidfile = os.path.normpath(os.getcwd() + '/.pid' + str(os.getpid()))
logger_name = os.path.basename(sys.argv[0])
old_config_filename = 'condor.old_config'
interval_thread_name = 'Thread for Interval Timer'
if os.name == 'nt' or os.name == 'ce':
   windows_shutdown_timer_name = 'Thread for Windows Shutdown Timer'
   shutdown_timer = None
   shutdown_interval = 0


def main(argv=None):
   global service, stop_running, replacing_file, version_queue, logger_name, old_config_filename, shutdown_interval

   if argv is None:
      argv = sys.argv

   try:
      file = {}
      retrieve = False
      node_name = socket.gethostname()
      config_file = ''
      username = ''
      passwd = ''
      log_level = logging.INFO
      num_attempts = 0
      last_attempt = 0

      long_opts = ['debug', 'hostname=', 'logfile=', 'managedfile=', 
                   'password=', 'retrieve', 'user=']

      # Set signal handlers
      signal.signal(signal.SIGINT, exit_signal_handler)
      signal.signal(signal.SIGTERM, exit_signal_handler)
      signal.signal(signal.SIGABRT, exit_signal_handler)
      signal.signal(signal.SIGILL, exit_signal_handler)
      signal.signal(signal.SIGFPE, exit_signal_handler)
      signal.signal(signal.SIGSEGV, exit_signal_handler)
      if os.name != 'nt' and os.name != 'ce':
         # These aren't available on windows
         signal.signal(signal.SIGQUIT, exit_signal_handler)
         signal.signal(signal.SIGHUP, exit_signal_handler)

         # Run as the user condor if running as root
         if os.geteuid() == 0:
            # Get the uid and gid for the condor user
            pw_data = pwd.getpwnam('condor')
            os.setregid(pw_data[3], pw_data[3])
            os.setreuid(pw_data[2], pw_data[2])

      try:
         opts, args = getopt.gnu_getopt(argv[1:], 'dh:l:m:rP:U:', long_opts)
      except getopt.GetoptError, error:
         print str(error)
         return(1)

      for option, arg in opts:
         if option in ('-d', '--debug'):
            log_level = logging.DEBUG
         if option in ('-h', '--hostname'):
            node_name = arg
         if option in ('-l', '--logfile'):
            file['log'] = arg
         if option in ('-m', '--managedfile'):
            config_file = arg
         if option in ('-P', '--password'):
            passwd = arg
         if option in ('-r', '--retrieve'):
            retrieve = True
         if option in ('-U', '--user'):
            username = arg


      # Configure the logging system
      if 'log' not in file.keys():
         try:
            file = read_condor_config('CONFIGD', ['LOG'], permit_param_only = False)
         except ConfigError, error:
            print 'Error: %s.  Exiting' % error.msg
            return(1)

      try:
         size = int(read_condor_config('', ['MAX_CONFIGD_LOG'])['max_configd_log'])
      except:
         size = 1000000

      try:
         backoff_factor = int(read_condor_config('CONFIGD', ['BACKOFF_FACTOR'])['backoff_factor'])
         if backoff_factor < 0:
            backoff_factor = 2
      except:
         backoff_factor = 2

      try:
         backoff_const = int(read_condor_config('CONFIGD', ['BACKOFF_CONSTANT'])['backoff_constant'])
         if backoff_const < 0:
            backoff_const = 9
      except:
         backoff_const = 9

      try:
         connect_timeout = int(read_condor_config('CONFIGD', ['WALLABY_CONNECT_TIMEOUT'])['wallaby_connect_timeout'])
      except:
         connect_timeout = 60

      # Create the log file
      try:
         base_logger = create_file_logger(logger_name, file['log'], log_level, size=size)
      except Exception, e:
         print e
         print 'Failed to open log file.  Exiting'
         return(1)

      log(logging.INFO, logger_name, 'Starting Up')
      log(logging.INFO, logger_name, 'Hostname is "%s"' % node_name)

      if os.name == 'nt' or os.name == 'ce':
         # Need to set a Timer for shutdown on windows
         log(logging.INFO, logger_name, 'Setting windows shutdown Timer')
         try:
            shutdown_interval = int(read_condor_config('CONFIGD', ['WIN_INTERVAL'], permit_param_only = False)['win_interval'])
         except ConfigError, error:
            log(logging.WARNING, logger_name, '%s' % error.msg)
            log(logging.WARNING, logger_name, 'Using default (5)')
            shutdown_interval = 5
         except:
            log(logging.WARNING, logger_name, 'WIN_INTERVAL must have a valid value')
            log(logging.WARNING, logger_name, 'Using default (5)')
            shutdown_interval = 5

         # Ensure there is a valid interval above 0 as this timer can not be
         # disabled
         if shutdown_interval <= 0:
            log(logging.WARNING, logger_name, 'Invalid WIN_INTERVAL (%d).  Using default (5)' % shutdown_interval)
            shutdown_interval = 5
         start_shutdown_timer()

      # Remove all temp configuration files
      log(logging.INFO, logger_name, 'Cleaning up temporary configuration files')
      temp_dir = tempfile.gettempdir()
      for file in os.listdir(temp_dir):
         full_path = os.path.normpath('%s/%s' % (temp_dir, file))
         if old_config_filename in file:
            log(logging.DEBUG, logger_name, 'Deleting temporary configuration file %s' % full_path)
            try:
               os.remove(full_path)
            except Exception, e:
               log(logging.WARNING, logger_name, e)

      # Retrieve the broker information from condor's configuration file
      if service != None:
         service.init(node_name, logger_name)
      if service != None and service.config(config_file, username, passwd) != True:
         if retrieve == True:
            print 'Error: Unable to retrieve configuration'
            log(logging.CRITICAL, logger_name, 'Unable to retrieve configuration')
         return(stop(1))

      # Wait for connection to the store
      log(logging.DEBUG, logger_name, 'Looking for the store agent')
      start = time.time()
      while service.console.get_store_agent() == None and stop_running == False:
         if time.time() - start >= connect_timeout:
            log(logging.INFO, logger_name, 'Timed out looking for store agent')
            break
         time.sleep(1)

      if service.console.get_store_agent() != None:
         log(logging.DEBUG, logger_name, 'Found the store agent')

      if service != None:
         if retrieve == True:
            if service.check_config_ver(force=True) == False:
               print 'Error: Unable to retrieve configuration'
               log(logging.CRITICAL, logger_name, 'Unable to retrieve configuration')
            return(stop(0))

         # Delay initial checkin after startup 
         random.seed()
         time.sleep(random.randint(0, 10))
         service.check_config_ver()
         service.setup_timers()

      # Loop forever until told to shutdown
      while stop_running == False or replacing_file == True:
         try:
            time.sleep(1)

            # Check to see if there is a new configuration version to install
            if version_queue.qsize() > 0:
               # There is a new configuration version, so trying to install it.
               # Record the time the last installation attempt was tried for
               # exponential backoff.
               if last_attempt == 0:
                  last_attempt = time.time()
               check_interval = service.get_interval()
               if num_attempts == 0 or \
                  (time.time() - last_attempt) >= next_attempt_interval:

                  if service.process_version_q() > 0:
                     # There's still a version in the queue, so there was a
                     # problem installing the configuruation.  This could be
                     # an OS hiccup, a store communcation issue, or that the
                     # configuration is just not valid.

                     # Subract a second to account to the sleep in the
                     # main loop
                     next_attempt_interval = (backoff_const + pow(backoff_factor, num_attempts))-1

                     # Maximum backoff time is the check_interval if defined
                     if next_attempt_interval > check_interval and \
                        check_interval > 0:
                        next_attempt_interval = check_interval
                     num_attempts += 1
                  else:
                     # Installation was successful
                     num_attempts = 0
                  last_attempt = 0

            # Verify timers are still running
            monitor_timers()
         except:
            pass

      return(stop(0))
   except Exception, e:
      if stop_running == False:
         log(logging.INFO, logger_name, e)
      if stop_running == True:
         return(stop(0))
      else:
         return(stop(1))


if __name__ == '__main__':
    sys.exit(main())
