#!/usr/bin/python3.3

# Copyright (C) 2010,2011  Internet Systems Consortium.
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SYSTEMS CONSORTIUM
# DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
# INTERNET SYSTEMS CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT,
# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
# FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
# WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

"""
This file implements the b10-init program.

Its purpose is to start up the BIND 10 system, and then manage the
processes, by starting and stopping processes, plus restarting
processes that exit.

To start the system, it first runs the c-channel program (msgq), then
connects to that. It then runs the configuration manager, and reads
its own configuration. Then it proceeds to starting other modules.

The Python subprocess module is used for starting processes, but
because this is not efficient for managing groups of processes,
SIGCHLD signals are caught and processed using the signal module.

Most of the logic is contained in the Init class. However, since Python
requires that signal processing happen in the main thread, we do
signal handling outside of that class, in the code running for
__main__.
"""

import sys; sys.path.append ('/usr/lib/python3.3/site-packages')
import os

# If B10_FROM_SOURCE is set in the environment, we use data files
# from a directory relative to that, otherwise we use the ones
# installed on the system
if "B10_FROM_SOURCE" in os.environ:
    SPECFILE_LOCATION = os.environ["B10_FROM_SOURCE"] +\
                        "/src/bin/bind10/init.spec"
else:
    PREFIX = "/usr"
    DATAROOTDIR = "${prefix}/share"
    SPECFILE_LOCATION = "/usr/share/bind10/init.spec"\
                         .replace("${datarootdir}", DATAROOTDIR)\
                         .replace("${prefix}", PREFIX)

import subprocess
import signal
import re
import errno
import time
import select
import random
import socket
from optparse import OptionParser, OptionValueError
import io
import pwd
import posix
import copy

from bind10_config import LIBEXECPATH
import bind10_config
import isc.cc
import isc.util.process
import isc.net.parse
import isc.log
from isc.log_messages.init_messages import *
import isc.bind10.component
import isc.bind10.special_component
import isc.bind10.socket_cache
import libutil_io_python
import tempfile

isc.log.init("b10-init", buffer=True)
logger = isc.log.Logger("init")

# Pending system-wide debug level definitions, the ones we
# use here are hardcoded for now
DBG_PROCESS = logger.DBGLVL_TRACE_BASIC
DBG_COMMANDS = logger.DBGLVL_TRACE_DETAIL

# Messages sent over the unix domain socket to indicate if it is followed by a real socket
CREATOR_SOCKET_OK = b"1\n"
CREATOR_SOCKET_UNAVAILABLE = b"0\n"

# RCodes of known exceptions for the get_token command
CREATOR_SOCKET_ERROR = 2
CREATOR_SHARE_ERROR = 3

# Assign this process some longer name
isc.util.process.rename()

# This is the version that gets displayed to the user.
# The VERSION string consists of the module name, the module version
# number, and the overall BIND 10 version number (set in configure.ac).
VERSION = "bind10 20110223 (BIND 10 1.0.0)"

# This is for boot_time of Init
_BASETIME = time.gmtime()

# Detailed error message commonly used on startup failure, possibly due to
# permission issue regarding log lock file.  We dump verbose message because
# it may not be clear exactly what to do if it simply says
# "failed to open <filename>: permission denied"
NOTE_ON_LOCK_FILE = """\
TIP: if this is about permission error for a lock file, check if the directory
of the file is writable for the user of the bind10 process; often you need
to start bind10 as a super user.  Also, if you specify the -u option to
change the user and group, the directory must be writable for the group,
and the created lock file must be writable for that user. Finally, make sure
the lock file is not left in the directly before restarting.
"""

class ProcessInfoError(Exception): pass

class ChangeUserError(Exception):
    '''Exception raised when setuid/setgid fails.

    When raised, it's expected to be propagated via underlying component
    management modules to the top level so that it will help provide useful
    fatal error message.

    '''
    pass

class ProcessInfo:
    """Information about a process"""

    dev_null = open(os.devnull, "w")

    def __init__(self, name, args, env={}, dev_null_stdout=False,
                 dev_null_stderr=False):
        self.name = name
        self.args = args
        self.env = env
        self.dev_null_stdout = dev_null_stdout
        self.dev_null_stderr = dev_null_stderr
        self.process = None
        self.pid = None

    def _preexec_work(self):
        """Function used before running a program that needs to run as a
        different user."""
        # First, put us into a separate process group so we don't get
        # SIGINT signals on Ctrl-C (b10-init will shut everthing down by
        # other means).
        os.setpgrp()

    def _spawn(self):
        if self.dev_null_stdout:
            spawn_stdout = self.dev_null
        else:
            spawn_stdout = None
        if self.dev_null_stderr:
            spawn_stderr = self.dev_null
        else:
            spawn_stderr = None
        # Environment variables for the child process will be a copy of those
        # of the b10-init process with any additional specific variables given
        # on construction (self.env).
        spawn_env = copy.deepcopy(os.environ)
        spawn_env.update(self.env)
        spawn_env['PATH'] = LIBEXECPATH + ':' + spawn_env['PATH']
        self.process = subprocess.Popen(self.args,
                                        stdin=subprocess.PIPE,
                                        stdout=spawn_stdout,
                                        stderr=spawn_stderr,
                                        close_fds=True,
                                        env=spawn_env,
                                        preexec_fn=self._preexec_work)
        self.pid = self.process.pid

    # spawn() and respawn() are the same for now, but in the future they
    # may have different functionality
    def spawn(self):
        self._spawn()

    def respawn(self):
        self._spawn()

class CChannelConnectError(Exception): pass

class ProcessStartError(Exception): pass

class Init:
    """Init of BIND class."""

    def __init__(self, msgq_socket_file=None, data_path=None,
                 config_filename=None, clear_config=False,
                 verbose=False, nokill=False, setuid=None, setgid=None,
                 username=None, cmdctl_port=None, wait_time=10):
        """
            Initialize the Init of BIND. This is a singleton (only one can run).

            The msgq_socket_file specifies the UNIX domain socket file that the
            msgq process listens on.  If verbose is True, then b10-init reports
            what it is doing.

            Data path and config filename are passed through to config manager
            (if provided) and specify the config file to be used.

            The cmdctl_port is passed to cmdctl and specify on which port it
            should listen.

            wait_time controls the amount of time (in seconds) that Init waits
            for selected processes to initialize before continuing with the
            initialization.  Currently this is only the configuration manager.
        """
        self.cc_session = None
        self.ccs = None
        self.curproc = None
        self.msgq_socket_file = msgq_socket_file
        self.component_config = {}
        # Some time in future, it may happen that a single component has
        # multple processes (like a pipeline-like component). If so happens,
        # name "components" may be inapropriate. But as the code isn't probably
        # completely ready for it, we leave it at components for now. We also
        # want to support multiple instances of a single component. If it turns
        # out that we'll have a single component with multiple same processes
        # or if we start multiple components with the same configuration (we do
        # this now, but it might change) is an open question.
        self.components = {}
        # Simply list of components that died and need to wait for a
        # restart. Components manage their own restart schedule now
        self.components_to_restart = []
        self.runnable = False
        self.__uid = setuid
        self.__gid = setgid
        self.username = username
        self.verbose = verbose
        self.nokill = nokill
        self.data_path = data_path
        self.config_filename = config_filename
        self.clear_config = clear_config
        self.cmdctl_port = cmdctl_port
        self.wait_time = wait_time
        self.msgq_timeout = 5

        # _run_under_unittests is only meant to be used when testing. It
        # bypasses execution of some code to help with testing.
        self._run_under_unittests = False

        self._component_configurator = isc.bind10.component.Configurator(self,
            isc.bind10.special_component.get_specials())
        # The priorities here make them start in the correct order. First
        # the socket creator (which would drop root privileges by then),
        # then message queue and after that the config manager (which uses
        # the config manager)
        self.__core_components = {
            'sockcreator': {
                'kind': 'core',
                'special': 'sockcreator',
                'priority': 200
            },
            'msgq': {
                'kind': 'core',
                'special': 'msgq',
                'priority': 199
            },
            'cfgmgr': {
                'kind': 'core',
                'special': 'cfgmgr',
                'priority': 198
            }
        }
        self.__started = False
        self.exitcode = 0

        # If -v was set, enable full debug logging.
        if self.verbose:
            logger.set_severity("DEBUG", 99)
        # This is set in init_socket_srv
        self._socket_path = None
        self._socket_cache = None
        self._tmpdir = None
        self._srv_socket = None
        self._unix_sockets = {}

    def __propagate_component_config(self, config):
        comps = dict(config)
        # Fill in the core components, so they stay alive
        for comp in self.__core_components:
            if comp in comps:
                raise Exception(comp + " is core component managed by " +
                                "b10-init, do not set it")
            comps[comp] = self.__core_components[comp]
        # Update the configuration
        self._component_configurator.reconfigure(comps)

    def change_user(self):
        '''Change the user and group to those specified on construction.

        This method is expected to be called by a component on initial
        startup when the system is ready to switch the user and group
        (i.e., once all components that need the privilege of the original
        user have started).
        '''
        try:
            if self.__gid is not None:
                logger.info(BIND10_SETGID, self.__gid)
                posix.setgid(self.__gid)
        except Exception as ex:
            raise ChangeUserError('failed to change group: ' + str(ex))

        try:
            if self.__uid is not None:
                posix.setuid(self.__uid)
                # We use one-shot logger after setuid here.  This will
                # detect any permission issue regarding logging due to the
                # result of setuid at the earliest opportunity.
                isc.log.Logger("b10-init").info(BIND10_SETUID, self.__uid)
        except Exception as ex:
            raise ChangeUserError('failed to change user: ' + str(ex))

    def config_handler(self, new_config):
        # If this is initial update, don't do anything now, leave it to startup
        if not self.runnable:
            return
        logger.debug(DBG_COMMANDS, BIND10_RECEIVED_NEW_CONFIGURATION,
                     new_config)
        try:
            if 'components' in new_config:
                self.__propagate_component_config(new_config['components'])
            return isc.config.ccsession.create_answer(0)
        except Exception as e:
            return isc.config.ccsession.create_answer(1, str(e))

    def get_processes(self):
        pids = list(self.components.keys())
        pids.sort()
        process_list = [ ]
        for pid in pids:
            process_list.append([pid, self.components[pid].name(),
                                 self.components[pid].address()])
        return process_list

    def _get_stats_data(self):
        return { 'boot_time':
                     time.strftime('%Y-%m-%dT%H:%M:%SZ', _BASETIME)
                 }

    def command_handler(self, command, args):
        logger.debug(DBG_COMMANDS, BIND10_RECEIVED_COMMAND, command)
        answer = isc.config.ccsession.create_answer(1, "command not implemented")
        if type(command) != str:
            answer = isc.config.ccsession.create_answer(1, "bad command")
        else:
            if command == "shutdown":
                self.runnable = False
                answer = isc.config.ccsession.create_answer(0)
            elif command == "getstats":
                answer = isc.config.ccsession.create_answer(
                    0, self._get_stats_data())
            elif command == "ping":
                answer = isc.config.ccsession.create_answer(0, "pong")
            elif command == "show_processes":
                answer = isc.config.ccsession. \
                    create_answer(0, self.get_processes())
            elif command == "get_socket":
                answer = self._get_socket(args)
            elif command == "drop_socket":
                if "token" not in args:
                    answer = isc.config.ccsession. \
                        create_answer(1, "Missing token parameter")
                else:
                    try:
                        self._socket_cache.drop_socket(args["token"])
                        answer = isc.config.ccsession.create_answer(0)
                    except Exception as e:
                        answer = isc.config.ccsession.create_answer(1, str(e))
            else:
                answer = isc.config.ccsession.create_answer(1,
                                                            "Unknown command")
        return answer

    def kill_started_components(self):
        """
            Called as part of the exception handling when a process fails to
            start, this runs through the list of started processes, killing
            each one.  It then clears that list.
        """
        logger.info(BIND10_KILLING_ALL_PROCESSES)
        self.__kill_children(True)
        self.components = {}

    def _read_bind10_config(self):
        """
            Reads the parameters associated with the Init module itself.

            This means the list of components we should start now.

            This could easily be combined into start_all_processes, but
            it stays because of historical reasons and because the tests
            replace the method sometimes.
        """
        logger.info(BIND10_READING_INIT_CONFIGURATION)

        config_data = self.ccs.get_full_config()
        self.__propagate_component_config(config_data['components'])

    def log_starting(self, process, port = None, address = None):
        """
            A convenience function to output a "Starting xxx" message if the
            logging is set to DEBUG with debuglevel DBG_PROCESS or higher.
            Putting this into a separate method ensures
            that the output form is consistent across all processes.

            The process name (passed as the first argument) is put into
            self.curproc, and is used to indicate which process failed to
            start if there is an error (and is used in the "Started" message
            on success).  The optional port and address information are
            appended to the message (if present).
        """
        self.curproc = process
        if port is None and address is None:
            logger.info(BIND10_STARTING_PROCESS, self.curproc)
        elif address is None:
            logger.info(BIND10_STARTING_PROCESS_PORT, self.curproc,
                        port)
        else:
            logger.info(BIND10_STARTING_PROCESS_PORT_ADDRESS,
                        self.curproc, address, port)

    def log_started(self, pid = None):
        """
            A convenience function to output a 'Started xxxx (PID yyyy)'
            message.  As with starting_message(), this ensures a consistent
            format.
        """
        if pid is None:
            logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS, self.curproc)
        else:
            logger.debug(DBG_PROCESS, BIND10_STARTED_PROCESS_PID, self.curproc, pid)

    def process_running(self, msg, who):
        """
            Some processes return a message to the Init after they have
            started to indicate that they are running.  The form of the
            message is a dictionary with contents {"running:", "<process>"}.
            This method checks the passed message and returns True if the
            "who" process is contained in the message (so is presumably
            running).  It returns False for all other conditions and will
            log an error if appropriate.
        """
        if msg is not None:
            try:
                if msg["running"] == who:
                    return True
                else:
                    logger.error(BIND10_STARTUP_UNEXPECTED_MESSAGE, msg)
            except:
                logger.error(BIND10_STARTUP_UNRECOGNISED_MESSAGE, msg)

        return False

    # The next few methods start the individual processes of BIND-10.  They
    # are called via start_all_processes().  If any fail, an exception is
    # raised which is caught by the caller of start_all_processes(); this kills
    # processes started up to that point before terminating the program.

    def _make_process_info(self, name, args, env,
                           dev_null_stdout=False, dev_null_stderr=False):
        """
            Wrapper around ProcessInfo(), useful to override
            ProcessInfo() creation during testing.
        """
        return ProcessInfo(name, args, env, dev_null_stdout, dev_null_stderr)

    def start_msgq(self):
        """
            Start the message queue and connect to the command channel.
        """
        self.log_starting("b10-msgq")
        msgq_proc = self._make_process_info("b10-msgq", ["b10-msgq"],
                                            self.c_channel_env,
                                            True, not self.verbose)
        msgq_proc.spawn()
        self.log_started(msgq_proc.pid)

        # Now connect to the c-channel
        cc_connect_start = time.time()
        while self.cc_session is None:
            # if we are run under unittests, break
            if self._run_under_unittests:
                break

            # if we have been trying for "a while" give up
            if (time.time() - cc_connect_start) > self.msgq_timeout:
                if msgq_proc.process:
                    msgq_proc.process.kill()
                logger.error(BIND10_CONNECTING_TO_CC_FAIL)
                raise CChannelConnectError("Unable to connect to c-channel after 5 seconds")

            # try to connect, and if we can't wait a short while
            try:
                self.cc_session = isc.cc.Session(self.msgq_socket_file)
            except isc.cc.session.SessionError:
                time.sleep(0.1)

        # Subscribe to the message queue.  The only messages we expect to receive
        # on this channel are once relating to process startup.
        if self.cc_session is not None:
            self.cc_session.group_subscribe("Init")

        return msgq_proc

    def start_cfgmgr(self):
        """
            Starts the configuration manager process
        """
        self.log_starting("b10-cfgmgr")
        args = ["b10-cfgmgr"]
        if self.data_path is not None:
            args.append("--data-path=" + self.data_path)
        if self.config_filename is not None:
            args.append("--config-filename=" + self.config_filename)
        if self.clear_config:
            args.append("--clear-config")
        bind_cfgd = self._make_process_info("b10-cfgmgr", args,
                                            self.c_channel_env)
        bind_cfgd.spawn()
        self.log_started(bind_cfgd.pid)

        # Wait for the configuration manager to start up as
        # subsequent initialization cannot proceed without it.  The
        # time to wait can be set on the command line.
        time_remaining = self.wait_time
        msg, env = self.cc_session.group_recvmsg()
        while time_remaining > 0 and not self.process_running(msg, "ConfigManager"):
            logger.debug(DBG_PROCESS, BIND10_WAIT_CFGMGR)
            time.sleep(1)
            time_remaining = time_remaining - 1
            msg, env = self.cc_session.group_recvmsg()

        if not self.process_running(msg, "ConfigManager"):
            raise ProcessStartError("Configuration manager process has not started")

        return bind_cfgd

    def start_ccsession(self, c_channel_env):
        """
            Start the CC Session

            The argument c_channel_env is unused but is supplied to keep the
            argument list the same for all start_xxx methods.

            With regards to logging, note that as the CC session is not a
            process, the log_starting/log_started methods are not used.
        """
        logger.info(BIND10_STARTING_CC)
        self.ccs = isc.config.ModuleCCSession(SPECFILE_LOCATION,
                                      self.config_handler,
                                      self.command_handler,
                                      socket_file = self.msgq_socket_file)
        self.ccs.start()
        logger.debug(DBG_PROCESS, BIND10_STARTED_CC)

    # A couple of utility methods for starting processes...

    def start_process(self, name, args, c_channel_env, port=None, address=None):
        """
            Given a set of command arguments, start the process and output
            appropriate log messages.  If the start is successful, the process
            is added to the list of started processes.

            The port and address arguments are for log messages only.
        """
        self.log_starting(name, port, address)
        newproc = self._make_process_info(name, args, c_channel_env)
        newproc.spawn()
        self.log_started(newproc.pid)
        return newproc

    def register_process(self, pid, component):
        """
        Put another process into b10-init to watch over it.  When the process
        dies, the component.failed() is called with the exit code.

        It is expected the info is a isc.bind10.component.BaseComponent
        subclass (or anything having the same interface).
        """
        self.components[pid] = component

    def start_simple(self, name):
        """
            Most of the BIND-10 processes are started with the command:

                <process-name> [-v]

            ... where -v is appended if verbose is enabled.  This method
            generates the arguments from the name and starts the process.

            The port and address arguments are for log messages only.
        """
        # Set up the command arguments.
        args = [name]
        if self.verbose:
            args += ['-v']

        # ... and start the process
        return self.start_process(name, args, self.c_channel_env)

    # The next few methods start up the rest of the BIND-10 processes.
    # Although many of these methods are little more than a call to
    # start_simple, they are retained (a) for testing reasons and (b) as a place
    # where modifications can be made if the process start-up sequence changes
    # for a given process.

    def start_auth(self):
        """
            Start the Authoritative server
        """
        authargs = ['b10-auth']
        if self.verbose:
            authargs += ['-v']

        # ... and start
        return self.start_process("b10-auth", authargs, self.c_channel_env)

    def start_resolver(self):
        """
            Start the Resolver.  At present, all these arguments and switches
            are pure speculation.  As with the auth daemon, they should be
            read from the configuration database.
        """
        self.curproc = "b10-resolver"
        # XXX: this must be read from the configuration manager in the future
        resargs = ['b10-resolver']
        if self.verbose:
            resargs += ['-v']

        # ... and start
        return self.start_process("b10-resolver", resargs, self.c_channel_env)

    def start_cmdctl(self):
        """
            Starts the command control process
        """
        args = ["b10-cmdctl"]
        if self.cmdctl_port is not None:
            args.append("--port=" + str(self.cmdctl_port))
        if self.verbose:
            args.append("-v")
        return self.start_process("b10-cmdctl", args, self.c_channel_env,
                                  self.cmdctl_port)

    def start_all_components(self):
        """
            Starts up all the components.  Any exception generated during the
            starting of the components is handled by the caller.
        """
        # Start the real core (sockcreator, msgq, cfgmgr)
        self._component_configurator.startup(self.__core_components)

        # Connect to the msgq. This is not a process, so it's not handled
        # inside the configurator.
        self.start_ccsession(self.c_channel_env)

        # Extract the parameters associated with Init.  This can only be
        # done after the CC Session is started.  Note that the logging
        # configuration may override the "-v" switch set on the command line.
        self._read_bind10_config()

        # TODO: Return the dropping of privileges

    def startup(self):
        """
            Start the Init instance.

            Returns None if successful, otherwise an string describing the
            problem.
        """
        # Try to connect to the c-channel daemon, to see if it is already
        # running
        c_channel_env = {}
        if self.msgq_socket_file is not None:
             c_channel_env["BIND10_MSGQ_SOCKET_FILE"] = self.msgq_socket_file
        logger.debug(DBG_PROCESS, BIND10_CHECK_MSGQ_ALREADY_RUNNING)
        try:
            self.cc_session = isc.cc.Session(self.msgq_socket_file)
            logger.fatal(BIND10_MSGQ_ALREADY_RUNNING)
            return "b10-msgq already running, or socket file not cleaned , cannot start"
        except isc.cc.session.SessionError:
            # this is the case we want, where the msgq is not running
            pass

        # Start all components.  If any one fails to start, kill all started
        # components and exit with an error indication.
        try:
            self.c_channel_env = c_channel_env
            self.start_all_components()
        except ChangeUserError as e:
            self.kill_started_components()
            return str(e) + '; ' + NOTE_ON_LOCK_FILE.replace('\n', ' ')
        except Exception as e:
            self.kill_started_components()
            return "Unable to start " + self.curproc + ": " + str(e)

        # Started successfully
        self.runnable = True
        self.__started = True
        return None

    def stop_process(self, process, recipient, pid):
        """
        Stop the given process, friendly-like. The process is the name it has
        (in logs, etc), the recipient is the address on msgq. The pid is the
        pid of the process (if we have multiple processes of the same name,
        it might want to choose if it is for this one).
        """
        logger.info(BIND10_STOP_PROCESS, process)
        self.cc_session.group_sendmsg(isc.config.ccsession.
                                      create_command('shutdown', {'pid': pid}),
                                      recipient, recipient)

    def component_shutdown(self, exitcode=0):
        """
        Stop the Init instance from a components' request. The exitcode
        indicates the desired exit code.

        If we did not start yet, it raises an exception, which is meant
        to propagate through the component and configurator to the startup
        routine and abort the startup immediately. If it is started up already,
        we just mark it so we terminate soon.

        It does set the exit code in both cases.
        """
        self.exitcode = exitcode
        if not self.__started:
            raise Exception("Component failed during startup");
        else:
            self.runnable = False

    def shutdown(self):
        """Stop the Init instance."""
        logger.info(BIND10_SHUTDOWN)
        # If ccsession is still there, inform rest of the system this module
        # is stopping. Since everything will be stopped shortly, this is not
        # really necessary, but this is done to reflect that b10-init is also
        # 'just' a module.
        self.ccs.send_stopping()

        # try using the BIND 10 request to stop
        try:
            self._component_configurator.shutdown()
        except:
            pass
        # XXX: some delay probably useful... how much is uncertain
        # I have changed the delay from 0.5 to 1, but sometime it's
        # still not enough.
        time.sleep(1)
        self.reap_children()

        # Send TERM and KILL signals to modules if we're not prevented
        # from doing so
        if not self.nokill:
            # next try sending a SIGTERM
            self.__kill_children(False)
            # finally, send SIGKILL (unmaskable termination) until everybody
            # dies
            while self.components:
                # XXX: some delay probably useful... how much is uncertain
                time.sleep(0.1)
                self.reap_children()
                self.__kill_children(True)
            logger.info(BIND10_SHUTDOWN_COMPLETE)

    def __kill_children(self, forceful):
        '''Terminate remaining subprocesses by sending a signal.

        The forceful paramter will be passed Component.kill().
        This is a dedicated subroutine of shutdown(), just to unify two
        similar cases.

        '''
        logmsg = BIND10_SEND_SIGKILL if forceful else BIND10_SEND_SIGTERM
        # We need to make a copy of values as the components may be modified
        # in the loop.
        for component in list(self.components.values()):
            logger.info(logmsg, component.name(), component.pid())
            try:
                component.kill(forceful)
            except OSError as ex:
                # If kill() failed due to EPERM, it doesn't make sense to
                # keep trying, so we just log the fact and forget that
                # component.  Ignore other OSErrors (usually ESRCH because
                # the child finally exited)
                signame = "SIGKILL" if forceful else "SIGTERM"
                logger.info(BIND10_SEND_SIGNAL_FAIL, signame,
                            component.name(), component.pid(), ex)
                if ex.errno == errno.EPERM:
                    del self.components[component.pid()]

    def _get_process_exit_status(self):
        return os.waitpid(-1, os.WNOHANG)

    def reap_children(self):
        """Check to see if any of our child processes have exited,
        and note this for later handling.
        """
        while True:
            try:
                (pid, exit_status) = self._get_process_exit_status()
            except OSError as o:
                if o.errno == errno.ECHILD:
                    break
                # XXX: should be impossible to get any other error here
                raise
            if pid == 0:
                break
            if pid in self.components:
                # One of the components we know about.  Get information on it.
                component = self.components.pop(pid)
                logger.info(BIND10_PROCESS_ENDED, component.name(), pid,
                            exit_status)
                if component.is_running() and self.runnable:
                    # Tell it it failed. But only if it matters (we are
                    # not shutting down and the component considers itself
                    # to be running.
                    component_restarted = component.failed(exit_status);
                    # if the process wants to be restarted, but not just yet,
                    # it returns False
                    if not component_restarted:
                        self.components_to_restart.append(component)
            else:
                logger.info(BIND10_UNKNOWN_CHILD_PROCESS_ENDED, pid)

    def restart_processes(self):
        """
            Restart any dead processes:

            * Returns the time when the next process is ready to be restarted.
            * If the server is shutting down, returns 0.
            * If there are no processes, returns None.

            The values returned can be safely passed into select() as the
            timeout value.

        """
        if not self.runnable:
            return 0
        still_dead = []
        # keep track of the first time we need to check this queue again,
        # if at all
        next_restart_time = None
        now = time.time()
        for component in self.components_to_restart:
            # If the component was removed from the configurator between since
            # scheduled to restart, just ignore it.  The object will just be
            # dropped here.
            if not self._component_configurator.has_component(component):
                logger.info(BIND10_RESTART_COMPONENT_SKIPPED, component.name())
            elif not component.restart(now):
                still_dead.append(component)
                if next_restart_time is None or\
                   next_restart_time > component.get_restart_time():
                    next_restart_time = component.get_restart_time()
        self.components_to_restart = still_dead

        return next_restart_time

    def _get_socket(self, args):
        """
        Implementation of the get_socket CC command. It asks the cache
        to provide the token and sends the information back.
        """
        try:
            try:
                addr = isc.net.parse.addr_parse(args['address'])
                port = isc.net.parse.port_parse(args['port'])
                protocol = args['protocol']
                if protocol not in ['UDP', 'TCP']:
                    raise ValueError("Protocol must be either UDP or TCP")
                share_mode = args['share_mode']
                if share_mode not in ['ANY', 'SAMEAPP', 'NO']:
                    raise ValueError("Share mode must be one of ANY, SAMEAPP" +
                                     " or NO")
                share_name = args['share_name']
            except KeyError as ke:
                return \
                    isc.config.ccsession.create_answer(1,
                                                       "Missing parameter " +
                                                       str(ke))

            # FIXME: This call contains blocking IPC. It is expected to be
            # short, but if it turns out to be problem, we'll need to do
            # something about it.
            token = self._socket_cache.get_token(protocol, addr, port,
                                                 share_mode, share_name)
            return isc.config.ccsession.create_answer(0, {
                'token': token,
                'path': self._socket_path
            })
        except isc.bind10.socket_cache.SocketError as e:
            return isc.config.ccsession.create_answer(CREATOR_SOCKET_ERROR,
                                                      str(e))
        except isc.bind10.socket_cache.ShareError as e:
            return isc.config.ccsession.create_answer(CREATOR_SHARE_ERROR,
                                                      str(e))
        except Exception as e:
            return isc.config.ccsession.create_answer(1, str(e))

    def socket_request_handler(self, token, unix_socket):
        """
        This function handles a token that comes over a unix_domain socket.
        The function looks into the _socket_cache and sends the socket
        identified by the token back over the unix_socket.
        """
        try:
            token = str(token, 'ASCII') # Convert from bytes to str
            fd = self._socket_cache.get_socket(token, unix_socket.fileno())
            # FIXME: These two calls are blocking in their nature. An OS-level
            # buffer is likely to be large enough to hold all these data, but
            # if it wasn't and the remote application got stuck, we would have
            # a problem. If there appear such problems, we should do something
            # about it.
            unix_socket.sendall(CREATOR_SOCKET_OK)
            libutil_io_python.send_fd(unix_socket.fileno(), fd)
        except Exception as e:
            logger.info(BIND10_NO_SOCKET, token, e)
            unix_socket.sendall(CREATOR_SOCKET_UNAVAILABLE)

    def socket_consumer_dead(self, unix_socket):
        """
        This function handles when a unix_socket closes. This means all
        sockets sent to it are to be considered closed. This function signals
        so to the _socket_cache.
        """
        logger.info(BIND10_LOST_SOCKET_CONSUMER, unix_socket.fileno())
        try:
            self._socket_cache.drop_application(unix_socket.fileno())
        except ValueError:
            # This means the application holds no sockets. It's harmless, as it
            # can happen in real life - for example, it requests a socket, but
            # get_socket doesn't find it, so the application dies. It should be
            # rare, though.
            pass

    def set_creator(self, creator):
        """
        Registeres a socket creator into the b10-init. The socket creator is not
        used directly, but through a cache. The cache is created in this
        method.

        If called more than once, it raises a ValueError.
        """
        if self._socket_cache is not None:
            raise ValueError("A creator was inserted previously")
        self._socket_cache = isc.bind10.socket_cache.Cache(creator)

    def init_socket_srv(self):
        """
        Creates and listens on a unix-domain socket to be able to send out
        the sockets.

        This method should be called after switching user, or the switched
        applications won't be able to access the socket.
        """
        self._srv_socket = socket.socket(socket.AF_UNIX)
        # We create a temporary directory somewhere safe and unique, to avoid
        # the need to find the place ourself or bother users. Also, this
        # secures the socket on some platforms, as it creates a private
        # directory.
        self._tmpdir = tempfile.mkdtemp(prefix='sockcreator-')
        # Get the name
        self._socket_path = os.path.join(self._tmpdir, "sockcreator")
        # And bind the socket to the name
        self._srv_socket.bind(self._socket_path)
        self._srv_socket.listen(5)

    def remove_socket_srv(self):
        """
        Closes and removes the listening socket and the directory where it
        lives, as we created both.

        It does nothing if the _srv_socket is not set (eg. it was not yet
        initialized).
        """
        if self._srv_socket is not None:
            self._srv_socket.close()
            if os.path.exists(self._socket_path):
                os.remove(self._socket_path)
            if os.path.isdir(self._tmpdir):
                os.rmdir(self._tmpdir)

    def _srv_accept(self):
        """
        Accept a socket from the unix domain socket server and put it to the
        others we care about.
        """
        (socket, conn) = self._srv_socket.accept()
        self._unix_sockets[socket.fileno()] = (socket, b'')

    def _socket_data(self, socket_fileno):
        """
        This is called when a socket identified by the socket_fileno needs
        attention. We try to read data from there. If it is closed, we remove
        it.
        """
        (sock, previous) = self._unix_sockets[socket_fileno]
        while True:
            try:
                data = sock.recv(1, socket.MSG_DONTWAIT)
            except socket.error as se:
                # These two might be different on some systems
                if se.errno == errno.EAGAIN or se.errno == errno.EWOULDBLOCK:
                    # No more data now. Oh, well, just store what we have.
                    self._unix_sockets[socket_fileno] = (sock, previous)
                    return
                else:
                    data = b'' # Pretend it got closed
            if len(data) == 0: # The socket got to it's end
                del self._unix_sockets[socket_fileno]
                self.socket_consumer_dead(sock)
                sock.close()
                return
            else:
                if data == b"\n":
                    # Handle this token and clear it
                    self.socket_request_handler(previous, sock)
                    previous = b''
                else:
                    previous += data

    def run(self, wakeup_fd):
        """
        The main loop, waiting for sockets, commands and dead processes.
        Runs as long as the runnable is true.

        The wakeup_fd descriptor is the read end of pipe where CHLD signal
        handler writes.
        """
        ccs_fd = self.ccs.get_socket().fileno()
        while self.runnable:
            # clean up any processes that exited
            self.reap_children()
            next_restart = self.restart_processes()
            if next_restart is None:
                wait_time = None
            else:
                wait_time = max(next_restart - time.time(), 0)

            # select() can raise EINTR when a signal arrives,
            # even if they are resumable, so we have to catch
            # the exception
            try:
                (rlist, wlist, xlist) = \
                    select.select([wakeup_fd, ccs_fd,
                                   self._srv_socket.fileno()] +
                                   list(self._unix_sockets.keys()), [], [],
                                  wait_time)
            except select.error as err:
                if err.args[0] == errno.EINTR:
                    (rlist, wlist, xlist) = ([], [], [])
                else:
                    logger.fatal(BIND10_SELECT_ERROR, err)
                    break

            for fd in rlist + xlist:
                if fd == ccs_fd:
                    try:
                        self.ccs.check_command()
                    except isc.cc.session.ProtocolError:
                        logger.fatal(BIND10_MSGQ_DISAPPEARED)
                        self.runnable = False
                        break
                elif fd == wakeup_fd:
                    os.read(wakeup_fd, 32)
                elif fd == self._srv_socket.fileno():
                    self._srv_accept()
                elif fd in self._unix_sockets:
                    self._socket_data(fd)

# global variables, needed for signal handlers
options = None
b10_init = None

def reaper(signal_number, stack_frame):
    """A child process has died (SIGCHLD received)."""
    # don't do anything...
    # the Python signal handler has been set up to write
    # down a pipe, waking up our select() bit
    pass

def get_signame(signal_number):
    """Return the symbolic name for a signal."""
    for sig in dir(signal):
        if sig.startswith("SIG") and sig[3].isalnum():
            if getattr(signal, sig) == signal_number:
                return sig
    return "Unknown signal %d" % signal_number

# XXX: perhaps register atexit() function and invoke that instead
def fatal_signal(signal_number, stack_frame):
    """We need to exit (SIGINT or SIGTERM received)."""
    global options
    global b10_init
    logger.info(BIND10_RECEIVED_SIGNAL, get_signame(signal_number))
    signal.signal(signal.SIGCHLD, signal.SIG_DFL)
    b10_init.runnable = False

def process_rename(option, opt_str, value, parser):
    """Function that renames the process if it is requested by a option."""
    isc.util.process.rename(value)

def parse_args(args=sys.argv[1:], Parser=OptionParser):
    """
    Function for parsing command line arguments. Returns the
    options object from OptionParser.
    """
    parser = Parser(version=VERSION)
    parser.add_option("-m", "--msgq-socket-file", dest="msgq_socket_file",
                      type="string", default=None,
                      help="UNIX domain socket file the b10-msgq daemon will use")
    parser.add_option("-i", "--no-kill", action="store_true", dest="nokill",
                      default=False, help="do not send SIGTERM and SIGKILL signals to modules during shutdown")
    parser.add_option("-u", "--user", dest="user", type="string", default=None,
                      help="Change user after startup (must run as root)")
    parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
                      help="display more about what is going on")
    parser.add_option("--pretty-name", type="string", action="callback",
                      callback=process_rename,
                      help="Set the process name (displayed in ps, top, ...)")
    parser.add_option("-c", "--config-file", action="store",
                      dest="config_file", default=None,
                      help="Configuration database filename")
    parser.add_option("--clear-config", action="store_true",
                      dest="clear_config", default=False,
                      help="Create backup of the configuration file and " +
                           "start with a clean configuration")
    parser.add_option("-p", "--data-path", dest="data_path",
                      help="Directory to search for configuration files",
                      default=None)
    parser.add_option("--cmdctl-port", dest="cmdctl_port", type="int",
                      default=None, help="Port of command control")
    parser.add_option("--pid-file", dest="pid_file", type="string",
                      default=None,
                      help="file to dump the PID of the BIND 10 process")
    parser.add_option("-w", "--wait", dest="wait_time", type="int",
                      default=10, help="Time (in seconds) to wait for config manager to start up")

    (options, args) = parser.parse_args(args)

    if options.cmdctl_port is not None:
        try:
            isc.net.parse.port_parse(options.cmdctl_port)
        except ValueError as e:
            parser.error(e)

    if args:
        parser.print_help()
        sys.exit(1)

    return options

def dump_pid(pid_file):
    """
    Dump the PID of the current process to the specified file.  If the given
    file is None this function does nothing.  If the file already exists,
    the existing content will be removed.  If a system error happens in
    creating or writing to the file, the corresponding exception will be
    propagated to the caller.
    """
    if pid_file is None:
        return
    f = open(pid_file, "w")
    f.write('%d\n' % os.getpid())
    f.close()

def unlink_pid_file(pid_file):
    """
    Remove the given file, which is basically expected to be the PID file
    created by dump_pid().  The specified may or may not exist; if it
    doesn't this function does nothing.  Other system level errors in removing
    the file will be propagated as the corresponding exception.
    """
    if pid_file is None:
        return
    try:
        os.unlink(pid_file)
    except OSError as error:
        if error.errno is not errno.ENOENT:
            raise

def remove_lock_files():
    """
    Remove various lock files which were created by code such as in the
    logger. This function should be called after BIND 10 shutdown.
    """

    lockfiles = ["logger_lockfile"]

    lpath = bind10_config.DATA_PATH
    if "B10_FROM_BUILD" in os.environ:
        lpath = os.environ["B10_FROM_BUILD"]
    if "B10_FROM_SOURCE_LOCALSTATEDIR" in os.environ:
        lpath = os.environ["B10_FROM_SOURCE_LOCALSTATEDIR"]
    if "B10_LOCKFILE_DIR_FROM_BUILD" in os.environ:
        lpath = os.environ["B10_LOCKFILE_DIR_FROM_BUILD"]

    for f in lockfiles:
        fname = lpath + '/' + f
        if os.path.isfile(fname):
            try:
                os.unlink(fname)
            except OSError as e:
                # We catch and ignore permission related error on unlink.
                # This can happen if bind10 started with -u, created a lock
                # file as a privileged user, but the directory is not writable
                # for the changed user.  This setup will cause immediate
                # start failure, and we leave verbose error message including
                # the leftover lock file, so it should be acceptable to ignore
                # it (note that it doesn't make sense to log this event at
                # this poitn)
                if e.errno != errno.EPERM and e.errno != errno.EACCES:
                    raise

    return

def main():
    global options
    global b10_init
    # Enforce line buffering on stdout, even when not a TTY
    sys.stdout = io.TextIOWrapper(sys.stdout.detach(), line_buffering=True)

    options = parse_args()

    # Announce startup.  Making this is the first log message.
    try:
        logger.info(BIND10_STARTING, VERSION)
    except RuntimeError as e:
        sys.stderr.write('ERROR: failed to write the initial log: %s\n' %
                         str(e))
        sys.stderr.write(NOTE_ON_LOCK_FILE)
        sys.exit(1)

    # Check user ID.
    setuid = None
    setgid = None
    username = None
    if options.user:
        # Try getting information about the user, assuming UID passed.
        try:
            pw_ent = pwd.getpwuid(int(options.user))
            setuid = pw_ent.pw_uid
            setgid = pw_ent.pw_gid
            username = pw_ent.pw_name
        except ValueError:
            pass
        except KeyError:
            pass

        # Next try getting information about the user, assuming user name
        # passed.
        # If the information is both a valid user name and user number, we
        # prefer the name because we try it second. A minor point, hopefully.
        try:
            pw_ent = pwd.getpwnam(options.user)
            setuid = pw_ent.pw_uid
            setgid = pw_ent.pw_gid
            username = pw_ent.pw_name
        except KeyError:
            pass

        if setuid is None:
            logger.fatal(BIND10_INVALID_USER, options.user)
            sys.exit(1)

    # Create wakeup pipe for signal handlers
    wakeup_pipe = os.pipe()
    signal.set_wakeup_fd(wakeup_pipe[1])

    # Set signal handlers for catching child termination, as well
    # as our own demise.
    signal.signal(signal.SIGCHLD, reaper)
    signal.siginterrupt(signal.SIGCHLD, False)
    signal.signal(signal.SIGINT, fatal_signal)
    signal.signal(signal.SIGTERM, fatal_signal)

    # Block SIGPIPE, as we don't want it to end this process
    signal.signal(signal.SIGPIPE, signal.SIG_IGN)

    try:
        b10_init = Init(options.msgq_socket_file, options.data_path,
                        options.config_file, options.clear_config,
                        options.verbose, options.nokill,
                        setuid, setgid, username, options.cmdctl_port,
                        options.wait_time)
        startup_result = b10_init.startup()
        if startup_result:
            logger.fatal(BIND10_STARTUP_ERROR, startup_result)
            sys.exit(1)
        b10_init.init_socket_srv()
        logger.info(BIND10_STARTUP_COMPLETE)
        dump_pid(options.pid_file)

        # Let it run
        b10_init.run(wakeup_pipe[0])

        # shutdown
        signal.signal(signal.SIGCHLD, signal.SIG_DFL)
        b10_init.shutdown()
    finally:
        # Clean up the filesystem
        unlink_pid_file(options.pid_file)
        remove_lock_files()
        if b10_init is not None:
            b10_init.remove_socket_srv()
    sys.exit(b10_init.exitcode)

if __name__ == "__main__":
    main()
