#!/usr/bin/env oo-ruby

#--
# Copyright 2012 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#++


require 'rubygems'
require 'optparse'
require 'socket'

$OPTIONS = {}

optparse = OptionParser.new { |opts|
  opts.banner = "\nUsage: #{$0}\n"+
                "\nExample: #{$0} -v\n\n"

    $OPTIONS[:verbose] = false
    opts.on('-v','--verbose', 'Print verbose statements') { |verbose|
        $OPTIONS[:verbose] = verbose
    }
}
optparse.parse!

def verbose(msg)
    if $OPTIONS[:verbose]
        $stdout.write("INFO: #{msg}\n")
    end
end

###### extension points - may be affected by loading of extension ######

# Return the name of the selinux policy to validate
def selinux_policy_name
    'openshift-origin'
end

# Return the list of packages to validate
def ext_packages
    []
end

###### end extension points #####

begin
  require '/usr/libexec/openshift/lib/admin-accept-node-ext'
rescue LoadError => e
  verbose("using default accept-node extensions")
end

$TC_CHECK=false
$TC_DATA = %x[tc clas show dev eth0].split("\n").grep(/parent/)
$ALL_QUOTAS = %x[repquota -a].split("\n")

$CONF_DIR="/etc/openshift"
$NODE_CONF_FILE=File.join($CONF_DIR, "/node.conf")
$RESOURCE_LIMITS_FILE=File.join($CONF_DIR,"/resource_limits.conf")
$LIMITS_CONF_DIR="/etc/security/limits.d"
#
# Control variables.  Override for testing
#
DEFAULT_SEBOOL_LIST="httpd_can_network_connect:on"
DEFAULT_PACKAGES="rubygem-openshift-origin-node openshift-origin-node-util \
    rubygem-openshift-origin-common selinux-policy selinux-policy-targeted"
DEFAULT_SERVICES="mcollective cgconfig cgred httpd openshift-port-proxy"
DEFAULT_MINSEM=512

$SEBOOL_LIST="SEBOOL_LIST"
$PACKAGES="PACKAGES"
$SERVICES="SERVICES"
$MINSEM="MINSEM"


####### GLOBAL VARS ########
$CGROUP_PASS=true
$TC_PASS=true
$ALL_USERS = []
###########################

if ENV[$SEBOOL_LIST].nil?
    $SEBOOL_LIST=DEFAULT_SEBOOL_LIST
else
    eputs "WARNING: ENV overrides SEBOOL_LIST"
end

if ENV[$PACKAGES].nil?
    $PACKAGES=DEFAULT_PACKAGES
else
    eputs "WARNING: ENV overrides PACKAGES"
end

if ENV[$SERVICES].nil?
    $SERVICES=DEFAULT_SERVICES
else
    eputs "WARNING: ENV overrides SERVICES"
end

if ENV[$MINSEM].nil?
    $MINSEM=DEFAULT_MINSEM
else
    eputs "WARNING: ENV overrides MINSEM"
end


def eputs(msg)
    $stderr.write("#{msg}\n")
end

######## UTILITIES ########

def do_fail(msg)
    eputs("FAIL: " + msg)
    $STATUS += 1
end

def source_file(file)
    i = 0
    IO.foreach(file) { |line|
        next if line =~ /^#/
        next if line =~ /^$/
        if line.include?"#"
            name,value = line.split("#")[0].split("=")
        else
            name,value = line.strip.split[0].split("=")
        end
        #$stdout.flush
        #puts "Setting vars #{name}=>#{value.gsub('"',"")}"
        next if name.nil? or value.nil?
        #puts "Setting vars #{name}=>#{value.gsub('"',"")}"
        ENV["#{name}"]=value.gsub('"','').strip
        #p ENV
        #exit
    }
end

###############################

def users
    $USERS ||= %x[grep ":#{ENV['GEAR_GECOS']}:" /etc/passwd].split("\n")
end

def load_node_conf
    verbose("loading node configuration file #{$NODE_CONF_FILE}")
    do_fail("FAIL: No configuration file: #{$NODE_CONF_FILE}") unless File.exists? $NODE_CONF_FILE

    #Source it in bash, grep it in ruby
    source_file($NODE_CONF_FILE)

    # make sure required values are set in the conf file
    %w[
      GEAR_BASE_DIR GEAR_SKEL_DIR GEAR_SHELL GEAR_GECOS
      GEAR_MIN_UID GEAR_MAX_UID CARTRIDGE_BASE_PATH
      PROXY_MIN_PORT_NUM PROXY_PORTS_PER_GEAR OPENSHIFT_HTTP_CONF_DIR
      CLOUD_DOMAIN BROKER_HOST PUBLIC_IP PUBLIC_HOSTNAME
    ].each do |value|
      next unless ENV[value].nil? || ENV[value].empty?
      do_fail("SEVERE: in #{$NODE_CONF_FILE}, #{value} not defined")
    end

    $GEAR_BASE_DIR=ENV['GEAR_BASE_DIR']
    do_fail("GEAR_BASE_DIR does not exist or is not a directory: #{$GEAR_BASE_DIR}") unless File.exists? $GEAR_BASE_DIR
    $GEAR_HTTPD_DIR=File.join($GEAR_BASE_DIR,"/.httpd.d")

    verbose("loading resource limit file #{$RESOURCE_LIMITS_FILE}")

    do_fail("No resource limits file: #{$RESOURCE_LIMITS_FILE}") unless File.exists? $RESOURCE_LIMITS_FILE
    source_file($RESOURCE_LIMITS_FILE)
end

def check_node_public_resolution
    verbose("checking node public hostname resolution")

    localhost = %w[127.0.0.1 ::1]
    pubhost = ENV["PUBLIC_HOSTNAME"]
    pubip = ENV["PUBLIC_IP"]
    broker = ENV["BROKER_HOST"]

    # check that the broker host resolves to an external IP
    begin
      resolved_host = IPSocket.getaddress(broker)
      # on devenv or livecd, BROKER_HOST=localhost is considered legit. Will let that be for now.
      #localhost.member?(resolved_host) and
        #do_fail("#{$NODE_CONF_FILE}: BROKER_HOST #{broker} should be public address, not localhost") 
    rescue Exception => e
      do_fail("#{$NODE_CONF_FILE}: BROKER_HOST #{broker} does not resolve (#{e})")
    end

    # attempt to resolve the node public hostname
    begin
      # must resolve as a FQDN, so should be the full name
      # (the "." at the end blocks adding a search domain)
      resolved_host = IPSocket.getaddress(pubhost + ".")
    rescue Exception => e
      do_fail("#{$NODE_CONF_FILE}: PUBLIC_HOSTNAME #{pubhost} does not resolve as a FQDN (#{e})")
      return
    end

    # make sure public settings resolve correctly to a non-localhost IP
    do_fail("#{$NODE_CONF_FILE}: PUBLIC_HOSTNAME #{pubhost} should be public, not localhost") if localhost.member? resolved_host
    do_fail("#{$NODE_CONF_FILE}: PUBLIC_IP #{pubip} should be public, not localhost") if localhost.member? pubip

    # it would be nice to check that the PUBLIC_IP actually ends up at a NIC on this host.
    # however in settings like EC2, there's no good way to do that.
    #
    # but we can check PUBLIC_HOSTNAME resolves to either a NIC on this host or PUBLIC_IP.
    # in EC2, hostname resolves differently internal vs. external, so will match NIC.
    # in most places, PUBLIC_HOSTNAME will resolve to PUBLIC_IP
    my_ips = `ip addr show scope global`.scan(/^\s+inet\s+([\d\.]+)/).flatten.push(pubip).uniq
    do_fail("#{$NODE_CONF_FILE}: PUBLIC_HOSTNAME #{pubhost} resolves to #{resolved_host}; expected #{my_ips.join '|'}") unless my_ips.member? resolved_host
end

def check_selinux()
    verbose("checking selinux status")

    policy_name = selinux_policy_name

    unless %x[getenforce] =~ /Enforcing/
        do_fail("selinux is not enabled")
    else
        verbose("checking selinux #{policy_name} policy")
        do_fail("selinux #{policy_name} policy is not loaded") unless %x[/usr/bin/timeout -s9 10 /usr/sbin/semodule -l] =~ /#{policy_name}[\d\.\W]+$/
    end
    verbose("checking selinux booleans")
    $SEBOOL_LIST.split.each { |bool|
        name,value = bool.split(':')
        result = %x[getsebool #{name}]
        do_fail("selinux boolean #{name} should be #{value}") unless result =~ /#{name} --> #{value}/
    }

    do_fail("invalid selinux labels in OPENSHIFT_DIR $GEAR_BASE_DIR") unless %x[restorecon -n -v #{$GEAR_BASE_DIR}] =~ //
end

def check_packages
    verbose("checking package list")
    packages = "#{$PACKAGES}  #{ext_packages.join(' ')}"
    not_installed = %x[rpm -q #{packages}].split("\n").grep(/not/)
    not_installed.each { |pack_not_installed|
        do_fail(pack_not_installed)
    }
end

def check_services
    verbose("checking services")
    $SERVICES.split.each { |service|
        %x[service #{service} status &>/dev/null]
        do_fail("service #{service} not running") unless $?.exitstatus == 0
    }
end


def check_semaphores
    verbose("checking kernel semaphores >= #{$MINSEM}")
    semcount=%x[sysctl kernel.sem | cut -f4].strip.to_i
    do_fail("kernel.sem semaphores too low: #{semcount} < #{$MINSEM}") if semcount <= $MINSEM
end

#
# Check cgroup config
#
def check_cgroup_config
    verbose("checking cgroups configuration")

    if %x[lscgroup cpu,cpuacct,memory,freezer,net_cls:/openshift 2>/dev/null | wc -l].strip.to_i < 1
        do_fail("lscgroup /openshift path does not exist")
        $CGROUP_PASS=false
    end
end

#
# Check tc config
#
def check_tc_config
    verbose("checking presence of tc qdisc")
    qdiscresponse="qdisc htb 1: root"

    result = %x[tc qdisc show dev eth0 | grep -q "#{qdiscresponse}"]
    if $?.exitstatus != 0
        do_fail("tc htb qdisc not configured")
        $TC_PASS=false
    else
        verbose("checking for cgroup filter")
        if %x[tc filter show dev eth0 | grep 'cgroup handle' | uniq | wc -l].strip.to_i < 1
            do_fail("no cgroup filter configured")
            $TC_PASS=false
        else
            verbose("checking presence of tc classes")
            if %x[tc class show dev eth0 | grep 'class htb' | wc -l].strip.to_i < 1
                do_fail("no htb classes configured")
                $TC_PASS=false
            end
        end
    end
end

def check_quotas
    verbose("checking filesystem quotas")
    oo_device=%x[df -P #{$GEAR_BASE_DIR} | tail -1].split[0]
    oo_mount=%x[df -P #{$GEAR_BASE_DIR} | tail -1 | tr -s ' '].split[5]
    unless %x[quotaon -u -p #{oo_mount} 2>&1].strip == "user quota on #{oo_mount} (#{oo_device}) is on"
        do_fail "quotas are not enabled on #{oo_mount} (#{oo_device})"
    end

    quota_db_file=File.join(oo_mount,"/aquota.user")
    if File.exists? quota_db_file
        verbose("checking quota db file selinux label")
        quota_db_type = %x[secon -f #{quota_db_file} | grep type: ]
        if quota_db_type !~ /quota_db_t/
            do_fail("quota db file: selinux type is incorrect: #{quota_db_type}")
        end
    else
        do_fail("quota db file #{quota_db_file} does not exist")
    end
end

def check_users
    verbose("checking #{users.length} user accounts")
    conf_dir_templ = "#{$LIMITS_CONF_DIR}/84-%s.conf"
    users.each do |user|
        username,password,uid,gid,gecos,home_dir,shell = user.split(":")
        $ALL_USERS << username #store it so we can use it later
        #Test home dir
        do_fail("user #{username} does not have a home directory #{home_dir}") unless File.exists? home_dir
        do_fail("user #{username} does not have a PAM limits file") unless File.exists?(conf_dir_templ % username)
        if $CGROUP_PASS
          check_user_cgroups username
        end
        if $TC_CHECK && $TC_PASS
            hex_uid = uid.to_i.to_s(16)
            tc_results = $TC_DATA.grep(/1:#{hex_uid}/)
            do_fail("user #{username} must have 1 tc class entry: actual=#{tc_results.length}") if tc_results.length != 1
        end

        results = $ALL_QUOTAS.grep(/#{username}/)
        if results.length == 0 ||
           results[0].split[4].to_i == 0 ||
           results[0].split[7].to_i == 0
            do_fail("user #{username} does not have quotas imposed")
        end
    end

end

# Verify that the user has a cgroup in all subsystems and no others
def check_user_cgroups(username)
  user_cgroup="/openshift/#{username}"
  test_subsystems = ['cpu', 'cpuacct', 'memory', 'freezer', 'net_cls'].sort

  # Create the list of cgroups to query
  cmd =  "lscgroup " + test_subsystems.map {|s| "#{s}:#{user_cgroup}"}.join(' ')
  reply = %x[#{cmd}]

  # This is a horrible bit of cleverness
  # each group is on one line.  split the lines to an array
  # extract the subsystem list (before the colon) for each group
  # split the subsystems on commas (in case there are joined subsystems)
  # then flatten the list sort and remove duplicates
  actual = reply.split.map {|s| s[/([^:]+)/,1] }.map {|g| g.split(',')}.flatten.uniq.sort

  do_fail("user #{username} must have all cgroup subsystems") if not (actual == test_subsystems)

end

def check_system_httpd_configs
    verbose("checking system httpd configs")
    Dir.foreach($GEAR_HTTPD_DIR) { |cfile|
        next if cfile[0,1] == "."
        next if cfile[-5,5] != ".conf"
        if ! $ALL_USERS.include?(cfile[0..cfile.index('_')-1])
            do_fail("httpd config file #{cfile} doesn't have an associated user")
        end
    }
end

def check_app_dirs
    verbose("checking application dirs")
    copy_all_users = $ALL_USERS.dup
    Dir.foreach($GEAR_BASE_DIR) { |entry|
        next if entry[0,1] == "."
        next if entry == 'lost+found'
        next if entry == 'last_access.log'
        next if File.symlink? File.join($GEAR_BASE_DIR,entry)
        next unless File.directory? File.join($GEAR_BASE_DIR,entry)
        [ ".ssh", ".env", ".sandbox", ".tmp" ].each { |dotdir|
             do_fail("directory #{entry} doesn't have a #{dotdir} directory") unless File.directory? File.join($GEAR_BASE_DIR,entry,dotdir)
        }
        ind = copy_all_users.index(entry)
        if ind
          copy_all_users.delete_at(ind)
        else
          do_fail("directory #{entry} doesn't have an associated user")
        end
    }
end

if __FILE__ == $0
    $STATUS=0
    load_node_conf
    check_node_public_resolution
    check_selinux
    check_packages
    check_services
    check_semaphores
    check_cgroup_config
    check_tc_config if $TC_CHECK
    check_quotas
    check_users
    check_app_dirs
    check_system_httpd_configs

    if $STATUS == 0
        puts "PASS"
    else
        eputs "#{$STATUS} ERRORS"
    end
    exit($STATUS)
end

