#!/usr/bin/env oo-ruby

#--
# Copyright 2012 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#++

require 'rubygems'
require 'getoptlong'
require 'time'

def usage
  puts <<USAGE
== Synopsis

#{$0}: Check all user applications

== Usage

#{$0} OPTIONS

Options:
-v|--verbose
    Print information about each check being performed
-h|--help
    Show Usage info
USAGE
  exit 255
end

args = {}
begin
  opts = GetoptLong.new(
    ["--verbose",          "-v", GetoptLong::NO_ARGUMENT],
    ["--help",             "-h", GetoptLong::NO_ARGUMENT]
  )
  opts.each{ |k,v| args[k]=v }
rescue GetoptLong::Error => e
  usage
end

verbose = args["--verbose"]
usage if args["--help"]

require "/var/www/openshift/broker/config/environment"
# Disable analytics for admin scripts
Rails.configuration.analytics[:enabled] = false
Rails.configuration.msg_broker[:rpc_options][:disctimeout] = 20

def datastore_has_gear?(gear_uuid, login=nil)
  $false_positive_check_cnt += 1
  if $false_positive_check_cnt < FALSE_POSITIVE_CHECK_LIMIT
    query = { "apps.group_instances.gears.uuid" => gear_uuid }
    query['_id'] = login if login
    hash = OpenShift::DataStore.instance.user_collection.find_one(query, {:fields => []})
    return !hash.nil?
  else
    return true
  end
end

FALSE_POSITIVE_CHECK_LIMIT = 4000
$false_positive_check_cnt = 0
no_error = true
summary = []

datastore_hash = {}
puts "Started at: #{Time.now}"
start_time = (Time.now.to_f * 1000).to_i
query = {"$or" => [{"apps.group_instances.gears.0" => {"$exists" => true}}, {"consumed_gears" => {"$ne" => 0}}]}
options = {:fields => ["apps.name",
                       "apps.creation_time",
                       "apps.group_instances.gears.uuid",
                       "apps.group_instances.gears.uid",
                       "apps.group_instances.gears.server_identity",
                       "apps.group_instances.name",
                       "consumed_gears",
                       "login"],
           :timeout => false}
ret = []
OpenShift::DataStore.instance.user_collection.find(query, options) do |mcursor|
  mcursor.each do |hash|
    gear_count = 0
    
    if hash['apps']
      hash['apps'].each { |app|
        creation_time = Time.parse(app['creation_time'])
        if app['group_instances']
          app['group_instances'].each { |gi|
            if gi['gears']
              gi['gears'].each { |gear|
                gear_count += 1
                datastore_hash[gear['uuid']] = [hash['login'], creation_time, gear['uid'], gear['server_identity']]
              }
            else
              puts "ERROR: Group instance '#{gi['name']}' for application: '#{app['name']}' for user '#{hash['login']}' doesn't have any gears"
              no_error = false
            end
          }
        else
          puts "ERROR: Application: '#{app['name']}' for user '#{hash['login']}' doesn't have any group instances"
          no_error = false
        end
      }
    end
  
    if hash['consumed_gears'] != gear_count
      msg = "ERROR: user #{hash['login']} has a mismatch in consumed gears (#{hash['consumed_gears']}) and actual gears (#{gear_count})!"
      puts msg if verbose
      summary << msg
      no_error = false
    else
      puts "OK: user #{hash['login']} has consumed_gears equal to actual gears (#{gear_count})!" if verbose
    end
  end
end
total_time = (Time.now.to_f * 1000).to_i - start_time
puts "Time to fetch mongo data: #{total_time.to_f/1000}s"
puts "Total gears found in mongo: #{datastore_hash.length}"

get_all_gears_start_time = (Time.now.to_f * 1000).to_i
node_hash, sender_hash = OpenShift::ApplicationContainerProxy.get_all_gears
total_time = (Time.now.to_f * 1000).to_i - get_all_gears_start_time
puts "Time to get all gears from nodes: #{total_time.to_f/1000}s"
puts "Total gears found on the nodes: #{node_hash.length}"

# now check
puts "Checking application gears on corresponding nodes:" if verbose
datastore_hash.each { |gear_uuid, gear_info|
  login = gear_info[0]
  creation_time = gear_info[1]
  server_identity = gear_info[3]
  print "#{gear_uuid}...\t" if verbose
  if (Time.now - creation_time) > 600
    if not node_hash.has_key? gear_uuid
      if sender_hash.has_key? server_identity
        if datastore_has_gear?(gear_uuid, login)
          no_error = false
          puts "FAIL" if verbose
          summary << "Gear #{gear_uuid} does not exist on any node"
        elsif verbose
          puts "OK"
        end
      else
        no_error = false
        puts "FAIL" if verbose
        summary << "The node #{server_identity} with gear #{gear_uuid} wasn't returned from mcollective"
      end
    elsif verbose
      puts "OK"
    end
  elsif verbose
    put "OK"
  end
}

# now check reverse
puts "Checking node gears in application database:" if verbose
node_hash.each { |gear_uuid, gear_info|
  print "#{gear_uuid}...\t" if verbose
  datastore_gear_info = datastore_hash[gear_uuid]
  if !datastore_gear_info
    if !datastore_has_gear?(gear_uuid)
      no_error = false
      puts "FAIL" if verbose
      summary << "Gear #{gear_uuid} exists on node #{gear_info[0]} (uid: #{gear_info[1]}) but does not exist in mongo database"
    elsif verbose
      puts "OK"
    end
  else
    puts "OK" if verbose
    if !datastore_gear_info[2].nil?
      begin
        uid = gear_info[1]
        if uid != datastore_gear_info[2].to_i
          summary << "Gear #{gear_uuid} is using uid: '#{uid}' but has reserved uid: '#{datastore_gear_info[2].to_i}'"
          no_error = false
        end
      rescue Exception => e
        summary << "Failed to check gear: '#{gear_uuid}'s uid because of exception: #{e.message}"
        no_error = false
      end
    end
  end
}

puts no_error ? "Success" : "Check failed.\n #{summary.join("\n")}"
if $false_positive_check_cnt >= FALSE_POSITIVE_CHECK_LIMIT
  puts "WARNING: Only checked the first #{FALSE_POSITIVE_CHECK_LIMIT} errors for false positives."
end
total_time = (Time.now.to_f * 1000).to_i - start_time
puts "Total time: #{total_time.to_f/1000}s"
puts "Finished at: #{Time.now}"
exit (no_error ? 0 : 1)
