head	1.3;
access;
symbols;
locks; strict;
comment	@# @;


1.3
date	99.08.26.04.06.57;	author jheiss;	state Exp;
branches;
next	1.2;

1.2
date	99.08.11.21.24.19;	author jheiss;	state Exp;
branches;
next	1.1;

1.1
date	99.08.09.03.20.18;	author jheiss;	state Exp;
branches;
next	;


desc
@Remotely 'boot net - install' Sun workstations to Jumpstart them.
@


1.3
log
@Added copyright and GNU license message.
@
text
@#!/usr/bin/perl -w
##############################################################################
# $Id: start,v 1.2 1999/08/11 21:24:19 jheiss Exp jheiss $
##############################################################################
# Remotely 'boot net - install' Sun workstations to Jumpstart them.
##############################################################################
# Copyright (C) 1999  Jason Heiss (jheiss@@ofb.net)
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
##############################################################################
# Pre-RCS version control:
# created 07/21/97
# last modified 05/07/98
##############################################################################
# $Log: start,v $
# Revision 1.2  1999/08/11 21:24:19  jheiss
# Removed code which gave back the superlock while start was running.
# This fixes some problems but largely breaks the status CGI script.
# Fixing that will require rewriting the status-tracking code to use
# one status file per machine instead of the current monolithic status
# file.
#
# Revision 1.1  1999/08/09 03:20:18  jheiss
# Initial revision
#
##############################################################################

require 'basedir.pl';
require 'can.pl';
require 'facts.pl';
require 'passwd.pl';
require 'process.pl';
require 'profiles.pl';
require 'status.pl';
use Getopt::Std;
#use POSIX "wait_h";

#############
# Constants #
#############

# Minimum time that one loop should take
my $MINLOOPTIME = 60;  # Measured in seconds

# Minimum time to pause at the bottom of the loop.  We pause to
# allow other programs and users to get a lock on the statusfile
# if they need to access it.
my $MINSLEEPTIME = 30;

# Number of forks to spawn to work on starting machines.
my $NUMFORKS = 50;

my $NIS_MASTER = 'hscw045';

###################
# Other variables #
###################
# Flag to indicate that we should exit even if not all of the machines
# have finished jumpstarting.  This would be used primarly during a
# rollout.  It is very rare for all of the machines to successfully
# jumpstart.  There are usually a few with unrecoverable errors.  It
# would be cleaner to have some way to indicate to start that you want
# to exit instead of ^C'ing the program.  There is no method implemented
# at this point to set this variable.
my $exit = 0;

my @@hosts;

##########################################
# Change to the proper working directory #
##########################################
set_basedir();

##############################
# Parse command line options #
##############################
$rvalue = getopts('hD:HI:V:S:m:f:F:');

if (!$rvalue || $opt_h)
{
	usage_message();
}

# General options
my $debug_level = $opt_D;
$harmless = $opt_H;

# Image and version
my $image = $opt_I;
my $image_version = $opt_V;
my $subimage = $opt_S;

# -m: For starting a single machine that is up and on the network
my $machine = $opt_m;    # Hostname

# -f: Specify a file which contains a list of hostnames to start
my $hostname_file = $opt_f;
# -F: Same as -f but with a little different behaviour.  When we start
#     a machine a record of that is stored in the status file.  With -m,
#     even if a machine has already been started we re-start it.
#     The assumption is that if the user is specifically asking us to
#     start some host they probably have a reason for re-starting it.
#     -f acts the same way except that it takes a list of hosts.  But with
#     -F we skip the hosts that have already been started.  If the user
#     has a large list of hosts to start there will probably be at least
#     a few that fail for whatever reason.  The user can fix the
#     problems with those few and re-run start with -F and only have to
#     wait for those few to be started.
my $hostname_file_norestart = $opt_F;

DSWITCH:
{
	if ($debug_level == -2) { $supersilent = $TRUE; last DSWITCH; }
	if ($debug_level == -1) { $silent = $TRUE; last DSWITCH; }
	if ($debug_level == 1) { $verbose = $TRUE; last DSWITCH; }
	if ($debug_level == 2) { $superverbose = $TRUE; last DSWITCH; }
}

# Supersilent implies silent
$silent = 1 if ($supersilent);
# Superverbose implies verbose
$verbose = 1 if ($superverbose);

# For the time being we'll just kick the default up to verbose
unless ($supersilent || $silent || $verbose || $superverbose)
{
	$verbose = 1;
}

# Image and image version are required
unless (defined($image) && defined($image_version))
{
    print "\nYou must specify an image and image version\n";
    usage_message(1);
}

# Subimage is optional
$subimage = 'default' unless (defined($subimage));

# Now check that they gave us valid $image, $image_version and $subimage
my $profcheck = check_profile($image, $image_version, $subimage);
if ($profcheck == $BADIMAGE)
{
    print "\n$image is not a defined image\n";
    usage_message(1);
}
if ($profcheck == $BADVERSION)
{
    print "\nInvalid image version for $image image\n";
    usage_message(1);
}
if ($profcheck == $BADSUBIMAGE)
{
    print "\nInvalid subimage for $image image version $image_version\n";
    usage_message(1);
}

# Check that they specified some hosts to configure
unless(defined($machine) ||
        defined($hostname_file) ||
        defined($hostname_file_norestart))
{
    print "\nYou must specify one or more hosts to configure\n";
    usage_message(1);
}

#######################################
# Make sure we're being run correctly #
#######################################

unless ($< == 0 && `hostname` =~ /^$NIS_MASTER/)
{
    print <<EOF;

You need to be root on the NIS master ($NIS_MASTER) for this script to work.
EOF

    usage_message(1);
}

#########################
# Get the root password #
#########################
$rootpass = getpass("current root password for the host(s)");

####################################
# Build list of hosts to configure #
####################################
my @@hosts_to_start;
if ($machine)
{
    push(@@hosts_to_start, $machine);
}
elsif ($hostname_file)
{
	@@hosts_to_start = readinputfile($hostname_file);
}
elsif ($hostname_file_norestart)
{
	@@hosts_to_start = readinputfile($hostname_file_norestart);
}
else
{
    die "What happened??";
}

if (scalar(@@hosts_to_start) == 0)
{
    print "\nYou didn't specify any hosts to configure\n";
    usage_message(1);
}

#####################
# Let the fun begin #
#####################

while ((scalar(@@hosts) == 0 || !finished(@@hosts_to_start)) && $exit == $FALSE)
{
	# Random variables
	my @@childpids;

	# Get a lock on the status system
	print "Locking status system\n" if ($verbose);
	superlock();

	# Handle the user hitting ^C gracefully
	$SIG{INT} = 'quit';

	# Read in the status of everyone
	print "Reading status file\n" if ($verbose);
	readstat();

	# Record the time the loop starts so that we can sleep the
	# appropriate amount of time at the end
	$starttime = time;
	print "Loop starts at $starttime\n" if ($superverbose);

	# At this point we can check if they gave us any hosts that
	# are ready to be started.  There is no point in continuing
	# if they didn't.
	my $need_to_continue = 0;
	foreach $host (@@hosts_to_start)
	{
		if ($status{$host}[0] >= $CONFIGSUCCESS)
		{
			$need_to_contine = 1;
		}
	}
	unless ($need_to_contine)
	{
		print "None of the hosts you specified are ready to start\n";
		quit();
	}

	print "Dividing " . scalar(@@hosts_to_start) .
		" hosts amongst $NUMFORKS forks\n" if ($superverbose);

	# Because handling each host usually requires a number of rsh
	# connections to that host and each rsh connection takes a second or
	# two, if we were to iterate through each host sequentially while doing
	# a large rollout it would literally take hours to get them all started.
	# As such, this script forks into $NUMFORKS forks.  Each of these forks
	# is assigned a set of hosts to work on.  Depending on the memory and
	# process number limitations of the machine this is run on, you might
	# adjust $NUMFORKS to optimize performance.

	# Initialize some variables for our host division algorithm
	my $numperfork = scalar(@@hosts_to_start) / $NUMFORKS;
	my $npfl = int($numperfork);
	my $npfh = $npfl + 1;
	my @@hostdivs = ();
	push(@@hostdivs, 0);
	my $numremain = scalar(@@hosts_to_start);

	print "Number of hosts per fork is $numperfork\n" if ($superverbose);

	# If the number of hosts is evenly divisible by the number of forks
	if ($numperfork == $npfl)
	{
		print "Number of hosts evenly divisible by number of forks\n"
			if ($superverbose);

		# This is the easy case.  We just evenly divide the hosts
		# up amongst the available forks.
		#for($i=$numperfork-1 ; $i<scalar(@@hosts_to_start) ; $i += $numperfork)
		for($i = $numperfork ; $i<=scalar(@@hosts_to_start) ; $i += $numperfork)
		{
			push(@@hostdivs, $i);
		}
	}
	else
	{
		print "Number of hosts NOT evenly divisible by number of forks\n"
			if ($superverbose);

		# This is the more complicated case.  The number of hosts is
		# not evenly divisible by the number of machines.  Thus we
		# want to split them up as evenly as possible.  Imagine, if you
		# will, that you could divide the hosts up in non-integer
		# amounts.  If you divided the number of hosts by the number
		# of forks you'd get some non-interger real number in this
		# case.  Let's call this number $numperfork.  Since we can't
		# divide them up in non-integer amounts what we do is start by
		# rounding $numperfork down (called $npfl) and assigning that
		# many hosts to each fork.  We do this until the number of
		# hosts remaining is an even multiple of $numperfork rounded up
		# (called $npfh) and the number of hosts remaining is equal
		# to the number of forks remaining times $npfh.  Then we assign
		# $npfh hosts to each remaining fork.  Confused yet?  :)
		my $counter = $NUMFORKS;
		while ( (int($numremain/$npfh) != $numremain/$npfh) ||
                	($counter*$npfh != $numremain) )
		{
			push(@@hostdivs, $hostdivs[$#hostdivs] + $npfl);
			$numremain -= $npfl;
			$counter--;
		}
		while ($numremain != 0)
		{
			push(@@hostdivs, $hostdivs[$#hostdivs] + $npfh);
			$numremain -= $npfh;
		}
	}

	if ($superverbose)
	{
		print "Divisions: ";
		foreach $div (@@hostdivs)
		{
			print "$div ";
		}
		print "\n";
	}

	# And now we perform the actual forks.  This bit is stolen pretty
	# much straight from the Camel book.
	my $forknum = 0;
	my $master = 1;
	my $i;
	for ($i=0 ; $i<$NUMFORKS && $master ; $i++)
	{
		FORK:
		{
			my $pid;
			if ($pid = fork)
			{
				# Parent here

				# Keep track of our child pids' so that we can
				# waitpid() on them.
				push(@@childpids, $pid);

				# Make sure we know we are the master
				$master = 1;
				@@ourhosts = ();

				# Increment fork number and fork again....
				$forknum++;
			}
			elsif (defined $pid)
			{
				# Child here
				$master = 0;

				print "Fork $forknum here!\n" if ($superverbose);
	
				# Make sure we were actually assigned some
				# machines
				if ($hostdivs[$forknum] == $hostdivs[$forknum+1])
				{
					print "Fork $forknum was not given any hosts to work " .
						"on, exiting now.\n" if ($superverbose);
					exit;
				}

				@@ourhosts = ();
				for ($j=$hostdivs[$forknum] ;
						$j<$hostdivs[$forknum+1] ;
						$j++)
				{
					push(@@ourhosts, $hosts_to_start[$j]);
				}

				print "Fork $forknum assigned: @@ourhosts\n" if ($superverbose);
			}
			elsif ($! =~ /No more process/)
			{
				# EAGAIN, supposedly recoverable fork error
				print "No more processes, trying again\n";
				sleep 5;
				redo FORK;
			}
			else
			{
				# Acck, wacky fork error
				die "Can't fork start script: $!\n";
			}
		}
	}

	# Now start working on our assigned set of hosts
	HOST: foreach $host (@@ourhosts)
	{
		# Machines that need to have the Jumpstart started
		if ($status{$host}[0] >= $CONFIGSUCCESS && $status{$host}[0] < $STARTED)
		{
			print "$host ($forknum):  Needs to be started, attempting to " .
				"do so\n";

			# Things to check for:
			# - Can ping machine
			# - Can rsh to machine
			# - Root password is standard one
			# - No users logged in
			# - No /usr/local/do_not_jumpstart file
			# - No ODB_SERVER process

			unless (can_ping($host))
			{
				print "$host ($forknum):  Can't ping\n" if ($verbose);

				setstat($host, $STARTNOPING);
				next HOST;
			}

			unless (can_rsh($host))
			{
				print "$host ($forknum):  Can't rsh\n" if ($verbose);

				setstat($host, $STARTNORSH);
				next HOST;
			}

			# Collect the OS version
			chop($osver = rsh($host, 'uname -r'));
			print "$host ($forknum):  OS version is $osver\n"
				if ($superverbose);

			# Check the root password
			if ($osver =~ /^4/)
			{
				chop($cryptpass = rsh($host,
                                        'head -1 /etc/passwd | cut -d : -f 2'));
			}
			else
			{
				chop($cryptpass = rsh($host,
                                        'head -1 /etc/shadow | cut -d : -f 2'));
			}
			$salt = substr($cryptpass, 0, 2);
			print "$host ($forknum):  Crypted password is $cryptpass\n"
				if ($superverbose);
			print "$host ($forknum):  You entered " .
				crypt($rootpass, $salt) . "\n" if ($superverbose);
			if (crypt($rootpass, $salt) ne $cryptpass)
			{
				print "$host ($forknum):  Root password does not match the " .
					"password you entered\n" if ($verbose);

				setstat($host, $STARTDIFFRP);
				next HOST;
			}

			# If nobody is logged in we'll get back nada from who
			chop($who_output = rsh($host, 'who'));
			if ($who_output)
			{
				print "$host ($forknum):  Someone is logged in\n" if ($verbose);

				# Play an audio file on the machine that asks the user to
				# log out.
				if (-r '/home/isdg/sounds/rob.au')
				{
					print "$host ($forknum):  Playing audio file\n"
						if ($verbose);
					rsh($host, 'audioplay /home/isdg/sounds/rob.au');
				}

				# And send a wall for remote users
				if (-r '/home/isdg/walls/wallout')
				{
					print "$host ($forknum):  Sending wall\n" if ($verbose);
					rsh($host, 'wall /home/isdg/walls/wallout -a');
				}

				setstat($host, $USERLO);
				next HOST;
			}

			# Check for a do_not_jumpstart file
			if (rsh_file_exists($host, '/usr/local/do_not_jumpstart'))
			{
				print "$host ($forknum):  do_not_jumpstart file\n"
					if ($verbose);

				setstat($host, $STARTDNJ);
				next HOST;
			}

			# Check if an ODB_SERVER process is running
			if (process_check($host, 'ODB_SERVER'))
			{
				print "$host ($forknum):  ODB_SERVER is running\n"
					if ($verbose);

				setstat($host, $ODBSERVER);
				next HOST;
			}
			
			# Well, we got this far...  Guess we'll go ahead and
			# fire it off.
			print "$host ($forknum):  Starting jumpstart\n" if ($verbose);
			unless ($harmless)
			{
				if ($OSVER =~ /^4/)
				{
					rsh($host, '/etc/reboot \'net - install\'');
				}
				else
				{
					rsh($host, 'reboot \'net - install\'');
				}
			}
			setstat($host, $STARTED);

			print "$host ($forknum):  Jumpstart started\n";
		} # End of machines which need to have Jumpstart started

		# Machines which have had Jumpstart started but which haven't
		# finished.
		elsif ($status{$host}[0] >= $STARTED && $status{$host}[0] < $FINALBOOT)
		{
			print "$host ($forknum):  Jumpstart started, checking status\n";

			unless (can_ping($host))
			{
				print "$host ($forknum):  Can't ping (started)\n" if ($verbose);
				setstat($host, $JSNOPING);
				next HOST;
			}

			unless (can_rsh($host))
			{
				print "$host ($forknum):  Can't rsh (started)\n" if ($verbose);
				setstat($host, $JSNORSH);
				next HOST;
			}

			# !!!!  This is prone to problems  !!!!
			# I.e. you have to be very careful when defining things in
			# profiles.pl such that the image name matches the file that
			# is created by the jumpstart.  The new filename that we've
			# standardized on (profile_<image>_<image version>) should
			# help this.
			if (rsh_file_exists($host,
					"/usr/local/${image}_version_${image_version}") ||
				rsh_file_exists($host,
					"/usr/local/profile_${image}_${image_version}"))
			{
				print "$host ($forknum):  Install finished, machine is " .
					"rebooting\n" if ($verbose);
				setstat($host, $FINALBOOT);
				next HOST;
			}
			elsif (process_check($host, 'pkginstall'))
			{
				print "$host ($forknum):  Installing packages\n" if ($verbose);
				setstat($host, $PACKAGEINSTALL);
				next HOST;
			}
			elsif (process_check($host, 'pfinstall'))
			{
				print "$host ($forknum):  Configuring disk drives\n"
					if ($verbose);
				setstat($host, $PFINSTALL);
				next HOST;
			}
			elsif (process_check($host, 'suninstall'))
			{
				print "$host ($forknum):  NIS configured, starting install\n"
					if ($verbose);
				setstat($host, $SUNINSTALL);
				next HOST;
			}
			elsif (process_check($host, 'openwin'))
			{
				print "$host ($forknum):  Openwindows is running, install " .
					"will probably start shortly\n" if ($verbose);
				next HOST;
			}
			elsif (process_check($host, 'sendmail'))
			{
				# The idea of this is to check for machines that rebooted
				# without actually doing the jumpstart.  I.e. if sendmail
				# is running that means the jumpstart should have completed
				# already and we should have seen the
				# /usr/local/image_version_blah file above.
				# Usually this seems to get triggered in error...
				# It seems to be some sort of timing issue where the
				# check above doesn't see the /usr/local/image_version_blah
				# file.  So we check for that again here before
				# restarting the machine.
				
				if (rsh_file_exists($host,
							"/usr/local/${image}_version_${image_version}") ||
					rsh_file_exists($host,
						"/usr/local/profile_${image}_${image_version}"))
				{
					print "$host ($forknum):  Sendmail check triggered in " .
						"error\n" if ($superverbose);
				}
				else
				{
					print "$host ($forknum):  Machine seems to have been " .
						"rebooted without finishing jumpstart, restarting\n"
						if ($verbose);

					# Maybe some of this will help me figure out why.
					print "ls /usr/local:\n";
					system("rsh $host ls /usr/local");
					print "ps -ef:\n";
					system("rsh $host ps -ef");
					sleep(300);

					setstat($host, $CONFIGSUCCESS);
					next HOST;
				}
			}

			print "Hmm, what is the status of $host??\n" if ($verbose);

		} # End of machines which have had Jumpstart started

		# Machines which have finished jumpstarting
		elsif ($status{$host}[0] == $FINALBOOT)
		{
			print "$host ($forknum):  Checking on final reboot\n";

			# See if machine has finished rebooting
			if (process_check($host, 'ttymon'))
			{
				print "$host ($forknum):  Finished rebooting\n" if ($verbose);
				setstat($host, $JUMPFIN);
			}
			else
			{
				print "$host ($forknum):  Hasn't finished rebooting\n"
					if ($verbose);
				next HOST;
			}
		}
		elsif ($status{$host}[0] == $JUMPFIN)
		{
			print "$host ($forknum):  Jumpstart finished, testing\n";

			# Run test programs
			$testnet = rsh($host, '/usr/local/bin/testnet');
			if ($testnet =~ /ERROR/)
			{
				print "$host ($forknum):  testnet failed\n" if ($verbose);
				setstat($host, $TNFAIL);
				next HOST;
			}
			$testjump = rsh($host, '/usr/local/bin/testjump');
			if ($testjump =~ /ERROR/)
			{
				print "$host ($forknum):  testjump failed\n" if ($verbose);
				setstat($host, $TJFAIL);
				next HOST;
			}

			print "$host ($forknum):  Machine ready to use\n";
			setstat($host, $READY);

		} # End of machines which have finished jumpstarting

	} # End of foreach loop

	unless ($master)
	{
		# Write out our changes
		print "Fork $forknum updating status file\n" if ($verbose);
		partialwritestat(@@ourhosts) unless ($harmless);
	}
	
	if ($master)
	{
		# Harvest our dead children ;->
		print "The master is harvesting dead children\n" if ($superverbose);

		my @@donepids;

		while (scalar(@@donepids) != scalar(@@childpids))
		{
			foreach $cpid (@@childpids)
			{
				# The second argument to waitpid tells it not to block
				if (! inarray($cpid, @@donepids) &&
					waitpid($cpid, 0))
					#waitpid($cpid, pack("b4", "0100")))
					#waitpid($cpid, &POSIX::WNOHANG))
				{
					push(@@donepids, $cpid);
					print "There are " . scalar(@@donepids) .
						" forks done out of " . scalar(@@childpids) . "\n";
				}
			}

			sleep 5;
		}
	}
	else
	{
		# Children exit here...
		print "Fork $forknum exiting\n" if ($superverbose);
		exit;
	}

	# Unhandle ^C while we don't have a lock on the status system
	$SIG{INT} = '';

	# Give back lock on status system
	print "Giving back lock on status system\n" if ($verbose);
	unsuperlock();

	# Pause to make sure loop takes $MINLOOPTIME seconds
	my $endtime = time;
	print "Loop ends at $endtime\n" if ($superverbose);
	my $difftime = $endtime - $starttime;
	my $sleeptime;
	if ($difftime >= $MINLOOPTIME)
	{
		$sleeptime = $MINSLEEPTIME;
	}
	else
	{
		$sleeptime = $MINLOOPTIME - $difftime;
		if ($sleeptime < $MINSLEEPTIME)
		{
			$sleeptime = $MINSLEEPTIME;
		}
	}
	print "Loop took " . $difftime . " seconds.  Pausing $sleeptime seconds\n"
		if ($superverbose);
	sleep($sleeptime);

} # End of while loop

print "Looks like everyone is done, exiting\n" if ($verbose);
exit;

###############
# Subroutines #
###############

sub quit
{
	# This should wait on statlock since that would indicate we're in
	# the middle of writing the status file.
	unsuperlock();

	unlink("/tmp/masterready.$$");
	unlink("/tmp/childready.$$");

	exit;
}

sub usage_message
{
	my $short = $_[0];

	print <<EOF;

Usage: $0 [-h] [-D level] [-H] \\
          -I image -V version [-S subimage] \\
          -m host | -f file | -F file
EOF

	if (defined($short) && $short)
	{
		print <<EOF;

-h: Describe all options
EOF
	}
	else
	{
		print <<EOF;
-h: Print this message
-D: Debug level, -1 and -2 reduce output below normal, 1 and 2 increase it
-H: Harmless, don't actually make any changes to anything

-I: Image, examples are client, netsvr, appserver  (this option is required)
-V: Image version, examples are 1.7, 2.0, 3.0beta  (this option is required)
-S: Subimage, examples from the 2.0 standard client are js, qj or qjd
    If the image doesn't have subimages just omit this option

-m: Machine, used to start a single machine.  Requires the machine to be
    running and on the net.  If that is not the case you will need to go
    to the machine, halt it if necessary and run 'boot net - install' at
    the ok prompt.  -m requires an argument which is the hostname of the
    machine to start.

-f: File containing a list of hostnames to start.  They are iterated over
    as if you'd specified them individually with -m.
-F: Similar to -f but skips hosts that have been previously started.  The
    idea here is that if you have a list of hosts to jumpstart you run through
    them once with -f.  That forces all of them to be started.  Then if there
    are errors with a few you can correct those errors and then re-run the
    list with -F.  Then only the ones that weren't already successfully
    jumpstarted will be started.
EOF
	}

	exit;
}

sub finished
{
	my $host;
	foreach $host (@@_)
	{
		if ($status{$host}[0] != $READY)
		{
			return 0;
		}
	}

	return 1;
}

@


1.2
log
@Removed code which gave back the superlock while start was running.
This fixes some problems but largely breaks the status CGI script.
Fixing that will require rewriting the status-tracking code to use
one status file per machine instead of the current monolithic status
file.
@
text
@d3 1
a3 1
# $Id: start,v 1.1 1999/08/09 03:20:18 jheiss Exp jheiss $
d7 16
d28 7
@


1.1
log
@Initial revision
@
text
@d3 1
a3 1
# $Id$
d11 4
a14 1
# $Log$
a221 10
	# Unhandle ^C while we don't have a lock on the status system
	$SIG{INT} = '';

	# Give back lock on status system.  We do this to give the status
	# CGI script as much time as possible to read the status file and
	# get the web pages built.  We relock the status file once we are
	# ready to write changes to it.
	print "Giving back lock on status system\n" if ($verbose);
	unsuperlock();

a226 5
	# Clear up temp files used below
	unlink("/tmp/masterready.$$");
	unlink("/tmp/childready.$$");
	my $masterpid = $$;

d668 1
a668 14
	# The master needs to now re-superlock the status file.  The
	# child forks need to wait for this to happen and then each
	# issue a partialwritestat to write out their updates.  However,
	# we don't want the master to issue the superlock until there
	# is a child ready to write changes.  Otherwise the master gets
	# here quite quickly, does the superlock and we haven't gained
	# anything.  So, the children try to create /tmp/childready.<pid>
	# were <pid> is the pid of the master.  We use the master's pid
	# because that is easier then the master keeping track of all of
	# the childrens' pids.  The way the master indicates to the children
	# that he has the superlock is by creating a file in /tmp called
	# masterready.<pid> where <pid> is the master's process id.  The
	# children wait until that file exists and then call partialwritestat.
	if ($master)
a669 34
		# Wait until a child process says it is ready to do a write
		while (! -e "/tmp/childready.$masterpid")
		{
			sleep 1;
		}

		# Get a lock on the status system
		print "Locking status system\n" if ($verbose);
		superlock();
		
		# Handle the user hitting ^C gracefully
		$SIG{INT} = 'quit';

		# Indicate to the children that we have the lock
		open(MR, ">/tmp/masterready.$masterpid");
		print MR "$$";
		close(MR);
	}
	else
	{
		# Tell the master we're ready
		if (! -e "/tmp/childready.$masterpid")
		{
			open(CR, ">/tmp/childready.$masterpid");
			print CR "$$";
			close(CR);
		}

		# Wait for the master to indicate that he has the lock
		while (! -e "/tmp/masterready.$masterpid")
		{
			sleep 1;
		}

a713 4

	# Clear up temp files used above
	unlink("/tmp/masterready.$masterpid");
	unlink("/tmp/childready.$masterpid");
@
