Index: trunk/server/common/oursrc/hacron/hacron
===================================================================
--- trunk/server/common/oursrc/hacron/hacron	(revision 1465)
+++ trunk/server/common/oursrc/hacron/hacron	(revision 1466)
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
-from __future__ import with_statement
-
 import glob
 import logging.handlers
+import fcntl
 import optparse
 import os
@@ -24,9 +23,11 @@
 logger = logging.getLogger('cron')
 
-import os
-import subprocess
-
-HA_LOGD = os.environ.get('HA_LOGD') == 'xyes'
-
+HA_LOGD = os.environ.get('HA_LOGD') == 'yes'
+
+class HacronError(Exception):
+    def __init__(self, errno, msg='Something went wrong'):
+        self.errno = errno
+        self.msg = msg
+    
 class HaLogHandler(logging.Handler):
     """
@@ -55,25 +56,16 @@
 
 class lock(object):
-    def __init__(self, name):
-        self.name = name
+    def __init__(self, filename):
+        self.filename = filename
+        if not _touch(filename):
+            raise
 
     def __enter__(self):
-        tries = 0
-        while True:
-            try:
-                self.lock = os.open(self.name, os.O_RDWR | os.O_CREAT | os.O_EXCL)
-            except OSError:
-                logger.error('Could not acquire lock %s.  Sleeping...' % self.name)
-                time.sleep(0.5)
-                tries += 1
-                if not tries % 60:
-                    logger.error("Waited too long; got bored.  Clearing lock %s." % self.name)
-                    _remove(self.name)
-            else:
-                break
+        f = open(self.filename)
+        fcntl.flock(f, fcntl.LOCK_EX)
             
     def __exit__(self, type, value, traceback):
-        os.close(self.lock)
-        _remove(self.name)
+        f = open(self.filename)
+        fcntl.flock(f, fcntl.LOCK_UN)
         
 def _touch(path):
@@ -118,7 +110,4 @@
     return path.join(CRONSPOOL_DIR, _suffix(server, 'cronspool'))
 
-def _server_exists(server):
-    return path.exists(path.join(SERVER_DIR, server))
-
 def _serverfile(server):
     return path.join(SERVER_DIR, server)
@@ -132,15 +121,28 @@
     return path.islink(crondir)
 
+def _restart_crond(args, options):
+    # TODO: insert correct cmd here.  Also, should we capture and log
+    # stdout?
+    if options.development:
+        cmd = ['echo', 'called crond reset']
+    else:
+        cmd = ['service', 'crond', 'reload']
+    subprocess.check_call(cmd)
+
 def start_cron(args, options):
     if not _touch(_serverfile(HOSTNAME)):
         return OCF_ERR_CONFIGURED
+    elif _is_master(HOSTNAME):
+        logger.error('%s is already the master!' % HOSTNAME)
+        return OCF_SUCCESS
 
     logger.info('Starting %s' % HOSTNAME)
-    if _is_master(HOSTNAME):
-        logger.error('%s is already the master!' % HOSTNAME)
     for server in _servers():
         crondir = _crondir(server)
         if server == HOSTNAME:
-            _remove(crondir)
+            # Get rid of current crondir, and leave if that fails.
+            if not _remove(crondir):
+                logger.error("Could not remove dummy cronspool dir %s" % crondir)
+                return OCF_ERR_GENERIC
             os.symlink('../cronspool', crondir)
             logger.info('Created master symlink %s' % crondir)
@@ -152,17 +154,18 @@
                 _mkdir(crondir)
                 logger.info('Created slave dummy directory %s' % crondir)
-
-    if CRON_RESTART_COMMAND:
-        ret = subprocess.call(CRON_RESTART_COMMAND)
-        if ret:
-            logger.error('Cron restart exited with return code %d' % ret)
-            return OCF_ERR_GENERIC
-        else:
-            logger.info('Restarted crond')
+    try:
+        _restart_crond()
+    except OSError, e:
+        logger.error('Cron restart exited with return code %d' % e.errno)
+        return OCF_ERR_GENERIC
+    else:
+        logger.info('Restarted crond')
     return OCF_SUCCESS
 
 def stop_cron(args, options):
+    """Stop cron."""
     if not _is_master(HOSTNAME):
         logger.error('I am not the master!')
+        return OCF_NOT_RUNNING
     else:
         crondir = _crondir(HOSTNAME)
@@ -170,7 +173,17 @@
         _remove(crondir)
         _mkdir(crondir)
-    return OCF_SUCCESS
+        # TODO: should we do something else here?
+        try:
+            _restart_crond()
+        except OSError, e:
+            logger.error('Cron restart exited with return code %d' % e.errno)
+            return OCF_ERR_GENERIC
+        else:
+            logger.info('Restarted crond')
+        return OCF_SUCCESS
 
 def monitor_cron(args, options):
+    """Check whether cron is running.  For now just makes sure that the
+    current machine is the master, although this should likely be fixed."""
     if _is_master(HOSTNAME):
         return OCF_SUCCESS
@@ -182,6 +195,8 @@
         logger.error('Could not touch %s' % _serverfile(HOSTNAME))
         return OCF_GENERIC_ERR
-    if not path.exists(CRONSPOOL_DIR):
+    elif not path.exists(CRONSPOOL_DIR):
         return OCF_GENERIC_ERR
+    else:
+        return OCF_SUCCESS
 
 def setup(args, options):
@@ -193,9 +208,6 @@
             logger.info('Already exists: %s' % d)
 
-def add_servers(servers, options):
-    for server in servers:
-        _touch(_serverfile(server))
-
 def remove_servers(servers, options):
+    """Remove servers from the list of available ones."""
     for server in servers:
         os.unlink(_serverfile(server))
@@ -219,12 +231,4 @@
 </longdesc>
 <shortdesc lang="en">Cron base directory</shortdesc>
-<content type="string" />
-</parameter>
-
-<parameter name="cron_restart_cmd">
-<longdesc lang="en">
-Command to restart cron.
-</longdesc>
-<shortdesc lang="en">Restart cron cmd</shortdesc>
 <content type="string" />
 </parameter>
@@ -248,5 +252,5 @@
 
 def _set_globals(args, options):
-    global HOSTNAME, CRONROOT, CRONSPOOL_DIR, SERVER_DIR, CRON_RESTART_COMMAND, \
+    global HOSTNAME, CRONROOT, CRONSPOOL_DIR, SERVER_DIR, \
         HA_RSCTMP, OCF_RESOURCE_INSTANCE
     if options.development:
@@ -264,10 +268,7 @@
     CRONROOT = options.cronroot or os.environ.get('OCF_RESKEY_cron_root')
     if not CRONROOT:
-        logging.error('No cron_root specified.')
-        return OCF_ERR_CONFIGURED
+        raise HacronError(OCF_ERR_CONFIGURED, 'No cron_root specified.')
     CRONSPOOL_DIR = path.join(CRONROOT, 'server-cronspools')
     SERVER_DIR = path.join(CRONROOT, 'servers')
-    CRON_RESTART_COMMAND = options.cron_restart or os.environ.get('OCF_RESKEY_cron_restart_cmd')
-
     HA_RSCTMP = os.environ.get('HA_RSCTMP', '/tmp')
     OCF_RESOURCE_INSTANCE = os.environ.get('OCF_RESOURCE_INSTANCE', 'default')
@@ -275,7 +276,23 @@
 
 def main():
-    cmds = ['start', 'reload', 'stop', 'monitor', 'validate-all', 'setup',
-            'remove-servers', 'meta-data']
-    usage_str = "usage: %%prog [%s]" % '|'.join(cmds)
+    usage_str = """usage: %%prog [-s server] [-c cronroot] [-d] cmd
+
+Script for starting and stopping cron in a multiserver environment.
+One server is designated the master.
+
+== HA available commands: ==
+start: Make this server into the master and reload crond.
+reload: Same as start.
+stop: Demote this server to a spare and reload crond.
+monitor: Indicate whether this server is successfully the master.
+validate-all: Make sure that things look right and this server is
+  ready to be promoted to master.
+meta-data: Print out the XML meta data for this service
+
+== User-only commands: ==
+setup: Create the folders, etc. necessary for running hacron.
+remove-servers server1 server2 ...: Take a list of servers out of the
+  list of available ones.
+    """
     parser = optparse.OptionParser(usage=usage_str)
     parser.add_option("-s", "--server",
@@ -290,9 +307,5 @@
                       action="store_true", dest="development",
                       default=False,
-                      help="run in production")
-    parser.add_option("-r", "--cron-restart",
-                      action="store", dest="cron_restart",
-                      default=None,
-                      help="run in production")
+                      help="run in development mode")
     (options, args) = parser.parse_args()
     if len(args) < 1:
@@ -303,8 +316,12 @@
     if command == 'meta-data':
         return meta_data_cron(args, options)
-    globals_status = _set_globals(args, options)
+
+    try:
+        _set_globals(args, options)
+    except HacronError, e:
+        logger.error(e.msg)
+        return e.errno
+
     with lock('%s/hacron-%s.lock' % (HA_RSCTMP, OCF_RESOURCE_INSTANCE)):
-        if globals_status:
-            return globals_status
         if command == 'start':
             return start_cron(args, options)
@@ -319,5 +336,5 @@
         elif command == 'setup':
             return setup(args, options)
-        elif command == 'add-servers':
+        elif command == 'remove-servers':
             return remove_servers(args, options)
         else:
