[ALL] Add Gearman KEEPALIVE options

This requires the lastest development version of python-gearman
to support the new keepalive options. Using these options (off
by default) will solve the problem when the Gearman job server
gets paused/unpaused in a cloud environment, but the clients and
workers never recognize that they are indeed disconnected.

WARNING: Not backwards compatible with older versions of the
python-gearman driver, even if the SSL and keepalive options
are not specified. This is a change in behavior.

Change-Id: Ic8db6676f7408364b6fe9a8690deb72bb6e2772c
This commit is contained in:
David Shrewsbury 2013-09-16 11:23:35 -04:00
parent 905976485d
commit 4d3fd33614
12 changed files with 198 additions and 53 deletions

View File

@ -55,6 +55,25 @@ Command Line Options
The path for the SSL key file to be used for the frontend of the API
server
.. option:: --gearman_keepalive
Use TCP KEEPALIVE to the Gearman job server. Not supported on all
systems.
.. option:: --gearman_keepcnt <COUNT>
Maximum number of TCP KEEPALIVE probes to send before killing the
connection to the Gearman job server.
.. option:: --gearman_keepidle <SECONDS>
Seconds of idle time on the Gearman job server connection before
sending TCP KEEPALIVE probes.
.. option:: --gearman_keepintvl <SECONDS>
Seconds between TCP KEEPALIVE probes.
.. option:: --gearman_ssl_ca <PATH>
The path for the Gearman SSL Certificate Authority.

View File

@ -60,6 +60,25 @@ Command Line Options
Used to specify the Gearman job server hostname and port. This option
can be used multiple times to specify multiple job servers.
.. option:: --gearman_keepalive
Use TCP KEEPALIVE to the Gearman job server. Not supported on all
systems.
.. option:: --gearman_keepcnt <COUNT>
Maximum number of TCP KEEPALIVE probes to send before killing the
connection to the Gearman job server.
.. option:: --gearman_keepidle <SECONDS>
Seconds of idle time on the Gearman job server connection before
sending TCP KEEPALIVE probes.
.. option:: --gearman_keepintvl <SECONDS>
Seconds between TCP KEEPALIVE probes.
.. option:: --gearman_ssl_ca <PATH>
The path for the Gearman SSL Certificate Authority
@ -79,7 +98,7 @@ Command Line Options
class.
.. option:: --swift_basepath <CONTAINER>
The default container to be used for customer log uploads.
.. option:: --swift_endpoint <URL>

View File

@ -83,6 +83,25 @@ Command Line Options
The flavor ID (image size ID) or name to use for new nodes spun up in
the Nova API
.. option:: --gearman_keepalive
Use TCP KEEPALIVE to the Gearman job server. Not supported on all
systems.
.. option:: --gearman_keepcnt <COUNT>
Maximum number of TCP KEEPALIVE probes to send before killing the
connection to the Gearman job server.
.. option:: --gearman_keepidle <SECONDS>
Seconds of idle time on the Gearman job server connection before
sending TCP KEEPALIVE probes.
.. option:: --gearman_keepintvl <SECONDS>
Seconds between TCP KEEPALIVE probes.
.. option:: --gearman_ssl_ca <PATH>
The path for the Gearman SSL Certificate Authority.

View File

@ -28,6 +28,25 @@ Command Line Options
* *haproxy* - `HAProxy <http://haproxy.1wt.eu>`_ software load balancer.
This is the default driver.
.. option:: --gearman_keepalive
Use TCP KEEPALIVE to the Gearman job server. Not supported on all
systems.
.. option:: --gearman_keepcnt <COUNT>
Maximum number of TCP KEEPALIVE probes to send before killing the
connection to the Gearman job server.
.. option:: --gearman_keepidle <SECONDS>
Seconds of idle time on the Gearman job server connection before
sending TCP KEEPALIVE probes.
.. option:: --gearman_keepintvl <SECONDS>
Seconds between TCP KEEPALIVE probes.
.. option:: --gearman_ssl_ca <FILE>
Full path to the file with the CA public key to use when
@ -61,9 +80,9 @@ Command Line Options
.. option:: --gearman-poll <SECONDS>
The number of seconds gearman will poll before re-shuffling its
The number of seconds gearman will poll before re-shuffling its
connections. Default is 1 second.
.. option:: --syslog
Send log events to syslog.

View File

@ -28,9 +28,8 @@ user = libra
group = libra
driver = haproxy
reconnect_sleep = 60
stats_poll = 300
gearman_poll = 60
server = 10.0.0.1:8080 10.0.0.2:8080
server = 10.0.0.1:4730 10.0.0.2:4730
pid = /var/run/libra/libra_worker.pid
logfile = /var/log/libra/libra_worker.log

View File

@ -51,7 +51,11 @@ def setup_app(pecan_config, args):
'server': args.gearman,
'ssl_key': args.gearman_ssl_key,
'ssl_cert': args.gearman_ssl_cert,
'ssl_ca': args.gearman_ssl_ca
'ssl_ca': args.gearman_ssl_ca,
'keepalive': args.gearman_keepalive,
'keepcnt': args.gearman_keepcnt,
'keepidle': args.gearman_keepidle,
'keepintvl': args.gearman_keepintvl
}
config['conffile'] = args.config
if args.debug:
@ -136,6 +140,22 @@ def main():
'--gearman', action='append', metavar='HOST:PORT', default=[],
help='Gearman job servers'
)
options.parser.add_argument(
'--gearman_keepalive', action="store_true",
help='use KEEPALIVE to Gearman server'
)
options.parser.add_argument(
'--gearman_keepcnt', type=int, metavar='COUNT',
help='max keepalive probes to send before killing connection'
)
options.parser.add_argument(
'--gearman_keepidle', type=int, metavar='SECONDS',
help='seconds of idle time before sending keepalive probes'
)
options.parser.add_argument(
'--gearman_keepintvl', type=int, metavar='SECONDS',
help='seconds between TCP keepalive probes'
)
options.parser.add_argument(
'--gearman_ssl_ca', metavar='FILE',
help='Gearman SSL certificate authority'

View File

@ -56,7 +56,11 @@ def setup_app(pecan_config, args):
'server': args.gearman,
'ssl_key': args.gearman_ssl_key,
'ssl_cert': args.gearman_ssl_cert,
'ssl_ca': args.gearman_ssl_ca
'ssl_ca': args.gearman_ssl_ca,
'keepalive': args.gearman_keepalive,
'keepcnt': args.gearman_keepcnt,
'keepidle': args.gearman_keepidle,
'keepintvl': args.gearman_keepintvl
}
config['ip_filters'] = args.ip_filters
if args.debug:
@ -115,6 +119,22 @@ def main():
'--gearman', action='append', metavar='HOST:PORT', default=[],
help='Gearman job servers'
)
options.parser.add_argument(
'--gearman_keepalive', action="store_true",
help='use KEEPALIVE to Gearman server'
)
options.parser.add_argument(
'--gearman_keepcnt', type=int, metavar='COUNT',
help='max keepalive probes to send before killing connection'
)
options.parser.add_argument(
'--gearman_keepidle', type=int, metavar='SECONDS',
help='seconds of idle time before sending keepalive probes'
)
options.parser.add_argument(
'--gearman_keepintvl', type=int, metavar='SECONDS',
help='seconds between TCP keepalive probes'
)
options.parser.add_argument(
'--gearman_ssl_ca', metavar='FILE',
help='Gearman SSL certificate authority'

View File

@ -84,20 +84,19 @@ class GearmanClientThread(object):
self.host = host
self.lbid = lbid
if all([conf.gearman.ssl_key, conf.gearman.ssl_cert,
conf.gearman.ssl_ca]):
# Use SSL connections to each Gearman job server.
ssl_server_list = []
for server in conf.gearman.server:
ghost, gport = server.split(':')
ssl_server_list.append({'host': ghost,
'port': int(gport),
'keyfile': conf.gearman.ssl_key,
'certfile': conf.gearman.ssl_cert,
'ca_certs': conf.gearman.ssl_ca})
self.gearman_client = JSONGearmanClient(ssl_server_list)
else:
self.gearman_client = JSONGearmanClient(conf.gearman.server)
server_list = []
for server in conf.gearman.server:
ghost, gport = server.split(':')
server_list.append({'host': ghost,
'port': int(gport),
'keyfile': conf.gearman.ssl_key,
'certfile': conf.gearman.ssl_cert,
'ca_certs': conf.gearman.ssl_ca,
'keepalive': conf.gearman.keepalive,
'keepcnt': conf.gearman.keepcnt,
'keepidle': conf.gearman.keepidle,
'keepintvl': conf.gearman.keepintvl})
self.gearman_client = JSONGearmanClient(server_list)
def send_assign(self, data):
job_data = {

View File

@ -34,18 +34,19 @@ def worker_thread(logger, args):
logger.info("Registering task libra_pool_mgm")
hostname = socket.gethostname()
if all([args.gearman_ssl_key, args.gearman_ssl_cert, args.gearman_ssl_ca]):
ssl_server_list = []
for host_port in args.gearman:
host, port = host_port.split(':')
ssl_server_list.append({'host': host,
'port': int(port),
'keyfile': args.gearman_ssl_key,
'certfile': args.gearman_ssl_cert,
'ca_certs': args.gearman_ssl_ca})
worker = JSONGearmanWorker(ssl_server_list)
else:
worker = JSONGearmanWorker(args.gearman)
server_list = []
for host_port in args.gearman:
host, port = host_port.split(':')
server_list.append({'host': host,
'port': int(port),
'keyfile': args.gearman_ssl_key,
'certfile': args.gearman_ssl_cert,
'ca_certs': args.gearman_ssl_ca,
'keepalive': args.gearman_keepalive,
'keepcnt': args.gearman_keepcnt,
'keepidle': args.gearman_keepidle,
'keepintvl': args.gearman_keepintvl})
worker = JSONGearmanWorker(server_list)
worker.set_client_id(hostname)
worker.register_task('libra_pool_mgm', handler)

View File

@ -118,6 +118,22 @@ def main():
'--gearman', action='append', metavar='HOST:PORT', default=[],
help='Gearman job servers'
)
options.parser.add_argument(
'--gearman_keepalive', action="store_true",
help='use KEEPALIVE to Gearman server'
)
options.parser.add_argument(
'--gearman_keepcnt', type=int, metavar='COUNT',
help='max keepalive probes to send before killing connection'
)
options.parser.add_argument(
'--gearman_keepidle', type=int, metavar='SECONDS',
help='seconds of idle time before sending keepalive probes'
)
options.parser.add_argument(
'--gearman_keepintvl', type=int, metavar='SECONDS',
help='seconds between TCP keepalive probes'
)
options.parser.add_argument(
'--gearman_ssl_ca', metavar='FILE',
help='Gearman SSL certificate authority'
@ -131,8 +147,7 @@ def main():
help='Gearman SSL key'
)
options.parser.add_argument(
'--gearman-poll',
dest='gearman_poll', type=int, metavar='TIME',
'--gearman_poll', type=int, metavar='TIME',
default=1, help='Gearman worker polling timeout'
)
options.parser.add_argument(

View File

@ -70,6 +70,22 @@ def main():
choices=known_drivers.keys(), default='haproxy',
help='type of device to use'
)
options.parser.add_argument(
'--gearman_keepalive', action="store_true",
help='use KEEPALIVE to Gearman server'
)
options.parser.add_argument(
'--gearman_keepcnt', type=int, metavar='COUNT',
help='max keepalive probes to send before killing connection'
)
options.parser.add_argument(
'--gearman_keepidle', type=int, metavar='SECONDS',
help='seconds of idle time before sending keepalive probes'
)
options.parser.add_argument(
'--gearman_keepintvl', type=int, metavar='SECONDS',
help='seconds between TCP keepalive probes'
)
options.parser.add_argument(
'--gearman_ssl_ca', dest='gearman_ssl_ca', metavar='FILE',
help='Gearman SSL certificate authority'
@ -83,13 +99,12 @@ def main():
help='Gearman SSL key'
)
options.parser.add_argument(
'--haproxy-service', dest='haproxy_service',
'--haproxy_service',
choices=haproxy_services.keys(), default='ubuntu',
help='os services to use with HAProxy driver (when used)'
)
options.parser.add_argument(
'-s', '--reconnect_sleep',
dest='reconnect_sleep', type=int, metavar='TIME',
'-s', '--reconnect_sleep', type=int, metavar='TIME',
default=60, help='seconds to sleep between job server reconnects'
)
options.parser.add_argument(
@ -98,12 +113,11 @@ def main():
help='add a Gearman job server to the connection list'
)
options.parser.add_argument(
'--stats-poll', dest='stats_poll', type=int, metavar='TIME',
'--stats_poll', type=int, metavar='TIME',
default=300, help='statistics polling interval in seconds'
)
options.parser.add_argument(
'--gearman-poll',
dest='gearman_poll', type=int, metavar='TIME',
'--gearman_poll', type=int, metavar='TIME',
default=1, help='Gearman worker polling timeout'
)
args = options.run()

View File

@ -63,19 +63,20 @@ def config_thread(logger, driver, args):
hostname = socket.gethostname()
logger.info("[worker] Registering task %s" % hostname)
if all([args.gearman_ssl_key, args.gearman_ssl_cert, args.gearman_ssl_ca]):
ssl_server_list = []
for host_port in args.server:
host, port = host_port.split(':')
ssl_server_list.append({'host': host,
'port': int(port),
'keyfile': args.gearman_ssl_key,
'certfile': args.gearman_ssl_cert,
'ca_certs': args.gearman_ssl_ca})
worker = CustomJSONGearmanWorker(ssl_server_list)
else:
worker = CustomJSONGearmanWorker(args.server)
server_list = []
for host_port in args.server:
host, port = host_port.split(':')
server_list.append({'host': host,
'port': int(port),
'keyfile': args.gearman_ssl_key,
'certfile': args.gearman_ssl_cert,
'ca_certs': args.gearman_ssl_ca,
'keepalive': args.gearman_keepalive,
'keepcnt': args.gearman_keepcnt,
'keepidle': args.gearman_keepidle,
'keepintvl': args.gearman_keepintvl})
worker = CustomJSONGearmanWorker(server_list)
worker.set_client_id(hostname)
worker.register_task(hostname, handler)
worker.logger = logger