Merging in newer version of worker code and small changes for the django app
This commit is contained in:
parent
fcd9a0172d
commit
5ca2df150c
65
README
65
README
@ -1,3 +1,66 @@
|
|||||||
StackTach is a debugging tool for OpenStack Nova.
|
StackTach is a debugging tool for OpenStack Nova.
|
||||||
|
|
||||||
It takes events from AMQP and sends them to the StackTach server for web display.
|
--------------------
|
||||||
|
|
||||||
|
It takes events from AMQP and inserts them in a database for the StackTach django project server for web display.
|
||||||
|
stacktach workers consume the monitor.info and monitor.error rabbit messape queues.
|
||||||
|
|
||||||
|
It's important that they keep running otherwise the queues fill up and slow down the
|
||||||
|
whole nova environment.
|
||||||
|
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
There's a django app in the 'stacktach' directory that lets you see what's
|
||||||
|
going on in your nova install, then there's two flavors of worker processes
|
||||||
|
that collect the amqp messages for displaying in the stacktach django apps.
|
||||||
|
|
||||||
|
The app was originally designed as a service so that different customers could
|
||||||
|
monitor their nova deployments, so it has 'tenants'.
|
||||||
|
|
||||||
|
After installing the django app like most django apps, if you navigate to it,
|
||||||
|
it'll prompt you to make a new tenant. It'll be given the number 1. You can
|
||||||
|
view it in the browser by appending /1 onto the end of the url.
|
||||||
|
|
||||||
|
That tenant ID then needs to be passed to one of the worker setups.
|
||||||
|
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
The original worker setup uses the single script 'worker.py'. It depends on
|
||||||
|
the 'kombu' lib to talk to your amqp server and uses eventlet green threads,
|
||||||
|
one for each nova deploy.
|
||||||
|
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
The newer version consists of the scripts:
|
||||||
|
|
||||||
|
* start_workers.py - Starts up sub-processes of worker_new.py's
|
||||||
|
* worker_conf.py - Holds the configs for your nova deploys
|
||||||
|
* worker_new.py - The actual worker code
|
||||||
|
|
||||||
|
This version was written to address stability issues in the original.
|
||||||
|
|
||||||
|
It depends on 'amqplib' for talking to the amqp broker.
|
||||||
|
|
||||||
|
It uses subprocess instead of threading to fire up the workers, one sub-proc
|
||||||
|
for each nova deploy.
|
||||||
|
|
||||||
|
The newer version seems to have a memory leak, so needs restarting
|
||||||
|
occasionally, but seems to be overall more stabler than the original.
|
||||||
|
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Before use, put your nova install detail into 'worker_conf.py'
|
||||||
|
|
||||||
|
If you need to start them manually:
|
||||||
|
|
||||||
|
sudo -i
|
||||||
|
cd /path/to/stacktach/install
|
||||||
|
export DJANGO_SETTINGS_MODULE=stacktach.settings
|
||||||
|
python start_workers.py
|
||||||
|
|
||||||
|
The start_workers.py imports the list of deployments to consume from worker_conf.py,
|
||||||
|
then it starts a sub process of worker.py for each deploy.
|
||||||
|
|
||||||
|
Nice way to see the logs in a screen session:
|
||||||
|
|
||||||
|
ls *.log | grep "global\|cell" | xargs watch --differences tail -n2
|
||||||
|
@ -10,12 +10,10 @@ from stacktach import models
|
|||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
import logging
|
|
||||||
import pprint
|
import pprint
|
||||||
import random
|
import random
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
VERSION = 4
|
VERSION = 4
|
||||||
|
|
||||||
@ -33,7 +31,11 @@ def _monitor_message(routing_key, body):
|
|||||||
publisher = body['publisher_id']
|
publisher = body['publisher_id']
|
||||||
parts = publisher.split('.')
|
parts = publisher.split('.')
|
||||||
service = parts[0]
|
service = parts[0]
|
||||||
host = parts[1]
|
if len(parts) > 1:
|
||||||
|
host = ".".join(parts[1:])
|
||||||
|
else:
|
||||||
|
host = None
|
||||||
|
#logging.error("publisher=%s, host=%s" % (publisher, host))
|
||||||
payload = body['payload']
|
payload = body['payload']
|
||||||
request_spec = payload.get('request_spec', None)
|
request_spec = payload.get('request_spec', None)
|
||||||
instance = payload.get('instance_id', None)
|
instance = payload.get('instance_id', None)
|
||||||
@ -80,9 +82,15 @@ def _parse(tenant, args, json_args):
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
values['tenant'] = tenant
|
values['tenant'] = tenant
|
||||||
when = body['_context_timestamp']
|
|
||||||
try:
|
try:
|
||||||
when = datetime.datetime.strptime(when, "%Y-%m-%dT%H:%M:%S.%f")
|
when = body['timestamp']
|
||||||
|
except KeyError:
|
||||||
|
when = body['_context_timestamp'] # Old way of doing it
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
when = datetime.datetime.strptime(when, "%Y-%m-%d %H:%M:%S.%f")
|
||||||
|
except ValueError:
|
||||||
|
when = datetime.datetime.strptime(when, "%Y-%m-%dT%H:%M:%S.%f") # Old way of doing it
|
||||||
values['microseconds'] = when.microsecond
|
values['microseconds'] = when.microsecond
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
pass
|
pass
|
||||||
@ -169,21 +177,21 @@ def _default_context(state):
|
|||||||
context = dict(utc=datetime.datetime.utcnow(), state=state)
|
context = dict(utc=datetime.datetime.utcnow(), state=state)
|
||||||
return context
|
return context
|
||||||
|
|
||||||
|
|
||||||
def welcome(request):
|
def welcome(request):
|
||||||
state = _reset_state(request)
|
state = _reset_state(request)
|
||||||
return render_to_response('stacktach/welcome.html', _default_context(state))
|
return render_to_response('welcome.html', _default_context(state))
|
||||||
|
|
||||||
|
|
||||||
@tenant_check
|
@tenant_check
|
||||||
def home(request, tenant_id):
|
def home(request, tenant_id):
|
||||||
state = _get_state(request, tenant_id)
|
state = _get_state(request, tenant_id)
|
||||||
return render_to_response('stacktach/index.html', _default_context(state))
|
return render_to_response('index.html', _default_context(state))
|
||||||
|
|
||||||
|
|
||||||
def logout(request):
|
def logout(request):
|
||||||
del request.session['state']
|
del request.session['state']
|
||||||
return render_to_response('stacktach/welcome.html', _default_context(None))
|
return render_to_response('welcome.html', _default_context(None))
|
||||||
|
|
||||||
|
|
||||||
@csrf_protect
|
@csrf_protect
|
||||||
@ -200,7 +208,7 @@ def new_tenant(request):
|
|||||||
else:
|
else:
|
||||||
form = models.TenantForm()
|
form = models.TenantForm()
|
||||||
context['form'] = form
|
context['form'] = form
|
||||||
return render_to_response('stacktach/new_tenant.html', context,
|
return render_to_response('new_tenant.html', context,
|
||||||
context_instance=template.RequestContext(request))
|
context_instance=template.RequestContext(request))
|
||||||
|
|
||||||
|
|
||||||
@ -212,7 +220,7 @@ def data(request, tenant_id):
|
|||||||
c = _default_context(state)
|
c = _default_context(state)
|
||||||
fields = _parse(state.tenant, args, raw_args)
|
fields = _parse(state.tenant, args, raw_args)
|
||||||
c['cooked_args'] = fields
|
c['cooked_args'] = fields
|
||||||
return render_to_response('stacktach/data.html', c)
|
return render_to_response('data.html', c)
|
||||||
|
|
||||||
|
|
||||||
@tenant_check
|
@tenant_check
|
||||||
@ -229,13 +237,13 @@ def details(request, tenant_id, column, row_id):
|
|||||||
from_time = value - datetime.timedelta(minutes=1)
|
from_time = value - datetime.timedelta(minutes=1)
|
||||||
to_time = value + datetime.timedelta(minutes=1)
|
to_time = value + datetime.timedelta(minutes=1)
|
||||||
rows = rows.filter(when__range=(from_time, to_time))
|
rows = rows.filter(when__range=(from_time, to_time))
|
||||||
|
|
||||||
rows = rows.order_by('-when', '-microseconds')[:200]
|
rows = rows.order_by('-when', '-microseconds')[:200]
|
||||||
_post_process_raw_data(rows, state, highlight=row_id)
|
_post_process_raw_data(rows, state, highlight=row_id)
|
||||||
c['rows'] = rows
|
c['rows'] = rows
|
||||||
c['allow_expansion'] = True
|
c['allow_expansion'] = True
|
||||||
c['show_absolute_time'] = True
|
c['show_absolute_time'] = True
|
||||||
return render_to_response('stacktach/rows.html', c)
|
return render_to_response('rows.html', c)
|
||||||
|
|
||||||
|
|
||||||
@tenant_check
|
@tenant_check
|
||||||
@ -246,7 +254,7 @@ def expand(request, tenant_id, row_id):
|
|||||||
payload = json.loads(row.json)
|
payload = json.loads(row.json)
|
||||||
pp = pprint.PrettyPrinter()
|
pp = pprint.PrettyPrinter()
|
||||||
c['payload'] = pp.pformat(payload)
|
c['payload'] = pp.pformat(payload)
|
||||||
return render_to_response('stacktach/expand.html', c)
|
return render_to_response('expand.html', c)
|
||||||
|
|
||||||
|
|
||||||
@tenant_check
|
@tenant_check
|
||||||
@ -257,7 +265,7 @@ def host_status(request, tenant_id):
|
|||||||
order_by('-when', '-microseconds')[:20]
|
order_by('-when', '-microseconds')[:20]
|
||||||
_post_process_raw_data(hosts, state)
|
_post_process_raw_data(hosts, state)
|
||||||
c['rows'] = hosts
|
c['rows'] = hosts
|
||||||
return render_to_response('stacktach/host_status.html', c)
|
return render_to_response('host_status.html', c)
|
||||||
|
|
||||||
|
|
||||||
@tenant_check
|
@tenant_check
|
||||||
@ -270,9 +278,9 @@ def search(request, tenant_id):
|
|||||||
if column != None and value != None:
|
if column != None and value != None:
|
||||||
rows = models.RawData.objects.filter(tenant=tenant_id).\
|
rows = models.RawData.objects.filter(tenant=tenant_id).\
|
||||||
filter(**{column:value}).\
|
filter(**{column:value}).\
|
||||||
order_by('-when', '-microseconds')
|
order_by('-when', '-microseconds')[:22]
|
||||||
_post_process_raw_data(rows, state)
|
_post_process_raw_data(rows, state)
|
||||||
c['rows'] = rows
|
c['rows'] = rows
|
||||||
c['allow_expansion'] = True
|
c['allow_expansion'] = True
|
||||||
c['show_absolute_time'] = True
|
c['show_absolute_time'] = True
|
||||||
return render_to_response('stacktach/rows.html', c)
|
return render_to_response('rows.html', c)
|
||||||
|
30
start_workers.py
Normal file
30
start_workers.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
# Copyright 2012 Openstack LLC.
|
||||||
|
# All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
from multiprocessing import Process
|
||||||
|
from worker import run
|
||||||
|
from worker_conf import DEPLOYMENTS
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
processes = []
|
||||||
|
for deployment in DEPLOYMENTS:
|
||||||
|
if deployment.get('enabled', True):
|
||||||
|
process = Process(target=run, args=(deployment,))
|
||||||
|
process.daemon = True
|
||||||
|
process.start()
|
||||||
|
processes.append(process)
|
||||||
|
for process in processes:
|
||||||
|
process.join()
|
29
worker_conf.py
Normal file
29
worker_conf.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
# Copyright 2012 Openstack LLC.
|
||||||
|
# All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
# This is a sample conf file. Use it as a guide to make your own
|
||||||
|
|
||||||
|
DEPLOYMENTS = [
|
||||||
|
# My fun conf
|
||||||
|
dict(
|
||||||
|
tenant_id=1, # This is the stacktach tenant, not an openstack tenant
|
||||||
|
name="my-fun-nova-deploy",
|
||||||
|
url='http://stacktach.my-fun-nova-deploy.com', # The url for the base of the django app
|
||||||
|
rabbit_host="1.2.3.4", # ip/host name of the amqp server to listen to
|
||||||
|
rabbit_port=5672,
|
||||||
|
rabbit_userid="amqp-user-1",
|
||||||
|
rabbit_password="some secret password",
|
||||||
|
rabbit_virtual_host="amqp-vhost"),
|
||||||
|
]
|
82
worker_new.py
Executable file
82
worker_new.py
Executable file
@ -0,0 +1,82 @@
|
|||||||
|
# Copyright 2012 Openstack LLC.
|
||||||
|
# All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
import amqplib.client_0_8 as amqp
|
||||||
|
import json
|
||||||
|
import socket
|
||||||
|
import time
|
||||||
|
|
||||||
|
from stacktach import models, views
|
||||||
|
|
||||||
|
|
||||||
|
class NovaConsumer(object):
|
||||||
|
def __init__(self, channel, tenant_id, logger):
|
||||||
|
self.channel = channel
|
||||||
|
self.tenant = models.Tenant.objects.get(tenant_id=tenant_id)
|
||||||
|
self.logger = logger
|
||||||
|
channel.basic_consume('monitor.info', callback=self.onMessage)
|
||||||
|
channel.basic_consume('monitor.error', callback=self.onMessage)
|
||||||
|
|
||||||
|
def onMessage(self, message):
|
||||||
|
routing_key = message.delivery_info['routing_key']
|
||||||
|
args = (routing_key, json.loads(message.body))
|
||||||
|
asJson = json.dumps(args)
|
||||||
|
|
||||||
|
#from pprint import pformat
|
||||||
|
#self.logger.debug("Saving %s", pformat(args))
|
||||||
|
views._parse(self.tenant, args, asJson)
|
||||||
|
self.logger.debug("Recorded %s ", routing_key)
|
||||||
|
self.channel.basic_ack(message.delivery_tag)
|
||||||
|
|
||||||
|
|
||||||
|
def run(deployment, logger):
|
||||||
|
tenant_id = deployment.get('tenant_id', 1)
|
||||||
|
host = deployment.get('rabbit_host', 'localhost')
|
||||||
|
port = deployment.get('rabbit_port', 5672)
|
||||||
|
user_id = deployment.get('rabbit_userid', 'rabbit')
|
||||||
|
password = deployment.get('rabbit_password', 'rabbit')
|
||||||
|
virtual_host = deployment.get('rabbit_virtual_host', '/')
|
||||||
|
|
||||||
|
logger.info("Rabbit: %s %s %s %s" %
|
||||||
|
(host, port, user_id, virtual_host))
|
||||||
|
|
||||||
|
|
||||||
|
while 1:
|
||||||
|
try:
|
||||||
|
conn = amqp.Connection(host, userid=user_id, password=password, virtual_host=virtual_host)
|
||||||
|
|
||||||
|
ch = conn.channel()
|
||||||
|
ch.access_request(virtual_host, active=True, read=True)
|
||||||
|
|
||||||
|
ch.exchange_declare('nova', type='topic', durable=True, auto_delete=False)
|
||||||
|
ch.queue_declare('monitor.info', durable=True, auto_delete=False, exclusive=False)
|
||||||
|
ch.queue_declare('monitor.error', durable=True, auto_delete=False, exclusive=False)
|
||||||
|
ch.queue_bind('monitor.info', 'nova')
|
||||||
|
ch.queue_bind('monitor.error', 'nova')
|
||||||
|
consumer = NovaConsumer(ch, tenant_id, logger)
|
||||||
|
#
|
||||||
|
# Loop as long as the channel has callbacks registered
|
||||||
|
#
|
||||||
|
while ch.callbacks:
|
||||||
|
ch.wait()
|
||||||
|
break
|
||||||
|
except socket.error, e:
|
||||||
|
logger.warn("Socket error: %s" % e)
|
||||||
|
time.sleep(5)
|
||||||
|
continue
|
||||||
|
|
||||||
|
ch.close()
|
||||||
|
conn.close()
|
||||||
|
logger.info("Finished")
|
Loading…
x
Reference in New Issue
Block a user