Merging in newer version of worker code and small changes for the django app

2012-09-06 05:20:39 +10:00 · 2012-09-06 05:20:39 +10:00 · 5ca2df150c
commit 5ca2df150c
parent fcd9a0172d
14 changed files with 230 additions and 18 deletions
--- a/65
+++ b/65
@ -1,3 +1,66 @@
 StackTach is a debugging tool for OpenStack Nova.

-It takes events from AMQP and sends them to the StackTach server for web display.
+--------------------
+
+It takes events from AMQP and inserts them in a database for the StackTach django project server for web display.
+stacktach workers consume the monitor.info and monitor.error rabbit messape queues.
+
+It's important that they keep running otherwise the queues fill up and slow down the
+whole nova environment.
+
+--------------------
+
+There's a django app in the 'stacktach' directory that lets you see what's
+going on in your nova install, then there's two flavors of worker processes
+that collect the amqp messages for displaying in the stacktach django apps.
+
+The app was originally designed as a service so that different customers could
+monitor their nova deployments, so it has 'tenants'.
+
+After installing the django app like most django apps, if you navigate to it,
+it'll prompt you to make a new tenant. It'll be given the number 1. You can
+view it in the browser by appending /1 onto the end of the url.
+
+That tenant ID then needs to be passed to one of the worker setups.
+
+--------------------
+
+The original worker setup uses the single script 'worker.py'. It depends on
+the 'kombu' lib to talk to your amqp server and uses eventlet green threads,
+one for each nova deploy.
+
+--------------------
+
+The newer version consists of the scripts:
+
+ * start_workers.py - Starts up sub-processes of worker_new.py's
+ * worker_conf.py - Holds the configs for your nova deploys
+ * worker_new.py - The actual worker code
+
+This version was written to address stability issues in the original.
+
+It depends on 'amqplib' for talking to the amqp broker.
+
+It uses subprocess instead of threading to fire up the workers, one sub-proc
+for each nova deploy.
+
+The newer version seems to have a memory leak, so needs restarting
+occasionally, but seems to be overall more stabler than the original.
+
+--------------------
+
+Before use, put your nova install detail into 'worker_conf.py'
+
+If you need to start them manually: 
+ 
+sudo -i
+cd /path/to/stacktach/install
+export DJANGO_SETTINGS_MODULE=stacktach.settings 
+python start_workers.py 
+ 
+The start_workers.py imports the list of deployments to consume from worker_conf.py, 
+then it starts a sub process of worker.py for each deploy. 
+ 
+Nice way to see the logs in a screen session: 
+ 
+        ls *.log | grep "global\|cell" | xargs watch --differences tail -n2 
--- a/stacktach/templates/stacktach/base.html
+++ b/stacktach/templates/stacktach/base.html
--- a/stacktach/templates/stacktach/data.html
+++ b/stacktach/templates/stacktach/data.html
--- a/stacktach/templates/stacktach/expand.html
+++ b/stacktach/templates/stacktach/expand.html
--- a/stacktach/templates/stacktach/host_status.html
+++ b/stacktach/templates/stacktach/host_status.html
--- a/stacktach/templates/stacktach/index.html
+++ b/stacktach/templates/stacktach/index.html
--- a/stacktach/templates/stacktach/instance_status.html
+++ b/stacktach/templates/stacktach/instance_status.html
--- a/stacktach/templates/stacktach/new_tenant.html
+++ b/stacktach/templates/stacktach/new_tenant.html
--- a/stacktach/templates/stacktach/rows.html
+++ b/stacktach/templates/stacktach/rows.html
--- a/stacktach/templates/stacktach/welcome.html
+++ b/stacktach/templates/stacktach/welcome.html
--- a/stacktach/views.py
+++ b/stacktach/views.py
@ -10,12 +10,10 @@ from stacktach import models

 import datetime
 import json
-import logging
 import pprint
 import random
 import sys

-logger = logging.getLogger(__name__)

 VERSION = 4

@ -33,7 +31,11 @@ def _monitor_message(routing_key, body):
    publisher = body['publisher_id']
    parts = publisher.split('.')   
    service = parts[0]
-    host = parts[1]
+    if len(parts) > 1:
+        host = ".".join(parts[1:])
+    else:
+        host = None
+    #logging.error("publisher=%s, host=%s" % (publisher, host))
    payload = body['payload']
    request_spec = payload.get('request_spec', None)
    instance = payload.get('instance_id', None)
@ -80,9 +82,15 @@ def _parse(tenant, args, json_args):
            return {}

        values['tenant'] = tenant
-        when = body['_context_timestamp']
        try:
-            when = datetime.datetime.strptime(when, "%Y-%m-%dT%H:%M:%S.%f")
+            when = body['timestamp']
+        except KeyError:
+            when = body['_context_timestamp'] # Old way of doing it
+        try:
+            try:
+                when = datetime.datetime.strptime(when, "%Y-%m-%d %H:%M:%S.%f")
+            except ValueError:
+                when = datetime.datetime.strptime(when, "%Y-%m-%dT%H:%M:%S.%f") # Old way of doing it
            values['microseconds'] = when.microsecond
        except Exception, e:
            pass
@ -169,21 +177,21 @@ def _default_context(state):
    context = dict(utc=datetime.datetime.utcnow(), state=state)
    return context

-    
+
 def welcome(request):
    state = _reset_state(request)
-    return render_to_response('stacktach/welcome.html', _default_context(state))
+    return render_to_response('welcome.html', _default_context(state))


@tenant_check
 def home(request, tenant_id):
    state = _get_state(request, tenant_id)
-    return render_to_response('stacktach/index.html', _default_context(state)) 
+    return render_to_response('index.html', _default_context(state)) 


 def logout(request):
    del request.session['state']
-    return render_to_response('stacktach/welcome.html', _default_context(None)) 
+    return render_to_response('welcome.html', _default_context(None)) 


@csrf_protect
@ -200,7 +208,7 @@ def new_tenant(request):
    else:
        form = models.TenantForm()
        context['form'] = form
-    return render_to_response('stacktach/new_tenant.html', context,
+    return render_to_response('new_tenant.html', context,
                              context_instance=template.RequestContext(request))


@ -212,7 +220,7 @@ def data(request, tenant_id):
    c = _default_context(state)
    fields = _parse(state.tenant, args, raw_args)
    c['cooked_args'] = fields
-    return render_to_response('stacktach/data.html', c)
+    return render_to_response('data.html', c)


@tenant_check
@ -229,13 +237,13 @@ def details(request, tenant_id, column, row_id):
        from_time = value - datetime.timedelta(minutes=1)
        to_time = value + datetime.timedelta(minutes=1)
        rows = rows.filter(when__range=(from_time, to_time))
-                                  
+
    rows = rows.order_by('-when', '-microseconds')[:200]
    _post_process_raw_data(rows, state, highlight=row_id)
    c['rows'] = rows
    c['allow_expansion'] = True
    c['show_absolute_time'] = True
-    return render_to_response('stacktach/rows.html', c)
+    return render_to_response('rows.html', c)


@tenant_check
@ -246,7 +254,7 @@ def expand(request, tenant_id, row_id):
    payload = json.loads(row.json)
    pp = pprint.PrettyPrinter()
    c['payload'] = pp.pformat(payload)
-    return render_to_response('stacktach/expand.html', c)
+    return render_to_response('expand.html', c)


@tenant_check
@ -257,7 +265,7 @@ def host_status(request, tenant_id):
                                   order_by('-when', '-microseconds')[:20]
    _post_process_raw_data(hosts, state)
    c['rows'] = hosts
-    return render_to_response('stacktach/host_status.html', c)
+    return render_to_response('host_status.html', c)


@tenant_check
@ -270,9 +278,9 @@ def search(request, tenant_id):
    if column != None and value != None:
        rows = models.RawData.objects.filter(tenant=tenant_id).\
               filter(**{column:value}).\
-               order_by('-when', '-microseconds')
+               order_by('-when', '-microseconds')[:22]
        _post_process_raw_data(rows, state)
    c['rows'] = rows
    c['allow_expansion'] = True
    c['show_absolute_time'] = True
-    return render_to_response('stacktach/rows.html', c)
+    return render_to_response('rows.html', c)
--- a/start_workers.py
+++ b/start_workers.py
@ -0,0 +1,30 @@
+# Copyright 2012 Openstack LLC.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from multiprocessing import Process
+from worker import run
+from worker_conf import DEPLOYMENTS
+
+
+if __name__ == '__main__':
+    processes = []
+    for deployment in DEPLOYMENTS:
+        if deployment.get('enabled', True):
+            process = Process(target=run, args=(deployment,))
+            process.daemon = True
+            process.start()
+            processes.append(process)
+    for process in processes:
+        process.join()
--- a/worker_conf.py
+++ b/worker_conf.py
@ -0,0 +1,29 @@
+# Copyright 2012 Openstack LLC.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+# This is a sample conf file. Use it as a guide to make your own
+
+DEPLOYMENTS = [
+    # My fun conf
+    dict(
+        tenant_id=1,  # This is the stacktach tenant, not an openstack tenant
+        name="my-fun-nova-deploy",
+        url='http://stacktach.my-fun-nova-deploy.com', # The url for the base of the django app
+        rabbit_host="1.2.3.4",  # ip/host name of the amqp server to listen to
+        rabbit_port=5672,
+        rabbit_userid="amqp-user-1",
+        rabbit_password="some secret password",
+        rabbit_virtual_host="amqp-vhost"),
+]
--- a/worker_new.py
+++ b/worker_new.py
@ -0,0 +1,82 @@
+# Copyright 2012 Openstack LLC.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import amqplib.client_0_8 as amqp
+import json
+import socket
+import time
+
+from stacktach import models, views
+
+
+class NovaConsumer(object):
+    def __init__(self, channel, tenant_id, logger):
+        self.channel = channel
+        self.tenant = models.Tenant.objects.get(tenant_id=tenant_id)
+        self.logger = logger
+        channel.basic_consume('monitor.info', callback=self.onMessage)
+        channel.basic_consume('monitor.error', callback=self.onMessage)
+
+    def onMessage(self, message):
+        routing_key = message.delivery_info['routing_key']
+        args = (routing_key, json.loads(message.body))
+        asJson = json.dumps(args)
+
+        #from pprint import pformat
+        #self.logger.debug("Saving %s", pformat(args))
+        views._parse(self.tenant, args, asJson)
+        self.logger.debug("Recorded %s ", routing_key)
+        self.channel.basic_ack(message.delivery_tag)
+
+
+def run(deployment, logger):
+    tenant_id = deployment.get('tenant_id', 1)
+    host = deployment.get('rabbit_host', 'localhost')
+    port = deployment.get('rabbit_port', 5672)
+    user_id = deployment.get('rabbit_userid', 'rabbit')
+    password = deployment.get('rabbit_password', 'rabbit')
+    virtual_host = deployment.get('rabbit_virtual_host', '/')
+
+    logger.info("Rabbit: %s %s %s %s" % 
+                            (host, port, user_id, virtual_host))
+
+
+    while 1:
+        try:
+            conn = amqp.Connection(host, userid=user_id, password=password, virtual_host=virtual_host)
+
+            ch = conn.channel()
+            ch.access_request(virtual_host, active=True, read=True)
+
+            ch.exchange_declare('nova', type='topic', durable=True, auto_delete=False)
+            ch.queue_declare('monitor.info', durable=True, auto_delete=False, exclusive=False)
+            ch.queue_declare('monitor.error', durable=True, auto_delete=False, exclusive=False)
+            ch.queue_bind('monitor.info', 'nova')
+            ch.queue_bind('monitor.error', 'nova')
+            consumer = NovaConsumer(ch, tenant_id, logger)
+            #
+            # Loop as long as the channel has callbacks registered
+            #
+            while ch.callbacks:
+                ch.wait()
+            break
+        except socket.error, e:
+            logger.warn("Socket error: %s" % e)
+            time.sleep(5)
+            continue
+
+    ch.close()
+    conn.close()
+    logger.info("Finished")