diff --git a/requirements.txt b/requirements.txt
index 07e1603..97ec161 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,3 +18,4 @@ PyYAML>=3.1.0,<4.0.0
jenkins-job-builder
xmltodict
+python-magic
diff --git a/turbo_hipster/lib/models.py b/turbo_hipster/lib/models.py
index 726a64d..5e8abc3 100644
--- a/turbo_hipster/lib/models.py
+++ b/turbo_hipster/lib/models.py
@@ -224,9 +224,12 @@ class Task(object):
self.log.debug("Process the resulting files (upload/push)")
+ dir_list = os.listdir(self.job_results_dir)
+ path_list = [os.path.join(self.job_results_dir, i) for i in dir_list]
+
if 'publish_logs' in self.worker_server.config:
- index_url = utils.push_file(
- self.job_arguments['LOG_PATH'], self.job_results_dir,
+ index_url = utils.push_files(
+ self.job_arguments['LOG_PATH'], path_list,
self.worker_server.config['publish_logs'])
self.log.debug("Index URL found at %s" % index_url)
self.work_data['url'] = index_url
diff --git a/turbo_hipster/lib/utils.py b/turbo_hipster/lib/utils.py
index 3a8fc2c..0673869 100644
--- a/turbo_hipster/lib/utils.py
+++ b/turbo_hipster/lib/utils.py
@@ -15,12 +15,15 @@
import git
import logging
+import magic
import os
import requests
import select
import shutil
import subprocess
import swiftclient
+import sys
+import tempfile
import time
@@ -197,74 +200,10 @@ def execute_to_log(cmd, logfile, timeout=-1, watch_logs=[], heartbeat=30,
return p.returncode
-def push_file(results_set_name, file_path, publish_config):
- """ Push a log file to a server. Returns the public URL """
- method = publish_config['type'] + '_push_file'
- if method in globals() and hasattr(globals()[method], '__call__'):
- return globals()[method](results_set_name, file_path, publish_config)
-
-
-def swift_push_file(results_set_name, file_path, swift_config):
- """ Push a log file to a swift server. """
- def _push_individual_file(results_set_name, file_path, swift_config):
- with open(file_path, 'r') as fd:
- name = os.path.join(results_set_name, os.path.basename(file_path))
- con = swiftclient.client.Connection(
- authurl=swift_config['authurl'],
- user=swift_config['user'],
- key=swift_config['password'],
- os_options={'region_name': swift_config['region']},
- tenant_name=swift_config['tenant'],
- auth_version=2.0)
- con.put_object(swift_config['container'], name, fd)
-
- if os.path.isfile(file_path):
- _push_individual_file(results_set_name, file_path, swift_config)
- elif os.path.isdir(file_path):
- for path, folders, files in os.walk(file_path):
- for f in files:
- f_path = os.path.join(path, f)
- _push_individual_file(results_set_name, f_path, swift_config)
-
- return (swift_config['prepend_url'] +
- os.path.join(results_set_name, os.path.basename(file_path)))
-
-
-def local_push_file(results_set_name, file_path, local_config):
- """ Copy the file locally somewhere sensible """
- def _push_file_or_dir(results_set_name, file_path, local_config):
- dest_dir = os.path.join(local_config['path'], results_set_name)
- dest_filename = os.path.basename(file_path)
- if not os.path.isdir(dest_dir):
- os.makedirs(dest_dir)
-
- dest_file = os.path.join(dest_dir, dest_filename)
-
- if os.path.isfile(file_path):
- shutil.copyfile(file_path, dest_file)
- elif os.path.isdir(file_path):
- shutil.copytree(file_path, dest_file)
-
- if os.path.isfile(file_path):
- _push_file_or_dir(results_set_name, file_path, local_config)
- elif os.path.isdir(file_path):
- for f in os.listdir(file_path):
- f_path = os.path.join(file_path, f)
- _push_file_or_dir(results_set_name, f_path, local_config)
-
- dest_filename = os.path.basename(file_path)
- return local_config['prepend_url'] + os.path.join(results_set_name,
- dest_filename)
-
-
-def scp_push_file(results_set_name, file_path, local_config):
- """ Copy the file remotely over ssh """
- # TODO!
- pass
-
-
def zuul_swift_upload(file_path, job_arguments):
"""Upload working_dir to swift as per zuul's instructions"""
+ # TODO(jhesketh): replace with swift_form_post_submit from below
+
# NOTE(jhesketh): Zuul specifies an object prefix in the destination so
# we don't need to be concerned with results_set_name
@@ -299,3 +238,201 @@ def zuul_swift_upload(file_path, job_arguments):
return (logserver_prefix +
job_arguments['ZUUL_EXTRA_SWIFT_DESTINATION_PREFIX'])
+
+
+def generate_log_index(file_list, logserver_prefix, results_set_name):
+ """Create an index of logfiles and links to them"""
+
+ output = '
Index of results'
+ output += ''
+ for f in file_list:
+ file_url = os.path.join(logserver_prefix, results_set_name, f)
+ # Because file_list is simply a list to create an index for and it
+ # isn't necessarily on disk we can't check if a file is a folder or
+ # not. As such we normalise the name to get the folder/filename but
+ # then need to check if the last character was a trailing slash so to
+ # re-append it to make it obvious that it links to a folder
+ filename_postfix = '/' if f[-1] == '/' else ''
+ filename = os.path.basename(os.path.normpath(f)) + filename_postfix
+ output += '- '
+ output += '%s' % (file_url, filename)
+ output += '
'
+
+ output += '
'
+ output += ''
+ return output
+
+
+def make_index_file(file_list, logserver_prefix, results_set_name,
+ index_filename='index.html'):
+ """Writes an index into a file for pushing"""
+
+ index_content = generate_log_index(file_list, logserver_prefix,
+ results_set_name)
+ tempdir = tempfile.mkdtemp()
+ fd = open(os.path.join(tempdir, index_filename), 'w')
+ fd.write(index_content)
+ return os.path.join(tempdir, index_filename)
+
+
+def get_file_mime(file_path):
+ """Get the file mime using libmagic"""
+
+ if not os.path.isfile(file_path):
+ return None
+
+ if hasattr(magic, 'from_file'):
+ return magic.from_file(file_path, mime=True)
+ else:
+ # no magic.from_file, we might be using the libmagic bindings
+ m = magic.open(magic.MAGIC_MIME)
+ m.load()
+ return m.file(file_path).split(';')[0]
+
+
+def swift_form_post_submit(file_list, url, hmac_body, signature):
+ """Send the files to swift via the FormPost middleware"""
+
+ # We are uploading the file_list as an HTTP POST multipart encoded.
+ # First grab out the information we need to send back from the hmac_body
+ payload = {}
+
+ (object_prefix,
+ payload['redirect'],
+ payload['max_file_size'],
+ payload['max_file_count'],
+ payload['expires']) = hmac_body.split('\n')
+ payload['signature'] = signature
+
+ # Loop over the file list in chunks of max_file_count
+ for sub_file_list in (file_list[pos:pos + int(payload['max_file_count'])]
+ for pos in xrange(0, len(file_list),
+ int(payload['max_file_count']))):
+ if payload['expires'] < time.time():
+ raise Exception("Ran out of time uploading files!")
+ files = {}
+ # Zuul's log path is generated without a tailing slash. As such the
+ # object prefix does not contain a slash and the files would be
+ # uploaded as 'prefix' + 'filename'. Assume we want the destination
+ # url to look like a folder and make sure there's a slash between.
+ filename_prefix = '/' if url[-1] != '/' else ''
+ for i, f in enumerate(sub_file_list):
+ if os.path.getsize(f['path']) > int(payload['max_file_size']):
+ sys.stderr.write('Warning: %s exceeds %d bytes. Skipping...\n'
+ % (f['path'], int(payload['max_file_size'])))
+ continue
+ files['file%d' % (i + 1)] = (filename_prefix + f['filename'],
+ open(f['path'], 'rb'),
+ get_file_mime(f['path']))
+ requests.post(url, data=payload, files=files)
+
+
+def build_file_list(file_path, logserver_prefix, results_set_name,
+ create_dir_indexes=True):
+ """Generate a list of files to upload to zuul. Recurses through directories
+ and generates index.html files if requested."""
+
+ # file_list: a list of dicts with {path=..., filename=...} where filename
+ # is appended to the end of the object (paths can be used)
+ file_list = []
+ if os.path.isfile(file_path):
+ file_list.append({'filename': os.path.basename(file_path),
+ 'path': file_path})
+ elif os.path.isdir(file_path):
+ if file_path[-1] == os.sep:
+ file_path = file_path[:-1]
+ parent_dir = os.path.dirname(file_path)
+ for path, folders, files in os.walk(file_path):
+ folder_contents = []
+ for f in files:
+ full_path = os.path.join(path, f)
+ relative_name = os.path.relpath(full_path, parent_dir)
+ push_file = {'filename': relative_name,
+ 'path': full_path}
+ file_list.append(push_file)
+ folder_contents.append(relative_name)
+
+ for f in folders:
+ full_path = os.path.join(path, f)
+ relative_name = os.path.relpath(full_path, parent_dir)
+ folder_contents.append(relative_name + '/')
+
+ if create_dir_indexes:
+ index_file = make_index_file(folder_contents, logserver_prefix,
+ results_set_name)
+ relative_name = os.path.relpath(path, parent_dir)
+ file_list.append({
+ 'filename': os.path.join(relative_name,
+ os.path.basename(index_file)),
+ 'path': index_file})
+
+ return file_list
+
+
+def push_files(results_set_name, path_list, publish_config,
+ generate_indexes=True):
+ """ Push a log file/foler to a server. Returns the public URL """
+
+ file_list = []
+ root_list = []
+
+ for file_path in path_list:
+ file_path = os.path.normpath(file_path)
+ if os.path.isfile(file_path):
+ root_list.append(os.path.basename(file_path))
+ else:
+ root_list.append(os.path.basename(file_path) + '/')
+
+ file_list += build_file_list(
+ file_path, publish_config['prepend_url'], results_set_name,
+ generate_indexes
+ )
+
+ index_file = ''
+ if generate_indexes:
+ index_file = make_index_file(root_list, publish_config['prepend_url'],
+ results_set_name)
+ file_list.append({
+ 'filename': os.path.basename(index_file),
+ 'path': index_file})
+
+ method = publish_config['type'] + '_push_files'
+ if method in globals() and hasattr(globals()[method], '__call__'):
+ globals()[method](results_set_name, file_list, publish_config)
+
+ return os.path.join(publish_config['prepend_url'], results_set_name,
+ os.path.basename(index_file))
+
+
+def swift_push_files(results_set_name, file_list, swift_config):
+ """ Push a log file to a swift server. """
+ for file_item in file_list:
+ with open(file_item['path'], 'r') as fd:
+ con = swiftclient.client.Connection(
+ authurl=swift_config['authurl'],
+ user=swift_config['user'],
+ key=swift_config['password'],
+ os_options={'region_name': swift_config['region']},
+ tenant_name=swift_config['tenant'],
+ auth_version=2.0)
+ filename = os.path.join(results_set_name, file_item['filename'])
+ con.put_object(swift_config['container'], filename, fd)
+
+
+def local_push_files(results_set_name, file_list, local_config):
+ """ Copy the file locally somewhere sensible """
+ for file_item in file_list:
+ dest_dir = os.path.join(local_config['path'], results_set_name,
+ os.path.dirname(file_item['filename']))
+ dest_filename = os.path.basename(file_item['filename'])
+ if not os.path.isdir(dest_dir):
+ os.makedirs(dest_dir)
+
+ dest_file = os.path.join(dest_dir, dest_filename)
+ shutil.copyfile(file_item['path'], dest_file)
+
+
+def scp_push_files(results_set_name, file_path, local_config):
+ """ Copy the file remotely over ssh """
+ # TODO!
+ pass
diff --git a/turbo_hipster/task_plugins/real_db_upgrade/handle_results.py b/turbo_hipster/task_plugins/real_db_upgrade/handle_results.py
index cf8c6df..561898f 100644
--- a/turbo_hipster/task_plugins/real_db_upgrade/handle_results.py
+++ b/turbo_hipster/task_plugins/real_db_upgrade/handle_results.py
@@ -19,65 +19,11 @@ Primarily place the log files somewhere useful and optionally email
somebody """
import calendar
-import tempfile
import time
import os
import re
-from turbo_hipster.lib.utils import push_file
-
-
-def generate_log_index(datasets):
- """ Create an index of logfiles and links to them """
- # Loop over logfile URLs
- # Create summary and links
- output = 'Index of results'
- output += ''
- for dataset in datasets:
- output += '- '
- output += '%s' % (dataset['result_uri'],
- dataset['name'])
- output += ' %s' % (dataset['result'],
- dataset['result'])
- output += '
'
-
- output += '
'
- output += ''
- return output
-
-
-def make_index_file(datasets, index_filename):
- """ Writes an index into a file for pushing """
- index_content = generate_log_index(datasets)
- tempdir = tempfile.mkdtemp()
- fd = open(os.path.join(tempdir, index_filename), 'w')
- fd.write(index_content)
- return os.path.join(tempdir, index_filename)
-
-
-def generate_push_results(datasets, publish_config):
- """ Generates and pushes results """
-
- last_link_uri = None
- for i, dataset in enumerate(datasets):
- result_uri = push_file(dataset['determined_path'],
- dataset['job_log_file_path'],
- publish_config)
- datasets[i]['result_uri'] = result_uri
- last_link_uri = result_uri
-
- if len(datasets) > 1:
- index_file = make_index_file(datasets, 'index.html')
- # FIXME: the determined path here is just copied from the last dataset.
- # Probably should be stored elsewhere...
- index_file_url = push_file(dataset['determined_path'], index_file,
- publish_config)
- return index_file_url
- else:
- return last_link_uri
-
-
MIGRATION_NUMBER_RE = re.compile('^([0-9]+).*\.py$')
MIGRATION_START_RE = re.compile('.* ([0-9]+) -\> ([0-9]+)\.\.\..*$')
MIGRATION_END_RE = re.compile('done$')