Add bulk processing and csv output

Running this command for each different project and metric gets old
particularly when you have to input the password each time. Update the
script to collect all metrics for a list of projects. Then to make that
more useful add support for csv output.

Change-Id: Id5ee94e046e11813387ad0d3ae4a9a2e8490062d
This commit is contained in:
Clark Boylan 2025-04-01 09:34:41 -07:00
parent 991941dec4
commit e6dedce81c

View File

@ -114,12 +114,11 @@ def calculate_review_efficiency(client, project_name, start_date, end_date):
r = client.search(index='openstack_gerrit', body=open_query)
open_total = r['hits']['total']['value']
print('%s review efficiency index %s to %s: %s' %
(project_name, start_date, end_date,
float(closed_total / open_total)))
rei = float(closed_total / open_total)
return rei, None
def calculate_median_merge_time(client, project_name, start_date, end_date):
def calculate_merge_time(client, project_name, start_date, end_date):
date_range = set_date_range(start_date, end_date)
project_term = {
"term": {
@ -153,14 +152,10 @@ def calculate_median_merge_time(client, project_name, start_date, end_date):
average_seconds = \
sum(map(lambda x: x.total_seconds(), times_sorted)) / len(times_sorted)
average = datetime.timedelta(seconds=average_seconds)
print("%s median time to merge %s to %s: %s" %
(project_name, start_date, end_date, median))
print("%s average time to merge %s to %s: %s" %
(project_name, start_date, end_date, average))
return median, average
def calculate_median_time_to_review(client, project_name,
start_date, end_date):
def calculate_time_to_review(client, project_name, start_date, end_date):
date_range = set_date_range(start_date, end_date)
project_term = {
"term": {
@ -194,10 +189,7 @@ def calculate_median_time_to_review(client, project_name,
middle = math.floor(len(times_to_review) / 2)
median = datetime.timedelta(days=times_sorted[middle])
average = datetime.timedelta(days=sum(times_sorted) / len(times_sorted))
print("%s median time to first review %s to %s: %sseconds" %
(project_name, start_date, end_date, median))
print("%s average time to first review %s to %s: %sseconds" %
(project_name, start_date, end_date, average))
return median, average
def calculate_patchset_per_review(client, project_name, start_date, end_date):
@ -229,20 +221,61 @@ def calculate_patchset_per_review(client, project_name, start_date, end_date):
middle = math.floor(len(patchsets_list) / 2)
median = patchsets_list[middle]
average = sum(patchsets_sorted) / len(patchsets_sorted)
print("%s median patchsets per review %s to %s: %s" %
(project_name, start_date, end_date, median))
print("%s average patchsets per review %s to %s: %s" %
(project_name, start_date, end_date, average))
return median, average
QUERIES = {
"rei": calculate_review_efficiency,
"median-merge": calculate_median_merge_time,
"median-ttr": calculate_median_time_to_review,
"time-to-merge": calculate_merge_time,
"time-to-review": calculate_time_to_review,
"patchset-per-review": calculate_patchset_per_review,
}
def gather_metrics(client, args):
if args.csv:
print("metric,project,starttime,endtime,value")
projects = [p for p in args.project.split(',') if p]
if args.query == "ALL":
queries = QUERIES.items()
else:
queries = [(args.query, QUERIES[args.query])]
for query, func in queries:
for project in projects:
median, average = func(
client, project, args.start_date, args.end_date)
if args.csv:
if isinstance(median, datetime.timedelta):
median = median.total_seconds()
if isinstance(average, datetime.timedelta):
average = average.total_seconds()
if not average:
# Some values are singletons overload use of median
print("%s,%s,%s,%s,%s" %
(query, project,
args.start_date, args.end_date, median))
else:
print("median-%s,%s,%s,%s,%s" %
(query, project,
args.start_date, args.end_date, median))
print("average-%s,%s,%s,%s,%s" %
(query, project,
args.start_date, args.end_date, average))
else:
if not average:
# Some values are singletons overload use of median
print("%s %s %s to %s: %s" %
(project, query,
args.start_date, args.end_date, median))
else:
print("%s median %s %s to %s: %s" %
(project, query,
args.start_date, args.end_date, median))
print("%s average %s %s to %s: %s" %
(project, query,
args.start_date, args.end_date, average))
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--host",
@ -258,10 +291,12 @@ def main():
help="Bitergia opensearch username", required=True)
parser.add_argument("--query",
help="Metric to query",
default='rei')
default='ALL')
parser.add_argument("--csv", help="Emit csv output", action="store_true")
parser.add_argument("project",
help="Project to filter results for. This must "
"match bitergia's idea of a project name")
"match bitergia's idea of a project name. May "
"be a comma separated list.")
parser.add_argument("start_date",
help="Start date for results. "
"eg 2025-01-01T00:00:00.000Z")
@ -285,7 +320,7 @@ def main():
use_ssl=True,
verify_certs=True,
)
QUERIES[args.query](client, args.project, args.start_date, args.end_date)
gather_metrics(client, args)
if __name__ == "__main__":