
This change introduces a large section of the API for the next major version of Shipyard - the action api. By interfacing with Airflow, Shipyard will invoke workflows and allow for controlling and querying status of those workflows. Foundationally, this patchset introduces a lot of framework code for other apis, including error handling to a common output format, database interaction for persistence of action information, and use of oslo_config for configuration support. Add GET all actions primary code - db connection not yet impl Update base classes to have more structure Add POST actions framework Add GET action by id Add GET of validations and steps Add control api Add unit tests of action api methods Re-Removed duplicate deps from test reqs Add routes for API Removed a lot of code better handled by falcon directly Cleaned up error flows- handlers and defaults Refactored existing airflow tests to match standard output format Updated json validation to be more specific Added basic start for alembic Added alembic upgrade at startup Added table creation definitions Added base revision for alembic upgrade Bug fixes - DB queries, airflow comm, logic issues, logging issues Bug fixes - date formats and alignment of keys between systems Exclusions to bandit / tox.ini Resolved merge conflicts with integration of auth Update to use oslo config and PBR Update the context middleware to check uuid in a less contentious way Removed routes and resources for regions endpoint - not used Add auth policies for action api Restructure execptions to be consistent class hierarchy and common handler Add generation of config and policy examples Update tests to init configs Update database configs to not use env. vars Removed examples directory, it was no longer accurate Addressed/removed several TODOs - left some behind as well Aligned input to DAGs with action: header Retrieved all sub-steps for dags Expanded step information Refactored auth handling for better logging rename create_actions policy to create_action removed some templated file comments in env.py generated by alembic updated inconsistent exception parameters updated to use ulid instead of uuid for action ids added action control audit code per review suggestion Fixed correlation date betwen dags/actions by more string parsing Change-Id: I2f9ea5250923f45456aa86826e344fc055bba762
235 lines
6.6 KiB
Python
235 lines
6.6 KiB
Python
# Copyright 2017 AT&T Intellectual Property. All other rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
Airflow database access - see db.py for instances to use
|
|
"""
|
|
import sqlalchemy
|
|
from oslo_config import cfg
|
|
|
|
from shipyard_airflow.db.common_db import DbAccess
|
|
from shipyard_airflow.db.errors import AirflowStateError
|
|
|
|
|
|
CONF = cfg.CONF
|
|
|
|
|
|
class AirflowDbAccess(DbAccess):
|
|
"""
|
|
Airflow database access
|
|
WARNING: This is a large set of assumptions based on the way airflow
|
|
arranges its database and are subject to change with airflow future
|
|
releases - i.e. we're leveraging undocumented/non-exposed interfaces
|
|
for airflow to work around lack of API and feature functionality.
|
|
"""
|
|
|
|
SELECT_ALL_DAG_RUNS = sqlalchemy.sql.text('''
|
|
SELECT
|
|
"dag_id",
|
|
"execution_date",
|
|
"state",
|
|
"run_id",
|
|
"external_trigger",
|
|
"start_date",
|
|
"end_date"
|
|
FROM
|
|
dag_run
|
|
''')
|
|
|
|
SELECT_DAG_RUNS_BY_ID = sqlalchemy.sql.text('''
|
|
SELECT
|
|
"dag_id",
|
|
"execution_date",
|
|
"state",
|
|
"run_id",
|
|
"external_trigger",
|
|
"start_date",
|
|
"end_date"
|
|
FROM
|
|
dag_run
|
|
WHERE
|
|
dag_id = :dag_id
|
|
AND
|
|
execution_date = :execution_date
|
|
''')
|
|
|
|
SELECT_ALL_TASKS = sqlalchemy.sql.text('''
|
|
SELECT
|
|
"task_id",
|
|
"dag_id",
|
|
"execution_date",
|
|
"start_date",
|
|
"end_date",
|
|
"duration",
|
|
"state",
|
|
"try_number",
|
|
"operator",
|
|
"queued_dttm"
|
|
FROM
|
|
task_instance
|
|
ORDER BY
|
|
priority_weight desc,
|
|
start_date
|
|
''')
|
|
|
|
SELECT_TASKS_BY_ID = sqlalchemy.sql.text('''
|
|
SELECT
|
|
"task_id",
|
|
"dag_id",
|
|
"execution_date",
|
|
"start_date",
|
|
"end_date",
|
|
"duration",
|
|
"state",
|
|
"try_number",
|
|
"operator",
|
|
"queued_dttm"
|
|
FROM
|
|
task_instance
|
|
WHERE
|
|
dag_id LIKE :dag_id
|
|
AND
|
|
execution_date = :execution_date
|
|
ORDER BY
|
|
priority_weight desc,
|
|
start_date
|
|
''')
|
|
|
|
UPDATE_DAG_RUN_STATUS = sqlalchemy.sql.text('''
|
|
UPDATE
|
|
dag_run
|
|
SET
|
|
state = :state
|
|
WHERE
|
|
dag_id = :dag_id
|
|
AND
|
|
execution_date = :execution_date
|
|
''')
|
|
|
|
def __init__(self):
|
|
DbAccess.__init__(self)
|
|
|
|
def get_connection_string(self):
|
|
"""
|
|
Returns the connection string for this db connection
|
|
"""
|
|
return CONF.base.postgresql_airflow_db
|
|
|
|
def get_all_dag_runs(self):
|
|
"""
|
|
Retrieves all dag runs.
|
|
"""
|
|
return self.get_as_dict_array(AirflowDbAccess.SELECT_ALL_DAG_RUNS)
|
|
|
|
def get_dag_runs_by_id(self, dag_id, execution_date):
|
|
"""
|
|
Retrieves dag runs by dag id and execution date
|
|
"""
|
|
return self.get_as_dict_array(
|
|
AirflowDbAccess.SELECT_DAG_RUNS_BY_ID,
|
|
dag_id=dag_id,
|
|
execution_date=execution_date)
|
|
|
|
def get_all_tasks(self):
|
|
"""
|
|
Retrieves all tasks.
|
|
"""
|
|
return self.get_as_dict_array(AirflowDbAccess.SELECT_ALL_TASKS)
|
|
|
|
def get_tasks_by_id(self, dag_id, execution_date):
|
|
"""
|
|
Retrieves tasks by dag id and execution date
|
|
"""
|
|
return self.get_as_dict_array(
|
|
AirflowDbAccess.SELECT_TASKS_BY_ID,
|
|
dag_id=dag_id + '%',
|
|
execution_date=execution_date)
|
|
|
|
def stop_dag_run(self, dag_id, execution_date):
|
|
"""
|
|
Triggers an update to set a dag_run to failed state
|
|
causing dag_run to be stopped
|
|
running -> failed
|
|
"""
|
|
self._control_dag_run(
|
|
dag_id=dag_id,
|
|
execution_date=execution_date,
|
|
expected_state='running',
|
|
desired_state='failed')
|
|
|
|
def pause_dag_run(self, dag_id, execution_date):
|
|
"""
|
|
Triggers an update to set a dag_run to paused state
|
|
causing dag_run to be paused
|
|
running -> paused
|
|
"""
|
|
self._control_dag_run(
|
|
dag_id=dag_id,
|
|
execution_date=execution_date,
|
|
expected_state='running',
|
|
desired_state='paused')
|
|
|
|
def unpause_dag_run(self, dag_id, execution_date):
|
|
"""
|
|
Triggers an update to set a dag_run to running state
|
|
causing dag_run to be unpaused
|
|
paused -> running
|
|
"""
|
|
self._control_dag_run(
|
|
dag_id=dag_id,
|
|
execution_date=execution_date,
|
|
expected_state='paused',
|
|
desired_state='running')
|
|
|
|
def check_dag_run_state(self, dag_id, execution_date, expected_state):
|
|
"""
|
|
Examines a dag_run for state. Throws execption if it's not right
|
|
"""
|
|
dag_run_list = self.get_dag_runs_by_id(
|
|
dag_id=dag_id, execution_date=execution_date)
|
|
if dag_run_list:
|
|
dag_run = dag_run_list[0]
|
|
if dag_run['state'] != expected_state:
|
|
raise AirflowStateError(
|
|
message='dag_run state must be running, but is {}'.format(
|
|
dag_run['state']))
|
|
else:
|
|
# not found
|
|
raise AirflowStateError(message='dag_run does not exist')
|
|
|
|
def _control_dag_run(self, dag_id, execution_date, expected_state,
|
|
desired_state):
|
|
"""
|
|
checks a dag_run's state for expected state, and sets it to the
|
|
desired state
|
|
"""
|
|
self.check_dag_run_state(
|
|
dag_id=dag_id,
|
|
execution_date=execution_date,
|
|
expected_state=expected_state)
|
|
self._set_dag_run_state(
|
|
state=desired_state, dag_id=dag_id, execution_date=execution_date)
|
|
|
|
def _set_dag_run_state(self, state, dag_id, execution_date):
|
|
"""
|
|
Sets a dag run to the specified state.
|
|
WARNING: this assumes that airflow works by reading state from the
|
|
dag_run table dynamically, is not caching results, and doesn't
|
|
start to use the states we're using in a new way.
|
|
"""
|
|
self.perform_insert(
|
|
AirflowDbAccess.UPDATE_DAG_RUN_STATUS,
|
|
state=state,
|
|
dag_id=dag_id,
|
|
execution_date=execution_date)
|