Airflow 气流式换档变速器
我需要使用copy命令将s3文件复制到redshift。我对气流有点陌生,有点问题。有人能纠正下面的代码吗。我可以这样调用rs.execute()吗 代码:Airflow 气流式换档变速器,airflow,airflow-operator,Airflow,Airflow Operator,我需要使用copy命令将s3文件复制到redshift。我对气流有点陌生,有点问题。有人能纠正下面的代码吗。我可以这样调用rs.execute()吗 代码: 您没有定义任何DAG,也没有使用类似的运算符。我建议你多读一点关于如何使用气流的知识。无论如何,代码应该是: import os from airflow import DAG from airflow.operators.python_operator import PythonOperator from airflow.utils.da
您没有定义任何DAG,也没有使用类似的运算符。我建议你多读一点关于如何使用气流的知识。无论如何,代码应该是:
import os
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago
from airflow.operators.s3_to_redshift_operator import S3ToRedshiftTransfer
default_args = {
'owner': 'gra',
'depends_on_past': False,
'start_date': datetime(2020, 12, 13),
'email': ['ss.com'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(seconds=5),
}
with DAG('dag_name', schedule_interval='@daily', default_args=default_args) as dag:
rs= S3ToRedshiftTransfer(redshift_conn_id ='12as',
aws_conn_id='gt_read',
schema='test',
table='dept',
s3_bucket="gng-test",
s3_key="copt.csv",
task_id="copy_redshift"
)
app_start >> rs
我能够使用boto3执行从s3到红移的复制。S3toredShift Transfer可用于执行相同的操作
# airflow related
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.operators.bash_operator import BashOperator
# other packages
from datetime import datetime
from datetime import timedelta
# from airflow.hooks import PostgresHook
from airflow.operators.s3_to_redshift_operator import S3ToRedshiftTransfer
#from airflow.providers.amazon.aws.transfers.s3_to_redshift import S3ToRedshiftOperator
from airflow.contrib.operators.aws_athena_operator import AWSAthenaOperator
from airflow.operators import SimpleHttpOperator, HttpSensor, BashOperator, EmailOperator, S3KeySensor
import boto3
default_args = {
'owner': 'grit_delta',
'depends_on_past': False,
'start_date': datetime(2020, 12, 13),
'email': ['sa.com'],
'email_on_failure': False,
'email_on_retry': False,
'schedule_interval': '@daily',
'retries': 1,
'retry_delay': timedelta(seconds=5),
}
dag=DAG(dag_id='veritas_test',default_args=default_args,schedule_interval=timedelta(1))
def job1():
print('First Job to start')
file_sensor = S3KeySensor(task_id = 's3_key_sensor_task',
s3_conn_id='_read',
poke_interval=120,
timeout=18*60*60,
bucket_key = "data/test.*",
bucket_name = "g-test",
wildcard_match = True,
dag = dag
)
app_start=PythonOperator(task_id='app_start', python_callable=job1, dag=dag)
def s3_redshift(**kwargs):
rsd = boto3.client('redshift-data')
deptKey='s3://airflow-dev/code/gta/dag/dept.csv'
sqlQuery="copy test.dept from 's3://airflow-grole' CSV ;"
#sqlQuery="insert into test.dept values('d1221',100)"
print(sqlQuery)
resp = rsd.execute_statement(
ClusterIdentifier="opes",
Database="ee",
DbUser="aa",
Sql=sqlQuery
#Sql="CREATE TABLE IF NOT EXISTS test.dept (title varchar(10), rating int);"
)
print(resp)
print(" completed")
return "OK"
copy_redshift=PythonOperator(task_id='copy_redshift', python_callable=s3_redshift,provide_context=True, dag=dag)
file_sensor >>app_start >> copy_redshift
import os
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago
from airflow.operators.s3_to_redshift_operator import S3ToRedshiftTransfer
default_args = {
'owner': 'gra',
'depends_on_past': False,
'start_date': datetime(2020, 12, 13),
'email': ['ss.com'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(seconds=5),
}
with DAG('dag_name', schedule_interval='@daily', default_args=default_args) as dag:
rs= S3ToRedshiftTransfer(redshift_conn_id ='12as',
aws_conn_id='gt_read',
schema='test',
table='dept',
s3_bucket="gng-test",
s3_key="copt.csv",
task_id="copy_redshift"
)
app_start >> rs
# airflow related
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.operators.bash_operator import BashOperator
# other packages
from datetime import datetime
from datetime import timedelta
# from airflow.hooks import PostgresHook
from airflow.operators.s3_to_redshift_operator import S3ToRedshiftTransfer
#from airflow.providers.amazon.aws.transfers.s3_to_redshift import S3ToRedshiftOperator
from airflow.contrib.operators.aws_athena_operator import AWSAthenaOperator
from airflow.operators import SimpleHttpOperator, HttpSensor, BashOperator, EmailOperator, S3KeySensor
import boto3
default_args = {
'owner': 'grit_delta',
'depends_on_past': False,
'start_date': datetime(2020, 12, 13),
'email': ['sa.com'],
'email_on_failure': False,
'email_on_retry': False,
'schedule_interval': '@daily',
'retries': 1,
'retry_delay': timedelta(seconds=5),
}
dag=DAG(dag_id='veritas_test',default_args=default_args,schedule_interval=timedelta(1))
def job1():
print('First Job to start')
file_sensor = S3KeySensor(task_id = 's3_key_sensor_task',
s3_conn_id='_read',
poke_interval=120,
timeout=18*60*60,
bucket_key = "data/test.*",
bucket_name = "g-test",
wildcard_match = True,
dag = dag
)
app_start=PythonOperator(task_id='app_start', python_callable=job1, dag=dag)
def s3_redshift(**kwargs):
rsd = boto3.client('redshift-data')
deptKey='s3://airflow-dev/code/gta/dag/dept.csv'
sqlQuery="copy test.dept from 's3://airflow-grole' CSV ;"
#sqlQuery="insert into test.dept values('d1221',100)"
print(sqlQuery)
resp = rsd.execute_statement(
ClusterIdentifier="opes",
Database="ee",
DbUser="aa",
Sql=sqlQuery
#Sql="CREATE TABLE IF NOT EXISTS test.dept (title varchar(10), rating int);"
)
print(resp)
print(" completed")
return "OK"
copy_redshift=PythonOperator(task_id='copy_redshift', python_callable=s3_redshift,provide_context=True, dag=dag)
file_sensor >>app_start >> copy_redshift