Google cloud platform 如何在GCP composer环境中使用GCSTOSFTPerator?
我想在我的GCP composer环境中使用Google cloud platform 如何在GCP composer环境中使用GCSTOSFTPerator?,google-cloud-platform,airflow,google-cloud-composer,Google Cloud Platform,Airflow,Google Cloud Composer,我想在我的GCP composer环境中使用GCSToSFTPOperator,我们在GCP composer环境中使用了ariflow版本1.10.3,composer-1.8.3-airflow-1.10.3(我将版本从1.10.2升级到1.10.3)GCSToSFTPOperator出现在最新版本的Airflow中。 见以下参考资料- 我也尝试了插件,我在插件文件夹中复制了GCSToSFTPOperator类源代码,然后导入到我的python DAG中,现在我也得到了aiffort.gc
GCSToSFTPOperator
,我们在GCP composer环境中使用了ariflow版本1.10.3
,composer-1.8.3-airflow-1.10.3
(我将版本从1.10.2升级到1.10.3)GCSToSFTPOperator
出现在最新版本的Airflow中。
见以下参考资料-
我也尝试了插件,我在插件文件夹中复制了GCSToSFTPOperator
类源代码,然后导入到我的python DAG中,现在我也得到了aiffort.gcp
的错误。之后我尝试在composer环境中安装gcp 0.2.1
pypi包,但也没有安装错误
步骤1-创建放置在DAG文件夹中的DAG代码
import os
from airflow import DAG
from airflow import models
from PluginGCSToSFTPOperator import GCSToSFTPOperator
from airflow.utils.dates import days_ago
default_args = {"start_date": days_ago(1)}
BUCKET_SRC = "bucket-name"
OBJECT_SRC = "parent-1.bin"
DESTINATION_PATH = "/tmp/single-file/"
with models.DAG(
"example_gcs_to_sftp", default_args=default_args, schedule_interval=None,
tags=['example']
) as dag:
copy_file_from_gcs_to_sftp = GCSToSFTPOperator(
task_id="file-copy-gsc-to-sftp",
source_bucket=BUCKET_SRC,
source_object=OBJECT_SRC,
destination_path=DESTINATION_PATH,
)
copy_file_from_gcs_to_sftp
第2步-复制GCSToSFTPOperator类代码并粘贴到一个python文件中,同一个文件放在插件文件夹中
import os
from tempfile import NamedTemporaryFile
from typing import Optional
#from airflow.plugins_manager import AirflowPlugin
from airflow import AirflowException
from airflow.gcp.hooks.gcs import GCSHook
from airflow.models import BaseOperator
from airflow.providers.sftp.hooks.sftp_hook import SFTPHook
from airflow.utils.decorators import apply_defaults
WILDCARD = "*"
class GCSToSFTPOperator(BaseOperator):
template_fields = ("source_bucket", "source_object", "destination_path")
ui_color = "#f0eee4"
# pylint: disable=too-many-arguments
@apply_defaults
def __init__(
self,
source_bucket: str,
source_object: str,
destination_path: str,
move_object: bool = False,
gcp_conn_id: str = "google_cloud_default",
sftp_conn_id: str = "ssh_default",
delegate_to: Optional[str] = None,
*args,
**kwargs
) -> None:
super().__init__(*args, **kwargs)
self.source_bucket = source_bucket
self.source_object = source_object
self.destination_path = destination_path
self.move_object = move_object
self.gcp_conn_id = gcp_conn_id
self.sftp_conn_id = sftp_conn_id
self.delegate_to = delegate_to
self.sftp_dirs = None
def execute(self, context):
gcs_hook = GCSHook(
gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to
)
sftp_hook = SFTPHook(self.sftp_conn_id)
if WILDCARD in self.source_object:
total_wildcards = self.source_object.count(WILDCARD)
if total_wildcards > 1:
raise AirflowException(
"Only one wildcard '*' is allowed in source_object parameter. "
"Found {} in {}.".format(total_wildcards, self.source_object)
)
prefix, delimiter = self.source_object.split(WILDCARD, 1)
objects = gcs_hook.list(
self.source_bucket, prefix=prefix, delimiter=delimiter
)
for source_object in objects:
destination_path = os.path.join(self.destination_path, source_object)
self._copy_single_object(
gcs_hook, sftp_hook, source_object, destination_path
)
self.log.info(
"Done. Uploaded '%d' files to %s", len(objects), self.destination_path
)
else:
destination_path = os.path.join(self.destination_path, self.source_object)
self._copy_single_object(
gcs_hook, sftp_hook, self.source_object, destination_path
)
self.log.info(
"Done. Uploaded '%s' file to %s", self.source_object, destination_path
)
def _copy_single_object(
self,
gcs_hook: GCSHook,
sftp_hook: SFTPHook,
source_object: str,
destination_path: str,
) -> None:
"""
Helper function to copy single object.
"""
self.log.info(
"Executing copy of gs://%s/%s to %s",
self.source_bucket,
source_object,
destination_path,
)
dir_path = os.path.dirname(destination_path)
sftp_hook.create_directory(dir_path)
with NamedTemporaryFile("w") as tmp:
gcs_hook.download(
bucket_name=self.source_bucket,
object_name=source_object,
filename=tmp.name,
)
sftp_hook.store_file(destination_path, tmp.name)
if self.move_object:
self.log.info(
"Executing delete of gs://%s/%s", self.source_bucket, source_object
)
gcs_hook.delete(self.source_bucket, source_object)
第3步-我也尝试过将相同的文件放入DAG文件夹,之后也会出现同样的错误
“没有名为'airflow.gcp'的模块”
现在我该怎么办呢?
在airflow 1.10.3版本中是否有其他操作员在场,或者我们是否有其他方法使用此
GCSToSFTPOperator
。您要查找的文档是airflow1.10.7
版本,最新版本。当您参考气流1.10.2
时,您将看到gcs\u to\u sftp
操作员在此版本中不存在
您可以尝试的是复制,制作一个,并将代码放入Composer实例bucket中的plugin目录中。如果您仍然有问题,请提供您已经采取的所有步骤,我将尽力帮助您
你也可以在Composer中阅读更多关于Airflow版本的信息。我复制了代码并将其放入插件文件夹,之后我收到错误-“没有名为'Airflow.gcp'的模块”Hello@muscat,我已经添加了我遵循的所有步骤,代码也附在附件中。我在gcp环境中将版本从1.10.2升级到1.10.3,操作员可用于1.10.7版本,但此版本不可升级。@Bhagesh感谢您提供所需信息!我将在今天/明天查找。您好@muscat,请查看另一个问题,并尝试提供您的意见-请参阅另一个答案:享受!