使用Python或Java将数据从本地上传到Azure ADLS Gen2

使用Python或Java将数据从本地上传到Azure ADLS Gen2,java,python,azure,azure-storage,azure-data-lake,Java,Python,Azure,Azure Storage,Azure Data Lake,我拥有Data Lake Gen2的Azure存储帐户。我想使用Python(或Java)将数据从本地上传到Lake Gen2文件系统 我已经找到了如何与存储帐户中的文件共享交互的方法,但是我还无法找到如何上传到Lake(而不是文件共享)。我还发现了如何为Gen1 Lakes执行此操作,但除了关闭Gen2之外,没有其他操作 我的问题是,到今天为止,Python是否可以做到这一点;或者,如何使用Java将文件上载到Gen2 Lake?非常感谢演示上载API调用的代码片段。根据官方教程,如下所示,如

我拥有Data Lake Gen2的Azure存储帐户。我想使用Python(或Java)将数据从本地上传到Lake Gen2文件系统

我已经找到了如何与存储帐户中的文件共享交互的方法,但是我还无法找到如何上传到Lake(而不是文件共享)。我还发现了如何为Gen1 Lakes执行此操作,但除了关闭Gen2之外,没有其他操作


我的问题是,到今天为止,Python是否可以做到这一点;或者,如何使用Java将文件上载到Gen2 Lake?非常感谢演示上载API调用的代码片段。

根据官方教程,如下所示,如果您尚未注册Azure Data Lake Store Gen 2的公开预览,则无法直接使用Azure Storage SDK for Python在Azure Data Lake Store Gen 2中执行任何操作

注意

只有在的公共预览中注册时,具有分层命名空间的帐户才能使用本文中描述的功能。要查看限制,请参阅“已知问题”文章

因此,将数据上传到ADLS Gen2的唯一解决方案是使用ADLS Gen2的REST API,请参考其参考

下面是我用Python将数据上传到ADLS Gen2的示例代码,它运行良好

import requests
import json

def auth(tenant_id, client_id, client_secret):
    print('auth')
    auth_headers = {
        "Content-Type": "application/x-www-form-urlencoded"
    }
    auth_body = {
        "client_id": client_id,
        "client_secret": client_secret,
        "scope" : "https://storage.azure.com/.default",
        "grant_type" : "client_credentials"
    }
    resp = requests.post(f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token", headers=auth_headers, data=auth_body)
    return (resp.status_code, json.loads(resp.text))

def mkfs(account_name, fs_name, access_token):
    print('mkfs')
    fs_headers = {
        "Authorization": f"Bearer {access_token}"
    }
    resp = requests.put(f"https://{account_name}.dfs.core.windows.net/{fs_name}?resource=filesystem", headers=fs_headers)
    return (resp.status_code, resp.text)

def mkdir(account_name, fs_name, dir_name, access_token):
    print('mkdir')
    dir_headers = {
        "Authorization": f"Bearer {access_token}"
    }
    resp = requests.put(f"https://{account_name}.dfs.core.windows.net/{fs_name}/{dir_name}?resource=directory", headers=dir_headers)
    return (resp.status_code, resp.text)
    
def touch_file(account_name, fs_name, dir_name, file_name, access_token):
    print('touch_file')
    touch_file_headers = {
        "Authorization": f"Bearer {access_token}"
    }
    resp = requests.put(f"https://{account_name}.dfs.core.windows.net/{fs_name}/{dir_name}/{file_name}?resource=file", headers=touch_file_headers)
    return (resp.status_code, resp.text)

def append_file(account_name, fs_name, path, content, position, access_token):
    print('append_file')
    append_file_headers = {
        "Authorization": f"Bearer {access_token}",
        "Content-Type": "text/plain",
        "Content-Length": f"{len(content)}"
    }
    resp = requests.patch(f"https://{account_name}.dfs.core.windows.net/{fs_name}/{path}?action=append&position={position}", headers=append_file_headers, data=content)
    return (resp.status_code, resp.text)
    
def flush_file(account_name, fs_name, path, position, access_token):
    print('flush_file')
    flush_file_headers = {
        "Authorization": f"Bearer {access_token}"
    }
    resp = requests.patch(f"https://{account_name}.dfs.core.windows.net/{fs_name}/{path}?action=flush&position={position}", headers=flush_file_headers)
    return (resp.status_code, resp.text)

def mkfile(account_name, fs_name, dir_name, file_name, local_file_name, access_token):
    print('mkfile')
    status_code, result = touch_file(account_name, fs_name, dir_name, file_name, access_token)
    if status_code == 201:
        with open(local_file_name, 'rb') as local_file:
            path = f"{dir_name}/{file_name}"
            content = local_file.read()
            position = 0
            append_file(account_name, fs_name, path, content, position, access_token)
            position = len(content)
            flush_file(account_name, fs_name, path, position, access_token)
    else:
        print(result)
        
    
if __name__ == '__main__':
    tenant_id = '<your tenant id>'
    client_id = '<your client id>'
    client_secret = '<your client secret>'
    
    account_name = '<your adls account name>'
    fs_name = '<your filesystem name>'
    dir_name = '<your directory name>'
    file_name = '<your file name>'
    local_file_name = '<your local file name>'
    
    # Acquire an Access token
    auth_status_code, auth_result = auth(tenant_id, client_id, client_secret)
    access_token = auth_status_code == 200 and auth_result['access_token'] or ''
    print(access_token)
    
    # Create a filesystem
    mkfs_status_code, mkfs_result = mkfs(account_name, fs_name, access_token)
    print(mkfs_status_code, mkfs_result)
    
    # Create a directory
    mkdir_status_code, mkdir_result = mkdir(account_name, fs_name, dir_name, access_token)
    print(mkdir_status_code, mkdir_result)
    
    # Create a file from local file
    mkfile(account_name, fs_name, dir_name, file_name, local_file_name, access_token)
导入请求
导入json
def auth(租户id、客户id、客户机密):
打印('auth')
验证头={
“内容类型”:“应用程序/x-www-form-urlencoded”
}
认证机构={
“客户id”:客户id,
“客户机密”:客户机密,
“范围”:https://storage.azure.com/.default",
“授予类型”:“客户端凭据”
}
resp=请求。post(f)https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token”,headers=auth_headers,data=auth_body)
返回(resp.status_代码,json.load(resp.text))
def mkfs(帐户名称、fs名称、访问令牌):
打印('mkfs')
fs_头={
“授权”:f“承载{access_token}”
}
resp=requests.put(f“https://{account\u name}.dfs.core.windows.net/{fs\u name}?resource=filesystem”,headers=fs\u headers)
返回(响应状态代码、响应文本)
def mkdir(帐户名称、fs名称、目录名称、访问令牌):
打印('mkdir')
目录头={
“授权”:f“承载{access_token}”
}
resp=requests.put(f“https://{account\u name}.dfs.core.windows.net/{fs\u name}/{dir\u name}?resource=directory”,headers=dir\u headers)
返回(响应状态代码、响应文本)
def touch_文件(帐户名称、fs_名称、目录名称、文件名称、访问令牌):
打印('touch_file')
触摸文件标题={
“授权”:f“承载{access_token}”
}
resp=requests.put(f“https://{account\u name}.dfs.core.windows.net/{fs\u name}/{dir\u name}/{file\u name}?resource=file”,headers=touch\u file\u headers)
返回(响应状态代码、响应文本)
def append_文件(帐户名称、fs_名称、路径、内容、位置、访问令牌):
打印('附加文件')
附加\u文件\u头={
“授权”:f“承载{access_token}”,
“内容类型”:“文本/普通”,
“内容长度”:f“{len(Content)}”
}
resp=requests.patch(f“https://{account\u name}.dfs.core.windows.net/{fs\u name}/{path}?action=append&position={position}”,headers=append\u file\u headers,data=content)
返回(响应状态代码、响应文本)
def flush_文件(帐户名称、fs_名称、路径、位置、访问令牌):
打印(“刷新文件”)
刷新\u文件\u头={
“授权”:f“承载{access_token}”
}
resp=requests.patch(f“https://{account\u name}.dfs.core.windows.net/{fs\u name}/{path}?action=flush&position={position}”,headers=flush\u file\u headers)
返回(响应状态代码、响应文本)
def mkfile(帐户名称、fs名称、目录名称、文件名称、本地文件名称、访问令牌):
打印('mkfile')
状态代码,结果=触摸文件(帐户名称、fs名称、目录名称、文件名称、访问令牌)
如果状态_代码==201:
打开(本地\u文件名称,'rb')作为本地\u文件:
path=f“{dir\u name}/{file\u name}”
content=local_file.read()
位置=0
追加文件(帐户名称、fs名称、路径、内容、位置、访问令牌)
位置=长度(内容)
刷新文件(帐户名称、fs名称、路径、位置、访问令牌)
其他:
打印(结果)
如果uuuu name uuuuuu='\uuuuuuu main\uuuuuuu':
租户id=“”
客户端id=“”
客户_机密=“”
帐户名称=“”
fs_名称=“”
目录名称=“”
文件名=“”
本地文件名=“”
#获取访问令牌
身份验证状态代码,身份验证结果=身份验证(租户id、客户id、客户机密)
access\u token=auth\u status\u code==200和auth\u result['access\u token']或''
打印(访问令牌)
#创建文件系统
mkfs_状态代码,mkfs_结果=mkfs(帐户名称、fs名称、访问令牌)
打印(mkfs_状态代码、mkfs_结果)
#创建目录
mkdir_状态_代码,mkdir_结果=mkdir(帐户名称、fs_名称、目录名称、访问令牌)
打印(mkdir\u状态代码、mkdir\u结果)
#从本地文件创建一个文件
mkfile(帐户名称、fs名称、目录名称、文件名称、本地文件名称、访问令牌)

希望有帮助。

根据官方教程,如下所示,如果您尚未注册Azure Data Lake Store Gen 2的公开预览,则无法直接使用Azure Storage SDK for Python在Azure Data Lake Store Gen 2中执行任何操作

注意

只有在的公共预览中注册时,具有分层命名空间的帐户才能使用本文中描述的功能。要查看限制,请参阅“已知问题”文章

因此,将数据上传到ADLS Gen2的唯一解决方案是使用ADLS Gen2的REST API,请参考其参考

下面是我用Python将数据上传到ADLS Gen2的示例代码,它运行良好

import requests
import json

def auth(tenant_id, client_id, client_secret):
    print('auth')
    auth_headers = {
        "Content-Type": "application/x-www-form-urlencoded"
    }
    auth_body = {
        "client_id": client_id,
        "client_secret": client_secret,
        "scope" : "https://storage.azure.com/.default",
        "grant_type" : "client_credentials"
    }
    resp = requests.post(f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token", headers=auth_headers, data=auth_body)
    return (resp.status_code, json.loads(resp.text))

def mkfs(account_name, fs_name, access_token):
    print('mkfs')
    fs_headers = {
        "Authorization": f"Bearer {access_token}"
    }
    resp = requests.put(f"https://{account_name}.dfs.core.windows.net/{fs_name}?resource=filesystem", headers=fs_headers)
    return (resp.status_code, resp.text)

def mkdir(account_name, fs_name, dir_name, access_token):
    print('mkdir')
    dir_headers = {
        "Authorization": f"Bearer {access_token}"
    }
    resp = requests.put(f"https://{account_name}.dfs.core.windows.net/{fs_name}/{dir_name}?resource=directory", headers=dir_headers)
    return (resp.status_code, resp.text)
    
def touch_file(account_name, fs_name, dir_name, file_name, access_token):
    print('touch_file')
    touch_file_headers = {
        "Authorization": f"Bearer {access_token}"
    }
    resp = requests.put(f"https://{account_name}.dfs.core.windows.net/{fs_name}/{dir_name}/{file_name}?resource=file", headers=touch_file_headers)
    return (resp.status_code, resp.text)

def append_file(account_name, fs_name, path, content, position, access_token):
    print('append_file')
    append_file_headers = {
        "Authorization": f"Bearer {access_token}",
        "Content-Type": "text/plain",
        "Content-Length": f"{len(content)}"
    }
    resp = requests.patch(f"https://{account_name}.dfs.core.windows.net/{fs_name}/{path}?action=append&position={position}", headers=append_file_headers, data=content)
    return (resp.status_code, resp.text)
    
def flush_file(account_name, fs_name, path, position, access_token):
    print('flush_file')
    flush_file_headers = {
        "Authorization": f"Bearer {access_token}"
    }
    resp = requests.patch(f"https://{account_name}.dfs.core.windows.net/{fs_name}/{path}?action=flush&position={position}", headers=flush_file_headers)
    return (resp.status_code, resp.text)

def mkfile(account_name, fs_name, dir_name, file_name, local_file_name, access_token):
    print('mkfile')
    status_code, result = touch_file(account_name, fs_name, dir_name, file_name, access_token)
    if status_code == 201:
        with open(local_file_name, 'rb') as local_file:
            path = f"{dir_name}/{file_name}"
            content = local_file.read()
            position = 0
            append_file(account_name, fs_name, path, content, position, access_token)
            position = len(content)
            flush_file(account_name, fs_name, path, position, access_token)
    else:
        print(result)
        
    
if __name__ == '__main__':
    tenant_id = '<your tenant id>'
    client_id = '<your client id>'
    client_secret = '<your client secret>'
    
    account_name = '<your adls account name>'
    fs_name = '<your filesystem name>'
    dir_name = '<your directory name>'
    file_name = '<your file name>'
    local_file_name = '<your local file name>'
    
    # Acquire an Access token
    auth_status_code, auth_result = auth(tenant_id, client_id, client_secret)
    access_token = auth_status_code == 200 and auth_result['access_token'] or ''
    print(access_token)
    
    # Create a filesystem
    mkfs_status_code, mkfs_result = mkfs(account_name, fs_name, access_token)
    print(mkfs_status_code, mkfs_result)
    
    # Create a directory
    mkdir_status_code, mkdir_result = mkdir(account_name, fs_name, dir_name, access_token)
    print(mkdir_status_code, mkdir_result)
    
    # Create a file from local file
    mkfile(account_name, fs_name, dir_name, file_name, local_file_name, access_token)
导入请求