如何使用SAS从web下载文件并分配到特定文件夹
早上好 所以我试着从网站下载zip文件,并尝试分配位置 我想放的位置是如何使用SAS从web下载文件并分配到特定文件夹,sas,Sas,早上好 所以我试着从网站下载zip文件,并尝试分配位置 我想放的位置是 S:\Projects\ 方法1 第一次尝试如下 DATA _null_ ; x 'start https://yehonal.github.io/DownGit/#/home?url=https:%2F%2Fgithub.com%2FCSSEGISandData%2FCOVID-19%2Ftree%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_daily_reports';
S:\Projects\
方法1
第一次尝试如下
DATA _null_ ;
x 'start https://yehonal.github.io/DownGit/#/home?url=https:%2F%2Fgithub.com%2FCSSEGISandData%2FCOVID-19%2Ftree%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_daily_reports';
RUN ;
方法1,我可以下载文件,但这会自动下载到我的下载文件夹
方法2
所以我是这样发现的
filename out "S:\Projects\csse_covid_19_daily_reports.zip";
proc http
url='https://yehonal.github.io/DownGit/#/home?url=https:%2F%2Fgithub.com%2FCSSEGISandData%2FCOVID-19%2Ftree%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_daily_reports'
method="get" out=out;
run;
但是代码不起作用,没有下载任何东西
如何从web下载文件并分配到特定位置?在这种情况下,我可能会建议使用宏(或调用EXECUTE),但我更喜欢宏,然后通过调用EXECUTE调用宏。在SAS Academics on Demand(免费云服务)上运行约一分钟
在浏览器中查看url时,使用浏览器中的javascript构建自动下载的zip文件
ProcHTTP
不运行javascript,因此将无法下载最终响应,即构造的zip文件,因此您将获得404消息
存储库中的文件列表可以从url获取为json
https://api.github.com/repos/CSSEGISandData/COVID-19/contents/csse_covid_19_data/csse_covid_19_daily_reports
列表数据包含每个csv文件的下载url。
下载url如下所示
https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-22-2020.csv
您可以使用SAS per@Reeza下载单个文件,或
- 使用
命令或SAS git*函数下载存储库git
- github服务器无法提供仅用于下载存储库特定子文件夹的AFAIK
git archive
- github服务器无法提供仅用于下载存储库特定子文件夹的AFAIK
- 使用
命令从git存储库下载特定文件夹svn
- 需要安装
我使用了SlikSvnsvn
- 需要安装
options noxwait xsync xmin source;
* use svn to download all files in a subfolder of a git repository;
* local folder for storing data from
* COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University;
%let covid_data_root = c:\temp\csse;
%let rc = %sysfunc(dcreate(covid,&covid_data_root));
%let download_path = &covid_data_root\covid;
%let repo_subdir_url = https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports;
%let svn_url = %sysfunc(tranwrd(&repo_subdir_url, tree/master, trunk));
%let os_command = svn checkout &svn_url "&download_path";
/*
* uncomment this block to download the (data) files from the repository subfolder;
%sysexec %superq(os_command);
*/
* codegen and execute the PROC IMPORT steps needed to read each csv file downloaded;
libname covid "&covid_data_root.\covid";
filename csvlist pipe "dir /b ""&download_path""";
data _null_;
infile csvlist length=l;
length filename $200;
input filename $varying. l;
if lowcase(scan(filename,-1,'.')) = 'csv';
out = 'covid.day_'||translate(scan(filename,1,'.'),'_','-');
/*
* NOTE: Starting 08/11/2020 FIPS data first starts appearing after a few hundred rows.
* Thus the high GuessingRows
*/
template =
'PROC IMPORT file="#path#\#filename#" replace out=#out# dbms=csv; ' ||
'GuessingRows = 5000;' ||
'run;';
source_code = tranwrd (template, "#path#", "&download_path");
source_code = tranwrd (source_code, "#filename#", trim(filename));
source_code = tranwrd (source_code, "#out#", trim(out));
/* uncomment this line to import each data file */
*call execute ('%nrstr(' || trim (source_code) || ')');
run;
* memname is always uppercase;
proc contents noprint data=covid._all_ out=meta(where=(memname like 'DAY_%'));
run;
* compute variable lengths for LENGTH statement;
proc sql noprint;
select
catx(' ', name, case when type=2 then '$' else '' end, maxlen)
into
:lengths separated by ' '
from
( select name, min(type) as type, max(length) as maxlen, min(varnum) as minvarnum, max(varnum) as maxvarnum
from meta
group by name
)
order by minvarnum, maxvarnum
;
quit;
* stack all the individual daily data;
data covid.all_days;
attrib date length=8 format=mmddyy10.;
length &lengths;
set covid.day_: indsname=dsname;
date = input(substr(dsname,11),mmddyy10.);
format _character_; * reset length based formats;
informat _character_; * reset length based informats;
run ;
proc sort data=covid.all_days out=us_days;
where country_region = 'US';
by province_state admin2 date;
run;
ods html gpath='.' path='.' file='covid.html';
options nobyline;
proc sgplot data=us_days;
where province_state =: 'Cali';
*where also admin2=: 'O';
by province_state admin2;
title "#byval2 County, #byval1";
series x=date y=confirmed;
xaxis valuesformat=monname3.;
label province_state='State' admin2='County';
label confirmed='Confirmed (cumulative?)';
run;
ods html close;
options byline;
阴谋
这可能是因为您正在使用的服务是怎样的。你有实际源文件的链接吗?@reeza和我尝试使用下载所有文件。你想一次下载所有文件还是每天下载最新的每日文件?你想下载文件还是导入数据?您可以直接将文件导入SAS数据集,即直接从Github文件导入。我现在在SAS上的逻辑是,不加载所有文件仅供参考-问题是服务下载,文件可能尚未准备好。准备/运行代码需要一些时间,因此,您也可以在等待完全下载完成后再下载的同时尝试睡眠步骤。我不知道该怎么做,所以我可能会发现现在解决这个问题的速度更快。这太棒了,我已经编写SAS两年了。我应该多学习。所以我可以像你一样。我真的很感谢你的帮助。非常感谢。
options noxwait xsync xmin source;
* use svn to download all files in a subfolder of a git repository;
* local folder for storing data from
* COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University;
%let covid_data_root = c:\temp\csse;
%let rc = %sysfunc(dcreate(covid,&covid_data_root));
%let download_path = &covid_data_root\covid;
%let repo_subdir_url = https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports;
%let svn_url = %sysfunc(tranwrd(&repo_subdir_url, tree/master, trunk));
%let os_command = svn checkout &svn_url "&download_path";
/*
* uncomment this block to download the (data) files from the repository subfolder;
%sysexec %superq(os_command);
*/
* codegen and execute the PROC IMPORT steps needed to read each csv file downloaded;
libname covid "&covid_data_root.\covid";
filename csvlist pipe "dir /b ""&download_path""";
data _null_;
infile csvlist length=l;
length filename $200;
input filename $varying. l;
if lowcase(scan(filename,-1,'.')) = 'csv';
out = 'covid.day_'||translate(scan(filename,1,'.'),'_','-');
/*
* NOTE: Starting 08/11/2020 FIPS data first starts appearing after a few hundred rows.
* Thus the high GuessingRows
*/
template =
'PROC IMPORT file="#path#\#filename#" replace out=#out# dbms=csv; ' ||
'GuessingRows = 5000;' ||
'run;';
source_code = tranwrd (template, "#path#", "&download_path");
source_code = tranwrd (source_code, "#filename#", trim(filename));
source_code = tranwrd (source_code, "#out#", trim(out));
/* uncomment this line to import each data file */
*call execute ('%nrstr(' || trim (source_code) || ')');
run;
* memname is always uppercase;
proc contents noprint data=covid._all_ out=meta(where=(memname like 'DAY_%'));
run;
* compute variable lengths for LENGTH statement;
proc sql noprint;
select
catx(' ', name, case when type=2 then '$' else '' end, maxlen)
into
:lengths separated by ' '
from
( select name, min(type) as type, max(length) as maxlen, min(varnum) as minvarnum, max(varnum) as maxvarnum
from meta
group by name
)
order by minvarnum, maxvarnum
;
quit;
* stack all the individual daily data;
data covid.all_days;
attrib date length=8 format=mmddyy10.;
length &lengths;
set covid.day_: indsname=dsname;
date = input(substr(dsname,11),mmddyy10.);
format _character_; * reset length based formats;
informat _character_; * reset length based informats;
run ;
proc sort data=covid.all_days out=us_days;
where country_region = 'US';
by province_state admin2 date;
run;
ods html gpath='.' path='.' file='covid.html';
options nobyline;
proc sgplot data=us_days;
where province_state =: 'Cali';
*where also admin2=: 'O';
by province_state admin2;
title "#byval2 County, #byval1";
series x=date y=confirmed;
xaxis valuesformat=monname3.;
label province_state='State' admin2='County';
label confirmed='Confirmed (cumulative?)';
run;
ods html close;
options byline;