如何使用SAS从web下载文件并分配到特定文件夹

如何使用SAS从web下载文件并分配到特定文件夹,sas,Sas,早上好 所以我试着从网站下载zip文件,并尝试分配位置 我想放的位置是 S:\Projects\ 方法1 第一次尝试如下 DATA _null_ ; x 'start https://yehonal.github.io/DownGit/#/home?url=https:%2F%2Fgithub.com%2FCSSEGISandData%2FCOVID-19%2Ftree%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_daily_reports';

早上好

所以我试着从网站下载zip文件,并尝试分配位置

我想放的位置是

S:\Projects\
方法1

第一次尝试如下

DATA _null_ ;
x 'start https://yehonal.github.io/DownGit/#/home?url=https:%2F%2Fgithub.com%2FCSSEGISandData%2FCOVID-19%2Ftree%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_daily_reports';
RUN ;
方法1,我可以下载文件,但这会自动下载到我的下载文件夹

方法2

所以我是这样发现的

filename out "S:\Projects\csse_covid_19_daily_reports.zip";

proc http
 url='https://yehonal.github.io/DownGit/#/home?url=https:%2F%2Fgithub.com%2FCSSEGISandData%2FCOVID-19%2Ftree%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_daily_reports'
 method="get" out=out;
run;
但是代码不起作用,没有下载任何东西


如何从web下载文件并分配到特定位置?

在这种情况下,我可能会建议使用宏(或调用EXECUTE),但我更喜欢宏,然后通过调用EXECUTE调用宏。在SAS Academics on Demand(免费云服务)上运行约一分钟


在浏览器中查看url时,使用浏览器中的javascript构建自动下载的zip文件
ProcHTTP
不运行javascript,因此将无法下载最终响应,即构造的zip文件,因此您将获得404消息

存储库中的文件列表可以从url获取为json

https://api.github.com/repos/CSSEGISandData/COVID-19/contents/csse_covid_19_data/csse_covid_19_daily_reports
列表数据包含每个csv文件的下载url。 下载url如下所示

https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-22-2020.csv
您可以使用SAS per@Reeza下载单个文件,或

  • 使用
    git
    命令或SAS git*函数下载存储库
    • github服务器无法提供仅用于下载存储库特定子文件夹的AFAIK
      git archive
  • 使用
    svn
    命令从git存储库下载特定文件夹
    • 需要安装
      svn
      我使用了SlikSvn
例如:

从堆叠导入的下载数据按日期绘制响应的系列图

options noxwait xsync xmin source;

* use svn to download all files in a subfolder of a git repository;

* local folder for storing data from 
* COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University;

%let covid_data_root = c:\temp\csse;

%let rc = %sysfunc(dcreate(covid,&covid_data_root));
%let download_path = &covid_data_root\covid;

%let repo_subdir_url = https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports;
%let svn_url = %sysfunc(tranwrd(&repo_subdir_url, tree/master, trunk));

%let os_command = svn checkout &svn_url "&download_path";

/*
* uncomment this block to download the (data) files from the repository subfolder;
%sysexec %superq(os_command);
*/

* codegen and execute the PROC IMPORT steps needed to read each csv file downloaded;

libname covid "&covid_data_root.\covid";

filename csvlist pipe "dir /b ""&download_path""";

data _null_;
  infile csvlist length=l;
  length filename $200;
  input filename $varying. l;

  if lowcase(scan(filename,-1,'.')) = 'csv';

  out = 'covid.day_'||translate(scan(filename,1,'.'),'_','-');

  /* 
   * NOTE: Starting 08/11/2020 FIPS data first starts appearing after a few hundred rows.
   * Thus the high GuessingRows 
   */
  template =
  'PROC IMPORT file="#path#\#filename#" replace out=#out# dbms=csv; ' ||
  'GuessingRows = 5000;' ||
  'run;';

  source_code = tranwrd (template, "#path#", "&download_path");
  source_code = tranwrd (source_code, "#filename#", trim(filename));
  source_code = tranwrd (source_code, "#out#", trim(out));

  /* uncomment this line to import each data file */
  *call execute ('%nrstr(' || trim (source_code) || ')');
run;

* memname is always uppercase;
proc contents noprint data=covid._all_ out=meta(where=(memname like 'DAY_%'));
run;

* compute variable lengths for LENGTH statement;
proc sql noprint;
  select 
    catx(' ', name, case when type=2 then '$' else '' end, maxlen)
  into
    :lengths separated by ' '
  from 
  ( select name, min(type) as type, max(length) as maxlen, min(varnum) as minvarnum, max(varnum) as maxvarnum
    from meta
    group by name
  )
  order by minvarnum, maxvarnum
  ;
quit;

* stack all the individual daily data;
data covid.all_days;
  attrib date length=8 format=mmddyy10.;
  length &lengths;
  set covid.day_: indsname=dsname;
  date = input(substr(dsname,11),mmddyy10.);
  format _character_;     * reset length based formats;
  informat _character_;   * reset length based informats; 
run ;

proc sort data=covid.all_days out=us_days;
  where country_region = 'US';
  by province_state admin2 date;
run;

ods html gpath='.' path='.' file='covid.html';

options nobyline;
proc sgplot data=us_days;
  where province_state =: 'Cali';
  *where also admin2=: 'O';

  by province_state admin2;
  title "#byval2 County, #byval1";

  series x=date y=confirmed;
  xaxis valuesformat=monname3.;

  label province_state='State' admin2='County';
  label confirmed='Confirmed (cumulative?)';
run;
  
ods html close;

options byline;
阴谋

这可能是因为您正在使用的服务是怎样的。你有实际源文件的链接吗?@reeza和我尝试使用下载所有文件。你想一次下载所有文件还是每天下载最新的每日文件?你想下载文件还是导入数据?您可以直接将文件导入SAS数据集,即直接从Github文件导入。我现在在SAS上的逻辑是,不加载所有文件仅供参考-问题是服务下载,文件可能尚未准备好。准备/运行代码需要一些时间,因此,您也可以在等待完全下载完成后再下载的同时尝试睡眠步骤。我不知道该怎么做,所以我可能会发现现在解决这个问题的速度更快。这太棒了,我已经编写SAS两年了。我应该多学习。所以我可以像你一样。我真的很感谢你的帮助。非常感谢。
options noxwait xsync xmin source;

* use svn to download all files in a subfolder of a git repository;

* local folder for storing data from 
* COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University;

%let covid_data_root = c:\temp\csse;

%let rc = %sysfunc(dcreate(covid,&covid_data_root));
%let download_path = &covid_data_root\covid;

%let repo_subdir_url = https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports;
%let svn_url = %sysfunc(tranwrd(&repo_subdir_url, tree/master, trunk));

%let os_command = svn checkout &svn_url "&download_path";

/*
* uncomment this block to download the (data) files from the repository subfolder;
%sysexec %superq(os_command);
*/

* codegen and execute the PROC IMPORT steps needed to read each csv file downloaded;

libname covid "&covid_data_root.\covid";

filename csvlist pipe "dir /b ""&download_path""";

data _null_;
  infile csvlist length=l;
  length filename $200;
  input filename $varying. l;

  if lowcase(scan(filename,-1,'.')) = 'csv';

  out = 'covid.day_'||translate(scan(filename,1,'.'),'_','-');

  /* 
   * NOTE: Starting 08/11/2020 FIPS data first starts appearing after a few hundred rows.
   * Thus the high GuessingRows 
   */
  template =
  'PROC IMPORT file="#path#\#filename#" replace out=#out# dbms=csv; ' ||
  'GuessingRows = 5000;' ||
  'run;';

  source_code = tranwrd (template, "#path#", "&download_path");
  source_code = tranwrd (source_code, "#filename#", trim(filename));
  source_code = tranwrd (source_code, "#out#", trim(out));

  /* uncomment this line to import each data file */
  *call execute ('%nrstr(' || trim (source_code) || ')');
run;

* memname is always uppercase;
proc contents noprint data=covid._all_ out=meta(where=(memname like 'DAY_%'));
run;

* compute variable lengths for LENGTH statement;
proc sql noprint;
  select 
    catx(' ', name, case when type=2 then '$' else '' end, maxlen)
  into
    :lengths separated by ' '
  from 
  ( select name, min(type) as type, max(length) as maxlen, min(varnum) as minvarnum, max(varnum) as maxvarnum
    from meta
    group by name
  )
  order by minvarnum, maxvarnum
  ;
quit;

* stack all the individual daily data;
data covid.all_days;
  attrib date length=8 format=mmddyy10.;
  length &lengths;
  set covid.day_: indsname=dsname;
  date = input(substr(dsname,11),mmddyy10.);
  format _character_;     * reset length based formats;
  informat _character_;   * reset length based informats; 
run ;

proc sort data=covid.all_days out=us_days;
  where country_region = 'US';
  by province_state admin2 date;
run;

ods html gpath='.' path='.' file='covid.html';

options nobyline;
proc sgplot data=us_days;
  where province_state =: 'Cali';
  *where also admin2=: 'O';

  by province_state admin2;
  title "#byval2 County, #byval1";

  series x=date y=confirmed;
  xaxis valuesformat=monname3.;

  label province_state='State' admin2='County';
  label confirmed='Confirmed (cumulative?)';
run;
  
ods html close;

options byline;