Sas 使用通配符重命名语句
我有一个数据集,其中的变量名称中有时间戳。因此,导入数据时,我有如下输出Sas 使用通配符重命名语句,sas,Sas,我有一个数据集,其中的变量名称中有时间戳。因此,导入数据时,我有如下输出 NAME |Quantity Value at 31/12/2019|Value at 31/12/2019|Yield Exp 31/12/2019 FIDO |12 |F |1 ALFA |20 |2 |4 BETA |3
NAME |Quantity Value at 31/12/2019|Value at 31/12/2019|Yield Exp 31/12/2019
FIDO |12 |F |1
ALFA |20 |2 |4
BETA |3 |5 |2
ETA |2 |B65 |0
THETA|14 |A40 |10
最终的输出不需要这些时间戳,所以我想重命名所有变量,删除带有日期的最后一部分。我希望结果是这样的:
NAME |Quantity Value|Value|Yield Exp
FIDO |12 |F |1
ALFA |20 |2 |4
BETA |3 |5 |2
ETA |2 |B65 |0
THETA|14 |A40 |10
我想使用rename函数,但我必须使用通配符,因为每次提供输入数据时,时间戳都是不同的。是否可以对widcards使用重命名功能?
此外,有些变量有空格和特殊的carachters,因此在引用它们时,我必须使用“n”符号
我已经编写了一段代码来获取包含所有变量名称的数据集:
data base;
infile cards missover;
input
NAME $ 'Quantity Value at 31/12/2019'n $ 'Value at 31/12/2019'n $ 'Yield Exp 31/12/2019'n $; datalines;
FIDO 12 F 1
ALFA 20 2 4
BETA 3 5 2
ETA 2 B65 0
THETA 14 A40 10
run;
proc sql ;
create table BASE_COLUMN as
select name
from sashelp.vcolumn
where memname = "BASE"
;
quit ;
proc sql;
update BASE_COLUMN
set name =
case
when name like "Quantity Value%" then "Quantity Value"
when name like "Value at%" then "Value"
when name like "Yield Exp%" then "Yield Exp"
else name
end;
quit;
但我仍然在努力使用BASE_列数据集重命名原始的BASE数据集
对于具有正确名称的数据集,重命名所有列名的最佳方法是什么?假设您的数据是以某个现有数据集的形式传入的,您可以使用
dictionary.columns
和宏生成代码,在数据步骤结束时重命名所有内容。在您的案例中,有两种情况:
proc sql noprint;
select name
, CASE
when(findw(name, 'at')) then substr(name, 1, findw(name, 'at')-1)
else substr(name, 1, anydigit(name)-1)
END
as new_name
into :names separated by '|'
, :new_names separated by '|'
from dictionary.columns
where libname = 'WORK'
AND memname = 'HAVE'
AND name NE calculated new_name
;
quit;
/* Generate code to rename the variables */
%macro rename;
%do i = 1 %to %sysfunc(countw(&names, |));
%let name = %scan(&names, &i, |);
%let new_name = %scan(&new_names, &i, |);
rename "&name"n = "&new_name"n;
%end;
%mend;
data want;
set have;
%rename;
run;
如果数据很大,可以使用proc datasets
就地重命名,而不是创建新的数据集来重命名变量,从而节省时间
proc datasets nolist;
modify have;
%rename;
quit;
感谢Stu Sztukowski,我有了一个具有以下代码的解决方案:
data base;
infile cards missover;
input
NAME $ 'Quantity Value at 31/12/2019'n $ 'Value at 31/12/2019'n $ 'Yield Exp 31/12/2019'n $; datalines;
FIDO 12 F 1
ALFA 20 2 4
BETA 3 5 2
ETA 2 B65 0
THETA 14 A40 10
run;
proc sql ;
create table BASE_COLUMN as
select name
from sashelp.vcolumn
where memname = "BASE"
;
quit ;
proc sql;
update BASE_COLUMN
set name =
case
when name like "Quantity Value%" then "Quantity Value"
when name like "Value at%" then "Value"
when name like "Yield Exp%" then "Yield Exp"
else name
end;
quit;
proc sql noprint;
select name
, CASE
when name like "Quantity Value%" then "Quantity Value"
when name like "Value at%" then "Value"
when name like "Yield Exp%" then "Yield Exp"
else name
END
as new_name
into :names separated by '|'
, :new_names separated by '|'
from dictionary.columns
where libname = 'WORK'
AND memname = 'BASE'
AND name NE calculated new_name
;
quit;
%macro rename;
%do i = 1 %to %sysfunc(countw(&names, |));
%let name = %scan(&names, &i, |);
%let new_name = %scan(&new_names, &i, |);
rename "&name"n = "&new_name"n;
%end;
%mend;
data BASE_CORRECT;
set BASE;
%rename;
run;
再次感谢 您的治理和编码实践可能建议使用驱动重命名过程的控制数据,而不是在一系列情况下使用映射数据。控制数据可以保存在永久库中,并仅在需要时进行修改。这里的示例使用
工作
库以方便演示
重命名控件数据的一些示例
示例1
数据集:Like\u name\u map
LIKE NEW_NAME
---------------------------------
Quantity Value% Quantity Value
Value at% Value
Yield Exp% Yield Exp
PATTERN NEW_NAME
-----------------------------------------------
^(.+)( at)? *\d{1,2}\/\d{1,2}\/\d{4}$ $1
示例2
数据集:Regex\u name\u map
LIKE NEW_NAME
---------------------------------
Quantity Value% Quantity Value
Value at% Value
Yield Exp% Yield Exp
PATTERN NEW_NAME
-----------------------------------------------
^(.+)( at)? *\d{1,2}\/\d{1,2}\/\d{4}$ $1
为了只重命名列而重写整个数据集可能会占用大量资源,特别是当表很大或有约束时<代码>过程数据集可用于执行重命名,并且仅修改现有数据集的标题部分
示例1控制数据是类似模式的映射
data base1 base2;
infile cards missover;
input
NAME $
'Quantity Value at 31/12/2019'n $
'Value at 31/12/2019'n $
'Yield Exp 31/12/2019'n $
z
; datalines;
FIDO 12 F 1 3
ALFA 20 2 4 1
BETA 3 5 2 4
ETA 2 B65 0 1
THETA 14 A40 10 5
run;
data work.like_name_map; length like $32 new_name $32;input
LIKE& NEW_NAME&; datalines;
Quantity Value% Quantity Value
Value at% Value
Yield Exp% Yield Exp
;
%macro rename_by_like(data, control=work.like_name_map);
%local lib mem index;
%let syslast = &data;
%let lib = %scan(&syslast,1);
%let mem = %scan(&syslast,2);
%if %sysfunc(exist(work.__contents)) %then %do;
proc sql; drop table __contents;
%end;
proc contents noprint data=&data out=__contents(keep=name);
/* utilize the control data for locating matches and applying naming map */
proc sql;
select quote(trim(__contents.name))||'n='||quote(trim(control.new_name))||'n' as rename
into :__rename1-
from __contents
join &control as control
on upcase(__contents.name) like upcase(trim(control.like))
;
quit;
%if &sqlobs %then %do;
proc datasets nolist lib=&lib;
modify &mem;
rename
%do index = 1 %to &sqlobs;
&&__rename&index
%end;
;
quit;
%end;
%mend;
%rename_by_like(base1)
将记录
NOTE: The execution of this query involves performing one or more Cartesian product joins that
can not be optimized.
NOTE: PROCEDURE SQL used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
NOTE: Renaming variable 'Quantity Value at 31/12/2019'n to 'Quantity Value'n.
NOTE: Renaming variable 'Value at 31/12/2019'n to Value.
NOTE: Renaming variable 'Yield Exp 31/12/2019'n to 'Yield Exp'n.
NOTE: MODIFY was successful for WORK.BASE1.DATA.
NOTE: PROCEDURE DATASETS used (Total process time):
示例2控制数据是Perl正则表达式模式的映射
data work.regex_name_map; length pattern $100 new_name $32; input
pattern& new_name&; datalines;
^(.+?)( at)? *\d{1,2}\/\d{1,2}\/\d{4}$ $1
;
%macro rename_by_regex(data, control=work.regex_name_map);
%local lib mem index;
%let syslast = &data;
%let lib = %scan(&syslast,1);
%let mem = %scan(&syslast,2);
%if %sysfunc(exist(work.__contents)) %then %do;
proc sql; drop table __contents;
%end;
proc contents noprint data=&data out=__contents(keep=name);
proc sql;
select
quote(trim(__contents.name))||'n' ||
'=' ||
quote(prxchange('s/'||trim(control.pattern)||'/'||trim(control.new_name)||'/',
1,
trim(__contents.name)
))||'n'
into :__rename1-
from __contents
join &control as control
on prxmatch('/'||trim(control.pattern)||'/i', trim(__contents.name));
quit;
%if &sqlobs %then %do;
proc datasets nolist lib=&lib;
modify &mem;
rename
%do index = 1 %to &sqlobs;
&&__rename&index
%end;
;
quit;
%end;
%mend;
%rename_by_regex(base2)
也许会有帮助。这会在列中添加前缀。您可以更改它以满足您的需要。我不确定您是否可以在SAS中使用通配符重命名。但是更好的方法是使用SAS字典来重命名变量。你可以看这篇文章来了解更多。你真的是从数据集开始的吗?还是从读入数据集的文本文件开始?如果是后者,那么为什么不使用您想要开始的变量名来读取文件呢?鉴于此,在几行proc更新之后,我有了一个具有正确列名的数据集(与原始数据集的顺序相同),我如何使用它来重命名所有变量?非常感谢,但是时间戳和我必须做的对话真的是不一致的。我宁愿在代码中手工进行每次更正(使用适当的“case-when”),然后用正确的名称大量重命名所有变量的名称。只需将old=new对直接生成单个宏变量就容易多了<代码>。。。catx('=',nliteral(名称),nliteral(新名称))到:重命名由''.'分隔…那么就不需要宏,只需使用RENAME语句中的列表
RENAME&RENAME代码>