在SAS

2014-10-16 31 views
0

創建新的變量我在SAS一組數據集,這看起來是這樣的:在SAS

DATA have; 

INPUT id time income; 

CARDS; 
1 2008 1000 
1 2009 900 
1 2010 1100 
2 2008 600 
2 2009 500 
2 2010 400 
3 2008 300 
3 2009 350 
3 2010 250 
; 
RUN; 

對於每一個人來說,我希望創建與個人收入的一個新的列(名爲income_id)在所有時間段內,所有其他人爲零。所以基本上我要的是這樣的:

DATA want; 
    INPUT id time income income_1 income_2 income_3; 
CARDS; 
1 2008 1000 1000 0 0 
1 2009 900 900 0 0 
1 2010 1100 1100 0 0 
2 2008 600 0 600 0 
2 2009 500 0 500 0 
2 2010 400 0 400 0 
3 2008 300 0 0 300 
3 2009 350 0 0 350 
3 2010 250 0 0 250 
; 
RUN; 

感謝

回答

1

一個直觀的方法是使用宏。

Yunchao Tian有一個很好的SUGI解釋瞭如何執行這個任務here

我改編了這裏的代碼給你。我測試了它,它似乎工作正常。

proc sort data=have out=unique nodupkey; 
    by id; 
run; 

/* assign the largest value of id to the macro variable NMAX */ 
data _null_; 
    set unique end=last; 
    if last then call symput('NMAX', PUT(id, 3.)); 
run; 

/* create all macro variables and assign value 0*/ 
data _null_; 
    do i=1 to &NMAX; 
     call symput('M'||LEFT(PUT(i,3.)), '0'); 
    end; 
run; 

/* assign the value of id to the corresponding macro variable */ 
data _null_; 
    set have; 
    call symput('M'||LEFT(PUT(id,3.)), PUT(id,3.)); 
run; 

/* macro to create code to set col to income or zero */ 
%MACRO GETID; 
%DO I = 1 %TO &NMAX; 
    %IF &&M&I = 0 %THEN %GOTO OUT; 
     IF ID = &&M&I THEN income_&I = income; 
     ELSE income_&I = 0; 
    %OUT: %END; 
%MEND GETID; 

/* Execute the macro */ 
DATA want; 
SET have; 
    %GETID 
RUN; 

PROC PRINT DATA=want; 
RUN; 
0
/* find min and max id for array boundaries, if ID is numeric */ 

proc sql noprint; 
select put(min(id), 16. -L), put(max(id), 16. -L) into :minId, :maxId 
from have 
; 
quit; 

/* with zero-ing the other variables, could be slow if lots of distinct IDs */ 
data want1; 
set have; 
array arr_income income_&minId - income_&maxId; 
    do i=&minId to &maxId; 
     if i = id then arr_income[id] = income; 
     else arr_income[i] = 0; 
    end; 
run; 

/* without zero-ing the other variables */ 
data want2; 
set have; 
array arr_income income_&minId - income_&maxId; 
arr_income[id] = income; 
run; 

注:他聲明array arr_income income_&minId - income_&maxId;將創建變量income_<i>的最小值和最大值,也是不存在之間的所有數字。

0
DATA have; 

INPUT id time income; 

CARDS; 
1 2008 1000 
1 2009 900 
1 2010 1100 
2 2008 600 
2 2009 500 
2 2010 400 
3 2008 300 
3 2009 350 
3 2010 250 
; 
RUN; 

proc sql; 
select count(distinct(id)) into :count from have; 
select distinct(id) into :id1 - :id%left(&count) from have; 
quit; 

%put &id1 &id2 &id3; 

options mprint; 

%macro test; 
data have2; 
set have ; 
by id time; 
%do i=1 %to &count; 
if id= &&id&i then income_&i=income;else income_&i=0; 
%end; 
run; 
%mend; 

%test; 

proc print data=have2; 
run;