/*------------------------------------------------------------------* | MACRO NAME : symmchk | SHORT DESC : Determine if the data of a variable is symmetric? *------------------------------------------------------------------* | CREATED BY : Cha, Stephen (03/26/2007 14:30) *------------------------------------------------------------------* | PURPOSE | | This macro checks for symmetry and suggests the best power | transformation, if one exists, to make an asymmetric data | symmetric. | | *------------------------------------------------------------------* | MODIFIED BY : Cha, Stephen (07/16/2009 14:45) | | Modify it so that it meets the BSI SAS macro requirement standard. | *------------------------------------------------------------------* | OPERATING SYSTEM COMPATIBILITY | | UNIX SAS v8 : YES | UNIX SAS v9 : YES | MVS SAS v8 : YES | MVS SAS v9 : YES | PC SAS v8 : YES | PC SAS v9 : YES *------------------------------------------------------------------* | MACRO CALL | | %symmchk ( | data= , | var= , | outdata=_dumpout | ); *------------------------------------------------------------------* | REQUIRED PARAMETERS | | Name : data | Default : | Type : Dataset Name | Purpose : dataset name | | Name : var | Default : | Type : Variable Name (Single) | Purpose : The name of the variable to be checked. | *------------------------------------------------------------------* | OPTIONAL PARAMETERS | | Name : outdata | Default : _dumpout | Type : Dataset Name | Purpose : Output dataset name | *------------------------------------------------------------------* | RETURNED INFORMATION | | Word of caution: Never run it in interactive mode and run this SAS | jobs using "nice +10 sas8 -work /localwork XXX.sas" commmand. It | might take 3 hours for N>400 | | | *------------------------------------------------------------------* | ADDITIONAL NOTES | | symy_out macro is a build in. | | Modification: 10/16/2008 | Dr. Andrew Kramer from Cerner.com has suggested to make the following | modifications: | 1. When only sums are required, replacing Proc Univariate with Proc Means. | 2. Using Proc Datasets to delete certain temporary files. These files | accumulate rather quickly and take up disk space. | *------------------------------------------------------------------* | EXAMPLES | | %symmchk(data=use,var=age,outdata=_dumpout); | | *------------------------------------------------------------------* | REFERENCES | | Mandrekar JN, Mandrekar SJ, Cha SS. Evaluating Methods of Symmetry, | Technical Report #73, Division of Biostatistics, Mayo Clinic, 2005. | | Mandrekar JN, Mandrekar SJ, Cha SS. An Intuitive Simulation Based | Approach for Assessing Symmetry. Journal of Statistics and Applications, | 1(1): 113-120, 2006. | | | *------------------------------------------------------------------* | Copyright 2009 Mayo Clinic College of Medicine. | | This program is free software; you can redistribute it and/or | modify it under the terms of the GNU General Public License as | published by the Free Software Foundation; either version 2 of | the License, or (at your option) any later version. | | This program is distributed in the hope that it will be useful, | but WITHOUT ANY WARRANTY; without even the implied warranty of | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | General Public License for more details. *------------------------------------------------------------------*/ %macro symmchk(data=,var=,out=_dumpout); ******************************************************; * check Input parameters *; ******************************************************; %local errorflg; %LET errorflg = 0; %if &data= %then %do; %put ERROR - dataset not defined; %LET errorflg = 1; %end; %if &var= %then %do; %put ERROR - Variable not defined; %LET errorflg = 1; %end; %if &errorflg = 1 %then %do; data _null_; error 'ERROR - detected in the input data to the macro .'; %go to exit; %end; ******************************************************; * assign title part *; ******************************************************; proc sql ; create table work._t as select * from dictionary.titles where type='T'; reset noprint; quit; proc sql; reset noprint; select nobs into :T from dictionary.tables where libname="WORK" & memname="_T"; quit; %LET TITLE1= ; /* Initialize at least one title */ data _null_; set _t; %IF (&T>=1) %THEN %DO I=1 %TO &T; if number=&I then call symput("TITLE&I", trim(left(text))); %END; run; %LET TNEW = 1; %LET TOTALT = %EVAL(&T + &TNEW); %IF &TOTALT<=10 %THEN %LET TSHOW=&T; %ELSE %LET TSHOW = %EVAL(10 - &TOTALT + &T); %LET NEXTT1=%EVAL(&TSHOW+1); ******************************************************; * symm_out macro is within symmchk *; ******************************************************; /******************************************************************** * * * Macro name: SYMM_out * * Investigator: Jay & S Mandrekar * * Programmer: Stephen Cha * * Date Created: 01/05/2004 * * Date last modified: 01/03/2005 * * * * Function: * * This macro is designed to check whether the data is symmetric? * ** * * Parameters: * * datatemp = name of the dataset * * chkvar= variable to be checked fpr symmetry * * Optional parameters: * * cutp = P-value from the non parametric symmetry test statistics * * (an output variable) * * outp= output flag (1=print output 0=Save output) * * outd= output data printout * * * * Example: * * %symm_out(datatemp=use, chkvar=age, cutp=cut, * * outd=datout,outp=0); * * * * Reference: "Are our data symmetric?" by Sumithra Mandrekar and * * Jayawant Mandrekar. Statistical Methodatatemp in Medical Research * * 2003: Volumn 12 pp 505-513 * * * * word of caution: Never run it in interactive and run this SAS * * using "nice +10 sas8 -work /localwork XXX.sas". It might take * * 3 hours for N=400 * * * *********************************************************************/ %macro symm_out(datatemp=,chkvar=,cutp=, outd=, outp=1); data _tmp0; set &datatemp; varuse=&chkvar; if varuse^=.; proc sort; by varuse; data _tmp1; set _tmp0; by varuse; if _N_=1 then do; newidx=0; retain newidx; end; newidx=newidx+1; dummy=1; keep newidx varuse dummy; proc sort; by dummy; proc means noprint data=_tmp1; var varuse; by dummy; output out=_ddd mean=varmean median=varmed skewness=varskew std=varstd Q1=q1 Q3=q3 N=totaln; proc univariate noprint data=_tmp1; var varuse; by dummy; output out=_ddd2 pctlpts=12.5 87.5 pctlpre=new; proc univariate noprint data=_tmp1; var varuse; by dummy; output out=_ddd3 pctlpts=0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 pctlpre=dum; proc univariate noprint data=_tmp1; var varuse; by dummy; output out=_ddd4 pctlpts=100 99 98 97 96 95 94 93 92 91 90 89 88 87 86 85 84 83 82 81 80 79 78 77 76 75 74 73 72 71 70 69 68 67 66 65 64 63 62 61 60 59 58 57 56 55 54 53 52 51 pctlpre=dum; data _ddd5; merge _ddd3 _ddd4 _ddd; by dummy; if first.dummy then do; id=0; retain id; end; keep id x dummy varmed varstd totaln; id=id+1; x=(dum100+dum0)/2; output; id=id+1; x=(dum99+dum1)/2; output; id=id+1; x=(dum98+dum2)/2; output; id=id+1; x=(dum97+dum3)/2; output; id=id+1; x=(dum96+dum4)/2; output; id=id+1; x=(dum95+dum5)/2; output; id=id+1; x=(dum94+dum6)/2; output; id=id+1; x=(dum93+dum7)/2; output; id=id+1; x=(dum92+dum8)/2; output; id=id+1; x=(dum91+dum9)/2; output; id=id+1; x=(dum90+dum10)/2; output; id=id+1; x=(dum89+dum11)/2; output; id=id+1; x=(dum88+dum12)/2; output; id=id+1; x=(dum87+dum13)/2; output; id=id+1; x=(dum86+dum14)/2; output; id=id+1; x=(dum85+dum15)/2; output; id=id+1; x=(dum84+dum16)/2; output; id=id+1; x=(dum83+dum17)/2; output; id=id+1; x=(dum82+dum18)/2; output; id=id+1; x=(dum81+dum19)/2; output; id=id+1; x=(dum80+dum20)/2; output; id=id+1; x=(dum79+dum21)/2; output; id=id+1; x=(dum78+dum22)/2; output; id=id+1; x=(dum77+dum23)/2; output; id=id+1; x=(dum76+dum24)/2; output; id=id+1; x=(dum75+dum25)/2; output; id=id+1; x=(dum74+dum26)/2; output; id=id+1; x=(dum73+dum27)/2; output; id=id+1; x=(dum72+dum28)/2; output; id=id+1; x=(dum71+dum29)/2; output; id=id+1; x=(dum70+dum30)/2; output; id=id+1; x=(dum69+dum31)/2; output; id=id+1; x=(dum68+dum32)/2; output; id=id+1; x=(dum67+dum33)/2; output; id=id+1; x=(dum66+dum34)/2; output; id=id+1; x=(dum65+dum35)/2; output; id=id+1; x=(dum64+dum36)/2; output; id=id+1; x=(dum63+dum37)/2; output; id=id+1; x=(dum62+dum38)/2; output; id=id+1; x=(dum61+dum39)/2; output; id=id+1; x=(dum60+dum40)/2; output; id=id+1; x=(dum59+dum41)/2; output; id=id+1; x=(dum58+dum42)/2; output; id=id+1; x=(dum57+dum43)/2; output; id=id+1; x=(dum56+dum44)/2; output; id=id+1; x=(dum55+dum45)/2; output; id=id+1; x=(dum54+dum46)/2; output; id=id+1; x=(dum53+dum47)/2; output; id=id+1; x=(dum52+dum48)/2; output; id=id+1; x=(dum51+dum49)/2; output; proc sort; by dummy id; data _ddd6; set _ddd5; by dummy id; keep s5 overtm dummy; if first.dummy then do; overtm=0; retain overtm; end; if abs(x-varmed)<=1.96*varstd/sqrt(totaln) then overtm=overtm+1; if last.dummy then do; s5=overtm/50; output; end; data _tmp2; merge _tmp1(in=in1) _ddd _ddd2 _ddd6; by dummy; if newidx<=totaln/2 then do; pick=1; varnew=varmed-varuse; end; if newidx> totaln/2 then do; pick=2; varnew=varuse-varmed; end; s3=(q1-varmed*2+q3)/(q3-q1); s3p=(new12_5-varmed*2+new87_5)/(new87_5-new12_5); proc sort; by newidx; data _tmp3; set _tmp2 end=eof; by newidx; keep newidx w1 w2 w3 w4 totaln varmean varskew skew1 dblsum trisum s3 s3p; label s3="S4" s3p="S4'"; if newidx=1 then do; w2=0; w3=0; w4=0; w1=0; trisum=0; dblsum=0; retain w1 w2 w3 w4 trisum dblsum; end; w1=w1+varuse; trisum=trisum+(varuse-varmean)**3; dblsum=dblsum+(varuse-varmean)**2; if newidx>=2 then do; w2=w2+(newidx-1)*varuse; end; if newidx>=3 then do; w3=w3+(newidx-1)*(newidx-2)*varuse; end; if newidx>=4 then do; w4=w4+(newidx-1)*(newidx-2)*(newidx-3)*varuse; end; if eof then do; w1=w1/totaln; w2=w2/(totaln*(totaln-1)); w3=w3/(totaln*(totaln-1)*(totaln-2)); w4=w4/(totaln*(totaln-1)*(totaln-2)*(totaln-3)); skew1=(trisum/totaln)/((dblsum/totaln)**1.5); output; end; lable skew1="S1"; data _tmp4; set _tmp3; w5=2*w2-w1; w6=6*w3-6*w2+w1; w7=20*w4-30*w3+12*w2-w1; skew2=w6/w5; skew3=(1+skew2)/(1-skew2); skew4=w7/w5; label w5="l2" w6="l3" w7="l4" skew2="S2" skew3="S2'" varmean="Mean"; data _tmp6; set _tmp2; drop dummy varmean varskew; data _tmp7; set _tmp6(keep=newidx varuse totaln s5); i1=newidx; x1=varuse; if 0=1) %THEN %DO I=1 %TO &T; title&I "&&TITLE&I"; %END; %exit: run; %mend symmchk;