****************************************************************************************************** * Program: C:\NHANES\Linearregression_sas_update_9.2.sas * * Proposal: Linear regression analysis using SAS Survey Procedures * ******************************************************************************************************; LIBNAME NH "C:\NHANES\DATA"; OPTIONS NODATE NOCENTER; options ls=72; proc format; VALUE sexfmt 1 = 'Male' 2 = 'Female' ; VALUE sex2fmt 1 = 'Female' 2 = 'Male' ; VALUE race2fmt 1='Mexican Americans' 3='Non-Hispanic white' 4='Non-Hispanic black' ; VALUE race3fmt 1='Mexican American' 2='Non-Hispanic black' 5='Non-Hispanic white' ; VALUE smkfmt 1='Never smoker' 2='Past smoker' 3='Current smoker' ; VALUE educ 1="< HS" 2="HS/GED" 3="> HS" ; VALUE bmicat 1="underweight" 2="normal weight" 3="overweight" 4="obese"; VALUE bmicatf 1="under weight" 2="overweight" 3="obese" 4="normal weight"; run; data analysis_data; set nh.analysis_data; if ridstatr=2; *all mec exam data; /*set don't know and refused (7,9) to missing*/ if dmdeduc>3 then dmdeduc=.; /*define smokers*/ if smq020 eq 2 then smoker=1; else if smq020 eq 1 and smq040 eq 3 then smoker=2; else if smq020 eq 1 and smq040 in(1,2) then smoker=3; /*for input to SAS PROC SURVEYREG - recode gender so that men is the reference group*/ if riagendr eq 1 then sex=2; else if riagendr eq 2 then sex=1; /*for input to SAS PROC SURVEYREG - recode race/ethnicity so that non-Hispanic white is the reference group*/ ethn=ridreth1; if ridreth1 eq 3 then ethn=5; else if ridreth1 eq 4 then ethn=2; else if ridreth1 eq 2 then ethn=3; else if ridreth1 eq 5 then ethn=4; if 0 le bmxbmi lt 18.5 then bmicatf=1; else if 18.5 le bmxbmi lt 25 then bmicatf=4; else if 25 le bmxbmi lt 30 then bmicatf=2; else if bmxbmi ge 30 then bmicatf=3; if 0 le bmxbmi lt 18.5 then bmicat=1; else if 18.5 le bmxbmi lt 25 then bmicat=2; else if 25 le bmxbmi lt 30 then bmicat=3; else if bmxbmi ge 30 then bmicat=4; if (lbdhdl^=. and riagendr^=. and ridreth1^=. and smoker^=. and dmdeduc^=. and bmxbmi^=.)and wtmec4yr>0 and (ridageyr>=20) then eligible=1; *else eligible=2; label riagendr='Gender' sex = 'Gender - recode' ridreth1='Race/ethnicity' ridageyr='Age in years' ethn = 'Race/ethnicity - recode' dmdeduc='Education' bmicatf='BMI category'; run; /*proc freq data=analysis_data;; tables eligible ; run; /*proc means data=analysis_data; where wtmec4yr>0; var wtmec4yr lbdhdl bmxbmi ; run;*/ *Simple Regression models****; PROC SURVEYREG data=analysis_data nomcar; STRATA sdmvstra; CLUSTER sdmvpsu; WEIGHT wtmec4yr; MODEL lbdhdl= bmxbmi/CLPARM vadjust=none; DOMAIN eligible; *ods select ParameterEstimates; TITLE 'Linear regression model for high density lipoprotein and body mass index: NHANES 1999-2002'; run; PROC SURVEYREG data=analysis_data nomcar; STRATA sdmvstra; CLUSTER sdmvpsu; WEIGHT wtmec4yr; DOMAIN eligible; MODEL lbdhdl= bmicat/CLPARM vadjust=none; TITLE 'Linear regression model for high density lipoprotein and body mass index: NHANES 1999-2002'; run; PROC SURVEYREG data=analysis_data nomcar; STRATA sdmvstra; CLUSTER sdmvpsu; WEIGHT wtmec4yr; CLASS bmicatf; DOMAIN eligible; MODEL lbdhdl= bmicatf/CLPARM solution vadjust=none; TITLE 'Linear regression model for high density lipoprotein and body mass index: NHANES 1999-2002'; run; *****Multiple Regression Model******; PROC SURVEYREG data=analysis_data nomcar; STRATA sdmvstra; CLUSTER sdmvpsu; WEIGHT wtmec4yr; CLASS sex ethn smoker dmdeduc bmicatf; DOMAIN eligible; MODEL lbdhdl= sex ethn ridageyr smoker dmdeduc smoker bmicatf/CLPARM solution vadjust=none; ESTIMATE 'Never vs past smoker' smoker 1 -1 0; TITLE 'Linear regression model for high density lipoprotein and selected covariates: NHANES 1999-2002'; run; *****Example of multiple regression model with interaction term******; PROC SURVEYREG data=analysis_data nomcar; STRATA sdmvstra; CLUSTER sdmvpsu; WEIGHT wtmec4yr; CLASS sex ethn smoker dmdeduc; DOMAIN eligible; MODEL lbdhdl= sex ethn ridageyr bmxbmi smoker dmdeduc smoker*bmxbmi/CLPARM solution vadjust=none;; ESTIMATE 'Never vs past smoker' smoker 1 -1 0; TITLE 'Linear regression model for high density lipoprotein and selected covariates: NHANES 1999-2002'; run; *SOURCE: SAS 9.2 Documentation SAS/STAT(R) 9.2 User's Guide *Note: NOMCAR requests that the procedure treat missing values in the variance computation as not missing completely at random (NOMCAR) for Taylor series variance estimation. When you specify the NOMCAR option, PROC SURVEYREG computes variance estimates by analyzing the nonmissing values as a domain or subpopulation, where the entire population includes both nonmissing and missing domains. See the section Missing Values for more details. By default, PROC SURVEYREG completely excludes an observation from analysis if that observation has a missing value, unless you specify the MISSING option. Note that the NOMCAR option has no effect on a classification variable when you specify the MISSING option, which treats missing values as a valid nonmissing level. The NOMCAR option applies only to Taylor series variance estimation. The replication methods, which you request with the VARMETHOD=BRR and VARMETHOD=JACKKNIFE options, do not use the NOMCAR option. Note: that when there is a CLASS statement, you need to use the SOLUTION option with the CLPARM option to obtain the parameter estimates and their confidence limits. Note: VADJUST=DF | NONE specifies whether to use degrees of freedom adjustment in the computation of the matrix for the variance estimation. If you do not specify the VADJUST= option, by default, PROC SURVEYREG uses the degrees-of-freedom adjustment that is equivalent to the VARADJ=DF option. If you do not want to use this variance adjustment, you can specify the VADJUST=NONE option.