************************************************************************ * Program: C:\NHANES\linearregression_suddan_update.sas * * Proposal: Linear regression using SUDAAN * ************************************************************************; LIBNAME NH "C:\NHANES\DATA"; OPTIONS NODATE NOCENTER nonum; options ls=72; proc format; VALUE sexfmt 1 = 'Male' 2 = 'Female' ; VALUE race2fmt 1='Mexican Americans' 2='Other Hispanic' 3='Non-Hispanic white' 4='Non-Hispanic black' 5='Other multi/racial' ; VALUE smkfmt 1='Never smoker' 2='Past smoker' 3='Current smoker' ; VALUE educ 1="< HS" 2="HS/GED" 3="> HS" ; VALUE bmicat 1="underweight" 2="normal weight" 3="overweight" 4="obese"; run; data analysis_data; set nh.analysis_data; /*set don't know and refused (7,9) to missing*/ if dmdeduc>3 then dmdeduc=.; /*define smokers*/ if smq020 eq 2 then smoker=1; else if smq020 eq 1 and smq040 eq 3 then smoker=2; else if smq020 eq 1 and smq040 in(1,2) then smoker=3; /*for input to SAS PROC SURVEYREG - recode gender so that men is the reference group*/ if riagendr eq 1 then sex=2; else if riagendr eq 2 then sex=1; /*for input to SAS PROC SURVEYREG - recode race/ethnicity so that non-Hispanic white is the reference group*/ ethn=ridreth1; if ridreth1 eq 3 then ethn=5; else if ridreth1 eq 4 then ethn=2; else if ridreth1 eq 2 then ethn=3; else if ridreth1 eq 5 then ethn=4; if 0 le bmxbmi lt 18.5 then bmicat=1; else if 18.5 le bmxbmi lt 25 then bmicat=2; else if 25 le bmxbmi lt 30 then bmicat=3; else if bmxbmi ge 30 then bmicat=4; if (lbdhdl^=. and riagendr^=. and ridreth1^=. and ridageyr^=. and smoker^=. and dmdeduc^=. and bmxbmi^=.) and wtmec4yr>0 and (ridageyr>=20) then eligible=1; label riagendr='Gender' ridreth1='Race/ethnicity' ethn = 'Race/ethnicity - recode' ridageyr='Age in years' dmdeduc='Education' bmicat='BMI category'; run; proc sort data=analysis_data; by sdmvstra sdmvpsu; run; **Simple Regression models**; proc regress data=analysis_data; subpopn eligible=1; nest sdmvstra sdmvpsu; weight wtmec4yr; model lbdhdl= bmxbmi; rtitle "Linear regression model for high density lipoprotein and body mass index: NHANES 1999-2002"; run; proc regress data=analysis_data; subpopn eligible=1; nest sdmvstra sdmvpsu; weight wtmec4yr; model lbdhdl= bmicat; rtitle "Linear regression model for high density lipoprotein and body mass index: NHANES 1999-2002"; run; proc regress data=analysis_data; subpopn eligible=1; nest sdmvstra sdmvpsu; weight wtmec4yr; class bmicat/nofreq; reflevel bmicat=2; model lbdhdl=bmicat; rformat bmicat bmicat.; rtitle "Linear regression model for high density lipoprotein and body mass index: NHANES 1999-2002"; run; *run multiple regression model; proc regress data=analysis_data; subpopn eligible=1; nest sdmvstra sdmvpsu; weight wtmec4yr; class riagendr ridreth1 smoker dmdeduc bmicat/nofreq; reflevel riagendr=1 ridreth1=3 dmdeduc=3 bmicat=2; model lbdhdl= riagendr ridreth1 ridageyr smoker dmdeduc bmicat; effects smoker=(1 -1 0)/ name="Never smoker vs. past smoker"; lsmeans bmicat riagendr ridreth1 smoker dmdeduc; test waldf satadjf satadjchi; rformat riagendr sexfmt.; rformat ridreth1 race2fmt.; rformat smoker smkfmt.; rformat dmdeduc educ.; rformat bmicat bmicat.; rtitle "Linear regression model for high density lipoprotein and selected covariates: NHANES 1999-2002"; run; *run crude mean HDL for each independent variable; proc descript data=analysis_data design=wr ; subpopn eligible=1; NEST sdmvstra sdmvpsu; weight wtmec4yr; class riagendr ridreth1 smoker dmdeduc bmicat/nofreq; var lbdhdl; table bmicat riagendr ridreth1 smoker dmdeduc ; PRINT nsum="Sample Size" mean="Crude Mean" semean="Standard Error" / nohead notime style=nchs nsumfmt=F7.0 meanfmt=F9.2 semeanfmt=F9.3 ; rformat riagendr sexfmt.; rformat ridreth1 race2fmt.; rformat smoker smkfmt.; rformat dmdeduc educ.; rformat bmicat bmicat.; run; ******Example of multiple regression model with interaction term***; proc regress data=analysis_data; subpopn eligible=1; nest sdmvstra sdmvpsu; weight wtmec4yr; class riagendr ridreth1 smoker dmdeduc/nofreq; reflevel riagendr=1 ridreth1=3 dmdeduc=3 bmicat=2; model lbdhdl= riagendr ridreth1 ridageyr smoker dmdeduc smoker*bmxbmi; effects smoker=(1 -1 0)/ name="Never smoker vs. past smoker"; lsmeans bmicat riagendr ridreth1 smoker dmdeduc; test waldf satadjf satadjchi; rformat riagendr sexfmt.; rformat ridreth1 race2fmt.; rformat smoker smkfmt.; rformat dmdeduc educ.; rformat bmicat bmicat.; rtitle "Linear regression model for high density lipoprotein and selected covariates: NHANES 1999-2002"; run;