*-------------------------------------------------------------------------; * This program is used to estimate mean intakes of calcium from ; * supplements and prevalence of supplemental calcium use among women 20 ; * years and older in the United States. Supplemental calcium intake is ; * also analyzed with respect to treatment for osteoporosis. ; *-------------------------------------------------------------------------; *-------------------------------------------------------------------------; * The LIBNAME statement is used to denote the location where the data will; * be stored. ; *-------------------------------------------------------------------------; libname NH "C:\NHANES\DATA"; *-------------------------------------------------------------------------; * Use the PROC FORMAT procedure to assign text labels to the numeric ; * values of user-defined formats ; *-------------------------------------------------------------------------; proc format; value AGEGRP 1='20-39' 2='40-59' 3='>= 60'; value YESNO 1='Yes' 2='No' 3='Missing/Unk'; value YESNOS 1='Used any calcium supp' 2='No supp use'; value GENDER 1='Male' 2='Female'; run; *-------------------------------------------------------------------------; * Use the PROC SORT procedure and BY statement to sort the 2003-2004 ; * demographic data (DEMO_C) and the osteoporosis data (OSQ_C) by SEQN. ; *-------------------------------------------------------------------------; proc sort data=NH.DEMO_C out=DEMO_C; by SEQN; run; proc sort data=NH.OSQ_C out=OSQ_C; by SEQN; run; *-------------------------------------------------------------------------; * Merge the DEMO_C and OSQ_C datasets by their linking variable, SEQN. ; * Use the KEEP statement to retain selected variables of interest in the ; * dataset. ; * ; * Use the FORMAT statement to apply the GENDER format to the RIAGENDR ; * variable ; *-------------------------------------------------------------------------; data DEMOOST (keep=SEQN RIAGENDR RIDAGEYR OSQ060 SDMVSTRA SDMVPSU WTINT2YR OSQ070); merge DEMO_C OSQ_C; by SEQN; format RIAGENDR GENDER.; run; *-------------------------------------------------------------------------; * Use the PROC CONTENTS procedure to list the contents of the DEMOOST ; * dataset - the new merged dataset. Use the VARNUM option to list the ; * variables according to their position in the dataset. ; * ; * Use the PROC MEANS procedure to determine the number of non-missing ; * values (N), number of missing values (Nmiss), minimum (min) and maximum ; *(max) values, to two decimal places (maxdec=2), for the variables in the ; * merged dataset. ; *-------------------------------------------------------------------------; proc contents data=DEMOOST varnum; run; proc means data= DEMOOST N Nmiss min max maxdec=2; run; *-------------------------------------------------------------------------; * Use the PROC MEANS procedure to determine the number of observations ; * (N), the number of missing observations (Nmiss), minimum values (min), ; * and maximum values (max) for the selected variables. Use the WHERE ; * statement to select the INT sample weight and to select females who are ; * 20 years of age and older. Use the VAR statement to list the variables ; * of interest. ; *-------------------------------------------------------------------------; proc means data=DEMOOST N Nmiss min max; where WTINT2YR > 0 and RIAGENDR=2 and RIDAGEYR >= 20; var OSQ060 OSQ070; run; *-------------------------------------------------------------------------; * Use the PROC FREQ procedure to determine the frequency of each value of ; * the variables listed. Use the WHERE statement to select the INT sample ; * weight and to select females who are 20 years of age and older. Use ; * the TABLES statement to list the variables of interest. Use the list ; * missing statement option to display the missing values. ; *-------------------------------------------------------------------------; proc freq data=DEMOOST; where WTINT2YR > 0 and RIAGENDR=2 and RIDAGEYR >= 20; tables OSQ060 OSQ070/list missing; format OSQ060 OSQ070 YESNO.; run; *-------------------------------------------------------------------------; * Use the PROC FREQ procedure to determine the frequency of each value of ; * the variables listed. Use the TABLES statement to list the variables ; * to be included in the output frequency table and the cross tabulation ; * frequency table for the skip patterns. Note that an asterisk indicates ; * that a crosstab will be constructed. ; *-------------------------------------------------------------------------; proc freq data=DEMOOST; where WTINT2YR > 0 and RIAGENDR=2 and RIDAGEYR >= 20; tables OSQ060*OSQ070/list missing; format OSQ060 OSQ070 YESNO.; title 'Check skip pattern for osteoporosis questionnaire'; run; *-------------------------------------------------------------------------; * Create a new variable in which age is categorized into groups (AGEGP). ; * ; * Apply a label and format to the new variable, AGEGRP ; *-------------------------------------------------------------------------; data DEMOOST; set DEMOOST; by SEQN; if (20 <= RIDAGEYR <= 39) then AGEGRP=1; else if (40 <= RIDAGEYR <= 59) then AGEGRP=2; else if (RIDAGEYR >= 60) then AGEGRP=3; label AGEGRP='Age of subject'; *-------------------------------------------------------------------------; * Create a new variable called TREATOSTEO based on responses to the ; * variables OSQ060 and OSQ070 ; * ; * Recode DONT KNOW responses to missing for OS1060 and OSQ070 ; * ; * Apply a label and format to the new variable, TREATOSTEO ; *-------------------------------------------------------------------------; if OSQ060=9 then OSQ060=.; if OSQ070=9 then OSQ070=.; if OSQ070=1 then treatOSTEO=1; else if OSQ070=2 or OSQ060=2 then treatOSTEO=2; Label treatOSTEO='Subject is being treated for osteoporosis'; format treatOSTEO YESNO.; run; *-------------------------------------------------------------------------; * Use the PROC FREQ procedure to determine the frequency of each value of ; * the variables listed. Use the WHERE statement to select the INT sample ; * weight and to select females who are 20 years of age and older. Use ; * the TABLES statement to list the variables of interest. Use the list ; * missing statement option to display the missing values. ; *-------------------------------------------------------------------------; proc freq data=DEMOOST; where WTINT2YR > 0 and RIAGENDR=2 and RIDAGEYR >= 20; tables OSQ060 OSQ070/list missing; format OSQ060 OSQ070 YESNO.; run; *-------------------------------------------------------------------------; * Use the PROC FREQ and TABLE statements to check the derived variable ; * (TREATOSTEO) against the original variables (OSQ060 and OSQ070). ; *-------------------------------------------------------------------------; Proc freq data=DEMOOST; where WTINT2YR > 0 and RIAGENDR=2 and RIDAGEYR >= 20; table TREATOSTEO*OSQ060*OSQ070/list missing; format TREATOSTEO OSQ060 OSQ070 YESNO.; title 'Check derived variable TREATOSTEO'; run; *-------------------------------------------------------------------------; * Use the PROC SORT procedure and BY statement to sort supplement files 1 ; * (NH.DSQ1_C) and 2 (NH.DSQ2_C) by SEQN. ; * ; * Use the DATA statement to name the new dataset, SUP12, and the MERGE ; * statement to merge supplement files 1 and 2 by SEQN. ; *-------------------------------------------------------------------------; proc sort data=NH.DSQ1_C out=DSQ1_C; by SEQN; run; proc sort data=NH.DSQ2_C out=DSQ2_C; BY SEQN; run; data SUP12; merge DSQ1_C DSQ2_C; by SEQN; run; *-------------------------------------------------------------------------; * Use the PROC SORT procedure and BY statement to sort SUP12, which was ; * created in the previous step, and supplement file 3 (NH.DSQ3_C) by ; * DSDSUPID. ; * ; * Use the DATA statement to name the new dataset, SUP123, and use the ; * MERGE statement to merge SUP12 and NH.DSQ3_C by DSDSUPID. Note that ; * these files must be merged by DSDSUPID because this is the unique ; * identifier common to these two datasets. ; *-------------------------------------------------------------------------; proc sort data=SUP12; by DSDSUPID; run; proc sort data=NH.DSQ3_C out=DSQ3_C; by DSDSUPID; run; data SUP123; merge SUP12 DSQ3_C; by DSDSUPID; run; *-------------------------------------------------------------------------; * Use the PROC CONTENTS procedure to list the contents of the SUP123 ; * dataset - the new merged dataset. Use the VARNUM option to list the ; * variables according to their position in the dataset. ; * ; * Use the PROC MEANS procedure to determine the number of non-missing ; * values (N), number of missing values (Nmiss), minimum (min) and maximum ; *(max) values, to two decimal places (maxdec=2), for the variables in the ; * merged dataset. ; *-------------------------------------------------------------------------; proc contents data= SUP123 varnum; run; proc means data= SUP123 N Nmiss min max maxdec=2; run; *-------------------------------------------------------------------------; * Use the PROC SORT procedure and BY statement to sort the supplement 4 ; * file (DSQ4_c). Merge this file with the SUP123 file from the previous ; * step by using the PROC SQL procedure and the unique identifier, ; * DSDSUPID. This merge will create a dataset called ALLSUP by adjoining ; * values from the supplement 4 file to the supplement 123 file, regardless; * of the number of observations for DSDQUPID in each file. ; * ; * For more information on PROC SQL you may want to consult the SAS manual ; *-------------------------------------------------------------------------; proc sort data=NH.DSQ4_C out=DSQ4_C; by DSDSUPID; run; proc sql; create table ALLSUP as select * from DSQ4_C as H, SUP123 as M where H.DSDSUPID=M.DSDSUPID; run; *-------------------------------------------------------------------------; * Use the PROC CONTENTS procedure to list the contents of the SUP123 ; * dataset - the new merged dataset. Use the VARNUM option to list the ; * variables according to their position in the dataset. ; * ; * Use the PROC MEANS procedure to determine the number of non-missing ; * values (N), number of missing values (Nmiss), minimum (min) and maximum ; *(max) values, to two decimal places (maxdec=2), for the variables in the ; * merged dataset. ; *-------------------------------------------------------------------------; proc contents data= ALLSUP varnum; run; proc means data= ALLSUP N Nmiss min max maxdec=2; run; *-------------------------------------------------------------------------; * Sort the new dataset, ALLSUP, by DSDINGID. This is the end of the merge; * procedure. ; *-------------------------------------------------------------------------; proc sort data=ALLSUP; by DSDINGID; run; *-------------------------------------------------------------------------; * Once the supplement files are merged, the code below can be used to ; * output only supplements with calcium as an ingredient. The following ; * list shows the name of the supplement as well as the corresponding ; * ingredient code (DSDINGID) ; * ; * Calcium: DSDINGID = 10000070 ; * Elemental Calcium: DSDINGID = 10000507 ; * Oyster Shell Calcium: DSDINGID = 10001483 ; * Calcium carbonate (40% calcium): DSDINGID = 10000611 ; * Calcium citrate (21% calcium): DSDINGID = 10001394 ; * Calcium d-glucarate (12.5% calcium): DSDINGID = 10001144 ; * Calcium gluconate (9% calcium): DSDINGID = 10000584 ; * Calcium hypophosphite (23.6% calcium): DSDINGID = 10002193 ; * Calcium pantothenate (8.4% calcium): DSDINGID = 10000437 ; * D-calcium pantothenate (8.4% calcium): DSDINGID = 10000521 ; * Calcium phosphate (30% calcium): DSDINGID = 10000795 ; * Calcium pyruvate (18.7% calcium): DSDINGID = 10000920 ; * Calcium HMB (14% calcium): DSDINGID = 10002190 ; *-------------------------------------------------------------------------; data CALCIUM; set ALLSUP; if DSDINGID in (10000070) or DSDINGID in (10000507) or DSDINGID in (10001483) or DSDINGID in (10000611) or DSDINGID in (10001394) or DSDINGID in (10001144) or DSDINGID in (10000584) or DSDINGID in (10002193) or DSDINGID in (10000437) or DSDINGID in (10000521) or DSDINGID in (10000795) or DSDINGID in (10000920) or DSDINGID in (10002190) then output; *-------------------------------------------------------------------------; * Convert the calcium compounds to their elemental amount - use the ; * percentage of calcium listed next to each of the compounds above as the ; * conversion factor. Note that this information can be found in the ; * documentation for the supplement data. ; *-------------------------------------------------------------------------; data dosage; set calcium; if DSDINGID =10000611 then PROP_SUPP_CALC = DSDQTY/100*40; if DSDINGID =10001394 then PROP_SUPP_CALC = DSDQTY/100*21; if DSDINGID =10001144 then PROP_SUPP_CALC = DSDQTY/100*12.5; if DSDINGID =10000584 then PROP_SUPP_CALC = DSDQTY/100*9; if DSDINGID =10002193 then PROP_SUPP_CALC = DSDQTY/100*23.6; if DSDINGID =10000437 then PROP_SUPP_CALC = DSDQTY/100*8.4; if DSDINGID =10000521 then PROP_SUPP_CALC = DSDQTY/100*8.4; if DSDINGID =10000795 then PROP_SUPP_CALC = DSDQTY/100*30; if DSDINGID =10000920 then PROP_SUPP_CALC = DSDQTY/100*18.7; if DSDINGID =10002190 then PROP_SUPP_CALC = DSDQTY/100*14; run; *-------------------------------------------------------------------------; * Use serving amount and intake information to create new variables that ; * will be used to calculate the amount of calcium intake per day ; * ; * For purposes of this analysis, data is imputed with the assumption that ; * when the reported amount of a supplement taken is missing (DSD122Q), ; * then the label serving size (DSDSERVQ) is equal to the serving size ; * that was consumed (SUPP_CALC). ; * ; * Determine the total amount of calcium per supplement (THIS_SUPP_CALC) ; * by multiplying the serving size by the quantity of the supplement taken ; * (DSDQTY) ; * ; * Convert units of total calcium per supplement from grams to milligrams. ; * It is important to check the units to make sure they are consistent ; * throughout the data. ; * ; * Determine the average amount of calcium consumed per supplement day ; * (DAILYAVG) by dividing the total amount of calcium per supplement by 30.; *-------------------------------------------------------------------------; data CALCIUM1; set DOSAGE; if DSD122Q=999999 then DSD122Q=.; if DSD103=9999 then DSD103=.; SUPP_CALC = DSD122Q / DSDSERVQ; if DSD122Q = . then SUPP_CALC = DSDSERVQ; if DSDINGID in (10000070 10000507 10001483) then THIS_SUPP_CALC = SUPP_CALC*DSDQTY; if DSDINGID in (10000611 10001394 10001144 10000584 10002193 10000437 10000521 10000795 10000920 10002190) then THIS_SUPP_CALC = SUPP_CALC*PROP_SUPP_CALC; if (DSDUNIT=5) then THIS_SUPP_CALC_MG = THIS_SUPP_CALC * 1000; else if (DSDUNIT=1) then THIS_SUPP_CALC_MG = THIS_SUPP_CALC; DAILYAVG = THIS_SUPP_CALC_MG * DSD103/30; run; *-------------------------------------------------------------------------; * Sort the data set , CALCIUM1, by sequence number (SEQN) ; *-------------------------------------------------------------------------; proc sort data=CALCIUM1; by SEQN; run; *-------------------------------------------------------------------------; * Use the MEANS procedure to sum the values of the amount of calcium ; * consumed per supplement per day (DAILYAVG). The data will be output in ; * a file called FINALCA. ; *-------------------------------------------------------------------------; proc means data=CALCIUM1 NOPRINT; by SEQN; var DAILYAVG; output out=FINALCA sum=DAILYAVG; run; *-------------------------------------------------------------------------; * Merge the new calcium file (FINALCA) that was output in the previous ; * step with the analysis file created towards the beginning of the ; * program. Also merge in the first supplement file to have access to ; * the variable DSD010 (use any supplement) ; *-------------------------------------------------------------------------; data DEMOOSTS; merge DEMOOST (in=D) FINALCA DSQ1_C; by SEQN; if D; run; *-------------------------------------------------------------------------; * Create additional variables of interest ; *-------------------------------------------------------------------------; data DEMOOSTS; set DEMOOSTS; by SEQN; *-------------------------------------------------------------------------; * Create a variable called INCOHF20 that defines who is in the cohort of ; * interest - females age >= 20 ; *-------------------------------------------------------------------------; if (RIAGENDR=2 and RIDAGEYR >= 20) then INCOHF20=1; else INCOHF20=0; *-------------------------------------------------------------------------; * Set calcium to zero if the respondent answers yes or no to supplement ; * intake but has no calcium intake ; * ; * Leave supplement use as missing if it is unknown ; *-------------------------------------------------------------------------; if dsd010 in (1,2) and DAILYAVG=. then DAILYAVG=0; *-------------------------------------------------------------------------; * Create a new variable called ANYCALSUP that represents yes or no use of ; * calcium supplement, regardless of amount. ; * ; * Apply a label to ANYCALSUP. ; *-------------------------------------------------------------------------; if DAILYAVG > 0 then ANYCALSUP=1; else if DAILYAVG=0 then ANYCALSUP=2; label ANYCALSUP='Used any supplement calcium'; run; *-------------------------------------------------------------------------; * Use the PROC SORT procedure to sort the data by strata and PSU. ; * ; * Use the PROC DESCRIPT procedure in SUDAAN to estimate mean intakes of ; * calcium from supplements using complex survey design factors (strata ; * and PSU). ; *-------------------------------------------------------------------------; proc sort data=DEMOOSTS; by SDMVSTRA SDMVPSU; run; proc descript data=DEMOOSTS; nest SDMVSTRA SDMVPSU; weight WTINT2YR; subgroup AGEGRP; levels 3; var DAILYAVG; tables AGEGRP; subpopn INCOHF20=1; rformat AGEGRP AGEGRP.; run; *-------------------------------------------------------------------------; * Use the PROC SORT procedure to sort the data by gender and age for the ; * analysis using PROC FREQ. It is necessary to sort the data in SAS when ; * using a BY statement within a procedure. ; * ; * Use the PROC FREQ procedure to analyze supplemental calcium intake with ; * respect to treatment for osteoporosis. ; * ; * Notice that this code does not include strata and PSU information. ; *-------------------------------------------------------------------------; proc sort data=DEMOOSTS; by RIAGENDR AGEGRP; run; proc freq data=DEMOOSTS; where INCOHF20=1; weight WTINT2YR; title2 'Weighted'; by RIAGENDR AGEGRP; tables treatOSTEO*ANYCALSUP; format RIAGENDR GENDER. AGEGRP AGEGRP. TREATOSTEO YESNO. ANYCALSUP YESNOS.; run; data NH.DEMOOSTS; set DEMOOSTS; run; **************************************************************************; *-------------------------------------------------------------------------; * Use the PROC SORT procedure to sort the data files by strata and PSU. ; * Data must always be sorted before running a SUDAAN procedure. ; * ; * Use the PROC CROSSTAB procedure to perform a chi-square test in SUDAAN. ; * This test will be used to determine whether age group and treatment for ; * osteoporosis are independent of each other in respondents aged 20 and ; * over. ; *-------------------------------------------------------------------------; proc sort data=DEMOOSTS; by SDMVSTRA SDMVPSU; run; proc crosstab data=DEMOOSTS design=wr; nest SDMVSTRA SDMVPSU; weight WTINT2YR; subpopn RIDAGEYR >= 20; class AGEGRP TREATOSTEO/nofreq; table AGEGRP*TREATOSTEO; print nsum rowper colper/tests=all; rformat AGEGRP AGEGRP.; rformat TREATOSTEO YESNO.; rtitle "Chi-square test for osteoporosis treatment by age group" "among people >= 20 years of age"; run;