*-------------------------------------------------------------------------; * This program is used to analyze food sources of calcium intake ; * among respondents ages 2 and older. The analysis combines data ; * from NHANES 2001-2002 and NHANES 2003-200 ; *-------------------------------------------------------------------------; *-------------------------------------------------------------------------; * Assign the libname NH to the C:\NHANES\DATA folder. This is where the ; * new dataset will be saved. Remember to surround the pathname with ; * quotation marks. ; * ; * Include SAS system options for output line length and input error ; * handling ; *-------------------------------------------------------------------------; libname NH "C:\NHANES\DATA"; options linesize=78 nofmterr; *-------------------------------------------------------------------------; * Use the LIBNAME statement to refer to the folder where the data files ; * are stored. ; * ; * Use the PROC CONTENTS procedure to list the contents of each dataset ; * 2001-2002 Dietary Interview (Individual Foods File) Examination File ; * 2003-3004 Dietary Interview (Individual Foods File) Examination File ; * 2001-2002 Demographic File ; * 2003-2004 Demographic File ; * ; * Use the VARNUM option to list the variables according to their position ; * in the dataset. ; *-------------------------------------------------------------------------; title2 'review data using PROC CONTENTS'; proc contents data=NH.DRXIFF_B varnum; proc contents data=NH.DR1IFF_C varnum; proc contents data=NH.DEMO_B varnum; proc contents data=NH.DEMO_C varnum; run; *-------------------------------------------------------------------------; * The DATA step creates a dataset for your 4 years of demographic data ; * (DEMO_4YR). ; * ; * The SET statement appends the 2003-2004 demographic data file ; * (NH.DEMO_C) to the 2001-2002 demographic data file (NH.DEMO_B). ; * ; * The KEEP statement selects the variables of interest. Notice that ; * in the keep statement, the variable, sequence number (SEQN) is ; * included. This variable should be included when datasets are appended. ; * ; * The SDMVPSU and SDMVSTRA variables are included in the dataset in order ; * to incorporate survey design information in later analyses. ; * ; *-------------------------------------------------------------------------; data DEMO_4YR; set NH.DEMO_B (keep=SEQN RIDAGEYR SDMVPSU SDMVSTRA) NH.DEMO_C (keep=SEQN RIDAGEYR SDMVPSU SDMVSTRA); run; proc contents; run; *-------------------------------------------------------------------------; * The DATA step creates the dataset for your 4 years of dietary data ; * (IFF_4YR). ; * ; * The KEEP statement includes only variables of interest in your dataset. ; * ; * The SET statement appends the 2003-2004 dietary nutrient data file ; * (NH.DR1IFF_C) to the 2001-2002 dietary nutrient data file (NH.DRXIFF_B).; * ; * The RENAME statement renames the variables DRDDRSTZ, DRXICALC,and ; * DRDIFDCD in the 2001-2002 dietary nutrient data file to DR1DRSTZ, ; * DR1ICALC, and DR1IFDCD, which are the names given to the same variables ; * in the 2003-2004 dietary nutrient data file. ; * ; * Note that WTDRD1 is the weight variable for all persons in dietary ; * data and is appropriate for use with dietary recall data. Weights ; * must be used in order for your analysis to be generalizable to the ; * total population. ; *-------------------------------------------------------------------------; data IFF_4YR (keep=DR1IFDCD WTDRD1 DR1ICALC SEQN DR1DRSTZ); set NH.DRXIFF_B (rename=(DRDDRSTZ=DR1DRSTZ DRXICALC=DR1ICALC DRDIFDCD=DR1IFDCD)) NH.DR1IFF_C; run; proc contents; run; *-------------------------------------------------------------------------; * Sort the new demographic dataset (DEMO_4YR) by SEQN. Data must be ; * sorted before merging ; *-------------------------------------------------------------------------; proc sort data=DEMO_4YR; by SEQN; run; *-------------------------------------------------------------------------; * Sort the new individual food intake dataset (IFF_4YR) by SEQN ; *-------------------------------------------------------------------------; proc sort data=IFF_4YR; by SEQN; run; *-------------------------------------------------------------------------; *Merge the demographic and individual food intake datasets by SEQN ; *-------------------------------------------------------------------------; data FDSRC; merge DEMO_4YR (IN=d) IFF_4YR (IN=i); by SEQN; if (d and i); run; *-------------------------------------------------------------------------; * The PROC FORMAT procedure assigns text names to the numeric values of ; * the FOODGRPF format. ; *-------------------------------------------------------------------------; proc format; value FOODGRPF 1 = "Milk & Milk Products" 2 = "Meat, Poultry, Fish & Mixtures" 3 = "Eggs" 4 = "Legumes, Nuts and Seeds" 5 = "Grain Products" 6 = "Fruits" 7 = "Vegetables" 8 = "Fats, Oils & Salad Dressings" 9 = "Sugar, Sweeteners & Beverages"; run; *-------------------------------------------------------------------------; * Create new variables that will be used in later analyses. ; *-------------------------------------------------------------------------; data FDSRC; set FDSRC; *---------------------------------------------------------------------; * Create a new variable called FOODGRP to group the data into nine ; * major food categories. This classification is based on the first ; * digit of the actual food code (DR1IFDCD). By dividing the 8-digit ; * food codes by 10000000 and using the INT function, the first digit ; * of the food code is assigned to the variable FOODGRP. This is only ; * one of the ways to group foods using SAS code. ; *---------------------------------------------------------------------; FOODGRP=int(DR1IFDCD/10000000); *---------------------------------------------------------------------; * Create a new variable called WTD_CALC, which is the amount of ; * calcium in a particular food multiplied by the sample weight of the ; * individual who consumed that food. This is one of the variables ; * used to determine food sources of calcium. ; * ; * The WTDRD1 variable must be divided by 2 since two waves of ; * survey are being combined. ; *---------------------------------------------------------------------; WTD_CALC=DR1ICALC*(WTDRD1/2); *---------------------------------------------------------------------; * Create a new variable called INCOH that defines who is in the cohort; * of interest (age >= 2 and satisfactory dietary recall data. ; *---------------------------------------------------------------------; if (RIDAGEYR >= 2 and DR1DRSTZ=1) then INCOH=1; else INCOH=0; *---------------------------------------------------------------------; * The LABEL statement applies a text description to the variable ; * FOODGRP ; * ; * The FORMAT statement applies the format FOODGRPF to FOODGRP ; * ; * As an option, add title lines for the output ; *---------------------------------------------------------------------; label FOODGRP = "Broad food grp based on 1st digit of USDA food code"; format FOODGRP FOODGRPF.; run; *-------------------------------------------------------------------------; * Use the PROC SORT procedure to sort the dataset by FOODGRP. Use the ; * PROC MEANS procedure to calculate the number of observations, minimum ; * values, and maximum values for the original food code variable (DR1FDCD); * by the new FOODGRP variable. ; *-------------------------------------------------------------------------; proc sort data=FDSRC; by FOODGRP; proc means data=FDSRC N min max; by FOODGRP; var DR1IFDCD; title 'Check if FOODGRP is defined correctly'; run; title1 "Contribution to total calcium intake by food groups - weighted"; title2 "People age >= 2 with reliable recall status, WWEIA," "NHANES 2001-2004, First day of intake"; *-------------------------------------------------------------------------; * The SURVEYFREQ procedure in SAS calculates the weighted contribution of ; * calcium from each food group. ; * ; * Note that for this analysis, only the data for INCOH=1 is of interest. ; * However, this code will also generate data for INCOH=0. ; *-------------------------------------------------------------------------; data FDSRC; set FDSRC; if INCOH=0 then WTD_CALC=.00000001; proc surveyfreq data=FDSRC; strata SDMVSTRA; cluster SDMVPSU; weight WTD_CALC; tables FOODGRP; title "Percent calcium by food group, using PROC SURVEYFREQ"; run;