Chapter 10: Multifactorial analyses using R
Figure 10.1 Output for one- way analysis of variance
Box 10.3 Presenting the results for one-way analysis of variance
Figure 10.1 and Box 10.3 Code
Click here to show code with comments# Tell R where you will be working. R will look here for your data: setwd("~/Documents/Books/Presenting/data/rData/") # See chapter 2 for more details on the setwd() function. # Use the load() function to load the dataset smokingbwt.RData out of the # current folder/directory into R's interactive working memory/environment. See # chapter 2 for more details. load("smokingbwt.RData") # Use the library() package to load the psych package for the describe() # function. See chapter 2 for more details on library(). library(psych) # The describe() function in the psych package does descriptive statistics # including a standard deviation. See chapter 2 for more details. describe(smokingbwt$birthwt) # Look for bad/impossible data. # The table() function does simple cross tabulation tables. table(smokingbwt$smokegroup) # Check for empty factor levels. # Make a dataset that has the two variables and drops the record # with the factor level of "". "" is a legal value so the complete.cases() function # will not drop the blank. The syntax is: # dataframe[ what records , what columns ] completeSmokebwt <- smokingbwt[smokingbwt$smokegroup!="", c("birthwt", "smokegroup") ] library(gmodels) # Load the gmodels package for the ci() function. # See chapter 2 for details on the with() function. with(completeSmokebwt, # First, group by smokinggroup levels, then calculate confidence limits on birthwt. by(birthwt, smokegroup, ci) ) # Load the gdata package for the reorder() function. library(gdata) # Use the reorder() function to set the levels of a categorical factor so it prints # in logical order. completeSmokebwt$smokegroup <-reorder(completeSmokebwt$smokegroup, new.order = c("never smoked", "ex pre-pregnancy", "ex in pregnancy", "1-14 cigs/day", "15+ cigs/day") ) # The lm() function can be used to build linear models. Here the code makes a # lm object, named modelBirthwt, holding the details of the predictive model. # See chapter 9 for more details on lm(). modelBirthwt<- with(completeSmokebwt, # linear model predicting birth weight with smoking group lm(birthwt ~ smokegroup) ) # The summary() function pays attention to the type of object it is summarizing. # See chapter 9 for more details. summary(modelBirthwt) # Display the model with estimates relative to nonsmokers. # The anova() function, when used with a lm object, can be used to show the # results of a linear model as an ANOVA table. Here the anova() function is used to show # the overall p-value for smoking. anova(modelBirthwt) with(completeSmokebwt, # The pairwise.t.test() function can create p-values for all pairwise # levels of the predictor while adjusting the p-values. pairwise.t.test(birthwt, # outcome smokegroup, # predictor p.adj="holm" # Adjust with the Holm method. ) ) library(agricolae) # Load the agricolae package for the scheffe.test() function. # The scheffe.test() function does not give pairwise p-values but shows which groups/levels # are different from others. scheffe.test(modelBirthwt, # the name of the model trt = "smokegroup", # the variable with levels to be compared console = TRUE # Show results. )
Figure 10.2 Output for multiple regression
Box 10.5 Presenting the results of multiple regression
Figure 10.2 and Box 10.5 Code
Click here to show code with commentssetwd("~/Documents/Books/Presenting/data/rData/") load("pefr.RData") # Make a linear model object from a multiple regression with 4 predictors UseAllPredictors <- with(pefr, lm(pefr ~ age + gas + smokers + wheeze) ) summary(UseAllPredictors) # Show parameter estimates, p-values and model stats. # The confint() function can be use to print 95% CLs on the model coefficients # from a lm object. confint(UseAllPredictors, level = .95) # Show confidence intervals. # Instead of typing the model again you can update it. The update() function # expects a model for the first argument and a formula for the second. The # formula typically includes . ~ . to indicate the original outcome and the # original predictors model and then a + or a - to indicate the addition or # removal of predictors. # Use3Predictors <- update(UseAllPredictors, # the model to update # . ~ . - smokers # remove smokers # # Make a linear model object from a multiple regression with 3 predictors. Use3Predictors <- with(pefr, lm(pefr ~ age + gas + wheeze) ) summary(Use3Predictors) confint(Use3Predictors, level = .95) # Make a linear model object from a multiple regression with 2 predictors. Use2Predictors <- with(pefr, lm(pefr ~ age + gas) ) summary(Use2Predictors) confint(Use2Predictors, level = .95)
Figure 10.3 Output from multiple regression with categorical predictor variables
Box 10.8 Presentation of multiple regression with categorical predictor variable
Box 10.8 and Figure 10.3 Code
Click here to show code with commentssetwd("~/Documents/Books/Presenting/data/rData/") load("smokingbwt.RData") # Make a data frame with no missing data on 3 analysis variables. useful <- smokingbwt[! is.na(smokingbwt$alcgroup) & # not missing ! is.na(smokingbwt$cigsgp) & ! is.na(smokingbwt$adjbw), # notice the , c("adjbw", "alcgroup", "cigsgp")] # keep 3 variables # Use the factor() function to make a variable in the useful data frame holding # a categorical factor version of the alcohol groups. See chapters 2 and 5 for more # details on factor(). useful$alcgroupFactor <- factor(useful$alcgroup, levels = 0:4, labels = c("0 g/week", "1-19 g/week", "20-49 g/week", "50-99 g/week", "100+ g/week") ) # Make a factor version of cigarette use. useful$cigsgpFactor <- factor(useful$cigsgp, levels = 1:2, labels = c("Light", "Heavy") ) # Make a linear model using the useful dataset where adjbw is predicted with # the two factors. modelDrugs <- with(useful, lm(adjbw ~ cigsgpFactor + alcgroupFactor) ) summary(modelDrugs) # Show the parameter estimates, p-values and model stats. confint(modelDrugs, # Show confidence intervals. level = .95) anova(modelDrugs) # Show overall p-values for factors.
Figure 10.5 Output for logistic regression
Box 10.12 Presenting the results for logistic regression
Figure 10.5 and Box 10.12 Code
Click here to show code with commentssetwd("~/Documents/Books/Presenting/data/rData/") load("bpd.RData") # Fit a logistic regression model. Save it as the object called modelChestWith4. modelChestWith4 <- with(bpd, # The glm() function fits generalized linear models. # Here it is used with the binomial family for logistic # regression. glm(chest ~ sex + multiple + o2dep36w + o2depdis, binomial # The default link will be logit. ) ) summary(modelChestWith4) # Show betas with P-values. # The exp() function calculates the exponential function. exp(coef(modelChestWith4)) # This exponentiates the coefficents to # get odds ratios. exp(confint.default(modelChestWith4)) # Get wald CLs on the ORs. modelChestWith3 <- with(bpd, glm(chest ~ sex + multiple + o2dep36w, binomial ) ) summary(modelChestWith3) exp(coef(modelChestWith3)) exp(confint.default(modelChestWith3))
Figure 10.6 Output for logistic regression with categorical predictor variables
Box 10.14 Presenting logistic regression with categorical predictor variables
Figure 10.6 and Box 10.14 Code
Click here to show code with commentssetwd("~/Documents/Books/Presenting/data/rData/") load("smokingbwt.RData") # The str() function shows the structure of an object. It is useful to quickly # check to see if R will treat a variable as a numeric or categorical variable # when it makes models. str(smokingbwt) # Fit a logistic regression model and save it as the object called nausea. nausea <- with(smokingbwt, # This is a generalized linear model using the binomial family # and the logit link. Predict nausea with smoker1 and # socialclass as categorical factors. glm(nausea1 ~ as.factor(smoker1) + as.factor(socialclass), binomial) ) summary(nausea) # Show betas, p-values and model summary. exp(coef(nausea)) # Odds ratios are betas exponentiated. exp(confint.default(nausea)) # Make Wald CLs on the odds ratios.