Best Linear Unbiased Predictors (BLUP) is used in plant breeding experiments for the selection of the best lines in multi-environmental trials (MET).
In this section, a comparison will be made between the
H2Cal()
function of the inti package (Lozano-Isla, 2020) and the code using
the asrml
package published by Buntaran et al. (2020).
For the comparison we will use the two methods described by Buntaran et al. (2020). (i) The “two-stage” analysis, using two stages, the first to calculate the BLUEs and the second the BLUPs. (ii) The “one stage” analysis using only one model to calculate the BLUPs.
Load data
library(inti)
library(purrr)
library(dplyr)
<- inti::met fb
Two-stage analysis
asreml
library(asreml)
library(data.table)
library(plyr)
library(stringr)
asreml.options(maxit=100) # Set asreml iteration
############################
##### Stage I LSMEANS #####
##### per location #####
<- data.table(fb)
ww
##### Make column Zone_Loc #####
<- nlevels(ww$env)
trials <- levels(ww$env)
envs
##### Make data list for Stage I #####
<- matrix(data=list(), nrow=length(envs), ncol=1,
data_list dimnames=list(envs, c("data_Set")))
##### Make a list of Trials #####
for(i in 1:trials){
print(i)
<- levels(ww$env)
b <- b[i]
c <- as.factor(c)
env <- data.table(env)
env <- merge(ww,env,by="env")
f assign(paste0("data_", b[i]), f)
"data_Set" ]] <- f
data_list[[i,
rm(b, c, f, env)
}
<- data.table(ldply(data_list[, "data_Set"], data.frame, .id="env"))
data_list
<- matrix(data=list(), nrow=length(envs), ncol=1,
stgI_list dimnames=list(envs, c("lsmeans")))
asreml.options(maxit=100) # Set asreml iteration
############################
##### Stage I LSMEANS #####
##### per location #####
for (i in envs){
<- droplevels(subset(ww, env == i))
edat
print(i)
.1 <- asreml(fixed = yield ~ cultivar,
modrandom = ~ rep + rep:alpha,
data = edat,
predict = predict.asreml(classify = "cultivar"))
update.asreml(mod.1)
print(summary.asreml(mod.1)$varcomp)
<- predict(mod.1, classify="cultivar", levels=levels(edat$cultivar), vcov=TRUE,aliased = T) # get the lsmeans
blue .1 <- data.table(blue$pvals)[, c(1:3)]
bluenames(blue.1) <- c("cultivar", "yield_lsm", "se")
.1[ , ':='(var=se^2, smith.w=diag(solve(blue$vcov)))] # calculate the Smith's weight
blue
"lsmeans" ]] <- blue.1 # put all the results of Stage 1 in the list
stgI_list[[i,
rm(Edat,mod.1, blue, blue.1)
}
#######################################################
##### Preparing dataset of Stage I for Stage II ######
##### Unlist the results of Stage I and format as data.table #####
<- data.table(plyr::ldply(stgI_list[, "lsmeans"], data.frame, .id="env"))
stgII_list
$zone<- factor(str_split_fixed(stgII_list$env, "_", 2)[,1]) # Make Zone column by split the record in Zone_Loc column
stgII_list$location <- factor(str_split_fixed(stgII_list$env, "_", 3)[,2]) # Make Location by split the record in Zone_Loc column
stgII_list$year <- factor(str_split_fixed(stgII_list$env, "_", 3)[,3]) # Make Year by split the record in Zone_Loc column
stgII_list
<- stgII_list
blues.asreml
############################
##### Stage II BLUPs ######
##### Zone analysis #####
<- asreml(yield_lsm ~ zone,
model random = ~cultivar + zone:location + zone:cultivar + cultivar:zone:location,
weights = smith.w,
family = asr_gaussian(dispersion=1.0), # fix residual variance to 1
data = blues.asreml,
predict = predict.asreml(classify = "cultivar")
)
update.asreml(model)
# print(summary.asreml(model)$varcomp) # print the variance components
<- data.frame((model$predictions$pvals[1:4])) blups.asrml
H2cal
library(inti)
library(purrr)
#> First stage
<- levels(fb$env)
envs
<- 1:length(envs) %>% map(function(x) {
model
<- fb %>% filter(env %in% envs[x]) %>%
model
H2cal(trait = "yield"
gen.name = "cultivar"
, rep.n = 4
, fixed.model = "0 + (1|rep) + (1|rep:alpha) + cultivar"
, random.model = "1 + (1|rep) + (1|rep:alpha) + (1|cultivar)"
, # , plot_diag = T
emmeans = F
,
)
<- model$blues %>% mutate(trial = levels(fb$env)[x])
blues
})
<- bind_rows(model) %>%
blues.h2cal separate(trial, c("zone", "location", "year")) %>%
mutate(across(c(yield, smith.w), as.numeric)) %>%
mutate(across(!c(yield, smith.w), as.factor))
#> Second stage
<- blues.h2cal %>%
met mutate(across(!yield, as.factor)) %>%
H2cal(trait = "yield"
gen.name = "cultivar"
, rep.n = 4
, env.n = 18
, env.name = "location"
, fixed.model = "0 + zone + (1|zone:location) + (1|zone:cultivar) + cultivar"
, random.model = "1 + zone + (1|zone:location) + (1|zone:cultivar) + (1|cultivar)"
, # , plot_diag = T
emmeans = T
, # , weights = blues.h2cal$smith.w
)
<- met$blups blups.h2cal
- First stage
- Fixed model with
0 +
for avoid intercep and calculate all the BLUEs. emmeans = F
to calculate the Smith weitghts in the first stage
- Fixed model with
BLUEs comparison
<- merge(blues.asreml
blues.comp
, blues.h2cal by = c("cultivar", "zone", "location"))
,
# plot(blues.comp$yield, blues.comp$yield_lsm)
<- cor(blues.comp$yield, blues.comp$yield_lsm)
rs cat("r =", rs)
## r = 1
%>% web_table(digits = 4) blues.comp
The BLUEs correlation between H2Cal
and
asrml
is (r = 1)
BLUPs comparison
<- merge(blups.asrml, blups.h2cal
blups.comp by = c("cultivar"))
,
# plot(blups.comp$yield, blups.comp$predicted.value)
<- cor(blups.comp$yield, blups.comp$predicted.value)
rs cat("r =", rs)
## r = 0.9818724
%>% web_table(digits = 4) blups.comp
The BLUPs correlation between H2Cal
and
asrml
is (r = 0.9818724)
Single-stage analysis
asreml
library(asreml)
options("scipen"=100,"digits"= 4 )
asreml.options(maxit=100) # Set asreml iteration
##### Fit a single-stage model #####
## incomplete block and replicate location-specific
## location-specifice residual variance
<- asreml(fixed = yield ~ zone,
mod random = ~ rep:at(location) + rep:alpha:at(location) + zone:location +
+ cultivar:zone:location+ cultivar:zone,
cultivar residual = ~ dsum(~(units)|location),
data = fb,
predict = predict.asreml(classify = "cultivar"))
update.asreml(mod)
<- data.frame((mod$predictions$pvals[1:4])) blups.asreml
H2cal
library(inti)
<- fb %>%
model H2cal(trait = "yield"
gen.name = "cultivar"
, env.name = "location"
, rep.n = 2
, env.n = 18
, fixed.model = "0 + zone + (1|rep:location) + (1|rep:alpha:location) + (1|zone:location) + (1|cultivar:zone) + (1|cultivar:zone:location) + cultivar"
, random.model = "1 + zone + (1|rep:location) + (1|rep:alpha:location) + (1|zone:location) + (1|cultivar:zone) + (1|cultivar:zone:location) + (1|cultivar)"
, summary = T
, emmeans = T
, # , plot_diag = T
)## Linear mixed model fit by REML ['lmerMod']
## Formula: yield ~ 1 + zone + (1 | rep:location) + (1 | rep:alpha:location) +
## (1 | zone:location) + (1 | cultivar:zone) + (1 | cultivar:zone:location) +
## (1 | cultivar)
## Data: dt.rm
## Weights: weights
##
## REML criterion at convergence: 11933.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.0760 -0.3308 -0.0084 0.3698 4.0576
##
## Random effects:
## Groups Name Variance Std.Dev.
## cultivar:zone:location (Intercept) 2209.5 47.00
## rep:alpha:location (Intercept) 944.6 30.73
## cultivar:zone (Intercept) 129.6 11.38
## rep:location (Intercept) 334.0 18.28
## cultivar (Intercept) 728.0 26.98
## zone:location (Intercept) 48679.8 220.64
## Residual 1396.8 37.37
## Number of obs: 1069, groups:
## cultivar:zone:location, 539; rep:alpha:location, 251; cultivar:zone, 90; rep:location, 36; cultivar, 30; zone:location, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 813.35 83.85 9.700
## zonenorth 51.57 129.67 0.398
## zonesouth 59.75 123.21 0.485
##
## Correlation of Fixed Effects:
## (Intr) znnrth
## zonenorth -0.644
## zonesouth -0.678 0.439
<- model$blups blups.h2cal
BLUPs comparison
<- merge(blups.asreml, blups.h2cal, by = c("cultivar"))
blups.comp
# plot(blups.comp$predicted.value, blups.comp$yield)
<- cor(blups.comp$predicted.value, blups.comp$yield)
rs cat("r =", rs)
## r = 0.9201732
%>% web_table(digits = 4) blups.comp
The BLUPs correlation between H2Cal
and
asrml
is (r = 0.9201732)