Skip to contents

Best Linear Unbiased Predictors (BLUP) is used in plant breeding experiments for the selection of the best lines in multi-environmental trials (MET).

In this section, a comparison will be made between the H2Cal() function of the inti package (Lozano-Isla, 2020) and the code using the asrml package published by Buntaran et al. (2020).

For the comparison we will use the two methods described by Buntaran et al. (2020). (i) The “two-stage” analysis, using two stages, the first to calculate the BLUEs and the second the BLUPs. (ii) The “one stage” analysis using only one model to calculate the BLUPs.

Load data

library(inti)
library(purrr)
library(dplyr)
fb <- inti::met

Two-stage analysis

asreml

library(asreml)
library(data.table)
library(plyr)
library(stringr)
asreml.options(maxit=100) # Set asreml iteration

############################
##### Stage I LSMEANS #####
##### per location   #####

ww <- data.table(fb)

##### Make column Zone_Loc #####

trials     <- nlevels(ww$env)
envs <- levels(ww$env)

##### Make data list for Stage I #####
data_list <- matrix(data=list(), nrow=length(envs), ncol=1, 
                    dimnames=list(envs, c("data_Set")))

##### Make a list of Trials #####
for(i in 1:trials){
  print(i)
  b <- levels(ww$env)
  c <- b[i]
  env <- as.factor(c)
  env <- data.table(env)
  f <- merge(ww,env,by="env")
  assign(paste0("data_", b[i]), f)
  data_list[[i, "data_Set" ]] <- f
  
  rm(b, c, f, env)
}

data_list <- data.table(ldply(data_list[, "data_Set"], data.frame, .id="env"))

stgI_list <- matrix(data=list(), nrow=length(envs), ncol=1, 
                    dimnames=list(envs, c("lsmeans")))

asreml.options(maxit=100) # Set asreml iteration

############################
##### Stage I LSMEANS #####
##### per location   #####

for (i in envs){
  
  edat <- droplevels(subset(ww, env == i))
  
  print(i)
  
  mod.1 <- asreml(fixed     = yield ~ cultivar,
                  random      = ~ rep + rep:alpha,
                  data        = edat,
                  predict     = predict.asreml(classify = "cultivar"))
  
  update.asreml(mod.1)
  print(summary.asreml(mod.1)$varcomp)
  
  blue <- predict(mod.1, classify="cultivar", levels=levels(edat$cultivar), vcov=TRUE,aliased = T) # get the lsmeans
  blue.1 <- data.table(blue$pvals)[, c(1:3)] 
  names(blue.1) <- c("cultivar", "yield_lsm", "se")
  blue.1[ , ':='(var=se^2, smith.w=diag(solve(blue$vcov)))] # calculate the Smith's weight
  
  stgI_list[[i, "lsmeans" ]] <- blue.1 # put all the results of Stage 1 in the list
  
  rm(Edat,mod.1, blue, blue.1)
}

#######################################################
##### Preparing dataset of Stage I for Stage II ######

##### Unlist the results of Stage I and format as data.table #####
stgII_list <- data.table(plyr::ldply(stgI_list[, "lsmeans"], data.frame, .id="env"))

stgII_list$zone<- factor(str_split_fixed(stgII_list$env, "_", 2)[,1]) # Make Zone column by split the record in Zone_Loc column
stgII_list$location <- factor(str_split_fixed(stgII_list$env, "_", 3)[,2])  # Make Location by split the record in Zone_Loc column
stgII_list$year <- factor(str_split_fixed(stgII_list$env, "_", 3)[,3]) # Make Year by split the record in Zone_Loc column

blues.asreml <- stgII_list

############################
##### Stage II BLUPs ######
##### Zone analysis #####

model <- asreml(yield_lsm  ~ zone,
                random    = ~cultivar + zone:location + zone:cultivar + cultivar:zone:location,
                weights   = smith.w,
                family    = asr_gaussian(dispersion=1.0), # fix residual variance to 1
                data      = blues.asreml,
                predict   = predict.asreml(classify = "cultivar")
                )

update.asreml(model)

# print(summary.asreml(model)$varcomp) # print the variance components

blups.asrml <- data.frame((model$predictions$pvals[1:4]))

H2cal

library(inti)
library(purrr)

#> First stage

envs <- levels(fb$env)

model <- 1:length(envs) %>% map(function(x) {
  
  model <- fb %>% filter(env %in% envs[x]) %>% 
    
    H2cal(trait = "yield"
          , gen.name = "cultivar"
          , rep.n = 4
          , fixed.model = "0 + (1|rep) + (1|rep:alpha) + cultivar"
          , random.model = "1 + (1|rep) + (1|rep:alpha) + (1|cultivar)"
          # , plot_diag = T
          , emmeans = F
          )
  
  blues <- model$blues %>% mutate(trial = levels(fb$env)[x])
  
  })

blues.h2cal <- bind_rows(model) %>% 
  separate(trial, c("zone", "location", "year")) %>% 
  mutate(across(c(yield, smith.w), as.numeric)) %>% 
  mutate(across(!c(yield, smith.w), as.factor))

#> Second stage

met <- blues.h2cal %>%
  mutate(across(!yield, as.factor)) %>%
  H2cal(trait = "yield"
        , gen.name = "cultivar"
        , rep.n = 4
        , env.n = 18
        , env.name = "location"
        , fixed.model = "0 + zone + (1|zone:location) + (1|zone:cultivar) + cultivar"
        , random.model = "1 + zone + (1|zone:location) + (1|zone:cultivar) + (1|cultivar)"
        # , plot_diag = T
        , emmeans = T
        # , weights = blues.h2cal$smith.w
        )

blups.h2cal <- met$blups
  • First stage
    • Fixed model with 0 + for avoid intercep and calculate all the BLUEs.
    • emmeans = F to calculate the Smith weitghts in the first stage

BLUEs comparison

blues.comp <- merge(blues.asreml 
                    , blues.h2cal 
                    , by = c("cultivar", "zone", "location"))

# plot(blues.comp$yield, blues.comp$yield_lsm)
rs <- cor(blues.comp$yield, blues.comp$yield_lsm) 
cat("r =", rs)
## r = 1

blues.comp %>% web_table(digits = 4)

The BLUEs correlation between H2Cal and asrml is (r = 1)

BLUPs comparison

blups.comp <- merge(blups.asrml, blups.h2cal
                    , by = c("cultivar"))

# plot(blups.comp$yield, blups.comp$predicted.value)
rs <- cor(blups.comp$yield, blups.comp$predicted.value) 
cat("r =", rs)
## r = 0.9818724

blups.comp %>% web_table(digits = 4)

The BLUPs correlation between H2Cal and asrml is (r = 0.9818724)

Single-stage analysis

asreml

library(asreml)

options("scipen"=100,"digits"= 4 )
asreml.options(maxit=100) # Set asreml iteration

##### Fit a single-stage model #####
## incomplete block and replicate location-specific
## location-specifice residual variance

mod <- asreml(fixed       = yield ~ zone,
              random      = ~ rep:at(location) + rep:alpha:at(location) + zone:location + 
                             cultivar + cultivar:zone:location+ cultivar:zone,
              residual    = ~ dsum(~(units)|location),
              data        = fb,
              predict     = predict.asreml(classify = "cultivar"))

update.asreml(mod)

blups.asreml <- data.frame((mod$predictions$pvals[1:4])) 

H2cal

library(inti)

model <- fb %>% 
  H2cal(trait = "yield"
        , gen.name = "cultivar"
        , env.name = "location"
        , rep.n = 2
        , env.n = 18
        , fixed.model = "0 + zone + (1|rep:location) + (1|rep:alpha:location) + (1|zone:location) + (1|cultivar:zone) + (1|cultivar:zone:location) + cultivar"
        , random.model = "1 + zone + (1|rep:location) + (1|rep:alpha:location) + (1|zone:location) + (1|cultivar:zone) + (1|cultivar:zone:location) + (1|cultivar)"
        , summary = T
        , emmeans = T
        # , plot_diag = T
        )
## Linear mixed model fit by REML ['lmerMod']
## Formula: yield ~ 1 + zone + (1 | rep:location) + (1 | rep:alpha:location) +  
##     (1 | zone:location) + (1 | cultivar:zone) + (1 | cultivar:zone:location) +  
##     (1 | cultivar)
##    Data: dt.rm
## Weights: weights
## 
## REML criterion at convergence: 11933.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0760 -0.3308 -0.0084  0.3698  4.0576 
## 
## Random effects:
##  Groups                 Name        Variance Std.Dev.
##  cultivar:zone:location (Intercept)  2209.5   47.00  
##  rep:alpha:location     (Intercept)   944.6   30.73  
##  cultivar:zone          (Intercept)   129.6   11.38  
##  rep:location           (Intercept)   334.0   18.28  
##  cultivar               (Intercept)   728.0   26.98  
##  zone:location          (Intercept) 48679.8  220.64  
##  Residual                            1396.8   37.37  
## Number of obs: 1069, groups:  
## cultivar:zone:location, 539; rep:alpha:location, 251; cultivar:zone, 90; rep:location, 36; cultivar, 30; zone:location, 18
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   813.35      83.85   9.700
## zonenorth      51.57     129.67   0.398
## zonesouth      59.75     123.21   0.485
## 
## Correlation of Fixed Effects:
##           (Intr) znnrth
## zonenorth -0.644       
## zonesouth -0.678  0.439

blups.h2cal <- model$blups 

BLUPs comparison

blups.comp <- merge(blups.asreml, blups.h2cal, by = c("cultivar"))

# plot(blups.comp$predicted.value, blups.comp$yield)
rs <- cor(blups.comp$predicted.value, blups.comp$yield) 
cat("r =", rs)
## r = 0.9201732

blups.comp %>% web_table(digits = 4)

The BLUPs correlation between H2Cal and asrml is (r = 0.9201732)

References

Buntaran, H., H.-P. Piepho, P. Schmidt, J. Rydén, M. Halling, et al. 2020. Cross-validation of stagewise mixed-model analysis of Swedish variety trials with winter wheat and spring barley. Crop Science 60(5): 2221–2240. doi: 10.1002/csc2.20177.
Lozano-Isla, F. 2020. Inti: Tools and statistical procedures in plant science.