Medium rare. [R for BE/BA]

posted by Helmut Homepage – Vienna, Austria, 2020-07-13 13:52 (195 d 04:13 ago) – Posting: # 21684
Views: 10,239

Hi ElMaestro,

» Job done?

I prefer medium rare over well done (aka quick and dirty or clean and never)!
Below my more R-ish code. Changes:
TODO:

###############################
# Section 1: Household things #
###############################

GetData <- function(path, sep, dec) {
  # read data from a CSV-file
  if (missing(sep)) sep <- ","
  if (!sep %in% c(",", ";", "t"))
    stop("Column separator must be any of ',', ';', 't'.")
  if (sep == "t") sep <- "\t"
  if (missing(dec)) dec <- "."
  if (!dec %in% c(".", ","))
    stop("Decimal separator must be '.', or ','.")
  # strip quotes and skip eventual commentary lines
  D        <- read.csv(path, sep = sep, dec = dec,
                       quote = "", comment.char = "#")
  names(D) <- c("Subj", "Per", "Seq", "Trt", "Y")
  cols     <- c("Subj", "Per", "Seq", "Trt")
  D[cols]  <- lapply(D[cols], factor)
  return(invisible(D))
}

CreateX <- function(D) {
  # first the two treatments
  TrtT <- as.numeric(as.character(D$Trt) == "T")
  TrtR <- as.numeric(as.character(D$Trt) == "R")
  X <- cbind(TrtT, TrtR)
  R <- qr(X)$rank
  # now the periods
  for (p in unique(D$Per)) {
    v  <- as.numeric(D$Per == p)
    XX <- data.frame(X, v)
    names(XX)[ncol(XX)] <- paste0("P", p)
    rnk <- qr(XX)$rank
    if (rnk > R) {
      X <- XX
      R <- rnk
    }
  }
  for (q in unique(D$Seq)) {
    v  <- as.numeric(D$Seq == q)
    XX <- data.frame(X, v)
    names(XX)[ncol(XX)] <- paste0("Q", q)
    rnk <- qr(XX)$rank
    if (rnk > R) {
      X <- XX
      R <- rnk
    }
  }
  return(as.matrix(X))
}
Create.CovM <- function(Params) {
  # block diagonal covariance matrix
  varT  <- Params[1]
  varBR <- Params[2]
  varWR <- Params[3]
  varRT <- Params[4]
  Nobs  <- length(D$Y)
  V     <- matrix(0, ncol = Nobs, nrow = Nobs)
  for (iRow in 1:Nobs) {
    for (iCol in 1:Nobs) {
      if (iCol == iRow) { # diagonal elements
        if (D$Trt[iRow] == "T") {
          V[iRow, iCol] <- V[iRow, iCol] + varT
        } else {
          V[iRow, iCol] <- V[iRow, iCol] + varWR + varBR
        }
      } else {            # off diagonal elements
        if (D$Subj[iRow] == D$Subj[iCol]) {
          if (D$Trt[iCol] == D$Trt[iRow]) {
            V[iRow, iCol] <- V[iRow, iCol] + varBR
          } else {
            V[iRow, iCol] <- V[iRow, iCol] + varRT
          }
        }
      }
    }
  }
  return(V)
}

############################
# Section 2: Matrix things #
############################
Obj.F12 <- function(Pars) {
  # this is line 3 of page 10 of:
  # http://people.csail.mit.edu/xiuming/docs/tutorials/reml.pdf
  CovM  <- Create.CovM(Pars)
  A     <- -0.5*log(det(CovM))
  # TODO: NaNs produced in the next line cause t(X) is not a square matrix
  B     <- -0.5*log(det(t(X) %*% solve(CovM) %*% X))
  est.b <- solve(t(X) %*% solve(CovM) %*% X) %*% t(X) %*% solve(CovM) %*% y
  tmp   <- y - X %*% est.b
  C     <- -0.5 *(t(tmp) %*% solve(CovM) %*% tmp)
  C     <- as.numeric(C) # make a scalar for other solvers
  return(A+B+C)
}

########################
# Section 3: Execution #
########################
Some.Initial.Guesses <- function() {
  # design (all possible ones, error handling, full or partial)
  subjs <- unique(as.numeric(D$Subj))
  trts  <- sort(unique(as.character(D$Trt)), decreasing = TRUE)
  if (!length(trts) == 2) stop("Only two treatments supported.")
  if (sum(!trts %in% c("T", "R")) !=0)
    stop("treatments must be coded as 'T' and 'R'.")
  seqs  <- sort(unique(as.character(D$Seq)), decreasing = TRUE) # T first
  type  <- paste(seqs, collapse = "|") # identifier
  nseq  <- length(unique(as.character(D$Seq)))
  nper  <- length(unique(as.numeric(D$Per)))
  nsubj <- length(subjs)
  if (nchar(type) == 19) {  # 4-period 4-sequence full replicate designs
    if (nper != 4) stop("4 periods required in this full replicate design.")
    if (nseq != 4) stop("4 sequences required in this full replicate design.")
    des <- "full"
  }
  if (nchar(type) == 9) {  # 4-period full replicate designs
    if (nper != 4) stop("4 periods required in this full replicate design.")
    if (nseq != 2) stop("2 sequences required in this full replicate design.")
    des <- "full"
  }
  if (nchar(type) == 7) {  # 3-period replicates
    if (type %in% c("TRT|RTR", "TRR|RTT")) {
      if (nper != 3) stop("3 periods required in this full replicate design.")
      if (nseq != 2) stop("2 sequences required in this full replicate design.")
      des <- "full"
    }
    if (type == "TRR|RTR") {
      if (nper != 3) stop("3 periods required in the extra-reference design.")
      if (nseq != 2) stop("2 sequences required in the extra-reference design.")
      des <- "partial"
    }
  }
  if (nchar(type) == 11) { # Balaam's design or 3-sequence partial
    if (!type == "TR|RT|TT|RR") { # Balamm's
      if (nper != 3) stop("3 periods required in this partial replicate design.")
      if (nseq != 3) stop("3 sequences required in this partial replicate design.")
      des <- "partial"
    } else {                      # TRT|RTR|RRT
      if (nper != 2) stop("2 periods required in Balaam's design.")
      if (nseq != 4) stop("4 sequences required in Balaam's design.")
      des <- "full"
    }
  }
  if (type == "TR|RT") stop("TR|RT design not supported yet.")
  gm.T <- gm.R <- numeric(nsubj) # vector of geometric means
  for (subj in seq_along(subjs)) {
    gm.T[subj] <- mean(log(D$Y[which(D$Subj == subj & D$Trt == "T")]))
    gm.R[subj] <- mean(log(D$Y[which(D$Subj == subj & D$Trt == "R")]))
  }
  # get rid of missings
  gm.T <- gm.T[!is.na(gm.T)]  
  gm.R <- gm.R[!is.na(gm.R)]  
  if (des == "partial") { # guess varT (between + within)
    m    <- lm(log(Y) ~ Seq+Per, data = D[D$Trt == "T", ])
    varT <- anova(m)["Residuals", "Mean Sq"]
  } else {                # guess VarWT
    m     <- lm(log(Y) ~ Seq+Per+Subj, data = D[D$Trt == "T", ])
    varWT <- anova(m)["Residuals", "Mean Sq"]
  }
  # guess VarWR
  m     <- lm(log(Y) ~ Seq+Per+Subj, data = D[D$Trt == "R", ])
  varWR <- anova(m)["Residuals", "Mean Sq"]   # guess varBR
  varBR <- var(gm.R)-varWR
  # TODO: full replicates
  varRT <- 0.5*(varT+varBR)
  rslt  <- c(varT, varBR, varWR, varRT)
  return(rslt)
}
MyREML <- function() {
  Ini <- Some.Initial.Guesses()
  F   <- optim(par = Ini, fn = Obj.F12,
               control = list(reltol = 1e-12, trace = 0, fnscale = -1))
  Nms <- c("var_T", "var_bR", "var_wR", "covar_bTbR")
  X   <- data.frame(Nms, F$par, Ini)
  names(X) <- c("Component", "Estimate", "Initial")
  cat("Maximum         :", F$value,
    "\nEvaluations     :", F$counts[["function"]],
    "\nConvergence code:", F$convergence, "\n")
  return(X)
}

# Data has to have five columns in this order:
#   Subject (numeric), Period (numeric), Sequence (character),
#   Treatment (character), Response (numeric)
# Column separator 'sep' comma (default), semicolon, or t (tab)
# Decimal separator 'dec' period (default) or comma
# path <- "https://bebac.at/downloads/ds01.csv" # EMA full replicate
path <- "https://bebac.at/downloads/ds02.csv" # EMA partial replicate
D   <- NULL # nullify eventual previous one
D   <- GetData(path)
X   <- CreateX(D)
y   <- log(D$Y)
# the workhorse
res <- MyREML() # patience!
G.matrix <- data.frame(c(NA, res$Estimate[4]),
                       c(res$Estimate[4], res$Estimate[2]))
names(G.matrix) <- 1:2
print(res, row.names = FALSE); cat("G matrix:\n"); print(G.matrix)


Gives:
Maximum         : 75.98722
Evaluations     : 197
Convergence code: 0
Warning message:
In log(det(t(X) %*% solve(CovM) %*% X)) : NaNs produced

  Component   Estimate    Initial
      var_T 0.07049482 0.06841615
     var_bR 0.03621594 0.02776728
     var_wR 0.01324652 0.01240137
 covar_bTbR 0.04563735 0.04809171
G matrix:
           1          2
1         NA 0.04563735
2 0.04563735 0.03621594

Dif-tor heh smusma 🖖
Helmut Schütz
[image]

The quality of responses received is directly proportional to the quality of the question asked. 🚮
Science Quotes

Complete thread:

Activity
 Admin contact
21,310 posts in 4,445 threads, 1,489 registered users;
online 3 (0 registered, 3 guests [including 2 identified bots]).
Forum time: Sunday 17:05 CET (Europe/Vienna)

Every man gets a narrower and narrower field of knowledge
in which he must be an expert in order to compete with other people.
The specialist knows more and more about less and less
and finally knows everything about nothing.    Konrad Lorenz

The Bioequivalence and Bioavailability Forum is hosted by
BEBAC Ing. Helmut Schütz
HTML5