Medium rare. [R for BE/BA]

posted by Helmut Homepage – Vienna, Austria, 2020-07-13 13:52 (79 d 17:21 ago) – Posting: # 21684
Views: 9,400

Hi ElMaestro,

» Job done?

I prefer medium rare over well done (aka quick and dirty or clean and never)!
Below my more R-ish code. Changes:
TODO:

###############################
# Section 1: Household things #
###############################

GetData <- function(path, sep, dec) {
  # read data from a CSV-file
  if (missing(sep)) sep <- ","
  if (!sep %in% c(",", ";", "t"))
    stop("Column separator must be any of ',', ';', 't'.")
  if (sep == "t") sep <- "\t"
  if (missing(dec)) dec <- "."
  if (!dec %in% c(".", ","))
    stop("Decimal separator must be '.', or ','.")
  # strip quotes and skip eventual commentary lines
  D        <- read.csv(path, sep = sep, dec = dec,
                       quote = "", comment.char = "#")
  names(D) <- c("Subj", "Per", "Seq", "Trt", "Y")
  cols     <- c("Subj", "Per", "Seq", "Trt")
  D[cols]  <- lapply(D[cols], factor)
  return(invisible(D))
}

CreateX <- function(D) {
  # first the two treatments
  TrtT <- as.numeric(as.character(D$Trt) == "T")
  TrtR <- as.numeric(as.character(D$Trt) == "R")
  X <- cbind(TrtT, TrtR)
  R <- qr(X)$rank
  # now the periods
  for (p in unique(D$Per)) {
    v  <- as.numeric(D$Per == p)
    XX <- data.frame(X, v)
    names(XX)[ncol(XX)] <- paste0("P", p)
    rnk <- qr(XX)$rank
    if (rnk > R) {
      X <- XX
      R <- rnk
    }
  }
  for (q in unique(D$Seq)) {
    v  <- as.numeric(D$Seq == q)
    XX <- data.frame(X, v)
    names(XX)[ncol(XX)] <- paste0("Q", q)
    rnk <- qr(XX)$rank
    if (rnk > R) {
      X <- XX
      R <- rnk
    }
  }
  return(as.matrix(X))
}
Create.CovM <- function(Params) {
  # block diagonal covariance matrix
  varT  <- Params[1]
  varBR <- Params[2]
  varWR <- Params[3]
  varRT <- Params[4]
  Nobs  <- length(D$Y)
  V     <- matrix(0, ncol = Nobs, nrow = Nobs)
  for (iRow in 1:Nobs) {
    for (iCol in 1:Nobs) {
      if (iCol == iRow) { # diagonal elements
        if (D$Trt[iRow] == "T") {
          V[iRow, iCol] <- V[iRow, iCol] + varT
        } else {
          V[iRow, iCol] <- V[iRow, iCol] + varWR + varBR
        }
      } else {            # off diagonal elements
        if (D$Subj[iRow] == D$Subj[iCol]) {
          if (D$Trt[iCol] == D$Trt[iRow]) {
            V[iRow, iCol] <- V[iRow, iCol] + varBR
          } else {
            V[iRow, iCol] <- V[iRow, iCol] + varRT
          }
        }
      }
    }
  }
  return(V)
}

############################
# Section 2: Matrix things #
############################
Obj.F12 <- function(Pars) {
  # this is line 3 of page 10 of:
  # http://people.csail.mit.edu/xiuming/docs/tutorials/reml.pdf
  CovM  <- Create.CovM(Pars)
  A     <- -0.5*log(det(CovM))
  # TODO: NaNs produced in the next line cause t(X) is not a square matrix
  B     <- -0.5*log(det(t(X) %*% solve(CovM) %*% X))
  est.b <- solve(t(X) %*% solve(CovM) %*% X) %*% t(X) %*% solve(CovM) %*% y
  tmp   <- y - X %*% est.b
  C     <- -0.5 *(t(tmp) %*% solve(CovM) %*% tmp)
  C     <- as.numeric(C) # make a scalar for other solvers
  return(A+B+C)
}

########################
# Section 3: Execution #
########################
Some.Initial.Guesses <- function() {
  # design (all possible ones, error handling, full or partial)
  subjs <- unique(as.numeric(D$Subj))
  trts  <- sort(unique(as.character(D$Trt)), decreasing = TRUE)
  if (!length(trts) == 2) stop("Only two treatments supported.")
  if (sum(!trts %in% c("T", "R")) !=0)
    stop("treatments must be coded as 'T' and 'R'.")
  seqs  <- sort(unique(as.character(D$Seq)), decreasing = TRUE) # T first
  type  <- paste(seqs, collapse = "|") # identifier
  nseq  <- length(unique(as.character(D$Seq)))
  nper  <- length(unique(as.numeric(D$Per)))
  nsubj <- length(subjs)
  if (nchar(type) == 19) {  # 4-period 4-sequence full replicate designs
    if (nper != 4) stop("4 periods required in this full replicate design.")
    if (nseq != 4) stop("4 sequences required in this full replicate design.")
    des <- "full"
  }
  if (nchar(type) == 9) {  # 4-period full replicate designs
    if (nper != 4) stop("4 periods required in this full replicate design.")
    if (nseq != 2) stop("2 sequences required in this full replicate design.")
    des <- "full"
  }
  if (nchar(type) == 7) {  # 3-period replicates
    if (type %in% c("TRT|RTR", "TRR|RTT")) {
      if (nper != 3) stop("3 periods required in this full replicate design.")
      if (nseq != 2) stop("2 sequences required in this full replicate design.")
      des <- "full"
    }
    if (type == "TRR|RTR") {
      if (nper != 3) stop("3 periods required in the extra-reference design.")
      if (nseq != 2) stop("2 sequences required in the extra-reference design.")
      des <- "partial"
    }
  }
  if (nchar(type) == 11) { # Balaam's design or 3-sequence partial
    if (!type == "TR|RT|TT|RR") { # Balamm's
      if (nper != 3) stop("3 periods required in this partial replicate design.")
      if (nseq != 3) stop("3 sequences required in this partial replicate design.")
      des <- "partial"
    } else {                      # TRT|RTR|RRT
      if (nper != 2) stop("2 periods required in Balaam's design.")
      if (nseq != 4) stop("4 sequences required in Balaam's design.")
      des <- "full"
    }
  }
  if (type == "TR|RT") stop("TR|RT design not supported yet.")
  gm.T <- gm.R <- numeric(nsubj) # vector of geometric means
  for (subj in seq_along(subjs)) {
    gm.T[subj] <- mean(log(D$Y[which(D$Subj == subj & D$Trt == "T")]))
    gm.R[subj] <- mean(log(D$Y[which(D$Subj == subj & D$Trt == "R")]))
  }
  # get rid of missings
  gm.T <- gm.T[!is.na(gm.T)]  
  gm.R <- gm.R[!is.na(gm.R)]  
  if (des == "partial") { # guess varT (between + within)
    m    <- lm(log(Y) ~ Seq+Per, data = D[D$Trt == "T", ])
    varT <- anova(m)["Residuals", "Mean Sq"]
  } else {                # guess VarWT
    m     <- lm(log(Y) ~ Seq+Per+Subj, data = D[D$Trt == "T", ])
    varWT <- anova(m)["Residuals", "Mean Sq"]
  }
  # guess VarWR
  m     <- lm(log(Y) ~ Seq+Per+Subj, data = D[D$Trt == "R", ])
  varWR <- anova(m)["Residuals", "Mean Sq"]   # guess varBR
  varBR <- var(gm.R)-varWR
  # TODO: full replicates
  varRT <- 0.5*(varT+varBR)
  rslt  <- c(varT, varBR, varWR, varRT)
  return(rslt)
}
MyREML <- function() {
  Ini <- Some.Initial.Guesses()
  F   <- optim(par = Ini, fn = Obj.F12,
               control = list(reltol = 1e-12, trace = 0, fnscale = -1))
  Nms <- c("var_T", "var_bR", "var_wR", "covar_bTbR")
  X   <- data.frame(Nms, F$par, Ini)
  names(X) <- c("Component", "Estimate", "Initial")
  cat("Maximum         :", F$value,
    "\nEvaluations     :", F$counts[["function"]],
    "\nConvergence code:", F$convergence, "\n")
  return(X)
}

# Data has to have five columns in this order:
#   Subject (numeric), Period (numeric), Sequence (character),
#   Treatment (character), Response (numeric)
# Column separator 'sep' comma (default), semicolon, or t (tab)
# Decimal separator 'dec' period (default) or comma
# path <- "https://bebac.at/downloads/ds01.csv" # EMA full replicate
path <- "https://bebac.at/downloads/ds02.csv" # EMA partial replicate
D   <- NULL # nullify eventual previous one
D   <- GetData(path)
X   <- CreateX(D)
y   <- log(D$Y)
# the workhorse
res <- MyREML() # patience!
G.matrix <- data.frame(c(NA, res$Estimate[4]),
                       c(res$Estimate[4], res$Estimate[2]))
names(G.matrix) <- 1:2
print(res, row.names = FALSE); cat("G matrix:\n"); print(G.matrix)


Gives:
Maximum         : 75.98722
Evaluations     : 197
Convergence code: 0
Warning message:
In log(det(t(X) %*% solve(CovM) %*% X)) : NaNs produced

  Component   Estimate    Initial
      var_T 0.07049482 0.06841615
     var_bR 0.03621594 0.02776728
     var_wR 0.01324652 0.01240137
 covar_bTbR 0.04563735 0.04809171
G matrix:
           1          2
1         NA 0.04563735
2 0.04563735 0.03621594

Dif-tor heh smusma 🖖
Helmut Schütz
[image]

The quality of responses received is directly proportional to the quality of the question asked. 🚮
Science Quotes

Complete thread:

Activity
 Admin contact
21,090 posts in 4,398 threads, 1,469 registered users;
online 23 (0 registered, 23 guests [including 4 identified bots]).
Forum time: Thursday 07:14 CEST (Europe/Vienna)

In these days, a man who says a thing cannot be done
is quite apt to be interrupted by some idiot doing it.    Elbert Green Hubbard

The Bioequivalence and Bioavailability Forum is hosted by
BEBAC Ing. Helmut Schütz
HTML5