Bioequivalence and Bioavailability Forum

Main page Policy/Terms of Use Abbreviations Latest Posts

 Log-in |  Register |  Search

Back to the forum  Query: 2017-11-24 06:46 CET (UTC+1h)
 

just y=ax+b [General Sta­tis­tics]

posted by Helmut Homepage - Vienna, Austria, 2017-10-10 18:15  - Posting: # 17885
Views: 641

Hi ElMaestro,

» I would like to know how to judge if the correlation is quantitatively appearing to be best for the first line or for the second line.

Sumfink along this?

x1  <- c(1, 2, 3, 4, 5)
y1  <- c(10.5, 11.4, 12.6, 13.3, 14.6)
n1  <- length(x1)
x2  <- c(1, 2, 3, 4, 5)
y2  <- c(10.5, 11.4, NA, 13.3, 14.6)
n2  <- length(x2)
x3  <- c(1, 2, 4, 5)
y3  <- c(10.5, 11.4, 13.3, 14.6)
n3  <- length(x3)
raw <- matrix(c(rep(1, n1), rep(2, n2), rep(3, n3),
              x1, x2, x3, y1, y2, y3,
              rep(n1, each=n1), rep(n2, each=n2), rep(n3, each=n3)),
              nrow=n1+n2+n3, ncol=4, byrow=FALSE,
              dimnames=list(NULL, c("set", "x", "y", "n")))
raw <- as.data.frame(raw); attach(raw)
set <- unique(set)
res <- data.frame(set=set, n=c(n1, n2, n3), NAs=NA,
                  int=NA, slope=NA, RSE=NA, R2.adj=NA)
for (j in seq_along(set)) {
  mod              <- lm(y ~ x, data=raw, set==j)
  sum.mod          <- summary(mod)
  res[j, "NAs"]    <- sum(is.na(raw$y[raw$set==j]))
  res[j, "int"]    <- coef(mod)[[1]]
  res[j, "slope"]  <- coef(mod)[[2]]
  res[j, "RSE"]    <- sum.mod$sigma
  res[j, "R2.adj"] <- sum.mod$adj.r.squared
}
print(raw, row.names=FALSE)
print(res[with(res, order(-R2.adj, +n)), ], row.names=FALSE)
detach(raw)


Fits linear models to the sets and gives results sorted by R²adj (descending) and n (ascending). y-values of set 2 and 3 are identical. Shows that NA gives the same result as a completely missing row due to the default na.action=na.omit in lm().

 set n NAs  int slope       RSE    R2.adj
   3 4   0 9.42  1.01 0.1565248 0.9928293
   2 5   1 9.42  1.01 0.1565248 0.9928293
   1 5   0 9.45  1.01 0.1494434 0.9912998



Edit: Toys for boys:

a    <- 9.45
b    <- 1.01
sets <- 16
n    <- 5
x    <- 1:n
sd   <- 0.25
pct  <- 20 # percentage of NAs
y    <- NULL
for (j in 1:sets) {
  y <- c(y, a + b*x + rnorm(n, 0, sd))
}
raw  <- matrix(c(rep(1:sets, each=n), rep(x, sets), y,
                 rep(n, n*sets)), nrow=n*sets, ncol=4, byrow=FALSE,
               dimnames=list(NULL, c("set", "x", "y", "n")))
loc  <- floor(runif(as.integer(pct*sets*n/100), min=1, max=n*sets))
raw[loc, "y"] <- NA
raw  <- as.data.frame(raw); attach(raw)
set  <- 1:sets
res  <- data.frame(set=set, n=rep(n, sets), NAs=NA, used=NA,
                  int=NA, slope=NA, RSE=NA, R2.adj=NA)
op   <- par(no.readonly=TRUE)
split.screen(c(4, 4)) # adjust for the number of sets!
for (j in seq_along(set)) {
  mod              <- lm(y ~ x, data=raw, set==j)
  sum.mod          <- summary(mod)
  res[j, "NAs"]    <- sum(is.na(raw$y[raw$set==j]))
  res[j, "used"]   <- res[j, "n"] - res[j, "NAs"]
  res[j, "int"]    <- coef(mod)[[1]]
  res[j, "slope"]  <- coef(mod)[[2]]
  res[j, "RSE"]    <- sum.mod$sigma
  res[j, "R2.adj"] <- sum.mod$adj.r.squared
  screen(j)
  par(mar=c(1.5, 1.5, 1, 0.1), mgp=c(2, 0.5, 0), line=2, tcl=-0.25)
  if (res[j, "used"] >= 2) {
    plot(raw$x[raw$set==j], raw$y[raw$set==j], las=1, xlab="", ylab="",
         ylim=c((a+b*x[1])*0.95, (a+b*x[n])/0.95), cex.axis=0.65,
         cex.main=0.65, font.main=1,
         main=paste("Rsq.adj =", signif(res[j, "R2.adj"], 6)))
    ifelse (res[j, "used"] == 2, col <- "red", col <- "blue")
    abline(coef=coef(mod), col=col)
    legend("topleft", cex=0.65, box.lty=0, x.intersp=0,
      legend=paste0("set ", j, ": ", res[j, "used"], " used"))
  }
}
res <- res[with(res, order(-R2.adj, +n)), ]
screen(head(res$set, 1))
par(mar=c(1.5, 1.5, 1, 0.1))
legend("right", cex=0.75, box.lty=0, x.intersp=0, legend="\"best\"")
screen(tail(res$set, 1))
par(mar=c(1.5, 1.5, 1, 0.1))
legend("right", cex=0.75, box.lty=0, x.intersp=0, legend="\"worst\"")
close.screen(all=TRUE)
par(op)
detach(raw)
print(res, row.names=FALSE)


[image]Regards,
Helmut Schütz 
[image]

The quality of responses received is directly proportional to the quality of the question asked. ☼
Science Quotes

Complete thread:

Back to the forum Activity
 Mix view
Bioequivalence and Bioavailability Forum | Admin contact
17,494 Posts in 3,746 Threads, 1,087 registered users;
33 users online (0 registered, 33 guests).

The belief that there is only one truth and
that oneself is in possession of it,
seems to me the deepest root of all
that is evil in the world.    Max Born

The BIOEQUIVALENCE / BIOAVAILABILITY FORUM is hosted by
BEBAC Ing. Helmut Schütz
XHTML/CSS RSS Feed