just y=ax+b [General Statistics]
❝ I would like to know how to judge if the correlation is quantitatively appearing to be best for the first line or for the second line.
Sumfink along this?
x1 <- c(1, 2, 3, 4, 5)
y1 <- c(10.5, 11.4, 12.6, 13.3, 14.6)
n1 <- length(x1)
x2 <- c(1, 2, 3, 4, 5)
y2 <- c(10.5, 11.4, NA, 13.3, 14.6)
n2 <- length(x2)
x3 <- c(1, 2, 4, 5)
y3 <- c(10.5, 11.4, 13.3, 14.6)
n3 <- length(x3)
raw <- matrix(c(rep(1, n1), rep(2, n2), rep(3, n3),
x1, x2, x3, y1, y2, y3,
rep(n1, each=n1), rep(n2, each=n2), rep(n3, each=n3)),
nrow=n1+n2+n3, ncol=4, byrow=FALSE,
dimnames=list(NULL, c("set", "x", "y", "n")))
raw <- as.data.frame(raw); attach(raw)
set <- unique(set)
res <- data.frame(set=set, n=c(n1, n2, n3), NAs=NA,
int=NA, slope=NA, RSE=NA, R2.adj=NA)
for (j in seq_along(set)) {
mod <- lm(y ~ x, data=raw, set==j)
sum.mod <- summary(mod)
res[j, "NAs"] <- sum(is.na(raw$y[raw$set==j]))
res[j, "int"] <- coef(mod)[[1]]
res[j, "slope"] <- coef(mod)[[2]]
res[j, "RSE"] <- sum.mod$sigma
res[j, "R2.adj"] <- sum.mod$adj.r.squared
}
print(raw, row.names=FALSE)
print(res[with(res, order(-R2.adj, +n)), ], row.names=FALSE)
detach(raw)
Fits linear models to the sets and gives results sorted by R²adj (descending) and n (ascending). y-values of set 2 and 3 are identical. Shows that
NA
gives the same result as a completely missing row due to the default na.action=na.omit
in lm()
. set n NAs int slope RSE R2.adj
3 4 0 9.42 1.01 0.1565248 0.9928293
2 5 1 9.42 1.01 0.1565248 0.9928293
1 5 0 9.45 1.01 0.1494434 0.9912998
Edit: Toys for boys:
a <- 9.45
b <- 1.01
sets <- 16
n <- 5
x <- 1:n
sd <- 0.25
pct <- 20 # percentage of NAs
y <- NULL
for (j in 1:sets) {
y <- c(y, a + b*x + rnorm(n, 0, sd))
}
raw <- matrix(c(rep(1:sets, each=n), rep(x, sets), y,
rep(n, n*sets)), nrow=n*sets, ncol=4, byrow=FALSE,
dimnames=list(NULL, c("set", "x", "y", "n")))
loc <- floor(runif(as.integer(pct*sets*n/100), min=1, max=n*sets))
raw[loc, "y"] <- NA
raw <- as.data.frame(raw); attach(raw)
set <- 1:sets
res <- data.frame(set=set, n=rep(n, sets), NAs=NA, used=NA,
int=NA, slope=NA, RSE=NA, R2.adj=NA)
op <- par(no.readonly=TRUE)
split.screen(c(4, 4)) # adjust for the number of sets!
for (j in seq_along(set)) {
mod <- lm(y ~ x, data=raw, set==j)
sum.mod <- summary(mod)
res[j, "NAs"] <- sum(is.na(raw$y[raw$set==j]))
res[j, "used"] <- res[j, "n"] - res[j, "NAs"]
res[j, "int"] <- coef(mod)[[1]]
res[j, "slope"] <- coef(mod)[[2]]
res[j, "RSE"] <- sum.mod$sigma
res[j, "R2.adj"] <- sum.mod$adj.r.squared
screen(j)
par(mar=c(1.5, 1.5, 1, 0.1), mgp=c(2, 0.5, 0), line=2, tcl=-0.25)
if (res[j, "used"] >= 2) {
plot(raw$x[raw$set==j], raw$y[raw$set==j], las=1, xlab="", ylab="",
ylim=c((a+b*x[1])*0.95, (a+b*x[n])/0.95), cex.axis=0.65,
cex.main=0.65, font.main=1,
main=paste("Rsq.adj =", signif(res[j, "R2.adj"], 6)))
ifelse (res[j, "used"] == 2, col <- "red", col <- "blue")
abline(coef=coef(mod), col=col)
legend("topleft", cex=0.65, box.lty=0, x.intersp=0,
legend=paste0("set ", j, ": ", res[j, "used"], " used"))
}
}
res <- res[with(res, order(-R2.adj, +n)), ]
screen(head(res$set, 1))
par(mar=c(1.5, 1.5, 1, 0.1))
legend("right", cex=0.75, box.lty=0, x.intersp=0, legend="\"best\"")
screen(tail(res$set, 1))
par(mar=c(1.5, 1.5, 1, 0.1))
legend("right", cex=0.75, box.lty=0, x.intersp=0, legend="\"worst\"")
close.screen(all=TRUE)
par(op)
detach(raw)
print(res, row.names=FALSE)
Dif-tor heh smusma 🖖🏼 Довге життя Україна!
![[image]](https://static.bebac.at/pics/Blue_and_yellow_ribbon_UA.png)
Helmut Schütz
![[image]](https://static.bebac.at/img/CC by.png)
The quality of responses received is directly proportional to the quality of the question asked. 🚮
Science Quotes
Complete thread:
- Goodness of fits: one model, different datasets ElMaestro 2017-10-06 23:01 [General Statistics]
- Goodness of fits: one model, different datasets nobody 2017-10-07 16:03
- Experimental setup, details ElMaestro 2017-10-07 18:06
- Visualization ElMaestro 2017-10-07 19:07
- multiple regression? Helmut 2017-10-08 17:17
- just y=ax+b ElMaestro 2017-10-08 17:30
- just y=ax+b Helmut 2017-10-08 17:35
- just y=ax+b ElMaestro 2017-10-08 17:50
- just y=ax+b nobody 2017-10-08 20:26
- ANCOVA with R? yjlee168 2017-10-08 21:28
- just y=ax+b DavidManteigas 2017-10-09 10:34
- just y=ax+b nobody 2017-10-09 10:45
- just y=ax+bHelmut 2017-10-10 18:15
- just y=ax+b ElMaestro 2017-10-08 17:50
- just y=ax+b Helmut 2017-10-08 17:35
- just y=ax+b ElMaestro 2017-10-08 17:30
- Experimental setup, details ElMaestro 2017-10-07 18:06
- Goodness of fits: one model, different datasets nobody 2017-10-07 16:03