library(gdata)
?keep
data(women, cars)
keep(cars)
## To remove all objects except cars, run:
## keep(cars, sure=TRUE)
"A big computer, a complex algorithm and a long time does not equal science." -- Robert Gentleman
lunedì 26 novembre 2007
martedì 16 ottobre 2007
Duplicate a figure with R
Combining pairwise sequence similarity and support vector machines for detecting remote protein evolutionary and structural relationships. Journal of computational biology. 2003;10(6):857-68 using only the Base graphics system in R.
Data can be downloaded from here (if it doesn't work, use Google's cache).
I imported the file in a spreadsheet and then I copied & pasted it in R.
jcb.scores = read.delim("clipboard")
attach(jcb.scores)
pdf("recomb_scores.pdf")
par(las =1) # To have horizontal labels for axes 2 and 4
plot(y~sort(SVM.pairwise.ROC, decreasing = TRUE), pch = 3, cex = 0.5,
xlab = "AUC", ylab = "Number of families", axes = FALSE,
xlim = c(0,1), ylim = c(0,60))
lines(y~sort(SVM.pairwise.ROC, decreasing = TRUE), lty = 1)
points(y~sort(FPS.ROC, decreasing = TRUE), pch = 4, cex = 0.5)
lines(y~sort(FPS.ROC, decreasing = TRUE), lty = 2)
points(y~sort(SVM.Fisher.ROC, decreasing = TRUE), pch = 8, cex = 0.5)
lines(y~sort(SVM.Fisher.ROC, decreasing = TRUE), lty = 3)
points(y~sort(SAM.ROC, decreasing = TRUE), pch = 0, cex = 0.5)
lines(y~sort(SAM.ROC, decreasing = TRUE), lty = 4)
points(y~sort(PSI.BLAST.ROC, decreasing = TRUE), pch = 15, cex = 0.5)
lines(y~sort(PSI.BLAST.ROC, decreasing = TRUE), lty = 5)
axis(1, at = seq(0,1,0.2), labels = c(0,0.2,0.4,0.6,0.8,1), tcl = 0.25, pos = 0) # tcl = 0.25 small ticks toward the curve
axis(2, at = c(0,10,20,30,40,50,60), labels=c(0,10,20,30,40,50,60), tcl= 0.25 , pos = 0)
axis(2, at = c(0,10,20,30,40,60), tcl= 0.25,labels = F, pos = 0)
axis(3, tick = T, tcl= 0.25, labels = F, pos = 60)
axis(4, at = c(0,10,20,30,40,50), tcl= 0.25, labels = F, pos = 1)
axis(4, at = c(0,10,20,30,40,60), tcl= 0.25, labels = F, pos = 1)
# To locate the legend interactively
xy.legend = locator(1)
# right-justifying a set of labels: thanks to Uwe Ligges
temp <- legend(xy.legend, legend = c("SVM-pairwise", "FPS","SVM-Fisher", "SAM","PSI-BLAST"), text.width = strwidth("SVM-pairwise"), xjust = 1, yjust = 1, lty = c(1,2,3,4,5), pch = c(3,4,8,0,15), bty = "n", cex = 0.8, title = "")
dev.off()
detach(jcb.scores)
The original image:
My version of the image (not exacly identical but...):
venerdì 14 settembre 2007
Plotting two or more overlapping density plots on the same graph
See this thread from StackOverflow for other ways to solve this task.
plot.multi.dens <- function(s)
{
junk.x = NULL
junk.y = NULL
for(i in 1:length(s))
{
junk.x = c(junk.x, density(s[[i]])$x)
junk.y = c(junk.y, density(s[[i]])$y)
}
xr <- range(junk.x)
yr <- range(junk.y)
plot(density(s[[1]]), xlim = xr, ylim = yr, main = "")
for(i in 1:length(s))
{
lines(density(s[[i]]), xlim = xr, ylim = yr, col = i)
}
}
#usage:
x = rnorm(1000,0,1)
y = rnorm(1000,0,2)
z = rnorm(1000,2,1.5)
# the input of the following function MUST be a numeric list
plot.multi.dens(list(x,y,z))
library(Hmisc)
le <- largest.empty(x,y,.1,.1)
legend(le,legend=c("x","y","z"), col=(1:3), lwd=2, lty = 1)
giovedì 9 agosto 2007
R package installation and administration
the packages in R:
# install a package
install.packages("ROCR")
# visualize package version
package_version("pamr")
# update a package
update.packages("Cairo")
# remove a package
remove.packages("RGtk2")
venerdì 3 agosto 2007
Sorting/ordering a data.frame according specific columns
x = rnorm(20)
y = sample(rep(1:2, each = 10))
z = sample(rep(1:4, 5))
data.df <- data.frame(values = x, labels.1 = y, labels.2 = z)
print(data.df)
# data ordered according to "labels.1" column
# and then "labels.2" column
nams <- c("labels.1", "labels.2")
data.df.sorted = data.df[do.call(order, data.df[nams]), ]
print(data.df.sorted)
giovedì 2 agosto 2007
Receiver Operating Characteristic (ROC) Curve in ROCR and verification packages
# it allows two different plots in the same frame par(mfrow = c(1,2)) # plot a ROC curve for a single prediction run # and color the curve according to cutoff. library(ROCR) data(ROCR.simple) pred <- prediction(ROCR.simple$predictions, ROCR.simple$labels) perf <- performance(pred,"tpr", "fpr") plot(perf,colorize = TRUE) # plot a ROC curve for a single prediction run # with CI by bootstrapping and fitted curve library(verification) roc.plot(ROCR.simple$labels,ROCR.simple$predictions, xlab = "False positive rate", ylab = "True positive rate", main = NULL, CI = T, n.boot = 100, plot = "both", binormal = TRUE)
lunedì 30 luglio 2007
screen - an other VERY useful Unix tool
from R News Vol. 7/1 April 2007 (http://cran.r-project.org/doc/Rnews/Rnews_2007-1.pdf):
If you need to run R code that executes for long periods of time upon remote machines, this amazing unix tool would became your best friend!
screen is a so-called terminal multiplexor, which allows us to create, shuffle, share, and suspend command line sessions within one window. It provides protection against disconnections and the flexibility to retrieve command line sessions remotely.
Starting using this utility is easy like ABC:
- Log in to remote server
- Run screen
- Run R and the long calculation
- Detach screen (Ctrl-a, Ctrl-d)
- Logout
The R session continues working in the background, contained within the screen session. If we want to revisit the session to check its progress, then we:
- Log in remotely via secure shell
- Start screen -r, which recalls the unattached session
- Examine how your calculation/script is performing
- Detach the screen session, (Ctrl-a, Ctrl-d)
- Log out
This procedure can be used, clearly, for invoking whatever unix program/command you need to use; it is sufficient to substitute the R invoking command with your invoking command line program(for example python).
As usual in the shell-space, invoking man
lunedì 16 luglio 2007
R upgrading on Windows© revisited
When I update R the following has worked for me (Windows XP)
1. Install the new version to a new directory (say C:\Program Files\R\R-2.5.1).
2. Rename the new library subdirectory to library2.
3. Copy the entire contents of the old library subdirectory (say
C:\Program Files\R\R-2.4.0\library\ to the new R root to create
C:\Program Files\R\R-2.5.1\library\ .
4. Copy the contents of library2 to library to update your basic library.
5. Now start your new version of R and update packages from the GUI or
from the R console. (You may need to firs check Rprofile .site to
ensure that no packages have been loaded)
6. On occasion I have got warning messages when I tried to load
packages after this procedure. This has been cleared by running
update.packages(checkBuilt = TRUE)
This checks that your packages have been built with the latest
version. When I do this I agree to install all available updates.
7. You may wish to copy various autoloads etc from your old
Rprofile.site to your new Rprofile.site. I understand that there are
some compatibility problems with 2.5.1 and SciViews so be careful.
mercoledì 20 giugno 2007
String manipulation, insert delim
I want to be able to insert delimiters, say commas, into a string
of characters at uneven intervals such that:
foo<-c("haveaniceday")# my string of character
bar<-c(4,1,4,3) # my vector of uneven intervals
my.fun(foo,bar) # some function that places delimiters appropriately
have,a,nice,day # what the function would ideally return
1)
paste(read.fwf(textConnection(foo), bar, as.is = TRUE), collapse = ",")
[1] "have,a,nice,day"
2)
my.function <- function(foo, bar){
# construct a matrix with start/end character positions
start <- head(cumsum(c(1, bar)), -1) # delete last one
sel <- cbind(start=start,end=start + bar -1)
strings <- apply(sel, 1, function(x) substr(foo, x[1], x[2]))
paste(strings, collapse=',')
}
my.function(foo, bar)
[1] "have,a,nice,day"
venerdì 8 giugno 2007
Back to back historgram
library(Hmisc)
age <- rnorm(1000,50,10)
sex <- sample(c('female','male'),1000,TRUE)
out <- histbackback(split(age, sex), probability=TRUE, xlim=c(-.06,.06), main = 'Back to Back Histogram')
#! just adding color
barplot(-out$left, col="red" , horiz=TRUE, space=0, add=TRUE, axes=FALSE)
barplot(out$right, col="blue", horiz=TRUE, space=0, add=TRUE, axes=FALSE)
lunedì 4 giugno 2007
How do you get the most common row from a matrix?
array(1:3,dim=c(4,5))
[,1] [,2] [,3] [,4] [,5]
[1,] 1 2 3 1 2
[2,] 2 3 1 2 3
[3,] 3 1 2 3 1
[4,] 1 2 3 1 2
in which rows 1 and 4 are similar, I want to find that vector c(1,2,3,1,2).
library(cluster)
x <- array(1:3,dim=c(4,5))
dissim <- as.matrix(daisy(as.data.frame(x)))
dissim[!upper.tri(dissim)] <- NA
unique(x[which(dissim == 0, arr.ind=TRUE), ])
or
count <- table(apply(x, 1, paste, collapse=" "))
count[which.max(count)]
venerdì 1 giugno 2007
R number output format
?formatC formatC(.000000012, format='fg') [1] "0.000000012"also
?sprintf sprintf("%.10f", 0.0000000012) [1] "0.0000000012"or
format(.0000012, scientific=FALSE) [1] "0.0000012"
martedì 29 maggio 2007
Detecting outliers through boxplots of the features
giovedì 24 maggio 2007
Selecting “contrasting” colors
What I want to be able to do is to place text on a background of arbitrary
(but known RGB) colour so that the text is legible.
I guess that this is better described as a "contrasting" than a "complementary" colour.
Since luminance contrasts are necessary and sufficient for readable text,
you could use white for dark colors and black for light colors.
Luminance is roughly proportional to 0.2*(R^2.4)+0.6*(G^2.4),
suggesting something like
lightdark <-function (color){
rgb <- col2rgb(color)/255
L <- c(0.2, 0.6, 0) %*% rgb
ifelse(L >= 0.2, "#000060", "#FFFFA0")
}
This uses a pale yellow for dark backgrounds and a dark blue for light
backgrounds, and it seems to work reasonably well.
mercoledì 23 maggio 2007
Scatter plot with axes drawn on the same scale
x <- sample(10:200,40)
y <- sample(20:100,40)
windows(width = max(x),height = max(y))
plot(x,y)
# try:
plot(x, y, asp = 1)
# or, better:
library(MASS)
eqscplot(x,y)
#or
library(lattice)
xyplot(y ~ x, aspect = "iso")
martedì 22 maggio 2007
How to draw a plot with two Y axises and one X axis
lunedì 21 maggio 2007
Conversion of column matrix into a vector without duplicate
A = matrix(c("a","b","c","d","e","u","v",rep("x",3)),5,2,byrow=F)
unique(A[, 2])
sabato 19 maggio 2007
Running R Programs on clusters
$ R CMD BATCH [options] my_script.R [outfile]
$ nohup nice -n 14 R CMD BATCH myRfile.R &
The output file lists the commands from the script file and their outputs. If no outfile is specified, the name used is that of 'infile' and '.Rout' is appended to outfile. To stop all the usual R command line information from being written to the outfile, add this as first line to my_script.R file: 'options(echo=FALSE)'. If the command is run like this 'R CMD BATCH --no-save my_script.R', then nothing will be saved in the .Rdata file which can get often very large. More on this can be found on the help pages: '$ R CMD BATCH --help' or '> ?BATCH'.
2. Submitting R script to Linux cluster via Torque Create the following shell script 'my_script.sh'
##################################
#!/bin/sh
cd $PBS_O_WORKDIR
R CMD BATCH --no-save my_script.R
##################################
This script doesn't need to have executable permissons. Use the following 'qsub' command to send this shell script to the Linux cluster from the directory where the R script 'my_script.R' is located. To utilize several CPUs on the Linux cluster, one can divide the input data into several smaller subsets and execute for each subset a separate process from a dedicated directory.
$ qsub my_script.sh
giovedì 17 maggio 2007
Quick and dirty function for descriptive statistics
desc <- function(mydata) {
require(e1071)
quantls <- quantile(x=mydata, probs=seq(from=0, to=1, by=0.25))
themean <- mean(mydata)
thesd <- sd(mydata)
kurt <- kurtosis(mydata)
skew <- skewness(mydata)
retlist <- list(Quantiles=quantls, Mean=themean,
StandDev=thesd,Skewness=skew, Kurtosis=kurt)
return(retlist)
}
# example
exampledata <- rnorm(10000)
summary(exampledata)
desc(exampledata)
mercoledì 16 maggio 2007
How can I turn a string into a variable?
varname <- c("a", "b", "d")
you can do
get(varname[1]) + 2
for a + 2
or
assign(varname[1], 2 + 2)
for a <- 2 + 2
or
eval(substitute(lm(y ~ x + variable), list(variable = as.name(varname[1]))
for lm(y ~ x + a)
At least in the first two cases it is often easier to just use a list, and then you can easily index it by name
vars <- list(a = 1:10, b = rnorm(100), d = LETTERS) vars[["a"]]
without any of this messing about.
martedì 15 maggio 2007
Large Matrix into a vector
mx <- matrix(rnorm(100,1:100),10,10) vec <- c(mx)or
vec <- c(as.matrix(mx))or
dim(mx) <- NULL
lunedì 14 maggio 2007
Comparing two matrices, row by row
ar1 <- array(data=c(1:16),dim=c(4,4))
ar2 <- array(data=c(1,2,3,3,5:16),dim=c(4,4))
z<-ar1==ar2
ar1
[,1] [,2] [,3] [,4]
[1,] 1 5 9 13
[2,] 2 6 10 14
[3,] 3 7 11 15
[4,] 4 8 12 16
ar2
[,1] [,2] [,3] [,4]
[1,] 1 5 9 13
[2,] 2 6 10 14
[3,] 3 7 11 15
[4,] 3 8 12 16
z
[,1] [,2] [,3] [,4]
[1,] TRUE TRUE TRUE TRUE
[2,] TRUE TRUE TRUE TRUE
[3,] TRUE TRUE TRUE TRUE
[4,] FALSE TRUE TRUE TRUE
which(z==FALSE)
[1] 4
Or
apply(ar1==ar2,1,all)
venerdì 11 maggio 2007
Density curve over a histogram
x <- rnorm(1000) hist(x, freq = FALSE, col = "grey") curve(dnorm, col = 2, add = TRUE)
This thread (specifically the Ted Harding answer) from the r-help-list augments the usefulness of this simple tip.
This kind of plots cab be easily produced using the lessR package.
For example, from ?color.density:
library(lessR) # generate 100 random normal data values y <- rnorm(100) # normal curve and general density curves superimposed over histogram # all defaults color.density(y)
giovedì 10 maggio 2007
Barplots of two sets
x <- c(0.0001, 0.0059, 0.0855, 0.9082)
y <- c(0.54, 0.813, 0.379, 0.35)
# create a two row matrix with x and y
height <- rbind(x, y)
# Use height and set 'beside = TRUE' to get pairs
# save the bar midpoints in 'mp'
# Set the bar pair labels to A:D
mp <- barplot(height, beside = TRUE,
ylim = c(0, 1), names.arg = LETTERS[1:4])
# Nel caso generale, i.e., che si usa di
# solito (height MUST be a matrix)
mp <- barplot(height, beside = TRUE)
# Draw the bar values above the bars
text(mp, height, labels = format(height, 4),
pos = 3, cex = .75)
martedì 8 maggio 2007
Fill a matrix with vectors of different lengths
v <- c(1,1,2,3,4,6)
binc <- function(x){
l <- sum(x)+1
y <- c(1,rep(0,l-1))
for (i in x) y <- y+c(rep(0,i),y)[1:l]
}
out <- lapply(1:length(v), function(i) binc(v[1:i]))
nout <- max(sapply(out, length))
sapply(out, function(x) c(x, rep(0, nout - length(x))))
lunedì 7 maggio 2007
Make many barplot into one plot
# I have 4 tables like this: satu <- array(c(5,15,20,68,29,54,84,119), dim=c(2,4), dimnames=list(c("Negative", "Positive"), c("Black", "Brown", "Red", "Blond"))) dua <- array(c(50,105,30,8,29,25,84,9), dim=c(2,4), dimnames=list(c("Negative", "Positive"), c("Black", "Brown", "Red", "Blond"))) tiga <- array(c(9,16,26,68,12,4,84,12), dim=c(2,4), dimnames=list(c("Negative", "Positive"), c("Black", "Brown", "Red", "Blond"))) empat <- array(c(25,13,50,78,19,34,84,101), dim=c(2,4), dimnames=list(c("Negative", "Positive"), c("Black", "Brown", "Red", "Blond"))) # rbind() the tables together TAB <- rbind(satu, dua, tiga, empat) # Do the barplot and save the bar midpoints mp <- barplot(TAB, beside = TRUE, axisnames = FALSE) # Add the individual bar labels mtext(1, at = mp, text = c("N", "P"), line = 0, cex = 0.5) # Get the midpoints of each sequential pair of bars # within each of the four groups at <- t(sapply(seq(1, nrow(TAB), by = 2), function(x) colMeans(mp[c(x, x+1), ]))) # Add the group labels for each pair mtext(1, at = at, text = rep(c("satu", "dua", "tiga", "empat"), 4), line = 1, cex = 0.75) # Add the color labels for each group mtext(1, at = colMeans(mp), text = c("Black", "Brown", "Red", "Blond"), line = 2)
domenica 6 maggio 2007
How to clear screen in R (Windows)
cls <- function() { require(rcom) wsh <- comCreateObject("Wscript.Shell") comInvoke(wsh, "SendKeys", "\014") invisible(wsh) } cls()or
# An R function to clear the screen on RGui: cls <- function() { if (.Platform$GUI[1] != "Rgui") return(invisible(FALSE)) if (!require(rcom, quietly = TRUE)) # Not shown any way! stop("Package rcom is required for 'cls()'") wsh <- comCreateObject("Wscript.Shell") if (is.null(wsh)) { return(invisible(FALSE)) } else { comInvoke(wsh, "SendKeys", "\014") return(invisible(TRUE)) } } cls()
See this StackOverflow Question for other solutions.
venerdì 4 maggio 2007
Fill a matrix with vectors of different lengths
V <- c(1,1,2,3,4,6)
binc <- function(x){
l <- sum(x)+1
y <- c(1,rep(0,l-1))
for (i in x) y <- y+c(rep(0,i),y)[1:l]
}
out <- lapply(1:length(v), function(i) binc(v[1:i]))
nout <- max(sapply(out, length))
sapply(out, function(x) c(x, rep(0, nout - length(x))))
giovedì 3 maggio 2007
Rotating a distribution plot by 90 degrees
mercoledì 2 maggio 2007
ls() improved!
by mode, class and 'size'! Thanks to Bendix Carstensen!
lls <- function (pos = 1, pat = "")
{
dimx <- function(dd) if (is.null(dim(dd)))
length(dd)
else dim(dd)
lll <- ls(pos = pos, pat = pat)
cat(formatC("mode", 1, 15), formatC("class", 1, 18),
formatC("name",1, max(nchar(lll)) + 1), "size\n-----------------------------------------------------------------\n")
if (length(lll) > 0)
{
for (i in 1:length(lll))
{
cat(formatC(eval(parse(t = paste("mode(", lll[i],
")"))), 1, 15), formatC(paste(eval(parse(t = paste("class(",
lll[i], ")"))), collapse = " "), 1, 18), formatC(lll[i],
1, max(nchar(lll)) + 1), " ", eval(parse(t = paste("dimx(", lll[i], ")"))), "\n")
}
}
}
venerdì 27 aprile 2007
What's the best way to upgrade R on Windows©?
That's a matter of taste. For most people the best thing to do is to uninstall R (see the previous Q), install the new version, copy any installed packages to the library folder in the new installation, run update.packages()
in the new R (`Update packages...' from the Packages menu, if you prefer) and then delete anything left of the old installation. Different versions of R are quite deliberately installed in parallel folders so you can keep old versions around if you wish.
Upgrading from R 1.x.y to R 2.x.y is special as all the packages need to be reinstalled. Rather than copy them across, make a note of their names and re-install them from CRAN.
giovedì 26 aprile 2007
How to Superimpose Histograms
superhist2pdf <- function(x, filename = "super_histograms.pdf", dev = "pdf", title = "Superimposed Histograms", nbreaks ="Sturges") { junk = NULL grouping = NULL for(i in 1:length(x)) { junk = c(junk,x[[i]]) grouping <- c(grouping, rep(i,length(x[[i]]))) } grouping <- factor(grouping) n.gr <- length(table(grouping)) xr <- range(junk) histL <- tapply(junk, grouping, hist, breaks=nbreaks, plot = FALSE) maxC <- max(sapply(lapply(histL, "[[", "counts"), max)) if(dev == "pdf") { pdf(filename, version = "1.4") } else{} if((TC <- transparent.cols <- .Device %in% c("pdf", "png"))) { cols <- hcl(h = seq(30, by=360 / n.gr, length = n.gr), l = 65, alpha = 0.5) } else { h.den <- c(10, 15, 20) h.ang <- c(45, 15, -30) } if(TC) { plot(histL[[1]], xlim = xr, ylim= c(0, maxC), col = cols[1], xlab = "x", main = title) } else { plot(histL[[1]], xlim = xr, ylim= c(0, maxC), density = h.den[1], angle = h.ang[1], xlab = "x") } if(!transparent.cols) { for(j in 2:n.gr) plot(histL[[j]], add = TRUE, density = h.den[j], angle = h.ang[j]) } else { for(j in 2:n.gr) plot(histL[[j]], add = TRUE, col = cols[j]) } invisible() if( dev == "pdf") { dev.off() } }
# How to use the function: d1 = rnorm(1:100) d2 = rnorm(1:100) + 4 # the input object MUST be a list! l1 = list(d1,d2) superhist2pdf(l1, nbreaks="Sturges")
martedì 24 aprile 2007
Plotting multiple smooth lines on the same graph
d1 <- cbind(rnorm(100), rnorm(100,3,1))
d2 <- cbind(rnorm(100), rnorm(100,1,1))
plot(d1[,1], d1[,2], xlim=range(c(d1[,1], d2[,1])), ylim=range(c(d1[,2], d2[,2])), col="blue", xlab="X", ylab="Y")
points(d2[,1], d2[,2], col="red")
points(loess.smooth(d1[,1], d1[,2]), type="l", col="blue")
points(loess.smooth(d2[,1], d2[,2]), type="l", col="red")
lunedì 23 aprile 2007
venerdì 20 aprile 2007
How to join two arrays using their column names intersection
ar1 <- array(data = c(1:16), dim = c(4,4))
ar2 <- array(data = c(1:16), dim = c(4,4))
colnames(ar1) <- c("A","B","D","E")
colnames(ar2) <- c("C","A","E","B")
# get the common names between the matrices
same <- intersect(colnames(ar1), colnames(ar2))
# join them together
rbind(ar1[,same], ar2[,same])
giovedì 19 aprile 2007
Highlight overlapping area between two curves
p <- seq(0.2,1.4,0.01)
x1 <- dnorm(p, 0.70, 0.12)
x2 <- dnorm(p, 0.90, 0.12)
plot(range(p), range(x1,x2), type = "n")
lines(p, x1, col = "red", lwd = 4, lty = 2)
lines(p, x2, col = "blue", lwd = 4)
polygon(c(p,p[1]), c(pmin(x1,x2),0), col = "grey")
Below an advanced example (Thanks to Blaž):
p <- seq(54,71,0.1)
x1 <- dnorm(p, 60, 1.5)
x2 <- dnorm(p, 65, 1.5)
par(mar=c(5.5, 4, 0.5, 0.5))
plot(range(p), range(x1,x2), type = "n", xlab="", ylab="", axes=FALSE)
box()
mtext(expression(bar(y)), side=1, line=1, adj=1.0, cex=2, col="black")
mtext("f"~(bar(y)), at=0.265, side=2, line=0.5, adj=1.0, las=2.0, cex=2, col="black")
axis(1, at=60, lab=mu[H[0]]~"=60", cex.axis=1.5, pos=-0.0165, tck=0.02)
axis(1, at=65, lab=mu[H[1]]~"=65", cex.axis=1.5, pos=-0.0165, tck=0.02)
axis(1, at=63.489, lab=y[k][","][zg]~"=63,489", pos=-0.035, tck=0.2)
axis(1, at=63.489, lab="", pos=-0.035, tck=-0.02)
lines(p, x1, col = "black")
lines(p, x2, col = "black")
line3 <- 63.5
polygon(c(line3, p[p<=line3], line3), c(0,x2[p<=line3],0), col = "grey23")
lines(p, x1, col = "black")
line <- 63.489
line1 <- 60
line2 <- 65
abline(v=line, col="black", lwd=1.5, lty = "dashed")
abline(v=line1, col="black", lwd=0.3, lty = "dashed")
abline(v=line2, col="black", lwd=0.3, lty = "dashed")
xt3 <- c(line,p[p>line],line)
yt3 <- c(0,x1[p>line],0)
polygon(xt3, yt3, col="grey")
legend("topright", inset=.05, c(expression(alpha), expression(beta)), fill=c("grey", "grey23"), cex=2)
mercoledì 18 aprile 2007
It's not R but It's VERY useful
Example (you must be in a unix environment with GNU free software):
sed 's/old_word/new_word/g' text_file > new_text_file
After which, new_text_file will contain the contents of text_file, except with the new_word values in place of the old_word values. For those interested in more detailed information, as usual, man (in this case man sed) is your friend.
martedì 17 aprile 2007
From Similarity To Distance matrix
# This function returns an object of class "dist"
sim2dist <- function(mx)
as.dist(sqrt(outer(diag(mx), diag(mx), "+") - 2*mx))
# from similarity to distance matrix
d.mx = as.matrix(d.mx)
d.mx = sim2dist(d.mx)
# The distance matrix can be used to visualize
# hierarchical clustering results as dendrograms
hc = hclust(d.mx)
plot(hc)
See Multivariate Analysis (Probability and Mathematical Statistics) for the statistical theory.