Was searching on good programming languages for data analysis. Open source, easy to undersand, R seems to be the language for data analysis. The following is a bit of R code for you to munch:
## N.B. If you get an error when loading the libraries
## you will need to install them using commands like
## install.packages("gplots", repos="http://cran.uk.R-project.org/", dependencies=TRUE)
##
## You should only need to do this once for each library.
## You will then need to load them with the
## library(gplots)
## command or equivalent each time you use R.
library("party")
library(gplots)
library("vcd")
library("RODBC")
library(e1071)
library("nnet")
library("rpart")
library("mlbench")
## Simple Commands
1+1
10*3
c(1,2,3)
c(1,2,3)*10
x <- 5
x*x
exp(1)
## ------------------------------
## Correlation + Scatterplots
colnames(iris)
plot(iris$Sepal.Length, iris$Petal.Length)
cor(iris$Sepal.Length, iris$Petal.Length)
cor(iris$Sepal.Length, iris$Petal.Length)^2
cor(rank(iris$Sepal.Length), rank(iris$Petal.Length))
cor.sp <- function(x,y) {
return(cor(rank(x),rank(y)))
}
## ------------------------------
## Plot Scatterplot of Iris data
plotIris1 <- function(){
table(iris$Species) # is data.frame with 'Species' factor
iS <- iris$Species == "setosa"
iV <- iris$Species == "versicolor"
matplot(c(1, 8), c(0, 4.5), type= "n", xlab = "Length", ylab = "Width",
main = "Petal and Sepal Dimensions in Iris Blossoms")
matpoints(iris[iS,c(1,3)], iris[iS,c(2,4)], pch = "sS", col = c(2,4))
matpoints(iris[iV,c(1,3)], iris[iV,c(2,4)], pch = "vV", col = c(2,4))
legend(1, 4, c(" Setosa Petals", " Setosa Sepals",
"Versicolor Petals", "Versicolor Sepals"),
pch = "sSvV", col = rep(c(2,4), 2))
}
plotIris1()
## ------------------------------
# Bar Chart Example with confidence intervals and grid
prettyBarChart <- function(){
## Source: R Graph Gallery
hh <- t(VADeaths)[, 5:1]
mybarcol <- "gray20"
ci.l <- hh * 0.85
ci.u <- hh * 1.15
mp <- barplot2(hh, beside = TRUE,
col = c("lightblue", "mistyrose",
"lightcyan", "lavender"),
legend = colnames(VADeaths), ylim = c(0, 100),
main = "Stairlift Usage in Virginia", font.main = 4,
sub = "Faked 95 percent error bars", col.sub = mybarcol,
cex.names = 1.5, plot.ci = TRUE, ci.l = ci.l, ci.u = ci.u,
plot.grid = TRUE)
mtext(side = 1, at = colMeans(mp), line = 2,
text = paste("Mean", formatC(colMeans(hh))), col = "red")
box()
}
prettyBarChart()
## ------------------------------
## Mosaic Plots
data(HairEyeColor)
mosaic(HairEyeColor, shade = TRUE)
## ------------------------------
## Linear Models
plot(iris$Sepal.Length, iris$Petal.Length)
plot(iris$Sepal.Length, iris$Petal.Length, col="blue",pch=19)
## Make a Model of Petals in terms of Sepals
irisModel <- lm(iris$Petal.Length ~ iris$Sepal.Length)
## plot the corresponding line
abline(irisModel)
## Details of the Model
summary(irisModel)
plot(iris$Petal.Length ~ iris$Species, col="cyan")
## ------------------------------
## Regression Tree
## Select data where Ozone level is known
airq <- subset(airquality, !is.na(Ozone))
## Build a regression tree predicting Ozone
airct <- ctree(Ozone ~ ., data = airq)
## Show the tree structure
plot(airct)
## Compare actual and predicted values
plot(airq$Ozone,predict(airct))
## Classification Tree
irisct <- ctree(Species ~ .,data = iris)
plot(irisct)
table(predict(irisct), iris$Species)
## Ctree Forest
iriscf <- cforest(Species ~ .,data = iris)
table(predict(iriscf), iris$Species)
## Random Forest
irisrf <- randomForest(Species ~ .,data = iris)
table(predict(irisrf), iris$Species)
varImpPlot(irisrf)
## Naive Bayes
irisnb <- naiveBayes(Species ~ .,data = iris)
table(predict(irisnb, iris[,-5]), iris$Species)
## Neural Net
irisnn <- nnet(Species ~ .,data = iris, size=2)
table(predict(irisnn, iris, type="class"), iris$Species)
## ------------------------------
## SQL Interface
library("RODBC")
channel <- odbcConnect("PostgreSQL30w", case="postgresql")
sqlSave(channel,iris, tablename="iris")
myIris <- sqlQuery(channel, "select * from iris")
summary(myIris)
## ------------------------------
demo(graphics)