Tuesday, 24 January 2017
Monday, 23 January 2017
Thursday, 12 January 2017
Data Visualizations in R
In this article, we will be working on basic and advanced visualizations.
The data used here is "Big Mart Data". The link for the same is given below:
### show info about the data
data=Big.Mart.Dataset
train=data
library(ggplot2)
Basic Visualizations
1. Scatter Plot
ggplot(train, aes(Item_Visibility, Item_MRP)) + geom_point() + scale_x_continuous("Item Visibility", breaks = seq(0,0.35,0.05))+ scale_y_continuous("Item MRP", breaks = seq(0,270,by = 30))+ theme_bw()
ggplot(train, aes(Item_Visibility, Item_MRP)) + geom_point(aes(color = Item_Type)) +
scale_x_continuous("Item Visibility", breaks = seq(0,0.35,0.05))+
scale_y_continuous("Item MRP", breaks = seq(0,270,by = 30))+
theme_bw() + labs(title="Scatterplot")
ggplot(train, aes(Item_Visibility, Item_MRP)) + geom_point(aes(color = Item_Type)) +
scale_x_continuous("Item Visibility", breaks = seq(0,0.35,0.05))+
scale_y_continuous("Item MRP", breaks = seq(0,270,by = 30))+
theme_bw() + labs(title="Scatterplot") + facet_wrap( ~ Item_Type)
2. Histogram
library(RColorBrewer)
data(VADeaths)
par(mfrow=c(2,3))
hist(VADeaths,breaks=10, col=brewer.pal(3,"Set3"),main="Set3 3 colors")
hist(VADeaths,breaks=3 ,col=brewer.pal(3,"Set2"),main="Set2 3 colors")
hist(VADeaths,breaks=7, col=brewer.pal(3,"Set1"),main="Set1 3 colors")
hist(VADeaths,,breaks= 2, col=brewer.pal(8,"Set3"),main="Set3 8 colors")
hist(VADeaths,col=brewer.pal(8,"Greys"),main="Greys 8 colors")
hist(VADeaths,col=brewer.pal(8,"Greens"),main="Greens 8 colors")
ggplot(train, aes(Item_MRP)) + geom_histogram(binwidth = 2)+
scale_x_continuous("Item MRP", breaks = seq(0,270,by = 30))+
scale_y_continuous("Count", breaks = seq(0,200,by = 20))+
labs(title = "Histogram")
3. Bar & Stack Bar Chart
ggplot(train, aes(Outlet_Establishment_Year)) + geom_bar(fill = "red")+theme_bw()+
scale_x_continuous("Establishment Year", breaks = seq(1985,2010)) +
scale_y_continuous("Count", breaks = seq(0,1500,150)) +
coord_flip()+ labs(title = "Bar Chart") + theme_gray()
Vertical Bar Chart:
ggplot(train, aes(Item_Type, Item_Weight)) + geom_bar(stat = "identity", fill = "darkblue") + scale_x_discrete("Outlet Type")+ scale_y_continuous("Item Weight", breaks = seq(0,15000, by = 500))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + labs(title = "Bar Chart")
Stacked Bar chart:
ggplot(train, aes(Outlet_Location_Type, fill = Outlet_Type)) + geom_bar()+
labs(title = "Stacked Bar Chart", x = "Outlet Location Type", y = "Count of Outlets")
4. Box Plot
ggplot(train, aes(Outlet_Identifier, Item_Outlet_Sales)) + geom_boxplot(fill = "red")+
scale_y_continuous("Item Outlet Sales", breaks= seq(0,15000, by=500))+
labs(title = "Box Plot", x = "Outlet Identifier")
5. Area Chart
ggplot(train, aes(Item_Outlet_Sales)) + geom_area(stat = "bin", bins = 30, fill = "steelblue") + scale_x_continuous(breaks = seq(0,11000,1000))+ labs(title = "Area Chart", x = "Item Outlet Sales", y = "Count")
Advanced Visualizations
library(RColorBrewer)
rf = colorRampPalette(rev(brewer.pal(40,'Set3')))
hexbinplot(diamonds$price~diamonds$carat, data=diamonds, colramp=rf)
6. Mosaic Plot
data(HairEyeColor)
mosaicplot(HairEyeColor)
7. Heat Map
ggplot(train, aes(Outlet_Identifier, Item_Type))+
geom_raster(aes(fill = Item_MRP))+
labs(title ="Heat Map", x = "Outlet Identifier", y = "Item Type")+
scale_fill_continuous(name = "Item MRP")
8. Map Visualization
devtools::install_github("rstudio/leaflet")
library(magrittr)
library(leaflet)
m <- leaflet() %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addMarkers(lng=77.2310, lat=28.6560, popup="The delicious food of chandni chowk")
m # Print the map
9. 3D Graphs
library(Rcmdr)
data(iris, package="datasets")
scatter3d(Petal.Width~Petal.Length+Sepal.Length|Species, data=iris, fit="linear",residuals=TRUE, parallel=FALSE, bg="black", axis.scales=TRUE, grid=TRUE, ellipsoid=FALSE)
attach(iris)# 3d scatterplot by factor level
library(lattice)
cloud(Sepal.Length~Sepal.Width*Petal.Length|Species, main="3D Scatterplot by Species")
xyplot(Sepal.Width ~ Sepal.Length, iris, groups = iris$Species, pch= 20)
10. Correlogram
install.packages("corrgram")
library(corrgram)
corrgram(train, order=NULL, panel=panel.shade, text.panel=panel.txt,main="Correlogram")
Tuesday, 10 January 2017
Beginner's Guide to ggplot2:Painless Data Visualization
One of the most appealing things about R is its ability to create data visualizations with just a couple of lines of code.
For example, it takes just one line of code -- and a short one at that -- to plot two variables in a scatterplot. Let's use as an example the mtcars data set installed with R by default.
For example, it takes just one line of code -- and a short one at that -- to plot two variables in a scatterplot. Let's use as an example the mtcars data set installed with R by default.
The ggplot2 package was created by Hadley Wicham. ggplot2 allows you to create graphs that represent both univariate and multivariate numerical and categorical data in a straightforward manner. Grouping can be represented by color, symbol, size, and transparency.
### install & load ggplot library
install.package("ggplot2")
library("ggplot2")
### show info about the data
qplot(Sepal.Length, Petal.Length, data = iris, color = Species)
qplot(Sepal.Length, Petal.Length, data = iris)
qplot(Sepal.Length, Petal.Length, data = iris, color = Species)
qplot(Sepal.Length, Petal.Length, data = iris, color = Species, size = Petal.Width)
qplot(Sepal.Length, Petal.Length, data = iris, geom = "line", color = Species)
qplot(age, circumference, data = Orange, geom = c("point", "line"), colour = Tree)
### show info about the data
head(iris)
qplot(Sepal.Length, Petal.Length, data = iris, color = Species)
qplot(Sepal.Length, Petal.Length, data = iris)
qplot(Sepal.Length, Petal.Length, data = iris, color = Species)
qplot(Sepal.Length, Petal.Length, data = iris, color = Species, size = Petal.Width)
qplot(Sepal.Length, Petal.Length, data = iris, geom = "line", color = Species)
qplot(age, circumference, data = Orange, geom = c("point", "line"), colour = Tree)
### show info about the data
head(diamonds)
### comparison qplot vs ggplot
##qplot histogram
qplot(clarity, data=diamonds, fill=cut, geom="bar")
# ggplot histogram -> same output
ggplot(diamonds, aes(clarity, fill=cut)) + geom_bar()
ggplot also produces the same output.
### how to use qplot
# scatterplot
qplot(wt, mpg, data=mtcars)
# continuous scale vs. discrete scale
head(mtcars)
qplot(wt, mpg, data=mtcars,colour=cyl)
levels(mtcars$cyl)
qplot(wt, mpg, data=mtcars,colour=factor(cyl))
# use different aesthetic mappings
qplot(wt, mpg, data=mtcars,shape=factor(cyl))
qplot(wt, mpg, data=mtcars,size=qsec)
#combine mappings (hint: hollow points, geom-concept, legend combination)
qplot(wt, mpg, data=mtcars, size=qsec, color=factor(carb), shape=I(1))
# bar-plot
qplot(factor(cyl), data=mtcars, geom="bar")
# use different display of bars (stacked, dodged, identity)
head(diamonds)
qplot(clarity, data=diamonds, geom="bar", fill=cut, position="stack")
qplot(wt, mpg, data=mtcars, color=factor(cyl), geom=c("point", "smooth"))
# illustrate flip versus changing of variable allocation
qplot(mpg, wt, data=mtcars, facets=cyl~., geom=c("point", "smooth"))
qplot(mpg, wt, data=mtcars, facets=cyl~., geom=c("point", "smooth")) + coord_flip()
qplot(wt, mpg, data=mtcars, facets=cyl~., geom=c("point", "smooth"))
# using ggplot-syntax with qplot (hint: qplot creates layers automatically)
qplot(mpg, wt, data=mtcars, color=factor(cyl), geom=c("point","line"))
# add an additional layer with different mapping
p.tmp + geom_point() + geom_point(aes(y=disp))
# setting aesthetics instead of mapping
p.tmp + geom_point(color="darkblue")
# dealing with overplotting (hollow points, pixel points, alpha[0-1] )
t.df <- data.frame(x=rnorm(2000), y=rnorm(2000))
p.norm <- ggplot(t.df, aes(x,y))
p.norm + geom_point()
p.norm + geom_point(shape=1)
p.tmp + geom_point(color="darkblue")
# dealing with overplotting (hollow points, pixel points, alpha[0-1] )
t.df <- data.frame(x=rnorm(2000), y=rnorm(2000))
p.norm <- ggplot(t.df, aes(x,y))
p.norm + geom_point()
p.norm + geom_point(shape=1)
p.norm + geom_point(shape=".")
p.norm + geom_point(colour=alpha("black", 1/2))
p.norm + geom_point(colour=alpha("blue", 1/10))
# using scales (color palettes, manual colors, matching of colors to values)
p.tmp <- qplot(cut, data=diamonds, geom="bar", fill=cut)
p.tmp
p.tmp + scale_fill_manual(values=c("#7fc6bc","#083642","#b1df01","#cdef9c","#466b5d"))
### create a pie-chart, radar-chart (hint: not recommended)
# map a barchart to a polar coordinate system
p.tmp <- ggplot(mtcars, aes(x=factor(1), fill=factor(cyl))) + geom_bar(width=1)
p.tmp
p.tmp + coord_polar(theta="y")
p.tmp + coord_polar()
### create survival/cumulative incidence plot
library(survival)
head(lung)
# create a kaplan-meier plot with survival package
t.Surv <- Surv(lung$time, lung$status)
t.survfit <- survfit(t.Surv~1, data=lung)
plot(t.survfit, mark.time=TRUE)
Subscribe to:
Posts (Atom)