Learning objectives
- Learn how to create a useful and attractive scatter plot using ggplot.
- Create scatter plot where color and size of the points vary with variables and values.
- Learn how to modify axis and plot properties.
- Export ggplot image in desired resolution/dimension.
Content
The tutorial will guide from beginner level (level 1) to the Pro level in scatter plot. A lot of the functions used in the tutorial will be useful while plotting barplot, boxplot, line plot, etc. Below is the list of topics that are coverd in this page.
- Simple ggplot
- Axis limit and interval
- ggplot themes
- Point colors
- Color of points by values
- Point color/size by values
- Draw best fit line
- Pro in scatter plot
- Export ggplot image
library(ggplot2) # if you haven't installed ggplot2, then enter install.packages('ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
# Check default dataset in R
data()
# we will use R default dataset "trees". It contains diameter,
# height and volume for Black Cherry Trees
head(trees)
## Girth Height Volume
## 1 8.3 70 10.3
## 2 8.6 65 10.3
## 3 8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
plot(trees[,'Height'], trees[,'Volume'], xlab = c('Height'), ylab = c('Volume'))
Level 1: Simple ggplot
ggplot(trees, aes(x = Height, y = Volume)) +
geom_point() +
xlab('This is height') + ylab('This is volume')
Level 2: Axis limit and interval
- use xlim() and ylim() functions to fix the axis limit
- use scale_x_continuous() and scale_y_continuous() to add intervals to the axis labels
ggplot(trees, aes(x = Height, y = Volume))+
geom_point() +
xlab('This is height')+ ylab('This is volume')+
xlim(c(30,90)) +
ylim(c(0,100))
# scale_y_continuous(breaks = seq(0, 80, by=10), limits=c(0,80))+
# scale_x_continuous(breaks = seq(0, 100, by=10), limits=c(0,100))
Level 3: ggplot themes
ggplot(trees, aes(x= Height, y = Volume))+
geom_point()+
xlab('This is height')+ ylab('This is volume')+
theme_bw()
# theme_classic()+
# theme_minimal()+
# theme_gray()+
# theme(axis.text.x = element_text(color = "grey20", size = 10),
# # axis.text.x = element_text(color = "grey20", size = 10, angle = 0, hjust = .5, vjust = .5),
# axis.title.x = element_text(color = "grey20", size = 12))+
# theme(text = element_text(size=15))+
Level 4: Point colors
ggplot(trees, aes(x= Height, y = Volume))+
geom_point(color = 'red',size = 2)+
# geom_point(color = 'black',size = 2, fill = 'red',pch=21)+
theme_bw()
# definition of pch can be found here https://www.datanovia.com/en/blog/ggplot-point-shapes-best-tips/
Level 5: Color of points by values
ggplot(trees, aes(x= Height, y = Volume, fill = Girth))+
geom_point(color = 'black',size = 3, pch=21)+
theme_bw()+
scale_fill_gradientn(colours = terrain.colors(10))
# scale_fill_gradient(name = 'aa', low = "blue", high = "red")+
# theme(legend.position = 'bottom',
# legend.direction = "horizontal")+
# theme(legend.position = c(0.1, 0.7),
# legend.background = element_rect(linetype='solid', color='black', size =0.5))
# ggtitle("Plot of height vs volume")
Level 6: Point color/size by values
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
ggplot(iris, aes(x= Sepal.Length, y = Petal.Length, fill = Species))+
# ggplot(iris, aes(x= Sepal.Length, y = Petal.Length, fill = Species, size = Petal.Length))+
geom_point(color = 'black', pch=21)+
# scale_fill_gradientn(colours = terrain.colors(10))+
# scale_fill_gradient(name = 'aa', low = "blue", high = "red")+
# theme(legend.position = 'bottom',
# legend.direction = "horizontal")+
# ggtitle("Plot of height vs volume")+
theme_bw()
Level 7: Draw best fit line
ggplot(trees, aes(x= Height, y = Volume, fill = Girth))+
geom_point(color = 'black',size = 3, pch=21)+
geom_smooth(method='lm',se=F,alpha=.5)+
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
ggplot(iris, aes(x= Sepal.Length, y = Petal.Length, fill = Species))+
geom_point(color = 'black',size = 3, pch=21)+
geom_smooth(aes(color = Species),method='lm',se=F,alpha=.5)+
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
Level 8: Pro in scatter plot
p =ggplot(iris, aes(x= Sepal.Length, y = Petal.Length, fill = Species))+
geom_point(color = 'black',size = 3, pch=21)+
scale_y_continuous(breaks = seq(1, 8, by=1), limits=c(1,8))+
scale_x_continuous(breaks = seq(3, 9, by=1), limits=c(3,9))+
xlab('Sepal length')+ ylab('Petal length')+
theme_bw()+
theme(axis.text.y = element_text(color = "black", size = 12),
axis.title.y = element_text(color = "black", size = 13),
axis.text.x = element_text(color = "black", size = 12, angle = 0, hjust = .5, vjust = .5),
axis.title.x = element_text(color = "black", size = 13))+
scale_fill_manual(values=c('#999999','#E69F00','#56B4E9'))+
theme(legend.position = c(0.2, 0.8),
legend.background = element_rect(linetype='solid', color='black', size =0.5))+
annotate("text", x = 8, y = 2, label = "Scatter plot", size = 5)
p
Export ggplot image
# export the image in png format
#setting working directory to export image.
setwd('C:/sarfaraz/Project_R_tutorials/R-tutorial/R_beginner_part3_files/')
ggsave(p,filename="scatterplot.png",
width = 20, height = 15, units = "cm")