Learning objectives

  • Learn how to create a useful and attractive scatter plot using ggplot.
  • Create scatter plot where color and size of the points vary with variables and values.
  • Learn how to modify axis and plot properties.
  • Export ggplot image in desired resolution/dimension.

Content

The tutorial will guide from beginner level (level 1) to the Pro level in scatter plot. A lot of the functions used in the tutorial will be useful while plotting barplot, boxplot, line plot, etc. Below is the list of topics that are coverd in this page.

  • Simple ggplot
  • Axis limit and interval
  • ggplot themes
  • Point colors
  • Color of points by values
  • Point color/size by values
  • Draw best fit line
  • Pro in scatter plot
  • Export ggplot image
library(ggplot2) # if you haven't installed ggplot2, then enter install.packages('ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
# Check default dataset in R
data()

# we will use R default dataset "trees". It contains diameter, 
# height and volume for Black Cherry Trees
head(trees)
##   Girth Height Volume
## 1   8.3     70   10.3
## 2   8.6     65   10.3
## 3   8.8     63   10.2
## 4  10.5     72   16.4
## 5  10.7     81   18.8
## 6  10.8     83   19.7
plot(trees[,'Height'], trees[,'Volume'], xlab = c('Height'), ylab = c('Volume'))

Level 1: Simple ggplot

ggplot(trees, aes(x = Height, y = Volume)) +
  geom_point() +
  xlab('This is height') + ylab('This is volume')

Level 2: Axis limit and interval

  • use xlim() and ylim() functions to fix the axis limit
  • use scale_x_continuous() and scale_y_continuous() to add intervals to the axis labels
p = ggplot(trees, aes(x = Height, y = Volume))+
  geom_point() +
  xlab('This is height')+ ylab('This is volume')
p1 = p +  xlim(c(30,90)) +  ylim(c(0,100))
p2 = p +  scale_y_continuous(breaks = seq(0, 80, by=10), limits=c(0,80))+
  scale_x_continuous(breaks = seq(0, 100, by=10), limits=c(0,100))

library(gridExtra)
grid.arrange(p1, p2, ncol=2)

### Level 3: ggplot themes

p = ggplot(trees, aes(x= Height, y = Volume))+
  geom_point()+
  xlab('This is height')+ ylab('This is volume')+
  theme_bw()+
  ggtitle("Classic dark-on-light theme")

p2 = p + theme_classic()+
  ggtitle("Classic-looking theme")
  # theme_minimal()+
  # theme_gray()+

p3 = p + theme(axis.text.x = element_text(color = "grey20", size = 10))+
      # axis.text.x = element_text(color = "grey20", size = 10, angle = 0, hjust = .5, vjust = .5), axis.title.x = element_text(color = "grey20", size = 12))+ 
  # theme(text = element_text(size=15))
  ggtitle("Modify axis text size")

grid.arrange(p, p2,p3, ncol=3)

Level 4: Point colors

p = ggplot(trees, aes(x= Height, y = Volume))+
  geom_point(color = 'red',size = 4)+
  theme_bw()+
  ggtitle("Points with user specified color")

p1 = ggplot(trees, aes(x= Height, y = Volume))+
  geom_point(color = 'black',size = 4, fill = 'red',pch=21)+
  theme_bw()+
  ggtitle("Points with color and border")
  # definition of pch can be found here https://www.datanovia.com/en/blog/ggplot-point-shapes-best-tips/

grid.arrange(p, p1, ncol=2)

Level 5: Color of points by values

p = ggplot(trees, aes(x= Height, y = Volume, fill = Girth))+
  geom_point(color = 'black',size = 3, pch=21)+
  theme_bw()+
  scale_fill_gradientn(colours = terrain.colors(10))+
  ggtitle("Point color using existing package")

p1 = p +
  scale_fill_gradient(name = 'values', low = "blue", high = "red")+
  theme(legend.position = 'bottom',
        legend.direction = "horizontal")+
  theme(legend.position = c(0.3, 0.8),
        legend.background = element_rect(linetype='solid', color='black', size =0.5))+
  ggtitle("Point color is user sprcified")
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
grid.arrange(p, p1, ncol=2)

Level 6: Point color/size by values

head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
p1 =ggplot(iris, aes(x= Sepal.Length, y = Petal.Length, fill = Species))+
  geom_point(color = 'black', pch=21)+
  ggtitle("Point color varies with value")+
  theme_bw()

p2 = ggplot(iris, aes(x= Sepal.Length, y = Petal.Length, fill = Species, size = Petal.Length))+
  geom_point(color = 'black', pch=21)+
  ggtitle("Point color and size varies with value")+
  theme_bw()

grid.arrange(p1, p2, ncol=2)

Level 7: Draw best fit line

p1 = ggplot(trees, aes(x= Height, y = Volume, fill = Girth))+
  geom_point(color = 'black',size = 3, pch=21)+
  geom_smooth(method='lm',se=F,alpha=.5)+
  theme_bw()

p2 = ggplot(iris, aes(x= Sepal.Length, y = Petal.Length, fill = Species))+
  geom_point(color = 'black',size = 3, pch=21)+
  geom_smooth(aes(color = Species),method='lm',se=F,alpha=.5)+
  theme_bw()

grid.arrange(p1, p2, ncol=2)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Level 8: Pro in scatter plot

p =ggplot(iris, aes(x= Sepal.Length, y = Petal.Length, fill = Species))+
  geom_point(color = 'black',size = 3, pch=21)+
  scale_y_continuous(breaks = seq(1, 8, by=1), limits=c(1,8))+
  scale_x_continuous(breaks = seq(3, 9, by=1), limits=c(3,9))+
  xlab('Sepal length')+ ylab('Petal length')+
  theme_bw()+
  theme(axis.text.y = element_text(color = "black", size = 12),
        axis.title.y = element_text(color = "black", size = 13),
        axis.text.x = element_text(color = "black", size = 12, angle = 0, hjust = .5, vjust = .5),
        axis.title.x = element_text(color = "black", size = 13))+
  scale_fill_manual(values=c('#999999','#E69F00','#56B4E9'))+
  theme(legend.position = c(0.2, 0.8),
        legend.background = element_rect(linetype='solid', color='black', size =0.5),
        plot.title = element_text(hjust = 0.5))+
  annotate("text", x = 8, y = 2, label = "Scatter plot", size = 5)+
  ggtitle("Barplot pro")

p1 = ggplot(iris, aes(x= Sepal.Length, y = Petal.Length, fill = Species, shape = Species))+
  geom_point(size = 3)+
  scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))+
  scale_shape_manual(values = c(21,22,23))+scale_size_manual(values=c(5,5,5))+
  scale_y_continuous(breaks = seq(0, 9, by=1), limits=c(0,9))+
  scale_x_continuous(breaks = seq(0, 9, by=1), limits=c(0,9))+
  geom_abline(intercept = 0, slope = 1,size=1, linetype = "dashed",color="red")+
  xlab('Sepal length')+ ylab('Petal length')+
  theme_bw()+
  theme(axis.text.y = element_text(color = "black", size = 12),
        axis.title.y = element_text(color = "black", size = 13),
        axis.text.x = element_text(color = "black", size = 12, angle = 0, hjust = .5, vjust = .5),
        axis.title.x = element_text(color = "black", size = 13))+
  scale_fill_manual(values=c('#999999','#E69F00','#56B4E9'))+
  theme(legend.position = c(0.2, 0.8),
        legend.background = element_rect(linetype='solid', color='black', size =0.5),
        plot.title = element_text(hjust = 0.5))+
  ggtitle("Barplot pro")

grid.arrange(p, p1, ncol=2)

Export ggplot image

# export the image in png format 
#setting working directory to export image.
setwd('C:/sarfaraz/Project_R_tutorials/R-tutorial/R_beginner_part3_files/')
ggsave(p,filename="scatterplot.png",
       width = 20, height = 15, units = "cm")