Africa population map in R
2 min readFeb 11, 2024
This time for Africa… let’s see how are different countries doing in terms of population.
install.packages(c("rvest", "dplyr", "rio"))
library(rvest)
library(dplyr)
library(rio)
# extract data from wikidepdia and do the necessary cleaning
aa = read_html("https://en.wikipedia.org/wiki/List_of_African_countries_by_population") %>%
html_nodes("table") %>%
.[[1]] %>%
html_table(fill = TRUE) %>%
setNames(make.names(names(.))) %>% select(2:5) %>%
dplyr::rename_with(~ c("country", "percent", "pop", 'percent.growth'), c(1:4)) %>%
mutate(across(2:4, ~ as.numeric(gsub("[^0-9]", "", .))))
Here is the cleaned data:
# A tibble: 56 × 4
country percent pop percent.growth
<chr> <dbl> <dbl> <dbl>
1 Nigeria 153 223804632 24
2 Ethiopia 87 126527060 26
3 Egypt 77 111942661 16
4 DR Congo 70 102262809 33
5 Tanzania 46 67438106 30
6 South Africa 41 59414495 9
7 Kenya 38 55100587 20
8 Uganda 33 48582334 28
9 Sudan 33 48109006 26
10 Algeria 31 45606481 16
# ℹ 46 more rows
# ℹ Use `print(n = ...)` to see more rows
And here is the population rank:
library(ggplot2)
library(dplyr)
ggplot(aa, aes(x = country, y = pop)) +
geom_segment(aes(x = country, xend = country, y = 0, yend = pop), color = ifelse(str_detect(aa$country, "^A"), "#6C5B7B", "#A48374")) +
geom_point(data = filter(aa, str_detect(country, "^A")), aes(color = "#6C5B7B")) +
geom_point(data = filter(aa, !str_detect(country, "^A")), aes(color = "#A48374")) +
scale_color_identity() + labs(x='', y='population')+
coord_flip() +
theme_minimal() +
theme(
panel.grid = element_blank(),
plot.background = element_rect(fill = "white", color = NA),
axis.text.x = element_text(size = 12))
Let’s make a map as well to have a better spatial understanding of the demographics…
library(sf)
library(rnaturalearth)
library(ggplot2)
africa_map <- world %>%
left_join(aa, by = c("name" = "country")) %>%
filter(continent == "Africa")
ggplot() +
geom_sf(data = africa_map, aes(fill = pop)) +
geom_label_repel(
data = africa_map %>% filter(pop > 5e6),
aes(label = name, fill = pop,
x = longitude, y = latitude)) + scale_fill_gradient(low = "#F1EDE6", high = "#FF6F00") +
theme_void()