Quick exploratory analysis of cigarette vending machine locations in Heidelberg, uncovering insights and patterns to better understand their spatial distribution.
This section for reproducibility tbd.
# main libraries
library(tidyverse)
library(glue)
library(sf)
library(units)
library(ggplot2)
library(spatstat)
library(ggeffects)
library(tmap)
library(osmdata)
library(DT)
library(plyr)
library(tictoc)
library(openrouteservice)
library(jsonlite)
api_key <- readLines("config.txt", n = 1)
# read in localy
# cvm <- st_read("cvm_data.gpkg", layer = "cvm", quiet=T)
# schools <- st_read("cvm_data.gpkg", layer = "schools", quiet=T)
# kindergartens <- st_read("cvm_data.gpkg", layer = "kindergarten", quiet=T)
# hd_boundary <- st_read("cvm_data.gpkg", layer = "hd bound", quiet=T)
# query osm via overpass
hd_boundary <- opq(bbox = 'Heidelberg, Germany') |>
add_osm_feature(key = 'name', value = 'Heidelberg') |>
add_osm_feature(key = 'admin_level', value = '6') |>
osmdata_sf()
hd_boundary <- hd_boundary$osm_multipolygons
hd_districts <- opq(bbox = 'Heidelberg, Germany') |>
add_osm_feature(key = 'admin_level', value = '9') |>
osmdata_sf()
hd_districts <- hd_districts$osm_multipolygons
cvm <- opq(bbox = 'Heidelberg, Germany') |>
add_osm_feature(key = 'vending', value = 'cigarettes') |>
osmdata_sf()
cvm <- cvm$osm_points
osm_schools <- opq(bbox = 'Heidelberg, Germany') |>
add_osm_feature(key = 'amenity', value = 'school') |>
osmdata_sf()
osm_kindergarten <- opq(bbox = 'Heidelberg, Germany') |>
add_osm_feature(key = 'amenity', value = 'kindergarten') |>
osmdata_sf()
# Check for overlaps. Are schools/kindergartens all mapped with their building footprint or do we see some mapped as points only
# How many school points do not overlap with school polygons
# osm_schools$osm_points |>
# filter(!st_intersects(osm_schools$osm_polygons$geometry |> st_union(), sparse=F)) |>
# nrow()
#
# # How many school polygons do not overlap with school multipolygons/relations
# osm_schools$osm_polygons |>
# st_point_on_surface() |>
# filter(!st_intersects(osm_schools$osm_multipolygons$geometry |> st_union(), sparse=F)) |> nrow()
#
# # How many kindergarten points do not overlap with kindergarten polygons
# osm_kindergarten$osm_points |>
# filter(!st_intersects(osm_schools$osm_polygons$geometry |> st_union(), sparse=F)) |>
# nrow()
#
# # How many kindergarten polygons do not overlap with kindergarten multipolygons/relations
# osm_kindergarten$osm_polygons |>
# st_point_on_surface() |>
# filter(!st_intersects(osm_schools$osm_multipolygons$geometry |> st_union(), sparse=F)) |> nrow()
# overview on types and objects
datatable(data.frame(object=c(rep("kindergarten",3),rep("school",3)),type=rep(c("nodes","ways","relations"),2), n=c(osm_kindergarten$osm_lines |> nrow(), osm_kindergarten$osm_polygons |> nrow(), osm_kindergarten$osm_multipolygons |> nrow(), osm_schools$osm_points |> nrow(), osm_schools$osm_polygons |> nrow(), osm_schools$osm_multipolygons |> nrow())))
hd_districts <- hd_districts |>
filter(st_intersects(st_centroid(geometry), hd_boundary$geometry, sparse=F))
# generate the centroid/point of surface for all school and kindergarten buildings
schools <- osm_schools$osm_polygons |> st_point_on_surface() |>
filter(st_intersects(geometry, hd_boundary$geometry, sparse=F))
kindergartens <- osm_kindergarten$osm_polygons |> st_point_on_surface() |>
filter(st_intersects(geometry, hd_boundary$geometry, sparse=F))
cvm <- cvm |>
filter(st_intersects(geometry, hd_boundary$geometry, sparse=F))
OpenStreetMap (OSM) is a collaborative map of the world created by people like you and me. The data model of OpenStreetMap consists of three main elements: nodes, ways, and relations.
Nodes: Nodes are the basic building blocks of the map. Think of them as small points on the map representing specific geographic features, such as a single tree, a streetlamp, a shop, or even a significant landmark like the Eiffel Tower. Each node is defined by its unique latitude and longitude coordinates, which pinpoint its exact location on the Earth’s surface.
Ways: Ways are like paths or lines connecting multiple nodes. They represent linear features like roads, rivers, railway tracks, and footpaths. A way is created by connecting multiple nodes in sequence. For example, a road can be represented as a way by connecting a series of nodes that follow the path of the road.
Relations: Relations are a way to describe more complex and interconnected features on the map. They group multiple nodes, ways, or even other relations together to represent things like bus routes, building outlines, or multi-part structures. Relations allow us to create more detailed and organized representations of the real world.
Example of a school relation in Heidelberg is the Kurfürst-Friedrich-Gymnasium which has 5 buildings and two distinct locations (https://www.openstreetmap.org/relation/13561009)
Amount of school buildings in Heidelberg: 49
Amount of kindergarten buildings in Heidelberg: 80
Amount of cigarette vending machines: 145
Calculate distances between each school/kindergarten and CVM location. Aggregate the resulting matrix rowwise (by school/kindergarten) to get the minimum distance towards every distinct CVM.
dist_mat_schools <- st_distance(schools, cvm)
dist_mat_kindergartens <- st_distance(kindergartens, cvm)
distinct_min_schools <- apply(dist_mat_schools, 1, min)
distinct_min_kindergartens <- apply(dist_mat_kindergartens, 1, min)
Type | Extreme | Distance |
---|---|---|
kindergarte | min | 15.579 |
kindergarten | max | 2414.505 |
school | min | 39.773 |
school | min | 2330.165 |
Furthest distance towards a CVM is ~ 2600 Meter. We create a continuous dataframe with values from 0 to 2600 Meter and add cumulative counts for CVM within the respective distance to/from schools/kindergartens.
breaks <- seq(0, 2600, by = 1) # Adjust the 'by' value as per your requirement
dist_counts_schools <- table(cut(distinct_min_schools, breaks = breaks, right = FALSE))
dist_counts_kindergartens <- table(cut(distinct_min_kindergartens, breaks = breaks, right = FALSE))
result_df <- data.frame(distance = breaks[2:length(breaks)],
cvm_count_schools = as.numeric(dist_counts_schools),
cvm_count_kindergartens = as.numeric(dist_counts_kindergartens))
result_df <- result_df |>
mutate(cvm_count_schools_cum = cumsum(cvm_count_schools),
cvm_count_kindergartens_cum = cumsum(cvm_count_kindergartens))
plot(result_df$distance, result_df$cvm_count_kindergartens_cum, type = "l", col = "blue", lwd = 2,
xlab = "Distance in m", ylab = "cumulative count", main = "CVM distance to schools/kindergartens")
lines(result_df$distance, result_df$cvm_count_schools_cum, col = "green", lwd = 2)
legend("topleft", legend = c("kindergartens", "schools"), col = c("blue", "green"), lwd = 2, lty = 1)
# zoom 0 - 150
plot(result_df$distance[1:150], result_df$cvm_count_kindergartens_cum[1:150], type = "l", col = "blue", lwd = 2,
xlab = "Distance in m", ylab = "cumulative count", main = "CVM distance to schools/kindergartens")
lines(result_df$distance[1:150], result_df$cvm_count_schools_cum[1:150], col = "green", lwd = 2)
legend("topleft", legend = c("kindergartens", "schools"), col = c("blue", "green"), lwd = 2, lty = 1)
The first plot indicates a steep growth in CVM locations within 0 - 500 meters distance to/from schools & kindergartens. However when zoomed in we see minimal growth in school locations but some growth in kindergarten locations below 100m.
But how many are exactly within 100m?
result_df |>
filter(distance<=100) |>
summarize(schools=sum(cvm_count_schools),
kindergartens=sum(cvm_count_kindergartens))
## schools kindergartens
## 1 2 18
tmap_mode("view")
## tmap mode set to interactive viewing
facilities <- rbind(schools |> select(c(amenity)),
kindergartens |> select(c(amenity)))
facilities_buff100 <- facilities |> st_transform(25832) |> st_buffer(100) |> st_transform(4326)
facilities_buff200 <- facilities |> st_transform(25832) |> st_buffer(200) |> st_transform(4326)
tm_basemap("OpenStreetMap") +
tm_shape(facilities) +
tm_symbols(
col = "amenity",
palette = c("darkgreen", "darkred"),
size = 0.1,
border.lwd = 0
) +
tm_shape(cvm) +
tm_symbols(col = "black", size = 0.1) +
tm_shape(facilities_buff100) +
tm_borders(col="grey50") +
tm_shape(facilities_buff200) +
tm_borders(col="grey25")
schools_m <- glm(cvm_count_schools_cum ~ distance, data = result_df, family = poisson)
summary(schools_m)
##
## Call:
## glm(formula = cvm_count_schools_cum ~ distance, family = poisson,
## data = result_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.442e+00 6.491e-03 530.30 <2e-16 ***
## distance 2.314e-04 4.027e-06 57.48 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 12629.8 on 2599 degrees of freedom
## Residual deviance: 9296.5 on 2598 degrees of freedom
## AIC: 23452
##
## Number of Fisher Scoring iterations: 4
ggpredict(schools_m)
## $distance
## # Predicted counts of cvm_count_schools_cum
##
## distance | Predicted | 95% CI
## -------------------------------------
## 0 | 31.25 | [30.86, 31.66]
## 400 | 34.29 | [33.94, 34.63]
## 600 | 35.91 | [35.60, 36.23]
## 1000 | 39.39 | [39.13, 39.66]
## 1400 | 43.22 | [42.96, 43.47]
## 1600 | 45.26 | [44.99, 45.54]
## 2000 | 49.65 | [49.29, 50.02]
## 2600 | 57.05 | [56.43, 57.68]
##
## Not all rows are shown in the ouput. Use `print(..., n = Inf)` to show
## all rows.
##
## attr(,"class")
## [1] "ggalleffects" "list"
## attr(,"model.name")
## [1] "schools_m"
kindergarten_m <- glm(cvm_count_kindergartens_cum ~ distance, data = result_df, family = poisson)
summary(kindergarten_m)
##
## Call:
## glm(formula = cvm_count_kindergartens_cum ~ distance, family = poisson,
## data = result_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.947e+00 5.067e-03 778.93 <2e-16 ***
## distance 2.181e-04 3.155e-06 69.13 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 15303 on 2599 degrees of freedom
## Residual deviance: 10486 on 2598 degrees of freedom
## AIC: 26039
##
## Number of Fisher Scoring iterations: 4
ggpredict(kindergarten_m)
## $distance
## # Predicted counts of cvm_count_kindergartens_cum
##
## distance | Predicted | 95% CI
## -------------------------------------
## 0 | 51.77 | [51.26, 52.29]
## 400 | 56.49 | [56.05, 56.93]
## 600 | 59.01 | [58.61, 59.42]
## 1000 | 64.39 | [64.05, 64.73]
## 1400 | 70.26 | [69.94, 70.58]
## 1600 | 73.39 | [73.05, 73.74]
## 2000 | 80.08 | [79.62, 80.55]
## 2600 | 91.28 | [90.50, 92.07]
##
## Not all rows are shown in the ouput. Use `print(..., n = Inf)` to show
## all rows.
##
## attr(,"class")
## [1] "ggalleffects" "list"
## attr(,"model.name")
## [1] "kindergarten_m"
cvm_ppp <- cvm |> st_transform(25832) |> as.ppp()
## Warning in as.ppp.sf(st_transform(cvm, 25832)): only first attribute column is
## used for marks
marks(cvm_ppp) <- NULL
hd_owin <- hd_boundary |> st_transform(25832) |> as.owin()
Window(cvm_ppp) <- hd_owin
ripley_K <- Kest(cvm_ppp, correction = "Ripley")
plot(ripley_K, main = "Ripley's K Function")
The K function indicates the location of CVMs are not subject to complete spatial randomness, but are clustered.
How many CVM’s per Heidelberg district
# join the HD districts
cvm <- st_join(cvm, hd_districts |> select(name))
cvm_nogeom <- cvm
st_geometry(cvm_nogeom) <- NULL
cvm_nogeom |> dplyr::group_by(name) |>
dplyr::summarise(count=n()) |> arrange(desc(count)) |> datatable()
Next we use the POI endpoint of openrouteservice to get information of all POIs in 100m distance from each CVM in Heidelberg.
for (i in 1:nrow(cvm)){
tic(glue("run {i} / {nrow(cvm)}"))
single_cvm <- cvm[i,]
geometry <- list(
geojson = list(
type = "Point",
coordinates = single_cvm |> st_coordinates() |> c()
),
buffer = 100
)
json_data <-
ors_pois(
request = 'stats',
geometry = geometry,
limit = 2000,
sortby = "distance",
api_key = api_key,
output = "text"
)
parsed_data <- fromJSON(json_data)
result <- lapply(parsed_data$places, function(x) {
if (!(is.atomic(x))) {
names <- names(x$categories)
counts <- sapply(x$categories, function(y) y$count)
data.frame(
categories = names,
count = counts,
stringsAsFactors = FALSE
)
} else {
NULL
}
})
result_df <- do.call(rbind, result)
result_df$places <- rownames(result_df)
rownames(result_df) <- NULL
result_df$cvm_osm_id <- single_cvm$osm_id
result_df$name <- single_cvm$name
result_df$places <- gsub("\\..*", "", result_df$places)
if (i == 1) {
final <- result_df
} else {
final <- rbind.fill(final, result_df)
}
toc(quiet = T)
}
Pie charts on POIs around CVM’s in Heidelberg and by district.
#categories_agg <- final |> dplyr::group_by(categories, name) |>
# dplyr::summarise(total_count=sum(count))
places_acc <- final |> dplyr::group_by(places, name) |>
dplyr::summarise(total_count=sum(count))
## `summarise()` has grouped output by 'places'. You can override using the
## `.groups` argument.
ggplot(places_acc, aes(x="", y=total_count, fill=places)) +
geom_bar(width = 1, stat = "identity") + coord_polar("y", start=0)
ggplot(places_acc, aes(x="", y=total_count, fill=places)) +
geom_bar(width = 1, stat = "identity")+
facet_wrap(vars(name)) + coord_polar("y", start=0)