1 Background and data

All data shown is from 2024-12-05 onwards.

Since 2024-12-05 we are analyzing the Apache Log which provides us with insides of the tool usage based on the IP address.

Terminology:

Created Mapframe: A mapframe is considered a “created mapframe” when the user clicks the “Next” button after the selection of an area of interest.

Downloaded Mapframe: A mapframe is considered a “downloaded mapframe” when the processing of the Sketch Map Template is done and the user clicks the “Download PDF” button.

Uploaded Sketch Map: At the moment we are tracking uploads in two different ways. 1) A downloaded mapframe has been uploaded by clicking “Digitize your Sketch Maps”. Here we are not able to differentiate between single and multiple map uploads. We are working on a solution account for this difference. 2) We save the bbox of uploaded Sketch Maps in a database and can therefore count these as real uploads.

Geodata Downloads: The user downloads Raster and/or Vector results by clicking “Download ZIP” and/or “Download GeoJSON” (currently only avalaible for uploads tracked through Apache).

2 Creation of Map Frames

3 Use of baselayers

#osm esri chart

donut <-sketchmap_report_map_frames_filter |> 
  group_by(layer) |> 
  summarise(count=n())

donut$fraction <- donut$count / sum(donut$count)
donut$ymax <- cumsum(donut$fraction)
donut$ymin <- c(0, head(donut$ymax, n=-1))


chart2 <- ggplot(donut, aes(ymax=ymax, ymin=ymin, xmax=2, xmin=1, fill=layer)) +
  geom_rect() +
  coord_polar(theta="y") +
  xlim(c(0.2, 2.5)) + 
  theme_void() +  
  theme(legend.position = "right") +
  labs(fill="Layer")

chart2

4 Map format used for map creation

donut_format <-sketchmap_report_map_frames_filter |> 
  group_by(format) |> 
  summarise(count=n())

donut_format$fraction <- donut_format$count / sum(donut_format$count)
donut_format$ymax <- cumsum(donut_format$fraction)
donut_format$ymin <- c(0, head(donut_format$ymax, n=-1))


chart_format <- ggplot(donut_format, aes(ymax=ymax, ymin=ymin, xmax=2, xmin=1, fill=format)) +
  geom_rect() +
  coord_polar(theta="y") +
  xlim(c(0.2, 2.5)) + 
  theme_void() +  
  theme(legend.position = "right") +
  labs(fill="Layer")

chart_format

5 Orientation chosen for map creation

#why is this not binary and other languages included

donut_orientation <-sketchmap_report_map_frames_filter |> 
  group_by(orientation) |> 
  summarise(count=n())

donut_orientation$fraction <- donut_orientation$count / sum(donut_format$count)
donut_orientation$ymax <- cumsum(donut_orientation$fraction)
donut_orientation$ymin <- c(0, head(donut_orientation$ymax, n=-1))


chart_orientation <- ggplot(donut_orientation, aes(ymax=ymax, ymin=ymin, xmax=2, xmin=1, fill=orientation)) +
  geom_rect() +
  coord_polar(theta="y") +
  xlim(c(0.2, 2.5)) + 
  theme_void() +  
  theme(legend.position = "right") +
  labs(fill="Layer")

chart_orientation

6 Language used to create Sketch Map

#why are there ES and es, why are there missing values if we have filtered already to dec 5

donut_lang <-sketchmap_report_map_frames_filter |> 
  group_by(created_lang) |> 
  summarise(count=n())

donut_lang$fraction <- donut_lang$count / sum(donut_lang$count)
donut_lang$ymax <- cumsum(donut_lang$fraction)
donut_lang$ymin <- c(0, head(donut_lang$ymax, n=-1))

# Total count of all values(to put to the legend, does not work so far)
total_count <- sum(donut_lang$count)

chart_lang <- ggplot(donut_lang, aes(ymax=ymax, ymin=ymin, xmax=2, xmin=1, fill=created_lang)) +
  geom_rect() +
  coord_polar(theta="y") +
  xlim(c(0.2, 2.5)) + 
  theme_void() +  
  theme(legend.position = "right") +
  labs(fill="Layer") +
  annotate("text", x = 2.7, y = 0, label = paste("Total: ", total_count), size = 5,hjust = 0)

chart_lang
## Warning: Removed 1 rows containing missing values (`geom_text()`).

7 Countries where Sketch Maps have been created from

Information retrieved via Apache log. The IP address of the users give insights on where the user was located when they created their mapframes.

plot_data_country <- sketchmap_report_map_frames_filter %>%
  group_by(created_from_country) %>%
  summarise(n = n())

#why are there entries since dec 5 without information created_from_country (as well as no information for other columns based on Apache)


#download worldmap
world <- ne_countries(scale = "medium", returnclass = "sf")
#world_lokal <- st_read("/Users/aschauss/Documents/SketchMapTool/Statistics/Map/world-administrative-boundaries.geojson")


#transfrom to Mollweide
world_mollweide <- st_transform(world, crs = "+proj=moll")
#world_lokal <- st_transform(world_lokal, crs = "+proj=moll")

#group by iso code
sketchmap_report_map_frames_filter_iso <- sketchmap_report_map_frames_filter %>%
  group_by(created_from_iso_code) %>%
  summarise(count = sum(!is.na(created_from_iso_code)))

#rightjoin with world
sketchmap_report_world_iso <- sketchmap_report_map_frames_filter_iso %>%
  rename(iso_a2 = created_from_iso_code) %>%
  right_join(world_mollweide, by = "iso_a2")

#back to sf
sketchmap_report_world_iso <- st_as_sf(sketchmap_report_world_iso)

tmap_mode("view")

#print map vr
tm_shape(sketchmap_report_world_iso) +
  tm_polygons("count", 
              palette = "Reds",
              title = "Count per Country",
              style = "jenks",  # Change classification method
              n = 5  # Number of classes
              ) +
  tm_borders() +
  tm_layout(main.title = "Created Sketch Maps",
            legend.outside = TRUE,
            legend.position = c("left", "bottom"),
            legend.bg.color = "white",
            legend.frame = FALSE,
            frame = FALSE
  )

8 Bounding Boxes from created Mapframes

Overview of the bounding boxes chosen when creating mapframes.

#switch to interactive
tmap_mode("view")

mapframes_bboxes <- st_as_sf(sketchmap_report_map_frames, wkt = "bbox", crs = 4326)

tm_shape(mapframes_bboxes) +
  tm_basemap("OpenStreetMap") +
  tm_borders() +
  tm_layout(title = "Bounding Box Localization for Generated Map Frames")

9 Bounding Boxes from created Mapframes per Country

Count of bounding boxes from mapframes per country to assess use of Sketch Map Tool per country

#areas of bounding boxes: not sure yet if of interest
#mapframes_bboxes_proj <- st_transform(mapframes_bboxes, crs = 32633)

#calculate areas
#mapframes_bboxes_proj$area <- st_area(mapframes_bboxes_proj)


# Calculate centroids of the bounding boxes

mapframes_bboxes_sf <- st_as_sf(mapframes_bboxes, wkt = "bbox", crs = 4326)
mapframes_bboxes_sf$centroid <- st_centroid(mapframes_bboxes_sf)

#Spatial join with world
world <- ne_countries(scale = "medium", returnclass = "sf")

centroids_with_country <- st_join(mapframes_bboxes_sf$centroid , world)

#Groupby
centroid_counts <- centroids_with_country %>%
  group_by(iso_a2) %>%
  summarise(countbbox = n_distinct(centroid), .groups = "drop")
#join back
centroid_counts_sf <- st_join(world, centroid_counts)

tmap_mode("view")

tm_shape(centroid_counts_sf ) + 
  tm_borders() +  # Add borders of countries
  tm_fill(col = "countbbox", 
          palette = "Reds", 
          title = "Centroid Count",
          style = "jenks",  # Change classification method 
          na.color = "gray",
          n = 5) +  # Number of classes for Jenks classification
  tm_layout(title = "Map Frames created for Countries")

10 Created vs. Downloaded Sketch Maps

A mapframe is counted as downloaded as soon as the user clicks on “Download” PDF.

#How many created Sketch Maps were actually dowloaded?
#sketchmap_report_map_frames_filter |> filter(!is.na(downloaded_at))

# Data for stacked bar chart
mapframe_counts_stacked <- data.frame(
  category = c("Downloaded", "Not Downloaded"),
  count = c(sum(!is.na(sketchmap_report_map_frames_filter$downloaded_at)), 
            nrow(sketchmap_report_map_frames_filter) - sum(!is.na(sketchmap_report_map_frames_filter$downloaded_at))),
  Status = c("Downloaded", "Not Downloaded")  # Add a status column for stacking
)

# Stacked Bar Chart
ggplot(mapframe_counts_stacked, aes(x = "Mapframes", y = count, fill = Status)) +
  geom_bar(stat = "identity") +
  labs(title = "Created vs. Downloaded Mapframes",
       x = "Mapframes",
       y = "Count") +
  scale_fill_manual(values = c("lightgreen", "lightgray")) +  # Different colors for clarity
  theme_minimal()

11 Mapframes with downloads and uploads

Uploads as number of Sketch Maps (former blobs) registered in the database. This is independent from upload clicks and multi uploads.

sketchmap_report_sketches_filter <- sketchmap_report_sketches |> filter(sketch_uploaded > '2024-12-05' |> as.Date())

mapframes_total <- sketchmap_report_map_frames_filter 
downloads <- sketchmap_report_map_frames_filter |> filter(!is.na(downloaded_at))
uploads <- sketchmap_report_sketches_filter

sketchmap_report_map_frames_filter <- sketchmap_report_map_frames |> filter(created_at > '2024-12-05' |> as.Date())

#take care of timestamps
mapframes_total <- sketchmap_report_map_frames_filter |> 
  mutate(day = floor_date(created_at, "day"))

downloads <- sketchmap_report_map_frames_filter |> 
  filter(!is.na(downloaded_at)) |> 
  mutate(day = floor_date(downloaded_at, "day"))

uploads <- sketchmap_report_sketches_filter |> 
  mutate(sketch_uploaded = as.POSIXct(sketch_uploaded, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")) |> 
  mutate(day = floor_date(sketch_uploaded, "day"))

# Cumulative counts per day for each dataset
mapframes_cum <- mapframes_total |> count(day) |> 
  arrange(day) |> 
  mutate(cumulative_count = cumsum(n), dataset = "Mapframes Created")

downloads_cum <- downloads |> count(day) |> 
  arrange(day) |> 
  mutate(cumulative_count = cumsum(n), dataset = "Downloads")

uploads_cum <- uploads |> count(day) |> 
  arrange(day) |> 
  mutate(cumulative_count = cumsum(n), dataset = "Sketches Uploaded")


# Combine datasets
final_counts_3 <- bind_rows(mapframes_cum, downloads_cum, uploads_cum)

chartupdown <- ggplot(final_counts_3, aes(x = day, y = cumulative_count, color = dataset)) +
  geom_line() +
  geom_point() +
  labs(title = "Cumulative Count of Mapframes, Downloads, and Uploads Over Time",
       x = "Days",
       y = "Cumulative Count") +
  theme_minimal()


ggplotly(chartupdown)
#olot only downloads versus uploads

filtered_counts <- final_counts_3 |> filter(dataset %in% c("Sketches Uploaded", "Downloads"))

# Create the plot
chartupdown2 <- ggplot(filtered_counts, aes(x = day, y = cumulative_count, color = dataset)) +
  geom_line() +
  geom_point() +
  labs(title = "Cumulative Count of Downloads and Uploads Over Time",
       x = "Days",
       y = "Cumulative Count") +
  theme_minimal()

# Convert to plotly
ggplotly(chartupdown2)
#any possibility to track multiple uploads?

# Number of downloaded Sketch Maps vs. created Sketch Maps
#sketchmap_report_map_frames_filter |>  filter(!is.na(downloaded_at) & uploads>0) |> nrow()
#67
#67 downloads mapframes show and uploads. Uploads csv = 296. Does that mean that there were several multiple uploads???
#im Schnitt 4,5 auf einmal upgeloaded?

#general number of uploads
#no_uploads <- nrow(sketchmap_report_sketches)
#296

14 Uploaded Sketch Maps and share of dowloaded results

Analysis is based on Apache log. Uploads = click on upload. This analysis does not consider multiple uploads. It will be soon replaced by geodata download information on sketches database uploads.

#sketchmap_report_uploads |> filter(is.na(download_raster) & is.na(download_vector)) |> nrow()
#22

#download_raster <- sketchmap_report_uploads |> filter(!is.na(download_raster))
#no_raster_downloads <- nrow(download_raster) 
# 141
#share_raster_downloads <- no_raster_downloads / no_uploads

#download_raster$uuid |> unique() |> length()
#  79
#download_vector <- sketchmap_report_uploads |> filter(!is.na(download_vector))
#no_vector_downloads <- nrow(download_vector) 
# 230

#share_vector_downloads <- no_vector_downloads / no_uploads

#download_vector$uuid |> unique() |> length()
# 138

#interesting that it is from different uuids?

 

# Categorize downloads based on raster and vector presence
sketchmap_report_uploads <- sketchmap_report_uploads |> 
  mutate(category = case_when(
    !is.na(download_raster) & !is.na(download_vector) ~ "Both",
    !is.na(download_raster) ~ "Raster Only",
    !is.na(download_vector) ~ "Vector Only",
    TRUE ~ "None"  # This covers cases where neither raster nor vector is downloaded
  ))

# Count each category
category_counts <- table(sketchmap_report_uploads$category)
total <- sum(category_counts) 

# Convert to data frame for ggplot
category_df <- as.data.frame(category_counts)
colnames(category_df) <- c("Category", "Count")

# Calculate percentage
category_df$Percentage <- (category_df$Count / total) * 100

# Plot bar chart with percentages
ggplot(category_df, aes(x = Category, y = Percentage, fill = Category)) +
  geom_bar(stat = "identity") +
  labs(title = "Distribution of Raster and Vector Geodata Downloads", 
       x = "Download Type", 
       y = "Percentage") +
  theme_minimal()