User activity map#

In this notebook we demonstrate how to analyze and visualize daily OSM user activity for a country.

These are the steps you see further down:

  • Set the connection parameters.

  • Prepare your input parameters, e.g. define area of interest and time interval.

  • Download data using DuckDB.

  • Create a Map, an interactive Slider to filter the map data.

Getting started#

Set connection params.

from dotenv import load_dotenv
load_dotenv()
True
import os

s3_user = os.environ["S3_ACCESS_KEY_ID"]  # add your user here
s3_password = os.environ["S3_SECRET_ACCESS_KEY"]  # add your password here

Configure DuckDB.

!pip install duckdb==1.0.0
import duckdb

con = duckdb.connect(
    config={
        'threads': 32,
        'max_memory': '50GB',
        'enable_object_cache': True
    }
)
con.install_extension("spatial")
con.load_extension("spatial")

Set the connection params to Iceberg Rest Catalog.

!pip install "pyiceberg[s3fs,duckdb,sql-sqlite,pyarrow]"
from pyiceberg.catalog.rest import RestCatalog

catalog = RestCatalog(
    name="default",
    **{
        "uri": "https://sotm2024.iceberg.ohsome.org",
        "s3.endpoint": "https://sotm2024.minio.heigit.org",
        "py-io-impl": "pyiceberg.io.pyarrow.PyArrowFileIO",
        "s3.access-key-id": s3_user,
        "s3.secret-access-key": s3_password,
        "s3.region": "eu-central-1"
    }
)

Set connection to MinIO object storage.

query = f"""
DROP SECRET IF EXISTS "__default_s3";
CREATE SECRET (
      TYPE S3,
      KEY_ID '{s3_user}',
      SECRET '{s3_password}',
      REGION 'eu-central-1',
      endpoint 'sotm2024.minio.heigit.org',
      use_ssl true,
      url_style 'path'
  );
"""
con.sql(query).show()
┌─────────┐
│ Success │
│ boolean │
├─────────┤
│ true    │
└─────────┘

Prepare the input parameters for your analysis#

For this analysis mainly two parameters matter:

  • time range

  • location (approximated by country bounding boxes)

# Set iceberg table
namespace = 'geo_sort'
tablename = 'contributions'
icebergtable = catalog.load_table((namespace, tablename))

# Define location filter
bboxes = {
    'kenya': (33.89, -4.68, 41.86, 5.51),
    'germany': (5.99, 47.3, 15.02, 54.98),
    'brazil': (-73.99, -33.77, -34.73, 5.24)
}

selected_region = 'kenya'
xmin, ymin, xmax, ymax = bboxes[selected_region]

# Define time range filter
start_timestamp = '2024-01-01T00:00:00'
time_interval = 'day'

Get the Data#

Here we download the H3 cell ID instead of the actual OSM geometry. This allows us to download the entire history of OSM. Furthermore, we get the OSM user ID and contribution timestamp.

import time
start_time = time.time()

icebergtable.scan(
    row_filter=(
        f"(status = 'latest' or status = 'history')"
        f"and (xmax >= {xmin} and xmin <= {xmax}) "
        f"and (ymax >= {ymin} and ymin <= {ymax}) "
        f"and valid_from >= '{start_timestamp}'"
    ),
    selected_fields=(
        "user_id",
        "valid_from",
        "h3_r5"
    ),
).to_duckdb('raw_osm_data',connection=con)

download_time = round(time.time() - start_time, 3)
print(f"download took {download_time} sec.")
download took 22.647 sec.

Count number of users per time interval.

Use H3 library to calculate H3 polygon geometries.

start_time = time.time()

query = f"""
INSTALL h3 FROM community;
LOAD h3;

SELECT
    h3_r5,
    1.0 * epoch_ms(date_trunc('{time_interval}', valid_from)) as time_interval,
    count(distinct user_id) as n_users,
    h3_cell_to_boundary_wkt(h3_r5) as geometry
FROM raw_osm_data
GROUP BY 1, 2
ORDER BY 2, 1;
"""

df = con.sql(query).df()

processing_time = round(time.time() - start_time, 3)
print(f"processing took {processing_time} sec.")
processing took 0.112 sec.

Display OSM user activity on map#

Get data from DucDKB into GeoPandas dataframe.

import pandas as pd
import geopandas as gpd

# convert the data to geodata
gdf = gpd.GeoDataFrame(
    df,
    geometry=gpd.GeoSeries.from_wkt(df['geometry'])
).set_crs('epsg:4326')

Define map parameters and style.

import numpy as np
import datetime
import lonboard
from palettable.colorbrewer.sequential import Blues_9

min_value = 0
max_value = gdf["n_users"].max()

# normalized color values from 0 to 1
user_activity_style = gdf["n_users"].apply(
    lambda x: (x - min_value) / (max_value - min_value))

gdf["height"] = 2500 * gdf["n_users"] 
heights = gdf["height"].to_numpy()
heights = np.nan_to_num(heights, nan=1)

min_valid_from = 1000 * datetime.datetime(2023,1,1).replace(tzinfo=datetime.timezone.utc).timestamp()
max_valid_from = 1000 * datetime.datetime(2024,6,1).replace(tzinfo=datetime.timezone.utc).timestamp()

# the lonboard map definition
layer = lonboard.PolygonLayer.from_geopandas(
    gdf,
    get_fill_color=lonboard.colormap.apply_continuous_cmap(user_activity_style, Blues_9, alpha=.85),
    extruded=True,
    get_elevation=heights,
    extensions=[lonboard.layer_extension.DataFilterExtension(filter_size=1)],
    get_filter_value=gdf["time_interval"],  # replace with desired column
    filter_range=[min_valid_from, max_valid_from]  # replace with desired filter range
)

view_state = {
    "longitude": xmin + ((xmax - xmin) / 2),
    "latitude": ymin + ((ymax - ymin) / 2),
    "zoom": 5,
    "pitch": 25
}

user_activity_map = lonboard.Map(
    basemap_style=lonboard.basemap.CartoBasemap.Positron,
    layers=[layer],
    view_state=view_state
)

Set date slider.

from datetime import date, timedelta
import ipywidgets
from traitlets import directional_link

start = datetime.datetime(2024,1,1)
end = datetime.datetime(2024,6,1)
delta = end - start   # returns timedelta
dates = [start + timedelta(days=i) for i in range(delta.days + 1)]
options = [(i.strftime('%d-%b-%Y'), 1000* i.replace(tzinfo=datetime.timezone.utc).timestamp()) for i in dates]

date_slider = ipywidgets.SelectionSlider(
    options=options,
    description='Day:',
    layout=ipywidgets.Layout(width='1000px'),
    disabled=False
)

directional_link(
    (date_slider, 'value'),
    (layer, "filter_range"),
    transform=lambda v: (v,v)
)
<traitlets.traitlets.directional_link at 0x786d4c7375f0>

Display the map. Have fun exploring and moving around the time slider!

display(user_activity_map, date_slider)