Road Network Evolution#

In this notebook we demonstrate how to analyze and visualize the development highways in OSM over time.

These are the steps you see further down:

  • Set the connection parameters.

  • Prepare your input parameters, e.g. define area of interest and time interval.

  • Download data using DuckDB.

  • Create the mapping saturation plot.

  • Create a Map, an interactive Slider to filter the map data.

Getting started#

Set connection params.

from dotenv import load_dotenv
load_dotenv()
True
import os

s3_user = os.environ["S3_ACCESS_KEY_ID"]  # add your user here
s3_password = os.environ["S3_SECRET_ACCESS_KEY"]  # add your password here

Configure DuckDB.

!pip install duckdb==1.0.0
import duckdb

con = duckdb.connect(
    config={
        'threads': 32,
        'max_memory': '50GB'
    }
)
con.install_extension("spatial")
con.load_extension("spatial")

Set the connection params to Iceberg Rest Catalog.

!pip install "pyiceberg[s3fs,duckdb,sql-sqlite,pyarrow]"
from pyiceberg.catalog.rest import RestCatalog

catalog = RestCatalog(
    name="default",
    **{
        "uri": "https://sotm2024.iceberg.ohsome.org",
        "s3.endpoint": "https://sotm2024.minio.heigit.org",
        "py-io-impl": "pyiceberg.io.pyarrow.PyArrowFileIO",
        "s3.access-key-id": s3_user,
        "s3.secret-access-key": s3_password,
        "s3.region": "eu-central-1"
    }
)

Set connection to MinIO object storage.

query = f"""
DROP SECRET IF EXISTS "__default_s3";
CREATE SECRET (
      TYPE S3,
      KEY_ID '{s3_user}',
      SECRET '{s3_password}',
      REGION 'eu-central-1',
      endpoint 'sotm2024.minio.heigit.org',
      use_ssl true,
      url_style 'path'
  );
"""
con.sql(query).show()
┌─────────┐
│ Success │
│ boolean │
├─────────┤
│ true    │
└─────────┘

Prepare the input parameters for your analysis#

# Set iceberg table
namespace = 'geo_sort'
tablename = 'contributions'
icebergtable = catalog.load_table((namespace, tablename))


# Define location filter
bboxes = {
    'heidelberg': (8.629761, 49.379556, 8.742371, 49.437890),
    'nairobi': (36.650938, -1.444471, 37.103887, -1.163522),
    'mannheim': (8.41416, 49.410362, 8.58999, 49.590489), 
    'berlin': (13.088345, 52.338271, 13.761161, 52.675509)
}

selected_region = 'nairobi'
xmin, ymin, xmax, ymax = bboxes[selected_region]
area_of_interest_file = f"s3a://heigit-ohsome-sotm24/data/sample_data/{selected_region}.geojson"

# Define geometry type filter
geometry_type = 'LineString'

# Define time range
min_timestamp = '2008-01-01'
max_timestamp = '2024-01-01'

Get the Data#

Download historic and latest OSM data for bounding box.

import time
start_time = time.time()

icebergtable.scan(
    row_filter=(
        f"(status = 'latest' or status = 'history')"
        f"and geometry_type = '{geometry_type}' "
        f"and (xmax >= {xmin} and xmin <= {xmax}) "
        f"and (ymax >= {ymin} and ymin <= {ymax}) "
    ),
    selected_fields=(
        "user_id",
        "osm_id",
        "osm_version",
        "status",
        "valid_from",
        "valid_to",
        "tags",
        "geometry",
    ),
).to_duckdb('raw_osm_data',connection=con)

download_time = round(time.time() - start_time, 3)
print(f"download took {download_time} sec.")
download took 12.624 sec.

Clip OSM highways with to area of interest and calculate road length in kilometer.

import time
start_time = time.time()

query = f"""
DROP TABLE IF EXISTS osm_data;
CREATE TABLE osm_data AS 
(
SELECT
    a.osm_id,
    a.osm_version,
    a.status,
    a.valid_from,
    a.valid_to,
    a.tags,
    ST_Intersection(ST_GeomFromText(a.geometry), aoi.geom) as clipped_geometry,
    ST_Length_Spheroid(clipped_geometry) / 1000 as length_km
FROM
    raw_osm_data as a,
    st_read('{area_of_interest_file}') as aoi
WHERE 1=1
    and tags['highway'][1] is not null
    -- spatial filtering part
    and ST_Intersects(st_GeomFromText(a.geometry), aoi.geom)
)
;
"""
con.sql(query)

processing_time = round(time.time() - start_time, 3)
print(f"processing took {processing_time} sec.")
processing took 30.721 sec.

Plot chart#

query = f"""
DROP TABLE IF EXISTS osm_stats;
CREATE TABLE osm_stats AS
(
with 
snapshots as (
    SELECT 
    row_number() OVER () as snapshot_index,
    range AS datetime_key 
    FROM RANGE(DATE '{min_timestamp}', DATE '{max_timestamp}', INTERVAL 1 MONTH)
)
SELECT
    snapshots.snapshot_index,
    snapshots.datetime_key,
    osm_id,
    valid_from,
    valid_to,
    length_km,
    clipped_geometry
FROM snapshots
JOIN osm_data on (
    snapshots.datetime_key >= osm_data.valid_from
    and
    snapshots.datetime_key <= osm_data.valid_to
    )
ORDER BY snapshot_index
);
"""
con.sql(query)
chart_query = f"""
SELECT
  DATE '{min_timestamp}' + INTERVAL (snapshot_index-1) MONTH as month,
  --(sum(road_length)/1000)::int8 as length_km
  sum(length_km) as length_km
FROM osm_stats
GROUP BY month
ORDER BY month ASC
"""

chart_data = con.sql(chart_query).pl()
chart = chart_data.plot.line(x="month", y="length_km", xaxis='top', width=700, responsive=False)
display(chart)

Display OSM highway evolution on map#

Get data from DucDKB into GeoPandas dataframe.

import pandas as pd
import geopandas as gpd

map_query = f"""
SELECT
    osm_id,
    epoch_ms(valid_from) as valid_from,
    epoch_ms(valid_to) as valid_to,
    ST_asText(clipped_geometry) as geometry,
FROM osm_data
WHERE ST_GeometryType(clipped_geometry) in ('LINESTRING', 'MULTILINESTRING');
"""

df = con.sql(map_query).df()

# convert the data to geodata
gdf = gpd.GeoDataFrame(
    df,
    geometry=gpd.GeoSeries.from_wkt(df['geometry'])
).set_crs('epsg:4326')

Define map parameters and style.

import lonboard
import numpy as np
import datetime

# compute lonboard color style for contious color map
min_valid_from = float(gdf["valid_from"].min())
max_valid_to = float(gdf["valid_to"].max())
default_snapshot = 1000 * datetime.datetime(2012,1,1).timestamp()

# lonboard gpu filtering
filter_values =  np.column_stack(
    [gdf["valid_from"], gdf["valid_to"]]
)

initial_filter_range = [
    [min_valid_from, default_snapshot],
    [default_snapshot, max_valid_to]
]


# the lonboard map definition
layer = lonboard.PathLayer.from_geopandas(
    gdf,
    extensions=[lonboard.layer_extension.DataFilterExtension(filter_size=2)],
    get_filter_value=filter_values,  # replace with desired column
    filter_range=initial_filter_range,  # replace with desired filter range
    get_color=[0,255,255,255],
    width_min_pixels=0.8,
)

view_state = {
    "longitude": xmin + ((xmax - xmin) / 2),
    "latitude": ymin + ((ymax - ymin) / 2),
    "zoom": 9,
}

osm_evolution_map = lonboard.Map(
    basemap_style=lonboard.basemap.CartoBasemap.DarkMatter,
    layers=[layer],
    view_state=view_state
)

Set date slider.

import datetime
from dateutil.relativedelta import relativedelta
import ipywidgets
from traitlets import directional_link

dates = pd.date_range(min_timestamp, max_timestamp, freq='MS').tolist()
options = [(date.strftime('%d-%b-%Y'), 1000 * date.timestamp()) for i, date in enumerate(dates)]

date_slider = ipywidgets.SelectionSlider(
    options=options,
    description='Day:',
    layout=ipywidgets.Layout(width='1000px'),
    disabled=False
)

directional_link(
    (date_slider, 'value'),
    (layer, "filter_range"),
    transform=lambda v: ((min_valid_from,v),(v,max_valid_to))
)

Display the map. Have fun exploring and moving around the time slider!

display(osm_evolution_map, date_slider)