DuckDB: Currentness of Buildings#
In this notebook we demonstrate how to analyze and visualize the up-to-date-ness or currentness of the latest OSM data.
These are the steps you see further down:
Set the connection parameters.
Prepare your input parameters, e.g. define area of interest and OSM tag filter.
Download data using DuckDB.
Plot currentness chart with Polars.
Create a Map, an interactive Slider to filter the map data.
Getting started#
Set connection params.
import os
s3_user = os.environ["S3_ACCESS_KEY_ID"] # add your user here
s3_password = os.environ["S3_SECRET_ACCESS_KEY"] # add your password here
Configure DuckDB.
import duckdb
con = duckdb.connect(
config={
'threads': 8,
'max_memory': '8GB'
}
)
con.install_extension("spatial")
con.load_extension("spatial")
Set connection to MinIO object storage.
query = f"""
DROP SECRET IF EXISTS "__default_s3";
CREATE SECRET (
TYPE S3,
KEY_ID '{s3_user}',
SECRET '{s3_password}',
REGION 'eu-central-1',
endpoint 'sotm2024.minio.heigit.org',
use_ssl true,
url_style 'path'
);
"""
con.sql(query).show()
┌─────────┐
│ Success │
│ boolean │
├─────────┤
│ true │
└─────────┘
Prepare the input parameters for your analysis#
# Set s3 path for parquet input data
parquet_data_path = "s3a://heigit-ohsome-sotm24/data/geo_sort_ext/contributions_germany/**"
#parquet_data_path = "s3a://heigit-ohsome-sotm24/data/geo_sort_ext/contributions/**"
# Define status filter
status = 'latest'
# Define location filter
bboxes = {
'heidelberg': (8.629761, 49.379556, 8.742371, 49.437890),
'nairobi': (36.650938, -1.444471, 37.103887, -1.163522),
'mannheim': (8.41416, 49.410362, 8.58999, 49.590489),
'berlin': (13.088345, 52.338271, 13.761161, 52.675509)
}
selected_region = 'heidelberg'
xmin, ymin, xmax, ymax = bboxes[selected_region]
area_of_interest_file =f"../data/{selected_region}.geojson"
# Define geometry type filter
geometry_type = 'Polygon'
Get the Data#
import time
start_time = time.time()
query = f"""
DROP TABLE IF EXISTS osm_data;
CREATE TABLE osm_data AS
(
SELECT
a.valid_from,
a.tags,
a.geometry
FROM
read_parquet('{parquet_data_path}', hive_partitioning=true) as a,
st_read('{area_of_interest_file}') as aoi
WHERE 1=1
and status = '{status}'
and geometry_type = '{geometry_type}'
and tags['building'][1] is not null
and tags['building'][1] != 'no'
-- spatial filtering part
and (a.bbox.xmax >= {xmin} AND a.bbox.xmin <= {xmax})
and (a.bbox.ymax >= {ymin} AND a.bbox.ymin <= {ymax})
and ST_Intersects(st_GeomFromText(a.geometry), aoi.geom)
)
;
"""
con.sql(query)
download_time = round(time.time() - start_time, 3)
print(f"download took {download_time} sec.")
download took 7.882 sec.
Plot currentness chart with polars and hvplot#
import polars as pl
chart_query = """
SELECT
date_trunc('month', valid_from) as month,
count(*) as n_edits
FROM osm_data
GROUP BY month
ORDER BY month
"""
df = con.sql(chart_query).pl()
df.plot.step(
x="month",
y="n_edits"
)
Display currentness of OSM features on map#
Get data from DucDKB into GeoPandas dataframe.
import geopandas as gpd
map_query = """
SELECT
epoch_ms(valid_from) as valid_from,
geometry
FROM osm_data;
"""
df = con.sql(map_query).df()
# convert the data to geodata
gdf = gpd.GeoDataFrame(
df,
geometry=gpd.GeoSeries.from_wkt(df['geometry'])
).set_crs('epsg:4326')
Define map parameters and style.
import datetime
import lonboard
from palettable.matplotlib import Viridis_20
# compute lonboard color style for contious color map
min_valid_from = 1000 * datetime.datetime(2007,1,1).timestamp()
max_valid_from = 1000 * datetime.datetime(2024,6,1).timestamp()
# normalized color values from 0 to 1
valid_from_style = gdf["valid_from"].apply(
lambda x: (x - min_valid_from) / (max_valid_from - min_valid_from))
# the lonboard map definition
layer = lonboard.SolidPolygonLayer.from_geopandas(
gdf,
extensions=[lonboard.layer_extension.DataFilterExtension(filter_size=1)],
get_filter_value=gdf["valid_from"], # replace with desired column
filter_range=[min_valid_from, max_valid_from], # replace with desired filter range
get_fill_color=lonboard.colormap.apply_continuous_cmap(valid_from_style, Viridis_20, alpha=1)
)
currentness_map = lonboard.Map(
basemap_style=lonboard.basemap.CartoBasemap.DarkMatter,
layers=[layer]
)
Define Dates Slider Selection Widget and link to map filter range.
from datetime import date, timedelta
import ipywidgets
from traitlets import directional_link
start = datetime.datetime(2007,1,1)
end = datetime.datetime(2024,6,1)
delta = end - start # returns timedelta
dates = [start + timedelta(days=i) for i in range(delta.days + 1)]
options = [(i.strftime('%d-%b-%Y'), int(1000* i.timestamp())) for i in dates]
date_slider = ipywidgets.SelectionRangeSlider(
options=options,
index=(0, len(dates)-1),
description='Last Edit:',
layout=ipywidgets.Layout(width='1000px'),
disabled=False
)
directional_link(
(date_slider, 'value'),
(layer, "filter_range"),
#(slider, 'value')
)
<traitlets.traitlets.directional_link at 0x7cdde89582c0>
Display the map. Have fun exploring and moving around the time slider!
display(currentness_map, date_slider)