HOT Tasking Manager project analysis#
In this notebook we demonstrate how to combine two datasets: HOT Tasking Manager project data and OSM contributions.
We want to investigate all contributions that have been made in OSM via HOT’s Tasking Manager. We will furthermore filter these contributions by using the OSM Changeset information.
These are the steps you see further down:
Set the connection parameters.
Prepare your input parameters, e.g. define HOT Tasking Manager project ID.
Download data using DuckDB. This time we also download HOT project data.
Filter OSM contributions using changeset attributes.
Display both datasets on a map.
Getting started#
Set connection params.
from dotenv import load_dotenv
load_dotenv()
True
import os
s3_user = os.environ["S3_ACCESS_KEY_ID"] # add your user here
s3_password = os.environ["S3_SECRET_ACCESS_KEY"] # add your password here
Configure DuckDB.
!pip install duckdb==1.0.0
import duckdb
con = duckdb.connect(
config={
'threads': 8,
'max_memory': '8GB',
# 'enable_object_cache': True
}
)
con.install_extension("spatial")
con.load_extension("spatial")
Set the connection params to Iceberg Rest Catalog. (We need this for the OSM data.)
from pyiceberg.catalog.rest import RestCatalog
catalog = RestCatalog(
name="default",
**{
"uri": "https://sotm2024.iceberg.ohsome.org",
"s3.endpoint": "https://sotm2024.minio.heigit.org",
"py-io-impl": "pyiceberg.io.pyarrow.PyArrowFileIO",
"s3.access-key-id": s3_user,
"s3.secret-access-key": s3_password,
"s3.region": "eu-central-1"
}
)
Set connection to MinIO object storage. (We need this for HOT Tasking Manager data)
query = f"""
DROP SECRET IF EXISTS "__default_s3";
CREATE SECRET (
TYPE S3,
KEY_ID '{s3_user}',
SECRET '{s3_password}',
REGION 'eu-central-1',
endpoint 'sotm2024.minio.heigit.org',
use_ssl true,
url_style 'path'
);
"""
con.sql(query).show()
┌─────────┐
│ Success │
│ boolean │
├─────────┤
│ true │
└─────────┘
Prepare the input parameters for your analysis#
hot_tm_parquet_data_path = "s3a://heigit-ohsome-sotm24/data/hot_tasking_manager/**"
# Set iceberg table
namespace = 'geo_sort'
tablename = 'contributions'
icebergtable = catalog.load_table((namespace, tablename))
hot_tm_project_id = 15996
Get Information about HOT Tasking Manager project#
import geopandas as gpd
query = f"""
SELECT *
FROM read_parquet('{hot_tm_parquet_data_path}') a
WHERE project_id = {hot_tm_project_id};
"""
df = con.sql(query).df()
bbox = df["bbox"].values[0]
# convert the data to geodata
gdf = gpd.GeoDataFrame(
df,
geometry=gpd.GeoSeries.from_wkt(df['geometry'])
).set_crs('epsg:4326')
Display where the project is located.
import lonboard
# the lonboard map definition
hot_tm_layer = lonboard.PolygonLayer.from_geopandas(
gdf,
get_fill_color=[0, 0, 0, 0],
get_line_color=[255, 0, 255],
get_line_width=10
)
view_state = {
"longitude": df["centroid"].values[0]["x"],
"latitude": df["centroid"].values[0]["y"],
"zoom": 12
}
overview_map = lonboard.Map(
basemap_style=lonboard.basemap.CartoBasemap.Positron,
layers=[hot_tm_layer],
view_state=view_state
)
display(overview_map)
Download data OSM Data#
import time
start_time = time.time()
icebergtable.scan(
row_filter=(
f"( status = 'latest' or status = 'history' )"
f"and (xmax >= {bbox['xmin']} and xmin <= {bbox['xmax']}) "
f"and (ymax >= {bbox['ymin']} and ymin <= {bbox['ymax']}) "
),
selected_fields=(
"user_id",
"osm_id",
"osm_version",
"valid_from",
"valid_to",
"tags",
"tags_before",
"changeset",
"geometry",
),
).to_duckdb('raw_osm_data',connection=con)
download_time = round(time.time() - start_time, 3)
print(f"download took {download_time} sec.")
download took 42.551 sec.
Filter data using OSM changeset information#
query = f"""
SELECT *
FROM raw_osm_data
WHERE 1=1
and list_contains(changeset.hashtags, 'hotosm-project-{hot_tm_project_id}')
"""
osm_df = con.sql(query).df()
# convert the data to geodata
osm_gdf = gpd.GeoDataFrame(
osm_df,
geometry=gpd.GeoSeries.from_wkt(osm_df['geometry'])
).set_crs('epsg:4326')
Display OSM data and HOT Tasking Manager project on map.
# the lonboard map definition
osm_polygons_layer = lonboard.PolygonLayer.from_geopandas(
osm_gdf[osm_gdf.geom_type == 'Polygon'],
get_fill_color=[0, 0, 0, 0],
get_line_width=1
)
osm_lines_layer = lonboard.PathLayer.from_geopandas(
osm_gdf[osm_gdf.geom_type == 'LineString'],
)
osm_points_layer = lonboard.ScatterplotLayer.from_geopandas(
osm_gdf[osm_gdf.geom_type == 'Point'],
)
osm_map = lonboard.Map(
basemap_style=lonboard.basemap.CartoBasemap.Positron,
layers=[
hot_tm_layer,
osm_polygons_layer,
osm_lines_layer,
osm_points_layer,
],
view_state=view_state
)
display(osm_map)
Display OSM user statistics#
Let’s analyse who mapped how much in this area.
query = f"""
SELECT
user_id,
count(*) as n_edits,
SUM(CASE
WHEN contains(changeset.editor, 'JOSM') THEN 1
ELSE 0
END) as josm_edit,
SUM(CASE
WHEN contains(changeset.editor, 'iD') THEN 1
ELSE 0
END) as iD_edit
FROM raw_osm_data
WHERE 1=1
and list_contains(changeset.hashtags, 'hotosm-project-{hot_tm_project_id}')
GROUP BY user_id
ORDER BY n_edits DESC
"""
user_stats_df = con.sql(query).df()
We are going to add OSM usernames to our dataframe.
import requests
def get_current_usernames(user_ids_list):
api_url = "http://textual.ru/whosthat/whosthat.php"
user_names_list = []
for user_id in user_ids_list:
params = {
"action": "info", # Use "info" action to get detailed information
"id": user_id,
}
response = requests.get(api_url, params=params)
if response.status_code == 200:
data = response.json()
if data and isinstance(data, list) and len(data) > 0:
# The API returns an array of hashes with user information
# We need to extract the user ID from the first entry
user_info = data[0]
user_names_list.append( user_info['names'][-1]["name"] )
return user_names_list
user_stats_df["username"] = get_current_usernames(user_stats_df["user_id"].values)
user_stats_df["link"] = "https://www.openstreetmap.org/user/" + user_stats_df["username"]
Display a user leaderboard.
display(user_stats_df)
user_id | n_edits | josm_edit | iD_edit | username | link | |
---|---|---|---|---|---|---|
0 | 563947 | 898 | 0.0 | 858.0 | notoncebut2x | https://www.openstreetmap.org/user/notoncebut2x |
1 | 11010237 | 734 | 734.0 | 0.0 | Dannyoung | https://www.openstreetmap.org/user/Dannyoung |
2 | 10570996 | 468 | 468.0 | 0.0 | Erick tamba | https://www.openstreetmap.org/user/Erick tamba |
3 | 21292317 | 439 | 0.0 | 439.0 | abhimanyubarun | https://www.openstreetmap.org/user/abhimanyubarun |
4 | 17340053 | 388 | 388.0 | 0.0 | Sally Mlingi | https://www.openstreetmap.org/user/Sally Mlingi |
5 | 13722972 | 261 | 186.0 | 75.0 | Jimerveille | https://www.openstreetmap.org/user/Jimerveille |
6 | 21288883 | 251 | 0.0 | 251.0 | BTR2 | https://www.openstreetmap.org/user/BTR2 |
7 | 19063413 | 245 | 0.0 | 245.0 | Aczel-Divin MOUNTOU | https://www.openstreetmap.org/user/Aczel-Divin... |
8 | 20450590 | 167 | 0.0 | 167.0 | KOGNOKO ABOULATSAMBO | https://www.openstreetmap.org/user/KOGNOKO ABO... |
9 | 9449463 | 150 | 150.0 | 0.0 | Mercy14846 | https://www.openstreetmap.org/user/Mercy14846 |
10 | 14962448 | 117 | 0.0 | 117.0 | Abdul-Nayar Rabiatu Sungumu | https://www.openstreetmap.org/user/Abdul-Nayar... |
11 | 8386618 | 104 | 0.0 | 104.0 | Adele Birkenes | https://www.openstreetmap.org/user/Adele Birkenes |
12 | 15320583 | 75 | 0.0 | 75.0 | Eldablessing | https://www.openstreetmap.org/user/Eldablessing |
13 | 21292063 | 60 | 0.0 | 60.0 | GlobalTurtle | https://www.openstreetmap.org/user/GlobalTurtle |
14 | 21304240 | 57 | 0.0 | 57.0 | sbribri | https://www.openstreetmap.org/user/sbribri |
15 | 20155561 | 51 | 13.0 | 38.0 | BENYE MULTI MEDIA | https://www.openstreetmap.org/user/BENYE MULTI... |
16 | 18790262 | 50 | 50.0 | 0.0 | NzahT | https://www.openstreetmap.org/user/NzahT |
17 | 20768452 | 44 | 0.0 | 44.0 | BAZONZILA MBANI Gloire | https://www.openstreetmap.org/user/BAZONZILA M... |
18 | 20982460 | 40 | 18.0 | 22.0 | Jaco452 | https://www.openstreetmap.org/user/Jaco452 |
19 | 21256164 | 33 | 17.0 | 16.0 | Perey Wisdom | https://www.openstreetmap.org/user/Perey Wisdom |
20 | 21186164 | 31 | 7.0 | 24.0 | Ziggyentumi@20 | https://www.openstreetmap.org/user/Ziggyentumi@20 |
21 | 21314713 | 29 | 0.0 | 29.0 | Maryamarab | https://www.openstreetmap.org/user/Maryamarab |
22 | 17496710 | 17 | 17.0 | 0.0 | dellington_deli | https://www.openstreetmap.org/user/dellington_... |
23 | 21230154 | 15 | 0.0 | 15.0 | VijayB2345 | https://www.openstreetmap.org/user/VijayB2345 |
24 | 21125012 | 15 | 0.0 | 15.0 | R237497 | https://www.openstreetmap.org/user/R237497 |
25 | 16690030 | 15 | 0.0 | 15.0 | Bouyaossere | https://www.openstreetmap.org/user/Bouyaossere |
26 | 13007399 | 14 | 0.0 | 14.0 | elodienix | https://www.openstreetmap.org/user/elodienix |
27 | 11500943 | 14 | 0.0 | 14.0 | Mariannamukulunga | https://www.openstreetmap.org/user/Mariannamuk... |
28 | 16739098 | 11 | 0.0 | 11.0 | TUNGBANI FRANCIS XAVIER | https://www.openstreetmap.org/user/TUNGBANI FR... |
29 | 21366183 | 9 | 0.0 | 9.0 | George Nkpagneer Gmagnancha | https://www.openstreetmap.org/user/George Nkpa... |
30 | 19297985 | 9 | 0.0 | 9.0 | susan27 | https://www.openstreetmap.org/user/susan27 |
31 | 15320557 | 8 | 0.0 | 8.0 | GDC_Christie | https://www.openstreetmap.org/user/GDC_Christie |
32 | 21363124 | 6 | 0.0 | 6.0 | Ibrahim Wunpini Sulemana | https://www.openstreetmap.org/user/Ibrahim Wun... |
33 | 10394726 | 5 | 0.0 | 5.0 | Calvin Menttor | https://www.openstreetmap.org/user/Calvin Menttor |
34 | 21363912 | 4 | 3.0 | 1.0 | Panladawuda | https://www.openstreetmap.org/user/Panladawuda |
35 | 21207980 | 2 | 0.0 | 2.0 | Kenaurd OKAMA | https://www.openstreetmap.org/user/Kenaurd OKAMA |
36 | 21331501 | 1 | 0.0 | 1.0 | DEKU PETER MEDODZI | https://www.openstreetmap.org/user/DEKU PETER ... |
Inspect the contributions made by the mapper with most edits.
top_mapper = user_stats_df.sort_values("n_edits")["user_id"][0]
# the lonboard map definition
osm_polygons_layer = lonboard.PolygonLayer.from_geopandas(
osm_gdf[(osm_gdf.geom_type == 'Polygon') & (osm_gdf["user_id"] == top_mapper)],
get_fill_color=[0, 0, 0, 0],
get_line_width=1
)
osm_lines_layer = lonboard.PathLayer.from_geopandas(
osm_gdf[(osm_gdf.geom_type == 'LineString') & (osm_gdf["user_id"] == top_mapper)],
)
osm_points_layer = lonboard.ScatterplotLayer.from_geopandas(
osm_gdf[(osm_gdf.geom_type == 'Point') & (osm_gdf["user_id"] == top_mapper)],
)
osm_map = lonboard.Map(
basemap_style=lonboard.basemap.CartoBasemap.Positron,
layers=[
hot_tm_layer,
osm_polygons_layer,
osm_lines_layer,
osm_points_layer,
],
view_state=view_state
)
display(osm_map)