Skip to content

DuckDB Examples

The Parquet files work out of the box with DuckDB, Pandas, Polars, or any Parquet-compatible tool.

Terminal window
# Install DuckDB
brew install duckdb # macOS
# or download from https://duckdb.org/docs/installation
# Start DuckDB
duckdb
SELECT m.title, m.year, l.provider_id
FROM 'US_links.parquet' l
JOIN 'core/media_details.parquet' m ON l.media_id = m.id
WHERE l.provider_id = 'netflix'
ORDER BY m.year DESC
LIMIT 20;
SELECT m.title, m.tmdb_rating, l.provider_id
FROM 'FR_links.parquet' l
JOIN 'core/media_details.parquet' m ON l.media_id = m.id
WHERE m.content_type = 'movie'
ORDER BY m.tmdb_rating DESC
LIMIT 10;
SELECT l.provider_id, p.name, COUNT(*) as title_count
FROM 'DE_links.parquet' l
JOIN 'core/providers.parquet' p ON l.provider_id = p.short_id
GROUP BY l.provider_id, p.name
ORDER BY title_count DESC;

Find titles available on multiple providers

Section titled “Find titles available on multiple providers”
SELECT m.title, m.year, COUNT(DISTINCT l.provider_id) as provider_count,
LIST(l.provider_id) as providers
FROM 'US_links.parquet' l
JOIN 'core/media_details.parquet' m ON l.media_id = m.id
GROUP BY m.title, m.year
HAVING provider_count >= 5
ORDER BY provider_count DESC
LIMIT 20;
SELECT m.title as english_title,
t.title as french_title,
m.year
FROM 'core/media_details.parquet' m
JOIN 'translations/lang_fr.parquet' t ON m.id = t.media_id
WHERE m.content_type = 'movie'
AND m.tmdb_rating > 8
ORDER BY m.tmdb_rating DESC
LIMIT 10;
COPY (
SELECT m.title, m.year, m.tmdb_rating, l.provider_id
FROM 'US_links.parquet' l
JOIN 'core/media_details.parquet' m ON l.media_id = m.id
WHERE m.content_type = 'movie'
ORDER BY m.tmdb_rating DESC
) TO 'us_movies.csv' (HEADER, DELIMITER ',');