Olympics Data Analysis with PostgreSQL
Writing Complex SQL Queries
data:image/s3,"s3://crabby-images/48cda/48cda4edae68af73627bf94457229c92c93bfb22" alt=""
Report -1 : Most decorated summer athletes
Requirements
This report should show the athletes that won at least three gold medals
- the athlete name
- the total number of gold medals
SELECT
a.name AS athlete_name,
SUM(gold) AS gold_medals
FROM summer_games AS s
JOIN athletes AS a
ON s.athlete_id = a.id
GROUP BY a.name
HAVING SUM(gold) >= 3
ORDER BY gold_medals DESC;
Output
data:image/s3,"s3://crabby-images/c31e0/c31e0270f5f3c6cdb2f9d1be2098766a0b80856a" alt=""
Report -2 : Most decorated athlete per region
Requirements
This report should show the most decorated athlete per region.
- the region
- the athlete name
- the total number of gold medals
SELECT
region,
athlete_name,
total_golds
FROM
(SELECT
region,
name AS athlete_name,
SUM(COALESCE(gold, 0)) AS total_golds,
-- Assign a regional rank to each athlete
ROW_NUMBER() OVER (PARTITION BY region
ORDER BY SUM(COALESCE(gold, 0)) DESC) AS row_num
FROM summer_games AS s
INNER JOIN athletes AS a
ON s.athlete_id = a.id
INNER JOIN countries AS c
ON s.country_id = c.id
GROUP BY region, athlete_name) AS subquery
-- Filter for only the top athlete per region
WHERE row_num = 1
AND total_golds > 0
ORDER BY total_golds DESC;
Output
data:image/s3,"s3://crabby-images/e6584/e6584a7bb95a499269fb91bcf9a23d78a82b2275" alt=""
Report-3: Athletes Representing Nobel-Prize Winning Countries
Requirements
This report should show 10 events with the most athletes who are from Nobel prize winner countries.
- the Olympic event (both summer and winter events should be included)
- the gender of athletes in the event
- the number of athletes
SELECT
event,
CASE WHEN event LIKE '%Women%' THEN 'female'
ELSE 'male' END AS gender,
COUNT(DISTINCT athlete_id) AS athletes
FROM summer_games
-- Only include countries that won a nobel prize
WHERE country_id IN
(SELECT country_id
FROM country_stats
WHERE nobel_prize_winners > 0)
GROUP BY event
UNION ALL
SELECT
event,
CASE WHEN event LIKE '%Women%' THEN 'female'
ELSE 'male' END AS gender,
COUNT(DISTINCT athlete_id) AS athletes
FROM winter_games
WHERE country_id IN
(SELECT country_id
FROM country_stats
WHERE nobel_prize_winners > 0)
GROUP BY event
ORDER BY athletes DESC
LIMIT 10;
Output
data:image/s3,"s3://crabby-images/887ba/887ba01681dd60b04e066a851dd2ee3540840b96" alt=""
Report- 4: Countries with high medal rates
Requirements
This report should show the top 10 countries by the number of medals per million population.
- the country code (3 letter country abbreviation)
- the population of the country (in millions)
- the total number of medals
- the number of medals per million population
SELECT
LEFT(REPLACE(UPPER(TRIM(c.country)),'.',''),3) AS country_code,
pop_in_millions,
SUM(COALESCE(bronze,0) + COALESCE(silver,0) + COALESCE(gold,0)) AS medals,
SUM(COALESCE(bronze,0) + COALESCE(silver,0) + COALESCE(gold,0)) / CAST(cs.pop_in_millions AS float) AS medals_per_million
FROM summer_games AS s
INNER JOIN countries AS c
ON s.country_id = c.id
INNER JOIN country_stats AS cs
ON s.country_id = cs.country_id
AND s.year = CAST(cs.year AS date)
WHERE cs.pop_in_millions IS NOT NULL
GROUP BY c.country, pop_in_millions
ORDER BY medals_per_million DESC
LIMIT 10;
Output
data:image/s3,"s3://crabby-images/c3a45/c3a453356b60993f209cd854dcf1f808d816d23a" alt=""
Report-5: Average tallest athletes and % GDP by region
Requirements
This report should show the average tallest athlete and % of the World GDP by region.
- the region
- the average height of the tallest athlete from each country within the region
- the percentage of the World GDP attributed to the region
- only winter games should be included
SELECT
region,
ROUND(AVG(height),2) AS avg_tallest,
-- Calculate region's percent of world gdp
ROUND(CAST((SUM(gdp) * 100 / SUM(SUM(gdp)) OVER()) AS numeric),2) AS perc_world_gdp
FROM countries AS c
INNER JOIN
(SELECT
country_id,
height,
ROW_NUMBER() OVER (PARTITION BY country_id ORDER BY height DESC) AS row_num
FROM winter_games AS w
INNER JOIN athletes AS a
ON w.athlete_id = a.id
GROUP BY country_id, height
ORDER BY country_id, height DESC) AS subquery
ON c.id = subquery.country_id
INNER JOIN country_stats AS cs
ON c.id = cs.country_id
-- Only include the tallest height for each country
WHERE row_num = 1
GROUP BY region
ORDER BY avg_tallest DESC;
Output
data:image/s3,"s3://crabby-images/99876/99876e608551cf106c6d059a523f5aa2e3f827c3" alt=""