This repository contains SQL queries to analyze a CSV file containing data on baby names born in the Community of Madrid in 2022. The data is sourced from the Instituto de EstadĂstica de la Comunidad de Madrid.
- baby_names_madrid_2022.csv: CSV file containing data on baby names born in the Community of Madrid in 2022.
- queries.sql: SQL queries for analyzing the baby names data.
- Get the total of unique names per year: This query calculates the total number of unique names per year.
SELECT year, COUNT(DISTINCT name) AS unique_names
FROM baby_names
GROUP BY year;
- Calculate the average popularity of names by gender: This query calculates the average popularity of names by gender.
SELECT gender, AVG(num) AS avg_popularity
FROM baby_names
GROUP BY gender;
- Select first names and the total babies with that name and order by the total number of babies with that name, descending: This query selects the first names and the total number of babies with each name, ordering them by the total number of babies in descending order.
SELECT name, SUM(num)
FROM baby_names
GROUP BY name
ORDER BY SUM(num) DESC;
- Select total babies by gender and order by the total number of babies with that gender, descending: This query selects the total number of babies by gender and orders them by the total number of babies in descending order.
SELECT gender, SUM(num)
FROM baby_names
GROUP BY gender
ORDER BY SUM(num) DESC;
- Classify names by popularity: This query classifies names by popularity into categories such as 'Very Popular', 'Popular', 'Moderately Popular', 'Not Very Popular', 'Rarely Used', and 'No Data'.
SELECT name, num,
CASE WHEN num > 600 then 'Very Popular'
WHEN num > 300 then 'Popular'
WHEN num > 100 then 'Moderately Popular'
WHEN num > 50 then 'Not Very Popular'
WHEN num > 1 then 'Rarely Used'
else 'No data' end as popularity_type
FROM baby_names;
- Select the top 10 names by gender: This query selects the top 10 names by gender from the 'baby_names' table, ordering them by their associated number in descending order.
SELECT gender, name, num
FROM baby_names
ORDER BY num DESC
LIMIT 10;
- Select the top 10 male names: This query selects the top 10 male from the 'baby_names' table, ordering them by their associated number in descending order.
SELECT name, num
FROM baby_names
WHERE gender = 'M'
ORDER BY num DESC
LIMIT 10;
- Select the top 10 female names: This query selects the top 10 female from the 'baby_names' table, ordering them by their associated number in descending order.
SELECT name, num
FROM baby_names
WHERE gender = 'F'
ORDER BY num DESC
LIMIT 10;
- Tally the total number of names composed of a single word compared to those composed of two words: This query calculates the total number of names composed of a single word and those composed of two words.
SELECT
SUM(CASE WHEN LENGTH(name) - LENGTH(REPLACE(name, ' ', '')) = 0 THEN 1 ELSE 0 END) AS single_word_names,
SUM(CASE WHEN LENGTH(name) - LENGTH(REPLACE(name, ' ', '')) > 0 THEN 1 ELSE 0 END) AS compound_names
FROM baby_names;
- Select the first letter of each name and count how many names start with that letter: This query selects the first letter of each name and counts how many names start with that letter.
SELECT
SUBSTRING(name, 1, 1) AS first_letter,
COUNT(*) AS total_names
FROM baby_names
GROUP BY SUBSTRING(name, 1, 1)
ORDER BY first_letter;
- Calculate the probability that a name starts with a certain letter: This query calculates the probability that a name starts with a certain letter.
SELECT
LEFT(name, 1) AS first_letter,
COUNT(*) AS name_count,
COUNT(*) / (SELECT COUNT(*) FROM baby_names) AS probability
FROM
baby_names
GROUP BY
first_letter
ORDER BY
first_letter;
- Select the last letter of each name and count how many names end with that letter: This query selects the last letter of each name and counts how many names end with that letter.
SELECT
RIGHT(name, 1) AS last_letter,
COUNT(*) AS name_count
FROM
baby_names
GROUP BY
last_letter
ORDER BY
last_letter;
- Calculate the probability that a name ends with a certain letter: This query calculates the probability that a name ends with a certain letter.
SELECT
last_letter,
COUNT(*) AS name_count,
COUNT(*) / (SELECT COUNT(*) FROM baby_names) AS probability
FROM
(SELECT RIGHT(name, 1) AS last_letter FROM baby_names) AS last_letters
GROUP BY
last_letter
ORDER BY
last_letter;
- Calculate which name has the most letters: This query calculates which name has the most letters.
SELECT
name,
LENGTH(name) AS letter_count
FROM
baby_names
ORDER BY
letter_count DESC
LIMIT 1;
- Calculate which name has the fewest letters: This query calculates which name has the fewest letters.
SELECT
name,
LENGTH(name) AS letter_count
FROM
baby_names
ORDER BY
letter_count ASC
LIMIT 1;
- Based on the average number of letters in each name, calculate the probability that a name has X number of letters: This query calculates the probability that a name has a certain number of letters based on the average number of letters in each name.
SELECT
letter_count,
COUNT(*) AS name_count,
COUNT(*) / (SELECT COUNT(*) FROM baby_names) AS probability
FROM
(SELECT LENGTH(name) AS letter_count FROM baby_names) AS letter_counts
GROUP BY
letter_count
ORDER BY
letter_count;
- What is the probability that being male and born in 2022, I have one name or another?: This query calculates the probability that being male and born in 2022, one has one name or another.
SELECT
name,
SUM(num) AS total_name_count,
CONCAT(FORMAT((SUM(num) / (SELECT SUM(num) FROM baby_names WHERE gender = 'M')) * 100, 2), '%') AS probability
FROM
baby_names
WHERE
gender = 'M'
GROUP BY
name;
- What is the probability that being female and born in 2022, I have one name or another?: This query calculates the probability that being female and born in 2022, one has one name or another.
SELECT
name,
SUM(num) AS total_name_count,
CONCAT(FORMAT((SUM(num) / (SELECT SUM(num) FROM baby_names WHERE gender = 'M')) * 100, 2), '%') AS probability
FROM
baby_names
WHERE
gender = 'F'
GROUP BY
name;