FIFA Ranking Analysis in SQL

Let's start by first creating tables and inserting data into them.

Create tables

CREATE TABLE country (
countryid INTEGER PRIMARY KEY,
countrycode TEXT NOT NULL,
countryname TEXT NOT NULL
);


CREATE TABLE rankcal (
dateid INTEGER PRIMARY KEY,
rankdate DATE NOT NULL,
rankyear INT NOT NULL,
rankmonth INT NOT NULL,
rankday INT NOT NULL,
rankquarter TEXT
);


CREATE TABLE fifarank (
rankid INTEGER PRIMARY KEY,
countryid INTEGER NOT NULL,
dateid INTEGER NOT NULL,
rank INTEGER NOT NULL,
FOREIGN KEY (countryid) REFERENCES country(countryid),
FOREIGN KEY (dateid) REFERENCES rankcal(dateid)
);


Now, let's insert data into the tables. For rankingcal, I used the date column from flat file available in kaggle and used python script to create insert statements.

from datetime import datetime

# List of dates (you can replace this with your full list of 286 dates)
dates = ['08/08/1993']

def get_quarter(month):
    if month in [1, 2, 3]:
        return 'Q1'
    elif month in [4, 5, 6]:
        return 'Q2'
    elif month in [7, 8, 9]:
        return 'Q3'
    else:
        return 'Q4'

# Function to generate the SQL insert statements
def generate_insert_statements(dates):
    insert_statements = []
   
    for date_str in dates:
        # Convert date to datetime object
        date_obj = datetime.strptime(date_str, '%m/%d/%Y')
       
        # Extract year, month, day
        year = date_obj.year
        month = date_obj.month
        day = date_obj.day
        quarter = get_quarter(month)
       
        # Create insert statement
        insert_statement = f"INSERT INTO rankcal(rankdate, rankyear, rankmonth, rankday, rankquarter) " \
                           f"VALUES ('{date_obj.strftime('%Y-%m-%d')}', {year}, {month}, {day}, '{quarter}');"
        insert_statements.append(insert_statement)
   
    return insert_statements

# Generate and print the insert statements
insert_statements = generate_insert_statements(dates)

# Output all insert statements
for statement in insert_statements:
    print(statement)

Now, to insert data into fifaranking table, we need to join original flat file from kaggle with country table and rankingcal table. So, we will first upload the source file fifa.csv. For that, we need to unpivot the fifa data from wide format to long format.

fifa table — wide format

rankingdate ARG BRA ESP FRA GER ITA
8/8/1993 5 8 13 12 1 2

unpivotrawranking - long format

date countrycode rank
8/8/1993 ARG 5
8/8/1993 BRA 8
8/8/1993 ESP 13
8/8/1993 FRA 12
8/8/1993 GER 1
8/8/1993 ITA 2

CREATE TABLE unpivotrawrank (
rankdate DATE,
countrycode VARCHAR,
rank INT
);

INSERT INTO unpivotrawrank (rankdate, countrycode, rank)
SELECT date, 'ARG' AS countrycode, ARG FROM fifa
UNION ALL
SELECT date, 'BRA' AS countrycode, BRA FROM fifa
UNION ALL
SELECT date, 'ESP' AS countrycode, ESP FROM fifa
UNION ALL
SELECT date, 'FRA' AS countrycode, FRA FROM fifa
UNION ALL
SELECT date, 'GER' AS countrycode, GER FROM fifa
UNION ALL
SELECT date, 'ITA' AS countrycode, ITA FROM fifa;

Now, let's insert data into fifarank table.

INSERT INTO fifarank (countryid, dateid, rank)
SELECT
c.countryid,
rc.dateid,
rr.rank
FROM unpivotrawrank rr
JOIN country c ON c.countrycode = rr.countrycode
JOIN rankcal rc ON rc.rankdate = rr.rankdate
LEFT JOIN fifarank fr -- Prevent duplicate inserts
ON fr.countryid = c.countryid AND fr.dateid = rc.dateid
WHERE fr.rankid IS NULL; -- Only insert if not already present

SQL Analysis

TOP N COUNTRIES FOR A GIVEN MONTH AND YEAR

RANK PROGRERSSION OF A SPECIFIC COUNTRY OVER TIME

COUNTRIES THAT REACHED RANK #1 ALONG WITH MONTH AND YEAR

NUMBER OF TIMES THE COUNTRY IS RANKED #1