/ Gists / Ranks - many cases
On gists

Ranks - many cases

MySql MySql tricks

ranks.sql Raw #

DROP TABLE IF EXISTS users;

CREATE TABLE users (user_id INT PRIMARY KEY AUTO_INCREMENT, name VARCHAR(20), start_date DATE, team_id INT);
INSERT INTO users (name, start_date, team_id) VALUES ('Matt', '2017-01-01', 1);
INSERT INTO users (name, start_date, team_id) VALUES ('John', '2017-01-02', 2);
INSERT INTO users (name, start_date, team_id) VALUES ('Sara', '2017-01-02', 2);
INSERT INTO users (name, start_date, team_id) VALUES ('Tim', '2017-01-02', 3);
INSERT INTO users (name, start_date, team_id) VALUES ('Bob', '2017-01-03', 3);
INSERT INTO users (name, start_date, team_id) VALUES ('Bill', '2017-01-04', 3);
INSERT INTO users (name, start_date, team_id) VALUES ('Kathy', '2017-01-04', 3);
INSERT INTO users (name, start_date, team_id) VALUES ('Anne', '2017-01-05', 3);
+---------+-------+------------+---------+
| user_id | name  | start_date | team_id |
+---------+-------+------------+---------+
|       1 | Matt  | 2017-01-01 |       1 |
|       2 | John  | 2017-01-02 |       2 |
|       3 | Sara  | 2017-01-02 |       2 |
|       4 | Tim   | 2017-01-02 |       3 |
|       5 | Bob   | 2017-01-03 |       3 |
|       6 | Bill  | 2017-01-04 |       3 |
|       7 | Kathy | 2017-01-04 |       3 |
|       8 | Anne  | 2017-01-05 |       3 |
+---------+-------+------------+---------+

# Ranked by start date
SELECT *
FROM users
ORDER by start_date ASC


# Ranked by start date with ties broken by user id
SELECT *
FROM users
ORDER by start_date ASC, user_id ASC


# First employee by start date with ties broken by user id
SELECT *
FROM users
ORDER by start_date ASC, user_id ASC
LIMIT 1



# First employee by start date with ties
SELECT *
FROM users
WHERE start_date = (SELECT MIN(start_date) FROM users);



# Second employee by start date with ties broken by user id
SELECT *
FROM users
ORDER by start_date ASC, user_id ASC
LIMIT 1
OFFSET 1



# Second employee by start date with ties
SELECT *
FROM users
WHERE start_date = (
    SELECT DISTINCT start_date
    FROM users
    ORDER BY start_date ASC
    LIMIT 1
    OFFSET 1
)



# Ranked by start date using variable
SET @rank := 0;
SELECT
    *,
    @rank := @rank + 1 AS rank
FROM users
ORDER BY start_date ASC
+---------+-------+------------+---------+------+
| user_id | name  | start_date | team_id | rank |
+---------+-------+------------+---------+------+
|       1 | Matt  | 2017-01-01 |       1 |    1 |
|       2 | John  | 2017-01-02 |       2 |    2 |
|       3 | Sara  | 2017-01-02 |       2 |    3 |
|       4 | Tim   | 2017-01-02 |       3 |    4 |
|       5 | Bob   | 2017-01-03 |       3 |    5 |
|       6 | Bill  | 2017-01-04 |       3 |    6 |
|       7 | Kathy | 2017-01-04 |       3 |    7 |
|       8 | Anne  | 2017-01-05 |       3 |    8 |
+---------+-------+------------+---------+------+



# Ranked by start date using a variable
# Based on this Stack Overflow comment:

SELECT
    *,
    @rank := @rank + 1 AS rank
FROM users, (SELECT @rank := 0) r
ORDER BY start_date ASC
+---------+-------+------------+---------+------------+------+
| user_id | name  | start_date | team_id | @rank := 0 | rank |
+---------+-------+------------+---------+------------+------+
|       1 | Matt  | 2017-01-01 |       1 |          0 |    1 |
|       2 | John  | 2017-01-02 |       2 |          0 |    2 |
|       3 | Sara  | 2017-01-02 |       2 |          0 |    3 |
|       4 | Tim   | 2017-01-02 |       3 |          0 |    4 |
|       5 | Bob   | 2017-01-03 |       3 |          0 |    5 |
|       6 | Bill  | 2017-01-04 |       3 |          0 |    6 |
|       7 | Kathy | 2017-01-04 |       3 |          0 |    7 |
|       8 | Anne  | 2017-01-05 |       3 |          0 |    8 |
+---------+-------+------------+---------+------------+------+



# First employee by start date using by setting a variable
SET @rank := 0;
SELECT *
FROM (
    SELECT
        *,
        @rank := @rank + 1 AS rank
    FROM users
    ORDER BY start_date ASC
) ranked
WHERE rank = 1



# Ranked by start date with ties
# Based on this Stack Overflow comment:

SET @prev_start_date = NULL;
SET @rank := 0;
SELECT
    *,
    CASE
        WHEN @prev_start_date = start_date THEN @rank
        -- Note that the assignment here will always be true
        WHEN @prev_start_date := start_date THEN @rank := @rank + 1
    END AS rank
FROM users
ORDER BY start_date ASC
+---------+-------+------------+---------+------+
| user_id | name  | start_date | team_id | rank |
+---------+-------+------------+---------+------+
|       1 | Matt  | 2017-01-01 |       1 |    1 |
|       2 | John  | 2017-01-02 |       2 |    2 |
|       3 | Sara  | 2017-01-02 |       2 |    2 |
|       4 | Tim   | 2017-01-02 |       3 |    2 |
|       5 | Bob   | 2017-01-03 |       3 |    3 |
|       6 | Bill  | 2017-01-04 |       3 |    4 |
|       7 | Kathy | 2017-01-04 |       3 |    4 |
|       8 | Anne  | 2017-01-05 |       3 |    5 |
+---------+-------+------------+---------+------+



# Ranked by user id within each team
SELECT
    a.*,
    COUNT(*) AS rank
FROM users a
INNER JOIN users b
ON a.team_id = b.team_id AND a.user_id >= b.user_id
GROUP BY a.team_id, a.user_id


# or, based on this Stack Overflow comment:

SELECT
    a.*,
    (
        SELECT COUNT(*)
        FROM users b
        WHERE a.team_id = b.team_id AND a.user_id >= b.user_id
    ) AS ranked
FROM users a
+---------+-------+------------+---------+--------+
| user_id | name  | start_date | team_id | ranked |
+---------+-------+------------+---------+--------+
|       1 | Matt  | 2017-01-01 |       1 |      1 |
|       2 | John  | 2017-01-02 |       2 |      1 |
|       3 | Sara  | 2017-01-02 |       2 |      2 |
|       4 | Tim   | 2017-01-02 |       3 |      1 |
|       5 | Bob   | 2017-01-03 |       3 |      2 |
|       6 | Bill  | 2017-01-04 |       3 |      3 |
|       7 | Kathy | 2017-01-04 |       3 |      4 |
|       8 | Anne  | 2017-01-05 |       3 |      5 |
+---------+-------+------------+---------+--------+
#  Note that both of these techniques require that there be a column without duplicates that we can rank on within the partition. For example, we can’t use start_date due to the duplicates within team 2 (2017-01-02) and team 3 (2017-01-14):

SELECT
    a.*,
    COUNT(*) AS rank
FROM users a
INNER JOIN users b
ON a.team_id = b.team_id AND a.start_date >= b.start_date
GROUP BY a.team_id, a.user_id
+---------+-------+------------+---------+------+
| user_id | name  | start_date | team_id | rank |
+---------+-------+------------+---------+------+
|       1 | Matt  | 2017-01-01 |       1 |    1 |
|       2 | John  | 2017-01-02 |       2 |    2 |
|       3 | Sara  | 2017-01-02 |       2 |    2 |
|       4 | Tim   | 2017-01-02 |       3 |    1 |
|       5 | Bob   | 2017-01-03 |       3 |    2 |
|       6 | Bill  | 2017-01-04 |       3 |    4 |
|       7 | Kathy | 2017-01-04 |       3 |    4 |
|       8 | Anne  | 2017-01-05 |       3 |    5 |
+---------+-------+------------+---------+------+



# Return the last person to join within each team based on user id
# Based on this Stack Overflow comment:

SELECT a.*
FROM users a
LEFT JOIN users b
ON a.team_id = b.team_id AND a.user_id < b.user_id
WHERE b.team_id IS NULL
or

SELECT a.*
FROM users a
WHERE user_id IN (
    SELECT MAX(user_id)
    FROM users
    GROUP BY team_id
)
+---------+------+------------+---------+
| user_id | name | start_date | team_id |
+---------+------+------------+---------+
|       1 | Matt | 2017-01-01 |       1 |
|       3 | Sara | 2017-01-02 |       2 |
|       8 | Anne | 2017-01-05 |       3 |
+---------+------+------------+---------+



# Return the last people to join within each team based on start date
SELECT a.*
FROM users a
LEFT JOIN users b
ON a.team_id = b.team_id AND a.start_date < b.start_date
WHERE b.team_id IS NULL

# or, based on this groupwise max post:

SELECT a.*
FROM users a
INNER JOIN (
    SELECT team_id, MAX(start_date) AS max_start_date
    FROM users b
    GROUP BY team_id
) max_start_dates
ON a.team_id = max_start_dates.team_id AND a.start_date = max_start_dates.max_start_date
+---------+------+------------+---------+
| user_id | name | start_date | team_id |
+---------+------+------------+---------+
|       1 | Matt | 2017-01-01 |       1 |
|       2 | John | 2017-01-02 |       2 |
|       3 | Sara | 2017-01-02 |       2 |
|       8 | Anne | 2017-01-05 |       3 |
+---------+------+------------+---------+



# Ranked with gaps
# Based on this Stack Overflow comment:

SELECT
    user_id,
    name,
    start_date,
    team_id,
    rank
FROM (
    SELECT
        *,
        IF(start_date = @_last_start_date, @cur_rank := @cur_rank, @cur_rank := @_sequence) AS rank,
        @_sequence := @_sequence + 1,
        @_last_start_date := start_date
    FROM users, (SELECT @cur_rank := 1, @_sequence := 1, @_last_start_date := NULL) r
    ORDER BY start_date
) ranked

# Notice that after the three tied for second earliest start date, the next one jumps to 5 (not 3):

+---------+-------+------------+---------+------+
| user_id | name  | start_date | team_id | rank |
+---------+-------+------------+---------+------+
|       1 | Matt  | 2017-01-01 |       1 |    1 |
|       2 | John  | 2017-01-02 |       2 |    2 |
|       3 | Sara  | 2017-01-02 |       2 |    2 |
|       4 | Tim   | 2017-01-02 |       3 |    2 |
|       5 | Bob   | 2017-01-03 |       3 |    5 |
|       6 | Bill  | 2017-01-04 |       3 |    6 |
|       7 | Kathy | 2017-01-04 |       3 |    6 |
|       8 | Anne  | 2017-01-05 |       3 |    8 |
+---------+-------+------------+---------+------+