feat: add sql solutions to lc problems (#1839)

yanglbme · web-flow · commit cc99c1338560 · 2023-10-18T20:34:09.000+08:00
* No.0550.Game Play Analysis IV
* No.0596.Classes More Than 5 Students
* No.0619.Biggest Single Number
* No.1045.Customers Who Bought All Products
* No.1729.Find Followers Count
* No.1731.The Number of Employees Which Report to Each Employee
* No.1789.Primary Department for Each Employee
diff --git a/solution/0500-0599/0550.Game Play Analysis IV/README.md b/solution/0500-0599/0550.Game Play Analysis IV/README.md
@@ -58,6 +58,14 @@ Activity table:
 
 <!-- 这里可写通用的实现逻辑 -->
 
+**方法一：分组取最小值 + 左连接**
+
+我们可以先找到每个玩家的首次登录日期，然后与原表进行左连接，连接条件为玩家 ID 相同且日期差为 $-1$，即第二天登录。那么，我们只需要统计出第二天登录的玩家数量中，玩家不为空的比率即可。
+
+**方法二：窗口函数**
+
+我们也可以使用窗口函数 `LEAD` 获取每个玩家的下一次登录日期，如果下一次登录日期与当前登录日期相差 $1$ 天，则说明该玩家在第二天登录，我们用一个字段 $st$ 记录该信息。然后，我们用窗口函数 `RANK` 对玩家 ID 按照日期升序排列，得到每个玩家的登录排名。最后，我们只需要统计出排名为 $1$ 的玩家中，字段 $st$ 不为空的比率即可。
+
 <!-- tabs:start -->
 
 ### **SQL**
@@ -67,13 +75,11 @@ Activity table:
 SELECT ROUND(AVG(b.event_date IS NOT NULL), 2) AS fraction
 FROM
     (
-        SELECT
-            player_id,
-            MIN(event_date) AS event_date
-        FROM activity
-        GROUP BY player_id
+        SELECT player_id, MIN(event_date) AS event_date
+        FROM Activity
+        GROUP BY 1
     ) AS a
-    LEFT JOIN activity AS b
+    LEFT JOIN Activity AS b
         ON a.player_id = b.player_id AND DATEDIFF(a.event_date, b.event_date) = -1;
 ```
 
@@ -89,20 +95,14 @@ WITH
                     ORDER BY event_date
                 ),
                 event_date
-            ) AS diff,
-            ROW_NUMBER() OVER (
+            ) = 1 AS st,
+            RANK() OVER (
                 PARTITION BY player_id
                 ORDER BY event_date
             ) AS rk
         FROM Activity
     )
-SELECT
-    ROUND(
-        COUNT(DISTINCT IF(diff = 1, player_id, NULL)) / COUNT(
-            DISTINCT player_id
-        ),
-        2
-    ) AS fraction
+SELECT ROUND(COUNT(IF(st = 1, player_id, NULL)) / COUNT(DISTINCT player_id), 2) AS fraction
 FROM T
 WHERE rk = 1;
 ```
diff --git a/solution/0500-0599/0550.Game Play Analysis IV/README_EN.md b/solution/0500-0599/0550.Game Play Analysis IV/README_EN.md
@@ -53,6 +53,14 @@ Only the player with id 1 logged back in after the first day he had logged in so
 
 ## Solutions
 
+**Solution 1: Grouping and Minimum Value + Left Join**
+
+We can first find the first login date of each player, and then perform a left join with the original table, with the join condition being that the player ID is the same and the date difference is $-1$, which means the player logged in on the second day. Then, we only need to calculate the ratio of non-null players among the players who logged in on the second day.
+
+**Solution 2: Window Function**
+
+We can use the `LEAD` window function to get the next login date of each player. If the next login date is one day after the current login date, it means that the player logged in on the second day, and we use a field $st$ to record this information. Then, we use the `RANK` window function to rank the player IDs in ascending order by date, and get the login ranking of each player. Finally, we only need to calculate the ratio of non-null $st$ values among the players with a ranking of $1$.
+
 <!-- tabs:start -->
 
 ### **SQL**
@@ -62,13 +70,11 @@ Only the player with id 1 logged back in after the first day he had logged in so
 SELECT ROUND(AVG(b.event_date IS NOT NULL), 2) AS fraction
 FROM
     (
-        SELECT
-            player_id,
-            MIN(event_date) AS event_date
-        FROM activity
-        GROUP BY player_id
+        SELECT player_id, MIN(event_date) AS event_date
+        FROM Activity
+        GROUP BY 1
     ) AS a
-    LEFT JOIN activity AS b
+    LEFT JOIN Activity AS b
         ON a.player_id = b.player_id AND DATEDIFF(a.event_date, b.event_date) = -1;
 ```
 
@@ -84,20 +90,14 @@ WITH
                     ORDER BY event_date
                 ),
                 event_date
-            ) AS diff,
-            ROW_NUMBER() OVER (
+            ) = 1 AS st,
+            RANK() OVER (
                 PARTITION BY player_id
                 ORDER BY event_date
             ) AS rk
         FROM Activity
     )
-SELECT
-    ROUND(
-        COUNT(DISTINCT IF(diff = 1, player_id, NULL)) / COUNT(
-            DISTINCT player_id
-        ),
-        2
-    ) AS fraction
+SELECT ROUND(COUNT(IF(st = 1, player_id, NULL)) / COUNT(DISTINCT player_id), 2) AS fraction
 FROM T
 WHERE rk = 1;
 ```
diff --git a/solution/0500-0599/0550.Game Play Analysis IV/Solution.sql b/solution/0500-0599/0550.Game Play Analysis IV/Solution.sql
@@ -2,11 +2,9 @@
 SELECT ROUND(AVG(b.event_date IS NOT NULL), 2) AS fraction
 FROM
     (
-        SELECT
-            player_id,
-            MIN(event_date) AS event_date
-        FROM activity
-        GROUP BY player_id
+        SELECT player_id, MIN(event_date) AS event_date
+        FROM Activity
+        GROUP BY 1
     ) AS a
-    LEFT JOIN activity AS b
+    LEFT JOIN Activity AS b
         ON a.player_id = b.player_id AND DATEDIFF(a.event_date, b.event_date) = -1;
diff --git a/solution/0500-0599/0596.Classes More Than 5 Students/README.md b/solution/0500-0599/0596.Classes More Than 5 Students/README.md
@@ -63,6 +63,10 @@ Courses table:
 
 <!-- 这里可写通用的实现逻辑 -->
 
+**方法一：分组统计**
+
+我们可以使用 `GROUP BY` 语句，按照班级分组，然后使用 `HAVING` 语句，筛选出学生数量大于等于 $5$ 的班级。
+
 <!-- tabs:start -->
 
 ### **SQL**
@@ -71,7 +75,7 @@ Courses table:
 # Write your MySQL query statement below
 SELECT class
 FROM Courses
-GROUP BY class
+GROUP BY 1
 HAVING COUNT(1) >= 5;
 ```
 
diff --git a/solution/0500-0599/0596.Classes More Than 5 Students/README_EN.md b/solution/0500-0599/0596.Classes More Than 5 Students/README_EN.md
@@ -59,6 +59,10 @@ Courses table:
 
 ## Solutions
 
+**Solution 1: Grouping and Aggregation**
+
+We can use the `GROUP BY` statement to group by class and then use the `HAVING` statement to filter out the classes with a student count greater than or equal to $5$.
+
 <!-- tabs:start -->
 
 ### **SQL**
@@ -67,7 +71,7 @@ Courses table:
 # Write your MySQL query statement below
 SELECT class
 FROM Courses
-GROUP BY class
+GROUP BY 1
 HAVING COUNT(1) >= 5;
 ```
 
diff --git a/solution/0500-0599/0596.Classes More Than 5 Students/Solution.sql b/solution/0500-0599/0596.Classes More Than 5 Students/Solution.sql
@@ -1,5 +1,5 @@
 # Write your MySQL query statement below
 SELECT class
 FROM Courses
-GROUP BY class
+GROUP BY 1
 HAVING COUNT(1) >= 5;
diff --git a/solution/0600-0699/0619.Biggest Single Number/README.md b/solution/0600-0699/0619.Biggest Single Number/README.md
@@ -91,6 +91,14 @@ MyNumbers table:
 
 <!-- 这里可写通用的实现逻辑 -->
 
+**方法一：分组 + 子查询**
+
+我们可以先将 `MyNumbers` 表按照 `num` 进行分组统计，找出只出现一次的数字，然后使用子查询找出最大的数字即可。
+
+**方法二：分组 + `CASE` 表达式**
+
+与方法一类似，我们可以先将 `MyNumbers` 表按照 `num` 进行分组统计，然后使用 `CASE` 表达式，找出只出现一次的数字，然后按数字降序排序，取第一个即可。
+
 <!-- tabs:start -->
 
 ### **SQL**
@@ -102,7 +110,7 @@ FROM
     (
         SELECT num
         FROM MyNumbers
-        GROUP BY num
+        GROUP BY 1
         HAVING COUNT(1) = 1
     ) AS t;
 ```
diff --git a/solution/0600-0699/0619.Biggest Single Number/README_EN.md b/solution/0600-0699/0619.Biggest Single Number/README_EN.md
@@ -79,6 +79,14 @@ MyNumbers table:
 
 ## Solutions
 
+**Solution 1: Grouping and Subquery**
+
+We can first group the `MyNumbers` table by `num` and count the number of occurrences of each number. Then, we can use a subquery to find the maximum number among the numbers that appear only once.
+
+**Solution 2: Grouping and `CASE` Expression**
+
+Similar to Method 1, we can first group the `MyNumbers` table by `num` and count the number of occurrences of each number. Then, we can use a `CASE` expression to find the numbers that appear only once, sort them in descending order by number, and take the first one.
+
 <!-- tabs:start -->
 
 ### **SQL**
@@ -90,7 +98,7 @@ FROM
     (
         SELECT num
         FROM MyNumbers
-        GROUP BY num
+        GROUP BY 1
         HAVING COUNT(1) = 1
     ) AS t;
 ```
diff --git a/solution/0600-0699/0619.Biggest Single Number/Solution.sql b/solution/0600-0699/0619.Biggest Single Number/Solution.sql
@@ -1,10 +1,9 @@
 # Write your MySQL query statement below
-SELECT
-    CASE
-        WHEN COUNT(1) = 1 THEN num
-        ELSE NULL
-    END AS num
-FROM MyNumbers
-GROUP BY num
-ORDER BY 1 DESC
-LIMIT 1;
+SELECT MAX(num) AS num
+FROM
+    (
+        SELECT num
+        FROM MyNumbers
+        GROUP BY 1
+        HAVING COUNT(1) = 1
+    ) AS t;
diff --git a/solution/1000-1099/1045.Customers Who Bought All Products/README.md b/solution/1000-1099/1045.Customers Who Bought All Products/README.md
@@ -77,7 +77,7 @@ Product 表：
 
 <!-- 这里可写通用的实现逻辑 -->
 
-**方法一：GROUP BY + HAVING**
+**方法一：分组统计 + 子查询**
 
 我们将 `Customer` 表按照 `customer_id` 进行分组，然后使用 `HAVING` 子句筛选出购买了所有产品的客户。
 
diff --git a/solution/1000-1099/1045.Customers Who Bought All Products/README_EN.md b/solution/1000-1099/1045.Customers Who Bought All Products/README_EN.md
@@ -74,6 +74,10 @@ The customers who bought all the products (5 and 6) are customers with IDs 1 and
 
 ## Solutions
 
+**Solution 1: Grouping and Subquery**
+
+We can group the `Customer` table by `customer_id`, and then use the `HAVING` clause to filter out the customers who have not purchased all products. To do this, we can use a subquery to find the total number of distinct products, and then compare it with the number of distinct products purchased by each customer.
+
 <!-- tabs:start -->
 
 ### **SQL**
diff --git a/solution/1700-1799/1729.Find Followers Count/README.md b/solution/1700-1799/1729.Find Followers Count/README.md
@@ -58,19 +58,20 @@ Followers 表：
 
 <!-- 这里可写通用的实现逻辑 -->
 
-`GROUP BY` 实现。
+**方法一：分组统计**
+
+我们可以直接对 `Followers` 表按照 `user_id` 进行分组，然后使用 `COUNT` 函数统计每个用户的关注者数量即可。
 
 <!-- tabs:start -->
 
 ### **SQL**
 
 ```sql
-SELECT
-    user_id,
-    COUNT(1) AS followers_count
+# Write your MySQL query statement below
+SELECT user_id, COUNT(1) AS followers_count
 FROM Followers
-GROUP BY user_id
-ORDER BY user_id;
+GROUP BY 1
+ORDER BY 1;
 ```
 
 <!-- tabs:end -->
diff --git a/solution/1700-1799/1729.Find Followers Count/README_EN.md b/solution/1700-1799/1729.Find Followers Count/README_EN.md
@@ -54,17 +54,20 @@ The followers of 2 are {0,1}
 
 ## Solutions
 
+**Solution 1: Grouping and Aggregation**
+
+We can directly group the `Followers` table by `user_id`, and use the `COUNT` function to count the number of followers for each user.
+
 <!-- tabs:start -->
 
 ### **SQL**
 
 ```sql
-SELECT
-    user_id,
-    COUNT(1) AS followers_count
+# Write your MySQL query statement below
+SELECT user_id, COUNT(1) AS followers_count
 FROM Followers
-GROUP BY user_id
-ORDER BY user_id;
+GROUP BY 1
+ORDER BY 1;
 ```
 
 <!-- tabs:end -->
diff --git a/solution/1700-1799/1729.Find Followers Count/Solution.sql b/solution/1700-1799/1729.Find Followers Count/Solution.sql
@@ -1,6 +1,5 @@
-SELECT
-    user_id,
-    COUNT(1) AS followers_count
+# Write your MySQL query statement below
+SELECT user_id, COUNT(1) AS followers_count
 FROM Followers
-GROUP BY user_id
-ORDER BY user_id;
+GROUP BY 1
+ORDER BY 1;
diff --git a/solution/1700-1799/1731.The Number of Employees Which Report to Each Employee/README.md b/solution/1700-1799/1731.The Number of Employees Which Report to Each Employee/README.md
@@ -55,6 +55,10 @@ Hercy 有两个需要向他汇报的员工, 他们是 Alice and Bob. 他们的
 
 <!-- 这里可写通用的实现逻辑 -->
 
+**方法一：自连接 + 分组统计**
+
+我们可以通过自连接的方式，将每个员工的上级经理信息连接到每个员工的信息上，然后再通过分组统计的方式，统计每个经理的下属员工数量和平均年龄。
+
 <!-- tabs:start -->
 
 ### **SQL**
@@ -69,8 +73,8 @@ SELECT
 FROM
     Employees AS e1
     JOIN Employees AS e2 ON e1.reports_to = e2.employee_id
-GROUP BY e2.employee_id
-ORDER BY e2.employee_id;
+GROUP BY 1
+ORDER BY 1;
 ```
 
 <!-- tabs:end -->
diff --git a/solution/1700-1799/1731.The Number of Employees Which Report to Each Employee/README_EN.md b/solution/1700-1799/1731.The Number of Employees Which Report to Each Employee/README_EN.md
@@ -54,6 +54,10 @@ Employees table:
 
 ## Solutions
 
+**Solution 1: Self-Join + Grouping**
+
+We can use self-join to connect the information of each employee's superior manager to the information of each employee, and then use grouping and aggregation to count the number of subordinates and the average age of each manager.
+
 <!-- tabs:start -->
 
 ### **SQL**
@@ -68,8 +72,8 @@ SELECT
 FROM
     Employees AS e1
     JOIN Employees AS e2 ON e1.reports_to = e2.employee_id
-GROUP BY e2.employee_id
-ORDER BY e2.employee_id;
+GROUP BY 1
+ORDER BY 1;
 ```
 
 <!-- tabs:end -->
diff --git a/solution/1700-1799/1731.The Number of Employees Which Report to Each Employee/Solution.sql b/solution/1700-1799/1731.The Number of Employees Which Report to Each Employee/Solution.sql
@@ -7,5 +7,5 @@ SELECT
 FROM
     Employees AS e1
     JOIN Employees AS e2 ON e1.reports_to = e2.employee_id
-GROUP BY e2.employee_id
-ORDER BY e2.employee_id;
+GROUP BY 1
+ORDER BY 1;
diff --git a/solution/1700-1799/1789.Primary Department for Each Employee/README.md b/solution/1700-1799/1789.Primary Department for Each Employee/README.md
@@ -71,6 +71,10 @@ Employee table:
 
 <!-- 这里可写通用的实现逻辑 -->
 
+**方法一：合并**
+
+我们可以查出所有已经有直属部门的员工，然后再查出所有只属于一个部门的员工，最后我们可以使用 `UNION` 合并两个结果集。
+
 <!-- tabs:start -->
 
 ### **SQL**
@@ -85,7 +89,7 @@ WHERE primary_flag = 'Y'
 UNION
 SELECT employee_id, department_id
 FROM Employee
-GROUP BY employee_id
+GROUP BY 1
 HAVING COUNT(1) = 1;
 ```
 
diff --git a/solution/1700-1799/1789.Primary Department for Each Employee/README_EN.md b/solution/1700-1799/1789.Primary Department for Each Employee/README_EN.md
diff --git a/solution/1700-1799/1789.Primary Department for Each Employee/Solution.sql b/solution/1700-1799/1789.Primary Department for Each Employee/Solution.sql