feat: add solutions to lcci problem: No.17.26

yanglbme · yanglbme · commit 01faba541408 · 2022-09-24T16:09:16.000+08:00
No.17.26.Sparse Similarity
diff --git a/lcci/17.26.Sparse Similarity/README.md b/lcci/17.26.Sparse Similarity/README.md
@@ -27,19 +27,154 @@
 	<li><code>docs[i].length &lt;= 500</code></li>
 	<li>相似度大于 0 的文档对数不会超过 1000</li>
 </ul>
+
 ## 解法
+
 <!-- 这里可写通用的实现逻辑 -->
+
+**方法一：哈希表**
+
+用哈希表 $d$ 记录每个单词对应了哪些文档。
+
+遍历 $d$ 的每一个文档列表，其任意两个文档都有相似度，我们用哈希表 $s$ 累加两个文档同时出现的单词个数。最后遍历 $s$，计算相似度。
+
+时间复杂度 $O(n^3)$。
+
 <!-- tabs:start -->
+
 ### **Python3**
+
 <!-- 这里可写当前语言的特殊实现逻辑 -->
+
 ```python
+class Solution:
+    def computeSimilarities(self, docs: List[List[int]]) -> List[str]:
+        eps = 1e-9
+        d = defaultdict(list)
+        for i, v in enumerate(docs):
+            for x in v:
+                d[x].append(i)
+        cnt = Counter()
+        for ids in d.values():
+            n = len(ids)
+            for i in range(n):
+                for j in range(i + 1, n):
+                    cnt[(ids[i], ids[j])] += 1
+        ans = []
+        for (i, j), v in cnt.items():
+            tot = len(docs[i]) + len(docs[j]) - v
+            x = v / tot + eps
+            ans.append(f'{i},{j}: {x:.4f}')
+        return ans
+```
 
-````
 ### **Java**
+
 <!-- 这里可写当前语言的特殊实现逻辑 -->
+
 ```java
+class Solution {
+    public List<String> computeSimilarities(int[][] docs) {
+        Map<Integer, List<Integer>> d = new HashMap<>();
+        for (int i = 0; i < docs.length; ++i) {
+            for (int v : docs[i]) {
+                d.computeIfAbsent(v, k -> new ArrayList<>()).add(i);
+            }
+        }
+        Map<String, Integer> cnt = new HashMap<>();
+        for (var ids : d.values()) {
+            int n = ids.size();
+            for (int i = 0; i < n; ++i) {
+                for (int j = i + 1; j < n; ++j) {
+                    String k = ids.get(i) + "," + ids.get(j);
+                    cnt.put(k, cnt.getOrDefault(k, 0) + 1);
+                }
+            }
+        }
+        List<String> ans = new ArrayList<>();
+        for (var e : cnt.entrySet()) {
+            String k = e.getKey();
+            int v = e.getValue();
+            String[] t = k.split(",");
+            int i = Integer.parseInt(t[0]), j = Integer.parseInt(t[1]);
+            int tot = docs[i].length + docs[j].length - v;
+            double x = (double) v / tot;
+            ans.add(String.format("%s: %.4f", k, x));
+        }
+        return ans;
+    }
+}
+```
+
+### **C++**
 
-````
+```cpp
+using pii = pair<int, int>;
+
+class Solution {
+public:
+    vector<string> computeSimilarities(vector<vector<int>>& docs) {
+        double eps = 1e-9;
+        unordered_map<int, vector<int>> d;
+        for (int i = 0; i < docs.size(); ++i) {
+            for (int v : docs[i]) {
+                d[v].push_back(i);
+            }
+        }
+        map<pii, int> cnt;
+        for (auto& [_, ids] : d) {
+            int n = ids.size();
+            for (int i = 0; i < n; ++i) {
+                for (int j = i + 1; j < n; ++j) {
+                    cnt[{ids[i], ids[j]}]++;
+                }
+            }
+        }
+        vector<string> ans;
+        for (auto& [k, v] : cnt) {
+            auto [i, j] = k;
+            int tot = docs[i].size() + docs[j].size() - v;
+            double x = (double) v / tot + eps;
+            char t[20];
+            sprintf(t, "%d,%d: %0.4lf", i, j, x);
+            ans.push_back(t);
+        }
+        return ans;
+    }
+};
+```
+
+### **Go**
+
+```go
+func computeSimilarities(docs [][]int) []string {
+	d := map[int][]int{}
+	for i, v := range docs {
+		for _, x := range v {
+			d[x] = append(d[x], i)
+		}
+	}
+	type pair struct{ i, j int }
+	cnt := map[pair]int{}
+	for _, ids := range d {
+		n := len(ids)
+		for i := 0; i < n; i++ {
+			for j := i + 1; j < n; j++ {
+				k := pair{ids[i], ids[j]}
+				cnt[k]++
+			}
+		}
+	}
+	ans := []string{}
+	for k, v := range cnt {
+		i, j := k.i, k.j
+		tot := len(docs[i]) + len(docs[j]) - v
+		x := float64(v)/float64(tot) + 1e-9
+		ans = append(ans, strconv.Itoa(i)+","+strconv.Itoa(j)+": "+fmt.Sprintf("%.4f", x))
+	}
+	return ans
+}
+```
 
 ### **...**
 
diff --git a/lcci/17.26.Sparse Similarity/README_EN.md b/lcci/17.26.Sparse Similarity/README_EN.md
@@ -49,13 +49,131 @@
 ### **Python3**
 
 ```python
-
+class Solution:
+    def computeSimilarities(self, docs: List[List[int]]) -> List[str]:
+        eps = 1e-9
+        d = defaultdict(list)
+        for i, v in enumerate(docs):
+            for x in v:
+                d[x].append(i)
+        cnt = Counter()
+        for ids in d.values():
+            n = len(ids)
+            for i in range(n):
+                for j in range(i + 1, n):
+                    cnt[(ids[i], ids[j])] += 1
+        ans = []
+        for (i, j), v in cnt.items():
+            tot = len(docs[i]) + len(docs[j]) - v
+            x = v / tot + eps
+            ans.append(f'{i},{j}: {x:.4f}')
+        return ans
 ```
 
 ### **Java**
 
 ```java
+class Solution {
+    public List<String> computeSimilarities(int[][] docs) {
+        Map<Integer, List<Integer>> d = new HashMap<>();
+        for (int i = 0; i < docs.length; ++i) {
+            for (int v : docs[i]) {
+                d.computeIfAbsent(v, k -> new ArrayList<>()).add(i);
+            }
+        }
+        Map<String, Integer> cnt = new HashMap<>();
+        for (var ids : d.values()) {
+            int n = ids.size();
+            for (int i = 0; i < n; ++i) {
+                for (int j = i + 1; j < n; ++j) {
+                    String k = ids.get(i) + "," + ids.get(j);
+                    cnt.put(k, cnt.getOrDefault(k, 0) + 1);
+                }
+            }
+        }
+        List<String> ans = new ArrayList<>();
+        for (var e : cnt.entrySet()) {
+            String k = e.getKey();
+            int v = e.getValue();
+            String[] t = k.split(",");
+            int i = Integer.parseInt(t[0]), j = Integer.parseInt(t[1]);
+            int tot = docs[i].length + docs[j].length - v;
+            double x = (double) v / tot;
+            ans.add(String.format("%s: %.4f", k, x));
+        }
+        return ans;
+    }
+}
+```
+
+### **C++**
+
+```cpp
+using pii = pair<int, int>;
+
+class Solution {
+public:
+    vector<string> computeSimilarities(vector<vector<int>>& docs) {
+        double eps = 1e-9;
+        unordered_map<int, vector<int>> d;
+        for (int i = 0; i < docs.size(); ++i) {
+            for (int v : docs[i]) {
+                d[v].push_back(i);
+            }
+        }
+        map<pii, int> cnt;
+        for (auto& [_, ids] : d) {
+            int n = ids.size();
+            for (int i = 0; i < n; ++i) {
+                for (int j = i + 1; j < n; ++j) {
+                    cnt[{ids[i], ids[j]}]++;
+                }
+            }
+        }
+        vector<string> ans;
+        for (auto& [k, v] : cnt) {
+            auto [i, j] = k;
+            int tot = docs[i].size() + docs[j].size() - v;
+            double x = (double) v / tot + eps;
+            char t[20];
+            sprintf(t, "%d,%d: %0.4lf", i, j, x);
+            ans.push_back(t);
+        }
+        return ans;
+    }
+};
+```
 
+### **Go**
+
+```go
+func computeSimilarities(docs [][]int) []string {
+	d := map[int][]int{}
+	for i, v := range docs {
+		for _, x := range v {
+			d[x] = append(d[x], i)
+		}
+	}
+	type pair struct{ i, j int }
+	cnt := map[pair]int{}
+	for _, ids := range d {
+		n := len(ids)
+		for i := 0; i < n; i++ {
+			for j := i + 1; j < n; j++ {
+				k := pair{ids[i], ids[j]}
+				cnt[k]++
+			}
+		}
+	}
+	ans := []string{}
+	for k, v := range cnt {
+		i, j := k.i, k.j
+		tot := len(docs[i]) + len(docs[j]) - v
+		x := float64(v)/float64(tot) + 1e-9
+		ans = append(ans, strconv.Itoa(i)+","+strconv.Itoa(j)+": "+fmt.Sprintf("%.4f", x))
+	}
+	return ans
+}
 ```
 
 ### **...**
diff --git a/lcci/17.26.Sparse Similarity/Solution.cpp b/lcci/17.26.Sparse Similarity/Solution.cpp
@@ -0,0 +1,33 @@
+using pii = pair<int, int>;
+
+class Solution {
+public:
+    vector<string> computeSimilarities(vector<vector<int>>& docs) {
+        double eps = 1e-9;
+        unordered_map<int, vector<int>> d;
+        for (int i = 0; i < docs.size(); ++i) {
+            for (int v : docs[i]) {
+                d[v].push_back(i);
+            }
+        }
+        map<pii, int> cnt;
+        for (auto& [_, ids] : d) {
+            int n = ids.size();
+            for (int i = 0; i < n; ++i) {
+                for (int j = i + 1; j < n; ++j) {
+                    cnt[{ids[i], ids[j]}]++;
+                }
+            }
+        }
+        vector<string> ans;
+        for (auto& [k, v] : cnt) {
+            auto [i, j] = k;
+            int tot = docs[i].size() + docs[j].size() - v;
+            double x = (double) v / tot + eps;
+            char t[20];
+            sprintf(t, "%d,%d: %0.4lf", i, j, x);
+            ans.push_back(t);
+        }
+        return ans;
+    }
+};
diff --git a/lcci/17.26.Sparse Similarity/Solution.go b/lcci/17.26.Sparse Similarity/Solution.go
@@ -0,0 +1,27 @@
+func computeSimilarities(docs [][]int) []string {
+	d := map[int][]int{}
+	for i, v := range docs {
+		for _, x := range v {
+			d[x] = append(d[x], i)
+		}
+	}
+	type pair struct{ i, j int }
+	cnt := map[pair]int{}
+	for _, ids := range d {
+		n := len(ids)
+		for i := 0; i < n; i++ {
+			for j := i + 1; j < n; j++ {
+				k := pair{ids[i], ids[j]}
+				cnt[k]++
+			}
+		}
+	}
+	ans := []string{}
+	for k, v := range cnt {
+		i, j := k.i, k.j
+		tot := len(docs[i]) + len(docs[j]) - v
+		x := float64(v)/float64(tot) + 1e-9
+		ans = append(ans, strconv.Itoa(i)+","+strconv.Itoa(j)+": "+fmt.Sprintf("%.4f", x))
+	}
+	return ans
+}
diff --git a/lcci/17.26.Sparse Similarity/Solution.java b/lcci/17.26.Sparse Similarity/Solution.java
@@ -0,0 +1,31 @@
+class Solution {
+    public List<String> computeSimilarities(int[][] docs) {
+        Map<Integer, List<Integer>> d = new HashMap<>();
+        for (int i = 0; i < docs.length; ++i) {
+            for (int v : docs[i]) {
+                d.computeIfAbsent(v, k -> new ArrayList<>()).add(i);
+            }
+        }
+        Map<String, Integer> cnt = new HashMap<>();
+        for (var ids : d.values()) {
+            int n = ids.size();
+            for (int i = 0; i < n; ++i) {
+                for (int j = i + 1; j < n; ++j) {
+                    String k = ids.get(i) + "," + ids.get(j);
+                    cnt.put(k, cnt.getOrDefault(k, 0) + 1);
+                }
+            }
+        }
+        List<String> ans = new ArrayList<>();
+        for (var e : cnt.entrySet()) {
+            String k = e.getKey();
+            int v = e.getValue();
+            String[] t = k.split(",");
+            int i = Integer.parseInt(t[0]), j = Integer.parseInt(t[1]);
+            int tot = docs[i].length + docs[j].length - v;
+            double x = (double) v / tot;
+            ans.add(String.format("%s: %.4f", k, x));
+        }
+        return ans;
+    }
+}
diff --git a/lcci/17.26.Sparse Similarity/Solution.py b/lcci/17.26.Sparse Similarity/Solution.py