|
27 | 27 | <li><code>docs[i].length <= 500</code></li>
|
28 | 28 | <li>相似度大于 0 的文档对数不会超过 1000</li>
|
29 | 29 | </ul>
|
| 30 | + |
30 | 31 | ## 解法
|
| 32 | + |
31 | 33 | <!-- 这里可写通用的实现逻辑 -->
|
| 34 | + |
| 35 | +**方法一:哈希表** |
| 36 | + |
| 37 | +用哈希表 $d$ 记录每个单词对应了哪些文档。 |
| 38 | + |
| 39 | +遍历 $d$ 的每一个文档列表,其任意两个文档都有相似度,我们用哈希表 $s$ 累加两个文档同时出现的单词个数。最后遍历 $s$,计算相似度。 |
| 40 | + |
| 41 | +时间复杂度 $O(n^3)$。 |
| 42 | + |
32 | 43 | <!-- tabs:start -->
|
| 44 | + |
33 | 45 | ### **Python3**
|
| 46 | + |
34 | 47 | <!-- 这里可写当前语言的特殊实现逻辑 -->
|
| 48 | + |
35 | 49 | ```python
|
| 50 | +class Solution: |
| 51 | + def computeSimilarities(self, docs: List[List[int]]) -> List[str]: |
| 52 | + eps = 1e-9 |
| 53 | + d = defaultdict(list) |
| 54 | + for i, v in enumerate(docs): |
| 55 | + for x in v: |
| 56 | + d[x].append(i) |
| 57 | + cnt = Counter() |
| 58 | + for ids in d.values(): |
| 59 | + n = len(ids) |
| 60 | + for i in range(n): |
| 61 | + for j in range(i + 1, n): |
| 62 | + cnt[(ids[i], ids[j])] += 1 |
| 63 | + ans = [] |
| 64 | + for (i, j), v in cnt.items(): |
| 65 | + tot = len(docs[i]) + len(docs[j]) - v |
| 66 | + x = v / tot + eps |
| 67 | + ans.append(f'{i},{j}: {x:.4f}') |
| 68 | + return ans |
| 69 | +``` |
36 | 70 |
|
37 |
| -```` |
38 | 71 | ### **Java**
|
| 72 | + |
39 | 73 | <!-- 这里可写当前语言的特殊实现逻辑 -->
|
| 74 | + |
40 | 75 | ```java
|
| 76 | +class Solution { |
| 77 | + public List<String> computeSimilarities(int[][] docs) { |
| 78 | + Map<Integer, List<Integer>> d = new HashMap<>(); |
| 79 | + for (int i = 0; i < docs.length; ++i) { |
| 80 | + for (int v : docs[i]) { |
| 81 | + d.computeIfAbsent(v, k -> new ArrayList<>()).add(i); |
| 82 | + } |
| 83 | + } |
| 84 | + Map<String, Integer> cnt = new HashMap<>(); |
| 85 | + for (var ids : d.values()) { |
| 86 | + int n = ids.size(); |
| 87 | + for (int i = 0; i < n; ++i) { |
| 88 | + for (int j = i + 1; j < n; ++j) { |
| 89 | + String k = ids.get(i) + "," + ids.get(j); |
| 90 | + cnt.put(k, cnt.getOrDefault(k, 0) + 1); |
| 91 | + } |
| 92 | + } |
| 93 | + } |
| 94 | + List<String> ans = new ArrayList<>(); |
| 95 | + for (var e : cnt.entrySet()) { |
| 96 | + String k = e.getKey(); |
| 97 | + int v = e.getValue(); |
| 98 | + String[] t = k.split(","); |
| 99 | + int i = Integer.parseInt(t[0]), j = Integer.parseInt(t[1]); |
| 100 | + int tot = docs[i].length + docs[j].length - v; |
| 101 | + double x = (double) v / tot; |
| 102 | + ans.add(String.format("%s: %.4f", k, x)); |
| 103 | + } |
| 104 | + return ans; |
| 105 | + } |
| 106 | +} |
| 107 | +``` |
| 108 | + |
| 109 | +### **C++** |
41 | 110 |
|
42 |
| -```` |
| 111 | +```cpp |
| 112 | +using pii = pair<int, int>; |
| 113 | + |
| 114 | +class Solution { |
| 115 | +public: |
| 116 | + vector<string> computeSimilarities(vector<vector<int>>& docs) { |
| 117 | + double eps = 1e-9; |
| 118 | + unordered_map<int, vector<int>> d; |
| 119 | + for (int i = 0; i < docs.size(); ++i) { |
| 120 | + for (int v : docs[i]) { |
| 121 | + d[v].push_back(i); |
| 122 | + } |
| 123 | + } |
| 124 | + map<pii, int> cnt; |
| 125 | + for (auto& [_, ids] : d) { |
| 126 | + int n = ids.size(); |
| 127 | + for (int i = 0; i < n; ++i) { |
| 128 | + for (int j = i + 1; j < n; ++j) { |
| 129 | + cnt[{ids[i], ids[j]}]++; |
| 130 | + } |
| 131 | + } |
| 132 | + } |
| 133 | + vector<string> ans; |
| 134 | + for (auto& [k, v] : cnt) { |
| 135 | + auto [i, j] = k; |
| 136 | + int tot = docs[i].size() + docs[j].size() - v; |
| 137 | + double x = (double) v / tot + eps; |
| 138 | + char t[20]; |
| 139 | + sprintf(t, "%d,%d: %0.4lf", i, j, x); |
| 140 | + ans.push_back(t); |
| 141 | + } |
| 142 | + return ans; |
| 143 | + } |
| 144 | +}; |
| 145 | +``` |
| 146 | +
|
| 147 | +### **Go** |
| 148 | +
|
| 149 | +```go |
| 150 | +func computeSimilarities(docs [][]int) []string { |
| 151 | + d := map[int][]int{} |
| 152 | + for i, v := range docs { |
| 153 | + for _, x := range v { |
| 154 | + d[x] = append(d[x], i) |
| 155 | + } |
| 156 | + } |
| 157 | + type pair struct{ i, j int } |
| 158 | + cnt := map[pair]int{} |
| 159 | + for _, ids := range d { |
| 160 | + n := len(ids) |
| 161 | + for i := 0; i < n; i++ { |
| 162 | + for j := i + 1; j < n; j++ { |
| 163 | + k := pair{ids[i], ids[j]} |
| 164 | + cnt[k]++ |
| 165 | + } |
| 166 | + } |
| 167 | + } |
| 168 | + ans := []string{} |
| 169 | + for k, v := range cnt { |
| 170 | + i, j := k.i, k.j |
| 171 | + tot := len(docs[i]) + len(docs[j]) - v |
| 172 | + x := float64(v)/float64(tot) + 1e-9 |
| 173 | + ans = append(ans, strconv.Itoa(i)+","+strconv.Itoa(j)+": "+fmt.Sprintf("%.4f", x)) |
| 174 | + } |
| 175 | + return ans |
| 176 | +} |
| 177 | +``` |
43 | 178 |
|
44 | 179 | ### **...**
|
45 | 180 |
|
|
0 commit comments