Skip to content

Commit 7344d12

Browse files
committed
Update the graph data structure
Use Compressed Sparse Row (CSR) to store graphs instead of N vectors to save memory.
1 parent b084bd7 commit 7344d12

File tree

4 files changed

+116
-40
lines changed

4 files changed

+116
-40
lines changed

.clang-format

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ UseTab: Never
44
BreakBeforeBraces: Linux
55
AllowShortIfStatementsOnASingleLine: false
66
IndentCaseLabels: false
7-
ForEachMacros: ['RANGES_FOR', 'FOREACH', 'rep']
7+
ForEachMacros: ['RANGES_FOR', 'FOREACH', 'rep', 'repv']

src/neighbor_partitioner.cpp

Lines changed: 25 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,10 @@ NeighborPartitioner::NeighborPartitioner(std::string basefilename)
3535
assigned_edges = 0;
3636
max_sample_size = num_vertices * 2;
3737
local_average_degree = 2 * (double)max_sample_size / num_vertices;
38-
sample_size = 0;
3938
capacity = (double)num_edges * 1.05 / p + 1;
4039
occupied.assign(p, 0);
4140
adj_out.resize(num_vertices);
4241
adj_in.resize(num_vertices);
43-
rep (i, num_vertices) {
44-
adj_out[i].reserve(local_average_degree);
45-
adj_in[i].reserve(local_average_degree);
46-
}
4742
is_cores.assign(p, boost::dynamic_bitset<>(num_vertices));
4843
is_boundarys.assign(p, boost::dynamic_bitset<>(num_vertices));
4944
master.assign(num_vertices, -1);
@@ -57,27 +52,30 @@ NeighborPartitioner::NeighborPartitioner(std::string basefilename)
5752

5853
void NeighborPartitioner::read_more()
5954
{
60-
while (sample_size < max_sample_size && fin_ptr < fin_end) {
55+
while (sample_edges.size() < max_sample_size && fin_ptr < fin_end) {
6156
edge_t *e = (edge_t *)fin_ptr;
6257
fin_ptr += sizeof(edge_t);
6358
if (check_edge(e)) {
64-
sample_size++;
65-
adj_out[e->first].push_back(e->second);
66-
adj_in[e->second].push_back(e->first);
59+
sample_edges.push_back(*e);
6760
}
6861
}
62+
63+
adj_out.build(sample_edges);
64+
65+
adj_in.build_reverse(sample_edges);
66+
67+
sample_edges.clear();
6968
}
7069

7170
void NeighborPartitioner::read_remaining()
7271
{
7372
auto &is_boundary = is_boundarys[p - 1], &is_core = is_cores[p - 1];
7473

75-
rep (u, num_vertices)
76-
for (auto &v : adj_out[u]) {
77-
is_boundary[u] = true;
78-
is_boundary[v] = true;
79-
assign_edge(p - 1, u, v);
80-
}
74+
for (auto &e : sample_edges) {
75+
is_boundary[e.first] = true;
76+
is_boundary[e.second] = true;
77+
assign_edge(p - 1, e.first, e.second);
78+
}
8179

8280
while (fin_ptr < fin_end) {
8381
edge_t *e = (edge_t *)fin_ptr;
@@ -89,7 +87,7 @@ void NeighborPartitioner::read_remaining()
8987
}
9088
}
9189

92-
rep (i, num_vertices) {
90+
repv (i, num_vertices) {
9391
if (is_boundary[i]) {
9492
is_core[i] = true;
9593
rep (j, p - 1)
@@ -103,18 +101,12 @@ void NeighborPartitioner::read_remaining()
103101

104102
void NeighborPartitioner::clean_samples()
105103
{
106-
rep (u, num_vertices) {
104+
repv (u, num_vertices) {
107105
adjlist_t &neighbors = adj_out[u];
108-
for (size_t i = 0; i < neighbors.size();) {
109-
vid_t &v = neighbors[i];
110-
edge_t e(u, v);
111-
if (!check_edge(&e)) {
112-
sample_size--;
113-
erase_one(adj_in[v], u);
114-
std::swap(v, neighbors.back());
115-
neighbors.pop_back();
116-
} else
117-
i++;
106+
for (size_t i = 0; i < neighbors.size(); i++) {
107+
edge_t e(u, neighbors[i]);
108+
if (check_edge(&e))
109+
sample_edges.push_back(e);
118110
}
119111
}
120112
}
@@ -170,14 +162,16 @@ void NeighborPartitioner::split()
170162
Timer read_timer, compute_timer;
171163

172164
min_heap.reserve(num_vertices);
165+
sample_edges.reserve(max_sample_size);
166+
LOG(INFO) << "partitioning...";
173167
for (bucket = 0; bucket < p - 1; bucket++) {
174-
DLOG(INFO) << "start partition " << bucket;
168+
std::cerr << bucket << ", ";
175169
read_timer.start();
176170
read_more();
177171
read_timer.stop();
178-
DLOG(INFO) << "sample size: " << sample_size;
172+
DLOG(INFO) << "sample size: " << adj_out.num_edges();
179173
compute_timer.start();
180-
local_capacity = sample_size / (p - bucket);
174+
local_capacity = adj_out.num_edges() / (p - bucket);
181175
while (occupied[bucket] < local_capacity) {
182176
vid_t d, vid;
183177
if (!min_heap.get_min(d, vid)) {
@@ -199,7 +193,7 @@ void NeighborPartitioner::split()
199193
compute_timer.stop();
200194
}
201195
bucket = p - 1;
202-
DLOG(INFO) << "start partition " << bucket;
196+
std::cerr << bucket << std::endl;
203197
read_timer.start();
204198
read_remaining();
205199
read_timer.stop();

src/neighbor_partitioner.hpp

Lines changed: 89 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,109 @@
1515
#include "util.hpp"
1616
#include "min_heap.hpp"
1717

18-
class NeighborPartitioner
18+
class adjlist_t
19+
{
20+
private:
21+
vid_t *start;
22+
vid_t len;
23+
24+
public:
25+
adjlist_t() : start(NULL), len(0) {}
26+
adjlist_t(vid_t *start, vid_t len) : start(start), len(len) {}
27+
vid_t *begin() { return start; }
28+
vid_t *end() { return start + len; }
29+
void increment() { len++; }
30+
size_t size() const { return len; }
31+
vid_t &operator[](size_t idx) { return start[idx]; };
32+
const vid_t &operator[](size_t idx) const { return start[idx]; };
33+
vid_t &back() { return start[len - 1]; };
34+
const vid_t &back() const { return start[len - 1]; };
35+
void pop_back() { len--; }
36+
void clear() { len = 0; }
37+
};
38+
39+
class graph_t
1940
{
2041
private:
21-
typedef std::vector<vid_t> adjlist_t;
22-
typedef std::vector<adjlist_t> graph_t;
42+
vid_t num_vertices;
43+
std::vector<vid_t> neighbors;
44+
std::vector<adjlist_t> vdata;
45+
46+
public:
47+
void resize(vid_t _num_vertices)
48+
{
49+
num_vertices = _num_vertices;
50+
vdata.resize(num_vertices);
51+
}
52+
53+
size_t num_edges() const { return neighbors.size(); }
54+
55+
void build(std::vector<edge_t> &edges)
56+
{
57+
repv (v, num_vertices)
58+
vdata[v].clear();
59+
std::sort(edges.begin(), edges.end(),
60+
[](const edge_t &a, const edge_t &b) {
61+
return a.first < b.first;
62+
});
63+
neighbors.resize(edges.size());
64+
vid_t last_v = -1;
65+
vid_t *start_ptr = &neighbors[0];
66+
for (size_t i = 0; i < edges.size(); i++) {
67+
neighbors[i] = edges[i].second;
68+
if (edges[i].first == last_v) {
69+
vdata[last_v].increment();
70+
} else {
71+
vdata[edges[i].first] = adjlist_t(start_ptr + i, 1);
72+
last_v = edges[i].first;
73+
}
74+
}
75+
}
76+
77+
void build_reverse(std::vector<edge_t> &edges)
78+
{
79+
repv (v, num_vertices)
80+
vdata[v].clear();
81+
std::sort(edges.begin(), edges.end(),
82+
[](const edge_t &a, const edge_t &b) {
83+
return a.second < b.second;
84+
});
85+
neighbors.resize(edges.size());
86+
vid_t last_v = -1;
87+
vid_t *start_ptr = &neighbors[0];
88+
for (size_t i = 0; i < edges.size(); i++) {
89+
neighbors[i] = edges[i].first;
90+
if (edges[i].second == last_v) {
91+
vdata[last_v].increment();
92+
} else {
93+
vdata[edges[i].second] = adjlist_t(start_ptr + i, 1);
94+
last_v = edges[i].second;
95+
}
96+
}
97+
}
98+
99+
adjlist_t &operator[](size_t idx) { return vdata[idx]; };
100+
const adjlist_t &operator[](size_t idx) const { return vdata[idx]; };
101+
};
23102

103+
class NeighborPartitioner
104+
{
105+
private:
24106
std::string basefilename;
25107

26108
vid_t num_vertices;
27109
size_t num_edges, assigned_edges;
28110
int p, bucket;
29111
double average_degree, local_average_degree;
30-
size_t sample_size, max_sample_size;
112+
size_t max_sample_size;
31113
size_t capacity, local_capacity;
32114

33115
// use mmap for file input
34116
int fin;
35117
off_t filesize;
36118
char *fin_map, *fin_ptr, *fin_end;
37119

120+
std::vector<edge_t> sample_edges;
38121
graph_t adj_out, adj_in;
39122
MinHeap<vid_t, vid_t> min_heap;
40123
std::vector<size_t> occupied;
@@ -126,13 +209,11 @@ class NeighborPartitioner
126209
for (size_t i = 0; i < neighbors.size();) {
127210
vid_t &u = neighbors[i];
128211
if (is_core[u]) {
129-
sample_size--;
130212
assign_edge(bucket, direction ? vid : u, direction ? u : vid);
131213
min_heap.decrease_key(vid);
132214
std::swap(u, neighbors.back());
133215
neighbors.pop_back();
134216
} else if (is_boundary[u] && occupied[bucket] < local_capacity) {
135-
sample_size--;
136217
assign_edge(bucket, direction ? vid : u, direction ? u : vid);
137218
min_heap.decrease_key(vid);
138219
erase_one(adj_r[u], vid);
@@ -155,11 +236,11 @@ class NeighborPartitioner
155236

156237
add_boundary(vid);
157238

158-
for (auto& w : adj_out[vid])
239+
for (auto &w : adj_out[vid])
159240
add_boundary(w);
160241
adj_out[vid].clear();
161242

162-
for (auto& w : adj_in[vid])
243+
for (auto &w : adj_in[vid])
163244
add_boundary(w);
164245
adj_in[vid].clear();
165246
}

src/util.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "threadpool11/threadpool11.hpp"
1212

1313
#define rep(i, n) for (int i = 0; i < (int)(n); ++i)
14+
#define repv(i, n) for (vid_t i = 0; i < n; ++i)
1415

1516
DECLARE_int32(p);
1617
DECLARE_uint64(memsize);

0 commit comments

Comments
 (0)