Skip to content

Commit 2a402ac

Browse files
committed
fix the unbalance issue
1 parent 6996da2 commit 2a402ac

File tree

4 files changed

+38
-16
lines changed

4 files changed

+38
-16
lines changed

src/main.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ DEFINE_uint64(memsize, 4096, "memory size in megabytes");
1515
DEFINE_string(filename, "", "the file name of the input graph");
1616
DEFINE_string(filetype, "edgelist",
1717
"the type of input file (supports 'edgelist' and 'adjlist')");
18+
DEFINE_bool(inmem, false, "in-memory mode");
1819
DEFINE_double(sample_ratio, 2, "the sample size divided by num_vertices");
1920
DEFINE_string(method, "neighbor",
2021
"partition method: neighbor, random, and dbh");

src/neighbor_partitioner.cpp

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,22 @@ NeighborPartitioner::NeighborPartitioner(std::string basefilename)
2929

3030
LOG(INFO) << "num_vertices: " << num_vertices
3131
<< ", num_edges: " << num_edges;
32+
CHECK_EQ(sizeof(vid_t) + sizeof(size_t) + num_edges * sizeof(edge_t), filesize);
3233

3334
p = FLAGS_p;
3435
average_degree = (double)num_edges * 2 / num_vertices;
3536
assigned_edges = 0;
36-
LOG(INFO) << "sample_ratio: " << FLAGS_sample_ratio;
37-
max_sample_size = num_vertices * FLAGS_sample_ratio;
37+
LOG(INFO) << "inmem: " << FLAGS_inmem;
38+
if (!FLAGS_inmem) {
39+
LOG(INFO) << "sample_ratio: " << FLAGS_sample_ratio;
40+
max_sample_size = num_vertices * FLAGS_sample_ratio;
41+
} else
42+
max_sample_size = num_edges;
3843
local_average_degree = 2 * (double)max_sample_size / num_vertices;
3944
capacity = (double)num_edges * 1.05 / p + 1;
45+
BUFFER_SIZE = std::min(64 * 1024 / sizeof(edge_t),
46+
std::max((size_t)1, (size_t)(num_edges * 0.05 / p + 1)));
47+
LOG(INFO) << "buffer size: " << BUFFER_SIZE;
4048
occupied.assign(p, 0);
4149
adj_out.resize(num_vertices);
4250
adj_in.resize(num_vertices);
@@ -122,27 +130,36 @@ void NeighborPartitioner::read_remaining()
122130
}
123131
}
124132

125-
void NeighborPartitioner::clean_samples()
133+
void NeighborPartitioner::clean_buffer()
126134
{
127-
repv (u, num_vertices)
128-
for (auto &v : adj_out[u])
129-
sample_edges.emplace_back(u, v);
130-
131-
results.resize(sample_edges.size());
135+
results.resize(buffer.size());
132136

133137
#pragma omp parallel for
134-
for (size_t i = 0; i < sample_edges.size(); i++)
135-
results[i] = check_edge(&sample_edges[i]);
138+
for (size_t i = 0; i < buffer.size(); i++)
139+
results[i] = check_edge(&buffer[i]);
136140

137-
for (size_t i = 0; i < sample_edges.size();)
141+
for (size_t i = 0; i < buffer.size();)
138142
if (results[i] < p) {
139-
assign_edge(results[i], sample_edges[i].first, sample_edges[i].second);
143+
assign_edge(results[i], buffer[i].first, buffer[i].second);
140144
std::swap(results[i], results.back());
141145
results.pop_back();
142-
std::swap(sample_edges[i], sample_edges.back());
143-
sample_edges.pop_back();
146+
std::swap(buffer[i], buffer.back());
147+
buffer.pop_back();
144148
} else
145149
i++;
150+
sample_edges.insert(sample_edges.end(), buffer.begin(), buffer.end());
151+
buffer.clear();
152+
}
153+
154+
void NeighborPartitioner::clean_samples()
155+
{
156+
repv (u, num_vertices)
157+
for (auto &v : adj_out[u]) {
158+
buffer.emplace_back(u, v);
159+
if (buffer.size() >= BUFFER_SIZE)
160+
clean_buffer();
161+
}
162+
clean_buffer();
146163
}
147164

148165
void NeighborPartitioner::assign_master()
@@ -205,7 +222,8 @@ void NeighborPartitioner::split()
205222
read_timer.stop();
206223
DLOG(INFO) << "sample size: " << adj_out.num_edges();
207224
compute_timer.start();
208-
local_capacity = adj_out.num_edges() / (p - bucket);
225+
local_capacity =
226+
FLAGS_inmem ? capacity : adj_out.num_edges() / (p - bucket);
209227
while (occupied[bucket] < local_capacity) {
210228
vid_t d, vid;
211229
if (!min_heap.get_min(d, vid)) {

src/neighbor_partitioner.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ class graph_t
105105
class NeighborPartitioner : public Partitioner
106106
{
107107
private:
108-
const size_t BUFFER_SIZE = 64 * 1024 / sizeof(edge_t);
108+
size_t BUFFER_SIZE;
109109
std::string basefilename;
110110

111111
vid_t num_vertices;
@@ -120,6 +120,7 @@ class NeighborPartitioner : public Partitioner
120120
off_t filesize;
121121
char *fin_map, *fin_ptr, *fin_end;
122122

123+
std::vector<edge_t> buffer;
123124
std::vector<edge_t> sample_edges;
124125
graph_t adj_out, adj_in;
125126
MinHeap<vid_t, vid_t> min_heap;
@@ -268,6 +269,7 @@ class NeighborPartitioner : public Partitioner
268269

269270
void read_more();
270271
void read_remaining();
272+
void clean_buffer();
271273
void clean_samples();
272274
void assign_master();
273275
size_t count_mirrors();

src/util.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ DECLARE_int32(p);
1717
DECLARE_uint64(memsize);
1818
DECLARE_string(filename);
1919
DECLARE_string(filetype);
20+
DECLARE_bool(inmem);
2021
DECLARE_double(sample_ratio);
2122

2223
typedef uint32_t vid_t;

0 commit comments

Comments
 (0)