Skip to content

Commit 04a36fe

Browse files
committed
pack-objects: fill a packbuilder from a walk
Most use-cases for the object packer communicate in terms of commits which each side has. We already have an object to specify this relationship between commits, namely git_revwalk. By knowing which commits we want to pack and which the other side already has, we can perform similar optimisations to git, by marking each tree as interesting or uninteresting only once, and not sending those trees which we know the other side has.
1 parent b63b76e commit 04a36fe

File tree

3 files changed

+265
-1
lines changed

3 files changed

+265
-1
lines changed

include/git2/pack.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,19 @@ GIT_EXTERN(int) git_packbuilder_insert_tree(git_packbuilder *pb, const git_oid *
114114
*/
115115
GIT_EXTERN(int) git_packbuilder_insert_commit(git_packbuilder *pb, const git_oid *id);
116116

117+
/**
118+
* Insert objects as given by the walk
119+
*
120+
* Those commits and all objects they reference will be inserted into
121+
* the packbuilder.
122+
*
123+
* @param pb the packbuilder
124+
* @param walk the revwalk to use to fill the packbuilder
125+
*
126+
* @return 0 or an error code
127+
*/
128+
GIT_EXTERN(int) git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk);
129+
117130
/**
118131
* Write the contents of the packfile to an in-memory buffer
119132
*

src/pack-objects.c

Lines changed: 242 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#include "thread-utils.h"
1616
#include "tree.h"
1717
#include "util.h"
18+
#include "revwalk.h"
19+
#include "commit_list.h"
1820

1921
#include "git2/pack.h"
2022
#include "git2/commit.h"
@@ -126,10 +128,16 @@ int git_packbuilder_new(git_packbuilder **out, git_repository *repo)
126128
GITERR_CHECK_ALLOC(pb);
127129

128130
pb->object_ix = git_oidmap_alloc();
129-
130131
if (!pb->object_ix)
131132
goto on_error;
132133

134+
pb->walk_objects = git_oidmap_alloc();
135+
if (!pb->walk_objects)
136+
goto on_error;
137+
138+
if (git_pool_init(&pb->object_pool, sizeof(git_walk_object), 0) < 0)
139+
goto on_error;
140+
133141
pb->repo = repo;
134142
pb->nr_threads = 1; /* do not spawn any thread by default */
135143

@@ -1347,6 +1355,7 @@ const git_oid *git_packbuilder_hash(git_packbuilder *pb)
13471355
return &pb->pack_oid;
13481356
}
13491357

1358+
13501359
static int cb_tree_walk(
13511360
const char *root, const git_tree_entry *entry, void *payload)
13521361
{
@@ -1405,6 +1414,235 @@ uint32_t git_packbuilder_written(git_packbuilder *pb)
14051414
return pb->nr_written;
14061415
}
14071416

1417+
int lookup_walk_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id)
1418+
{
1419+
git_walk_object *obj;
1420+
1421+
obj = git_pool_mallocz(&pb->object_pool, 1);
1422+
if (!obj) {
1423+
giterr_set_oom();
1424+
return -1;
1425+
}
1426+
1427+
git_oid_cpy(&obj->id, id);
1428+
1429+
*out = obj;
1430+
return 0;
1431+
}
1432+
1433+
static int retrieve_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id)
1434+
{
1435+
int error;
1436+
khiter_t pos;
1437+
git_walk_object *obj;
1438+
1439+
pos = git_oidmap_lookup_index(pb->walk_objects, id);
1440+
if (git_oidmap_valid_index(pb->walk_objects, pos)) {
1441+
obj = git_oidmap_value_at(pb->walk_objects, pos);
1442+
} else {
1443+
if ((error = lookup_walk_object(&obj, pb, id)) < 0)
1444+
return error;
1445+
1446+
git_oidmap_insert(pb->walk_objects, &obj->id, obj, error);
1447+
}
1448+
1449+
*out = obj;
1450+
return 0;
1451+
}
1452+
1453+
static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id)
1454+
{
1455+
int error;
1456+
git_walk_object *obj;
1457+
1458+
if ((error = retrieve_object(&obj, pb, id)) < 0)
1459+
return error;
1460+
1461+
obj->uninteresting = 1;
1462+
1463+
return 0;
1464+
}
1465+
1466+
static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id)
1467+
{
1468+
git_walk_object *obj;
1469+
git_tree *tree;
1470+
int error;
1471+
size_t i;
1472+
1473+
if ((error = retrieve_object(&obj, pb, id)) < 0)
1474+
return error;
1475+
1476+
if (obj->uninteresting)
1477+
return 0;
1478+
1479+
obj->uninteresting = 1;
1480+
1481+
if ((error = git_tree_lookup(&tree, pb->repo, id)) < 0)
1482+
return error;
1483+
1484+
for (i = 0; i < git_tree_entrycount(tree); i++) {
1485+
const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1486+
const git_oid *entry_id = git_tree_entry_id(entry);
1487+
switch (git_tree_entry_type(entry)) {
1488+
case GIT_OBJ_TREE:
1489+
if ((error = mark_tree_uninteresting(pb, entry_id)) < 0)
1490+
goto cleanup;
1491+
break;
1492+
case GIT_OBJ_BLOB:
1493+
if ((error = mark_blob_uninteresting(pb, entry_id)) < 0)
1494+
goto cleanup;
1495+
break;
1496+
default:
1497+
/* it's a submodule or something unknown, we don't want it */
1498+
;
1499+
}
1500+
}
1501+
1502+
cleanup:
1503+
git_tree_free(tree);
1504+
return error;
1505+
}
1506+
1507+
/*
1508+
* Mark the edges of the graph uninteresting. Since we start from a
1509+
* git_revwalk, the commits are already uninteresting, but we need to
1510+
* mark the trees and blobs.
1511+
*/
1512+
static int mark_edges_uninteresting(git_packbuilder *pb, git_commit_list *commits)
1513+
{
1514+
int error;
1515+
git_commit_list *list;
1516+
git_commit *commit;
1517+
1518+
for (list = commits; list; list = list->next) {
1519+
if (!list->item->uninteresting)
1520+
continue;
1521+
1522+
if ((error = git_commit_lookup(&commit, pb->repo, &list->item->oid)) < 0)
1523+
return error;
1524+
1525+
error = mark_tree_uninteresting(pb, git_commit_tree_id(commit));
1526+
git_commit_free(commit);
1527+
1528+
if (error < 0)
1529+
return error;
1530+
}
1531+
1532+
return 0;
1533+
}
1534+
1535+
int insert_tree(git_packbuilder *pb, git_tree *tree)
1536+
{
1537+
size_t i;
1538+
int error;
1539+
git_tree *subtree;
1540+
git_walk_object *obj;
1541+
const char *name;
1542+
1543+
if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0)
1544+
return error;
1545+
1546+
if (obj->seen)
1547+
return 0;
1548+
1549+
obj->seen = 1;
1550+
1551+
if ((error = git_packbuilder_insert(pb, &obj->id, NULL)))
1552+
return error;
1553+
1554+
for (i = 0; i < git_tree_entrycount(tree); i++) {
1555+
const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1556+
const git_oid *entry_id = git_tree_entry_id(entry);
1557+
switch (git_tree_entry_type(entry)) {
1558+
case GIT_OBJ_TREE:
1559+
if ((error = git_tree_lookup(&subtree, pb->repo, entry_id)) < 0)
1560+
return error;
1561+
1562+
error = insert_tree(pb, subtree);
1563+
git_tree_free(subtree);
1564+
1565+
if (error < 0)
1566+
return error;
1567+
1568+
break;
1569+
case GIT_OBJ_BLOB:
1570+
name = git_tree_entry_name(entry);
1571+
if ((error = git_packbuilder_insert(pb, entry_id, name)) < 0)
1572+
return error;
1573+
break;
1574+
default:
1575+
/* it's a submodule or something unknown, we don't want it */
1576+
;
1577+
}
1578+
}
1579+
1580+
1581+
return error;
1582+
}
1583+
1584+
int insert_commit(git_packbuilder *pb, git_walk_object *obj)
1585+
{
1586+
int error;
1587+
git_commit *commit = NULL;
1588+
git_tree *tree = NULL;
1589+
1590+
obj->seen = 1;
1591+
1592+
if ((error = git_packbuilder_insert(pb, &obj->id, NULL)) < 0)
1593+
return error;
1594+
1595+
if ((error = git_commit_lookup(&commit, pb->repo, &obj->id)) < 0)
1596+
return error;
1597+
1598+
if ((error = git_tree_lookup(&tree, pb->repo, git_commit_tree_id(commit))) < 0)
1599+
goto cleanup;
1600+
1601+
if ((error = insert_tree(pb, tree)) < 0)
1602+
goto cleanup;
1603+
1604+
cleanup:
1605+
git_commit_free(commit);
1606+
git_tree_free(tree);
1607+
return error;
1608+
}
1609+
1610+
int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk)
1611+
{
1612+
int error;
1613+
git_oid id;
1614+
git_walk_object *obj;
1615+
1616+
assert(pb && walk);
1617+
1618+
if ((error = mark_edges_uninteresting(pb, walk->user_input)) < 0)
1619+
return error;
1620+
1621+
/*
1622+
* TODO: git marks the parents of the edges
1623+
* uninteresting. This may provide a speed advantage, but does
1624+
* seem to assume the remote does not have a single-commit
1625+
* history on the other end.
1626+
*/
1627+
1628+
/* walk down each tree up to the blobs and insert them, stopping when uninteresting */
1629+
while ((error = git_revwalk_next(&id, walk)) == 0) {
1630+
if ((error = retrieve_object(&obj, pb, &id)) < 0)
1631+
return error;
1632+
1633+
if (obj->seen || obj->uninteresting)
1634+
continue;
1635+
1636+
if ((error = insert_commit(pb, obj)) < 0)
1637+
return error;
1638+
}
1639+
1640+
if (error == GIT_ITEROVER)
1641+
error = 0;
1642+
1643+
return 0;
1644+
}
1645+
14081646
int git_packbuilder_set_callbacks(git_packbuilder *pb, git_packbuilder_progress progress_cb, void *progress_cb_payload)
14091647
{
14101648
if (!pb)
@@ -1438,6 +1676,9 @@ void git_packbuilder_free(git_packbuilder *pb)
14381676
if (pb->object_list)
14391677
git__free(pb->object_list);
14401678

1679+
git_oidmap_free(pb->walk_objects);
1680+
git_pool_clear(&pb->object_pool);
1681+
14411682
git_hash_ctx_cleanup(&pb->ctx);
14421683
git_zstream_free(&pb->zstream);
14431684

src/pack-objects.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "oidmap.h"
1616
#include "netops.h"
1717
#include "zstream.h"
18+
#include "pool.h"
1819

1920
#include "git2/oid.h"
2021
#include "git2/pack.h"
@@ -50,6 +51,12 @@ typedef struct git_pobject {
5051
filled:1;
5152
} git_pobject;
5253

54+
typedef struct {
55+
git_oid id;
56+
unsigned int uninteresting:1,
57+
seen:1;
58+
} git_walk_object;
59+
5360
struct git_packbuilder {
5461
git_repository *repo; /* associated repository */
5562
git_odb *odb; /* associated object database */
@@ -66,6 +73,9 @@ struct git_packbuilder {
6673

6774
git_oidmap *object_ix;
6875

76+
git_oidmap *walk_objects;
77+
git_pool object_pool;
78+
6979
git_oid pack_oid; /* hash of written pack */
7080

7181
/* synchronization objects */

0 commit comments

Comments
 (0)