@@ -88,10 +88,9 @@ typedef enum {
8888 GIT_DIFF_INCLUDE_UNTRACKED = (1 << 8 ),
8989 /** Include unmodified files in the diff list */
9090 GIT_DIFF_INCLUDE_UNMODIFIED = (1 << 9 ),
91- /** Even with the GIT_DIFF_INCLUDE_UNTRACKED flag, when an untracked
92- * directory is found, only a single entry for the directory is added
93- * to the diff list; with this flag, all files under the directory will
94- * be included, too.
91+ /** Even with GIT_DIFF_INCLUDE_UNTRACKED, an entire untracked directory
92+ * will be marked with only a single entry in the diff list; this flag
93+ * adds all files under the directory as UNTRACKED entries, too.
9594 */
9695 GIT_DIFF_RECURSE_UNTRACKED_DIRS = (1 << 10 ),
9796 /** If the pathspec is set in the diff options, this flags means to
@@ -120,6 +119,11 @@ typedef enum {
120119 GIT_DIFF_INCLUDE_TYPECHANGE_TREES = (1 << 16 ),
121120 /** Ignore file mode changes */
122121 GIT_DIFF_IGNORE_FILEMODE = (1 << 17 ),
122+ /** Even with GIT_DIFF_INCLUDE_IGNORED, an entire ignored directory
123+ * will be marked with only a single entry in the diff list; this flag
124+ * adds all files under the directory as IGNORED entries, too.
125+ */
126+ GIT_DIFF_RECURSE_IGNORED_DIRS = (1 << 10 ),
123127} git_diff_option_t ;
124128
125129/**
@@ -133,20 +137,18 @@ typedef enum {
133137typedef struct git_diff_list git_diff_list ;
134138
135139/**
136- * Flags for the file object on each side of a diff .
140+ * Flags for the delta object and the file objects on each side .
137141 *
138- * Note: most of these flags are just for **internal** consumption by
139- * libgit2, but some of them may be interesting to external users.
142+ * These flags are used for both the `flags` value of the `git_diff_delta`
143+ * and the flags for the `git_diff_file` objects representing the old and
144+ * new sides of the delta. Values outside of this public range should be
145+ * considered reserved for internal or future use.
140146 */
141147typedef enum {
142- GIT_DIFF_FILE_VALID_OID = (1 << 0 ), /** `oid` value is known correct */
143- GIT_DIFF_FILE_FREE_PATH = (1 << 1 ), /** `path` is allocated memory */
144- GIT_DIFF_FILE_BINARY = (1 << 2 ), /** should be considered binary data */
145- GIT_DIFF_FILE_NOT_BINARY = (1 << 3 ), /** should be considered text data */
146- GIT_DIFF_FILE_FREE_DATA = (1 << 4 ), /** internal file data is allocated */
147- GIT_DIFF_FILE_UNMAP_DATA = (1 << 5 ), /** internal file data is mmap'ed */
148- GIT_DIFF_FILE_NO_DATA = (1 << 6 ), /** file data should not be loaded */
149- } git_diff_file_flag_t ;
148+ GIT_DIFF_FLAG_BINARY = (1 << 0 ), /** file(s) treated as binary data */
149+ GIT_DIFF_FLAG_NOT_BINARY = (1 << 1 ), /** file(s) treated as text data */
150+ GIT_DIFF_FLAG_VALID_OID = (1 << 2 ), /** `oid` value is known correct */
151+ } git_diff_flag_t ;
150152
151153/**
152154 * What type of change is described by a git_diff_delta?
@@ -186,18 +188,17 @@ typedef enum {
186188 *
187189 * `size` is the size of the entry in bytes.
188190 *
189- * `flags` is a combination of the `git_diff_file_flag_t` types, but those
190- * are largely internal values.
191+ * `flags` is a combination of the `git_diff_flag_t` types
191192 *
192193 * `mode` is, roughly, the stat() `st_mode` value for the item. This will
193194 * be restricted to one of the `git_filemode_t` values.
194195 */
195196typedef struct {
196- git_oid oid ;
197+ git_oid oid ;
197198 const char * path ;
198- git_off_t size ;
199- unsigned int flags ;
200- uint16_t mode ;
199+ git_off_t size ;
200+ uint32_t flags ;
201+ uint16_t mode ;
201202} git_diff_file ;
202203
203204/**
@@ -219,16 +220,17 @@ typedef struct {
219220 *
220221 * Under some circumstances, in the name of efficiency, not all fields will
221222 * be filled in, but we generally try to fill in as much as possible. One
222- * example is that the "binary" field will not examine file contents if you
223- * do not pass in hunk and/or line callbacks to the diff foreach iteration
224- * function. It will just use the git attributes for those files.
223+ * example is that the "flags" field may not have either the `BINARY` or the
224+ * `NOT_BINARY` flag set to avoid examining file contents if you do not pass
225+ * in hunk and/or line callbacks to the diff foreach iteration function. It
226+ * will just use the git attributes for those files.
225227 */
226228typedef struct {
227229 git_diff_file old_file ;
228230 git_diff_file new_file ;
229231 git_delta_t status ;
230- unsigned int similarity ; /**< for RENAMED and COPIED, value 0-100 */
231- int binary ;
232+ uint32_t similarity ; /**< for RENAMED and COPIED, value 0-100 */
233+ uint32_t flags ;
232234} git_diff_delta ;
233235
234236/**
@@ -377,7 +379,7 @@ typedef struct git_diff_patch git_diff_patch;
377379typedef enum {
378380 /** look for renames? (`--find-renames`) */
379381 GIT_DIFF_FIND_RENAMES = (1 << 0 ),
380- /** consider old size of modified for renames? (`--break-rewrites=N`) */
382+ /** consider old side of modified for renames? (`--break-rewrites=N`) */
381383 GIT_DIFF_FIND_RENAMES_FROM_REWRITES = (1 << 1 ),
382384
383385 /** look for copies? (a la `--find-copies`) */
@@ -387,10 +389,49 @@ typedef enum {
387389
388390 /** split large rewrites into delete/add pairs (`--break-rewrites=/M`) */
389391 GIT_DIFF_FIND_AND_BREAK_REWRITES = (1 << 4 ),
392+
393+ /** turn on all finding features */
394+ GIT_DIFF_FIND_ALL = (0x1f ),
395+
396+ /** measure similarity ignoring leading whitespace (default) */
397+ GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0 ,
398+ /** measure similarity ignoring all whitespace */
399+ GIT_DIFF_FIND_IGNORE_WHITESPACE = (1 << 6 ),
400+ /** measure similarity including all data */
401+ GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1 << 7 ),
390402} git_diff_find_t ;
391403
404+ /**
405+ * Pluggable similarity metric
406+ */
407+ typedef struct {
408+ int (* file_signature )(
409+ void * * out , const git_diff_file * file ,
410+ const char * fullpath , void * payload );
411+ int (* buffer_signature )(
412+ void * * out , const git_diff_file * file ,
413+ const char * buf , size_t buflen , void * payload );
414+ void (* free_signature )(void * sig , void * payload );
415+ int (* similarity )(int * score , void * siga , void * sigb , void * payload );
416+ void * payload ;
417+ } git_diff_similarity_metric ;
418+
392419/**
393420 * Control behavior of rename and copy detection
421+ *
422+ * These options mostly mimic parameters that can be passed to git-diff.
423+ *
424+ * - `rename_threshold` is the same as the -M option with a value
425+ * - `copy_threshold` is the same as the -C option with a value
426+ * - `rename_from_rewrite_threshold` matches the top of the -B option
427+ * - `break_rewrite_threshold` matches the bottom of the -B option
428+ * - `target_limit` matches the -l option
429+ *
430+ * The `metric` option allows you to plug in a custom similarity metric.
431+ * Set it to NULL for the default internal metric which is based on sampling
432+ * hashes of ranges of data in the file. The default metric is a pretty
433+ * good similarity approximation that should work fairly well for both text
434+ * and binary data, and is pretty fast with fixed memory overhead.
394435 */
395436typedef struct {
396437 unsigned int version ;
@@ -411,6 +452,9 @@ typedef struct {
411452 * the `diff.renameLimit` config) (default 200)
412453 */
413454 unsigned int target_limit ;
455+
456+ /** Pluggable similarity metric; pass NULL to use internal metric */
457+ git_diff_similarity_metric * metric ;
414458} git_diff_find_options ;
415459
416460#define GIT_DIFF_FIND_OPTIONS_VERSION 1
@@ -856,11 +900,12 @@ GIT_EXTERN(int) git_diff_patch_to_str(
856900 *
857901 * NULL is allowed for either `old_blob` or `new_blob` and will be treated
858902 * as an empty blob, with the `oid` set to NULL in the `git_diff_file` data.
903+ * Passing NULL for both blobs is a noop; no callbacks will be made at all.
859904 *
860- * We do run a binary content check on the two blobs and if either of the
861- * blobs looks like binary data, the `git_diff_delta` binary attribute will
862- * be set to 1 and no call to the hunk_cb nor line_cb will be made (unless
863- * you pass `GIT_DIFF_FORCE_TEXT` of course).
905+ * We do run a binary content check on the blob content and if either blob
906+ * looks like binary data, the `git_diff_delta` binary attribute will be set
907+ * to 1 and no call to the hunk_cb nor line_cb will be made (unless you pass
908+ * `GIT_DIFF_FORCE_TEXT` of course).
864909 *
865910 * @return 0 on success, GIT_EUSER on non-zero callback, or error code
866911 */
@@ -880,6 +925,11 @@ GIT_EXTERN(int) git_diff_blobs(
880925 * so the `git_diff_file` parameters to the callbacks will be faked a la the
881926 * rules for `git_diff_blobs()`.
882927 *
928+ * Passing NULL for `old_blob` will be treated as an empty blob (i.e. the
929+ * `file_cb` will be invoked with GIT_DELTA_ADDED and the diff will be the
930+ * entire content of the buffer added). Passing NULL to the buffer will do
931+ * the reverse, with GIT_DELTA_REMOVED and blob content removed.
932+ *
883933 * @return 0 on success, GIT_EUSER on non-zero callback, or error code
884934 */
885935GIT_EXTERN (int ) git_diff_blob_to_buffer (
0 commit comments