@@ -159,6 +159,16 @@ void DC_set_data(DeltaChunk* dc, const uchar* data, Py_ssize_t dlen, bool shared
159
159
160
160
}
161
161
162
+ // Make the given data our own. It is assumed to have the size stored in our instance
163
+ // and will be managed by us.
164
+ inline
165
+ void DC_set_data_with_ownership (DeltaChunk * dc , const uchar * data )
166
+ {
167
+ assert (data );
168
+ DC_deallocate_data (dc );
169
+ dc -> data = data ;
170
+ }
171
+
162
172
inline
163
173
ull DC_rbound (const DeltaChunk * dc )
164
174
{
@@ -214,7 +224,6 @@ void DC_offset_copy_to(const DeltaChunk* src, DeltaChunk* dest, ull ofs, ull siz
214
224
if (src -> data ){
215
225
DC_set_data (dest , src -> data + ofs , size , 0 );
216
226
} else {
217
- dest -> data = NULL ;
218
227
dest -> data_shared = 0 ;
219
228
}
220
229
}
@@ -825,6 +834,8 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
825
834
const unsigned long rbound = cp_off + cp_size ;
826
835
if (rbound < cp_size ||
827
836
rbound > base_size ){
837
+ // this really shouldn't happen
838
+ error = 1 ;
828
839
assert (0 );
829
840
break ;
830
841
}
@@ -834,16 +845,53 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
834
845
835
846
} else if (cmd ) {
836
847
// TODO: Compress nodes by parsing them in advance
837
- // NOTE: Compression only necessary for all other deltas, not
838
- // for the first one, as we will share the data. It really depends
839
- // What's faster
840
848
// Compression reduces fragmentation though, which is why we do it
841
849
// in all cases.
842
- DeltaChunk * dc = DCV_append (& dcv );
843
- DC_init (dc , tbw , cmd , 0 );
844
- DC_set_data (dc , data , cmd , is_shared_data );
845
- tbw += cmd ;
850
+ const uchar * add_start = data - 1 ;
851
+ const uchar * add_end = dend ;
852
+ ull num_bytes = cmd ;
846
853
data += cmd ;
854
+ ull num_chunks = 1 ;
855
+ while (data < dend ){
856
+ fprintf (stderr , "looping\n" );
857
+ const char c = * data ;
858
+ if (c & 0x80 ){
859
+ add_end = data ;
860
+ break ;
861
+ } else {
862
+ num_chunks += 1 ;
863
+ data += c + 1 ; // advance by 1 to skip add cmd
864
+ num_bytes += c ;
865
+ }
866
+ }
867
+
868
+ fprintf (stderr , "add bytes = %i\n" , (int )num_bytes );
869
+ #ifdef DEBUG
870
+ assert (add_end - add_start > 0 );
871
+ if (num_chunks > 1 ){
872
+ fprintf (stderr , "Compression worked, got %i bytes of %i chunks\n" , (int )num_bytes , (int )num_chunks );
873
+ }
874
+ #endif
875
+
876
+ DeltaChunk * dc = DCV_append (& dcv );
877
+ DC_init (dc , tbw , num_bytes , 0 );
878
+
879
+ // gather the data, or (possibly) share single blocks
880
+ if (num_chunks > 1 ){
881
+ uchar * dcdata = PyMem_Malloc (num_bytes );
882
+ while (add_start < add_end ){
883
+ const char bytes = * add_start ++ ;
884
+ fprintf (stderr , "Copying %i bytes\n" , bytes );
885
+ memcpy ((void * )dcdata , (void * )add_start , bytes );
886
+ dcdata += bytes ;
887
+ add_start += bytes ;
888
+ }
889
+ DC_set_data_with_ownership (dc , dcdata );
890
+ } else {
891
+ DC_set_data (dc , data - cmd , cmd , is_shared_data );
892
+ }
893
+
894
+ tbw += num_bytes ;
847
895
} else {
848
896
error = 1 ;
849
897
PyErr_SetString (PyExc_RuntimeError , "Encountered an unsupported delta cmd: 0" );
0 commit comments