Skip to content

Commit 072cca7

Browse files
author
sasha@mysql.sashanet.com
committed
handle tree overflow in count(distinct)
test heap table/tree overflow in count(distinct)
1 parent ac379c4 commit 072cca7

File tree

5 files changed

+94
-4
lines changed

5 files changed

+94
-4
lines changed

mysql-test/r/count_distinct2.result

+8
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,11 @@ count(distinct n2) n1
7474
1 NULL
7575
1 1
7676
3 2
77+
count(distinct n)
78+
5000
79+
Variable_name Value
80+
Created_tmp_disk_tables 1
81+
count(distinct s)
82+
5000
83+
Variable_name Value
84+
Created_tmp_disk_tables 1
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
-O max_heap_table_size=16384

mysql-test/t/count_distinct2.test

+29
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,32 @@ select count(distinct n1), count(distinct n2) from t1;
4343

4444
select count(distinct n2), n1 from t1 group by n1;
4545
drop table t1;
46+
47+
# test the converstion from tree to MyISAM
48+
create table t1 (n int);
49+
let $1=5000;
50+
while ($1)
51+
{
52+
eval insert into t1 values($1);
53+
dec $1;
54+
}
55+
56+
flush status;
57+
select count(distinct n) from t1;
58+
show status like 'Created_tmp_disk_tables';
59+
drop table t1;
60+
61+
#test conversion from heap to MyISAM
62+
create table t1 (s text);
63+
let $1=5000;
64+
while ($1)
65+
{
66+
eval insert into t1 values('$1');
67+
dec $1;
68+
}
69+
70+
flush status;
71+
select count(distinct s) from t1;
72+
show status like 'Created_tmp_disk_tables';
73+
drop table t1;
74+

sql/item_sum.cc

+42-1
Original file line numberDiff line numberDiff line change
@@ -830,7 +830,26 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2)
830830
return 0;
831831
}
832832

833+
// helper function for walking the tree when we dump it to MyISAM -
834+
// tree_walk will call it for each
835+
// leaf
833836

837+
int dump_leaf(byte* key, uint32 count __attribute__((unused)),
838+
Item_sum_count_distinct* item)
839+
{
840+
char* buf = item->table->record[0];
841+
int error;
842+
memset(buf, 0xff, item->rec_offset); // make up for cheating in the tree
843+
memcpy(buf + item->rec_offset, key, item->tree.size_of_element);
844+
if ((error = item->table->file->write_row(buf)))
845+
{
846+
if (error != HA_ERR_FOUND_DUPP_KEY &&
847+
error != HA_ERR_FOUND_DUPP_UNIQUE)
848+
return 1;
849+
}
850+
851+
return 0;
852+
}
834853

835854
Item_sum_count_distinct::~Item_sum_count_distinct()
836855
{
@@ -916,11 +935,29 @@ bool Item_sum_count_distinct::setup(THD *thd)
916935
key_len, compare_key, 0, 0);
917936
tree.cmp_arg = cmp_arg;
918937
use_tree = 1;
938+
939+
// the only time key_len could be 0 is if someone does
940+
// count(distinct) on a char(0) field - stupid thing to do,
941+
// but this has to be handled - otherwise someone can crash
942+
// the server with a DoS attack
943+
max_elements_in_tree = (key_len) ? max_heap_table_size/key_len :
944+
max_heap_table_size;
919945
}
920946

921947
return 0;
922948
}
923949

950+
int Item_sum_count_distinct::tree_to_myisam()
951+
{
952+
if(create_myisam_from_heap(table, tmp_table_param,
953+
HA_ERR_RECORD_FILE_FULL, 1) ||
954+
tree_walk(&tree, (tree_walk_action)&dump_leaf, (void*)this,
955+
left_root_right))
956+
return 1;
957+
delete_tree(&tree);
958+
use_tree = 0;
959+
return 0;
960+
}
924961

925962
void Item_sum_count_distinct::reset()
926963
{
@@ -947,7 +984,11 @@ bool Item_sum_count_distinct::add()
947984

948985
if(use_tree)
949986
{
950-
if(!tree_insert(&tree, table->record[0] + rec_offset, 0))
987+
// if the tree got too big, convert to MyISAM, otherwise
988+
// insert into the tree
989+
if((tree.elements_in_tree > max_elements_in_tree && tree_to_myisam())
990+
||
991+
!tree_insert(&tree, table->record[0] + rec_offset, 0))
951992
return 1;
952993
}
953994
else if ((error=table->file->write_row(table->record[0])))

sql/item_sum.h

+14-3
Original file line numberDiff line numberDiff line change
@@ -148,15 +148,26 @@ class Item_sum_count_distinct :public Item_sum_int
148148
bool fix_fields(THD *thd,TABLE_LIST *tables);
149149
TMP_TABLE_PARAM *tmp_table_param;
150150
TREE tree;
151-
bool use_tree; // If there are no blobs, we can use a tree, which
151+
uint max_elements_in_tree;
152+
// calculated based on max_heap_table_size. If reached,
153+
// walk the tree and dump it into MyISAM table
154+
155+
bool use_tree;
156+
// If there are no blobs, we can use a tree, which
152157
// is faster than heap table. In that case, we still use the table
153158
// to help get things set up, but we insert nothing in it
154-
int rec_offset; // the first few bytes of record ( at least one)
159+
160+
int rec_offset;
161+
// the first few bytes of record ( at least one)
155162
// are just markers for deleted and NULLs. We want to skip them since
156163
// they will just bloat the tree without providing any valuable info
157164

158-
friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
165+
int tree_to_myisam();
159166

167+
friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
168+
friend int dump_leaf(byte* key, uint32 count __attribute__((unused)),
169+
Item_sum_count_distinct* item);
170+
160171
public:
161172
Item_sum_count_distinct(List<Item> &list)
162173
:Item_sum_int(list),table(0),used_table_cache(~(table_map) 0),

0 commit comments

Comments
 (0)