# Collection of SQL Snippets Here are find some SQL queries to inspect the current OA postgres DB. # Basic Stats ```sql -- tables row counts (select 'user' as "table", count(*) from "user") union (select 'task', count(*) from task) union (select 'message_tree_state', count(*) from message_tree_state) union (select 'message_reaction', count(*) from message_reaction) union (select 'text_labels', count(*) from text_labels) union (select 'message', count(*) from message) union (select 'journal', count(*) from journal); ``` # Messages ```sql -- only human by role select role, count(*) from message where not deleted and review_result and not synthetic group by role; ``` ```sql -- language distribution of messages (incl. synthetic) select lang, count(*), synthetic from message where not deleted and review_result group by lang, synthetic; ``` ```sql -- only human generated messages by lang select lang, count(*) from message where not deleted and review_result and not synthetic group by lang; ``` ## Message Trees ```sql -- total count of message trees select count(*) from message_tree_state; ``` ```sql -- message tree counts by state select state, count(*) from message_tree_state group by state; ``` ```sql -- count of waiting initial prompts by language select m.lang, count(*) from message_tree_state mts join message m on mts.message_tree_id = m.id where mts.state = 'prompt_lottery_waiting' group by m.lang; ``` ```sql -- message trees by lang in ready_for_export or growing state select m.lang, mts.state, count(*) from message_tree_state mts join message m on mts.message_tree_id = m.id where mts.state in ('ready_for_export', 'growing') group by mts.state, m.lang order by lang, state; ``` ```sql -- select message tree counts select mts.message_tree_id, count(m.id), max(m.depth), count(m.id) filter (where m.role='prompter') as prompter, count(m.id) filter (where m.role='assistant') as assistant from message_tree_state mts join message m on mts.message_tree_id = m.message_tree_id where mts.state='growing' and not m.deleted and m.review_result=true and m.lang='en' and mts.active group by mts.message_tree_id order by count(m.id) desc; ``` ```sql -- show top 100 largest trees select mts.message_tree_id, mts.goal_tree_size, mts.state, count(m.id) as message_count from message_tree_state mts join message m on mts.message_tree_id = m.message_tree_id where not m.deleted and m.review_result=true group by mts.message_tree_id, mts.state order by count(m.id) desc limit 100; ``` ```sql -- active trees, current & goal_size select mts.message_tree_id, mts.state, mts.goal_tree_size, count(m.id) AS tree_size, max(m.depth) AS max_depth from message_tree_state mts join message m ON mts.message_tree_id = m.message_tree_id WHERE mts.active and not m.deleted and m.review_result group by mts.message_tree_id, mts.goal_tree_size; ``` ## Users ```sql -- count users that accepted tos select count(*) from "user" where tos_acceptance_date is not null; ``` ```sql -- last 25 active users select u.id, u.username, u.auth_method, u.display_name, u.last_activity_date, age(current_timestamp, last_activity_date) from "user" u WHERE u.last_activity_date is not null order by u.last_activity_date desc limit 25; select id, display_name, username, auth_method, last_activity_date from "user" where age(last_activity_date) < interval '1 minutes' order by last_activity_date desc limit 25; ``` ```sql -- count active users in last 5 mins select count(*) from "user" u where age(current_timestamp, last_activity_date) < interval '5 mins'; ``` ```sql -- total count of non-deleted messages (human + synth) select count(*) from message where deleted=false and review_result=true; ``` ```sql -- count max, mean message counts per tree for a given language with t(message_tree_id, tree_size, state) as (select mts.message_tree_id, count(m.id), mts.state from message_tree_state mts join message m on mts.message_tree_id = m.message_tree_id where not m.deleted and m.review_result=true and m.lang = 'en' group by mts.message_tree_id) select state, count(t.*) as trees, sum(t.tree_size) as total_msgs, max(t.tree_size), avg(t.tree_size) from t group by t.state; ``` ## Tasks ```sql -- average time between task creation and completion (select t.payload#>>'{payload, type}' as type, count(*), avg(r.created_date-t.created_date) from task t join message_reaction r on t.id = task_id where t.done and not t.skipped group by t.payload#>>'{payload, type}') union (select t.payload#>>'{payload, type}' as type, count(*), avg(l.created_date-t.created_date) from task t join text_labels l on t.id = l.task_id where t.done and not t.skipped group by t.payload#>>'{payload, type}') union ( select t.payload#>>'{payload, type}' as type, count(*), avg(m.created_date-t.created_date) from task t join message m on t.id = m.task_id where t.done and not t.skipped group by t.payload#>>'{payload, type}'); ``` ## Connections ```sql -- from https://dba.stackexchange.com/questions/161760/number-of-active-connections-and-remaining-connections select max_conn,used,res_for_super,max_conn-used-res_for_super res_for_normal from (select count(*) used from pg_stat_activity) t1, (select setting::int res_for_super from pg_settings where name=$$superuser_reserved_connections$$) t2, (select setting::int max_conn from pg_settings where name=$$max_connections$$) t3; ```