Skip to content

Commit 451c2b3

Browse files
authoredApr 17, 2023
add draft OpenAssistant OASST1 data notebook (LAION-AI#2551)
- minimal little notebook as a sort of "Getting Started" Notebook for the [dataset](https://huggingface.co/datasets/OpenAssistant/oasst1)
1 parent 82c46c9 commit 451c2b3

File tree

2 files changed

+567
-0
lines changed

2 files changed

+567
-0
lines changed
 
+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Open Assistant - OASST1 Dataset Notebooks
2+
3+
This repository contains some helper notebooks for playing around with the
4+
[Open Assistant - OASST1 Dataset](https://huggingface.co/datasets/OpenAssistant/oasst1)
5+
6+
- [`getting-started.ipynb`](./getting-started.ipynb) - Getting started with the
7+
dataset.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,560 @@
1+
{
2+
"cells": [
3+
{
4+
"attachments": {},
5+
"cell_type": "markdown",
6+
"metadata": {},
7+
"source": [
8+
"# Getting started with OpenAssistant OASST1 data\n",
9+
"\n",
10+
"- https://huggingface.co/datasets/OpenAssistant/oasst1"
11+
]
12+
},
13+
{
14+
"attachments": {},
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/master/notebooks/openassistant-oasst1/getting-started.ipynb)"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": 1,
24+
"metadata": {},
25+
"outputs": [],
26+
"source": [
27+
"# uncomment and run below lines to set up if running in colab\n",
28+
"#!pip install datasets pandas"
29+
]
30+
},
31+
{
32+
"cell_type": "code",
33+
"execution_count": 2,
34+
"metadata": {},
35+
"outputs": [
36+
{
37+
"name": "stderr",
38+
"output_type": "stream",
39+
"text": [
40+
"c:\\Users\\andre\\Documents\\repos\\Open-Assistant\\venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
41+
" from .autonotebook import tqdm as notebook_tqdm\n"
42+
]
43+
}
44+
],
45+
"source": [
46+
"import pandas as pd\n",
47+
"from datasets import load_dataset\n",
48+
"\n",
49+
"# set some pandas options to make the output more readable\n",
50+
"pd.set_option(\"display.max_rows\", 500)\n",
51+
"pd.set_option(\"display.max_columns\", 500)\n",
52+
"pd.set_option(\"display.width\", 1000)"
53+
]
54+
},
55+
{
56+
"cell_type": "code",
57+
"execution_count": 3,
58+
"metadata": {},
59+
"outputs": [
60+
{
61+
"name": "stderr",
62+
"output_type": "stream",
63+
"text": [
64+
"Found cached dataset parquet (C:/Users/andre/.cache/huggingface/datasets/OpenAssistant___parquet/OpenAssistant--oasst1-2e4c5fea546c88d6/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n",
65+
"100%|██████████| 2/2 [00:00<00:00, 15.88it/s]\n"
66+
]
67+
}
68+
],
69+
"source": [
70+
"# load dataset from huggingface datasets\n",
71+
"ds = load_dataset(\"OpenAssistant/oasst1\")"
72+
]
73+
},
74+
{
75+
"cell_type": "code",
76+
"execution_count": 4,
77+
"metadata": {},
78+
"outputs": [],
79+
"source": [
80+
"# lets convert the train dataset to a pandas df\n",
81+
"df = ds[\"train\"].to_pandas()"
82+
]
83+
},
84+
{
85+
"cell_type": "code",
86+
"execution_count": 5,
87+
"metadata": {},
88+
"outputs": [
89+
{
90+
"name": "stdout",
91+
"output_type": "stream",
92+
"text": [
93+
"<class 'pandas.core.frame.DataFrame'>\n",
94+
"RangeIndex: 84437 entries, 0 to 84436\n",
95+
"Data columns (total 18 columns):\n",
96+
" # Column Non-Null Count Dtype \n",
97+
"--- ------ -------------- ----- \n",
98+
" 0 message_id 84437 non-null object \n",
99+
" 1 parent_id 74591 non-null object \n",
100+
" 2 user_id 84437 non-null object \n",
101+
" 3 created_date 84437 non-null object \n",
102+
" 4 text 84437 non-null object \n",
103+
" 5 role 84437 non-null object \n",
104+
" 6 lang 84437 non-null object \n",
105+
" 7 review_count 84437 non-null int32 \n",
106+
" 8 review_result 83732 non-null object \n",
107+
" 9 deleted 84437 non-null bool \n",
108+
" 10 rank 48730 non-null float64\n",
109+
" 11 synthetic 84437 non-null bool \n",
110+
" 12 model_name 0 non-null object \n",
111+
" 13 detoxify 72297 non-null object \n",
112+
" 14 message_tree_id 84437 non-null object \n",
113+
" 15 tree_state 84437 non-null object \n",
114+
" 16 emojis 71496 non-null object \n",
115+
" 17 labels 84199 non-null object \n",
116+
"dtypes: bool(2), float64(1), int32(1), object(14)\n",
117+
"memory usage: 10.1+ MB\n"
118+
]
119+
}
120+
],
121+
"source": [
122+
"# look at the df info\n",
123+
"df.info(verbose=True, memory_usage=True, show_counts=True)"
124+
]
125+
},
126+
{
127+
"cell_type": "code",
128+
"execution_count": 9,
129+
"metadata": {},
130+
"outputs": [
131+
{
132+
"data": {
133+
"text/plain": [
134+
"{35238: {'message_id': 'cb9c1893-2eda-42ee-b0a7-adeb0e142867',\n",
135+
" 'parent_id': 'd24480be-94bd-4dc8-b7df-5eb973ed786f',\n",
136+
" 'user_id': '50f76166-c558-47be-80b1-5e2b2df6ff09',\n",
137+
" 'created_date': '2023-02-07T11:53:26.527725+00:00',\n",
138+
" 'text': 'Good job, are there other riddles like this that you know?',\n",
139+
" 'role': 'prompter',\n",
140+
" 'lang': 'en',\n",
141+
" 'review_count': 3,\n",
142+
" 'review_result': True,\n",
143+
" 'deleted': False,\n",
144+
" 'rank': nan,\n",
145+
" 'synthetic': False,\n",
146+
" 'model_name': None,\n",
147+
" 'detoxify': {'toxicity': 0.021441340446472168,\n",
148+
" 'severe_toxicity': 3.3909338526427746e-05,\n",
149+
" 'obscene': 0.0014250462409108877,\n",
150+
" 'identity_attack': 0.0003495119162835181,\n",
151+
" 'insult': 0.012425446882843971,\n",
152+
" 'threat': 5.629667793982662e-05,\n",
153+
" 'sexual_explicit': 7.260362326633185e-05},\n",
154+
" 'message_tree_id': '63f26a43-8b7d-4bb2-ad36-4309034c84cd',\n",
155+
" 'tree_state': 'ready_for_export',\n",
156+
" 'emojis': {'name': array(['+1', '_skip_reply'], dtype=object),\n",
157+
" 'count': array([2, 1])},\n",
158+
" 'labels': {'name': array(['spam', 'lang_mismatch', 'pii', 'not_appropriate', 'hate_speech',\n",
159+
" 'sexual_content', 'quality', 'toxicity', 'humor', 'creativity',\n",
160+
" 'violence'], dtype=object),\n",
161+
" 'value': array([0. , 0. , 0. , 0. , 0. ,\n",
162+
" 0. , 0.66666667, 0.375 , 0.75 , 0.25 ,\n",
163+
" 0.125 ]),\n",
164+
" 'count': array([3, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2])}}}"
165+
]
166+
},
167+
"execution_count": 9,
168+
"metadata": {},
169+
"output_type": "execute_result"
170+
}
171+
],
172+
"source": [
173+
"# look at a sample row in a json format we can easily read\n",
174+
"df.sample(1).transpose().to_dict()"
175+
]
176+
},
177+
{
178+
"cell_type": "code",
179+
"execution_count": 7,
180+
"metadata": {},
181+
"outputs": [
182+
{
183+
"name": "stdout",
184+
"output_type": "stream",
185+
"text": [
186+
"e63d94b6-6f92-410f-b9d8-5cbd9367f2e3\n"
187+
]
188+
}
189+
],
190+
"source": [
191+
"# lets grab a random message tree\n",
192+
"message_tree_id = df[\"message_tree_id\"].sample(1).values[0]\n",
193+
"print(message_tree_id)"
194+
]
195+
},
196+
{
197+
"cell_type": "code",
198+
"execution_count": 8,
199+
"metadata": {},
200+
"outputs": [
201+
{
202+
"data": {
203+
"text/html": [
204+
"<div>\n",
205+
"<style scoped>\n",
206+
" .dataframe tbody tr th:only-of-type {\n",
207+
" vertical-align: middle;\n",
208+
" }\n",
209+
"\n",
210+
" .dataframe tbody tr th {\n",
211+
" vertical-align: top;\n",
212+
" }\n",
213+
"\n",
214+
" .dataframe thead th {\n",
215+
" text-align: right;\n",
216+
" }\n",
217+
"</style>\n",
218+
"<table border=\"1\" class=\"dataframe\">\n",
219+
" <thead>\n",
220+
" <tr style=\"text-align: right;\">\n",
221+
" <th></th>\n",
222+
" <th>message_id</th>\n",
223+
" <th>parent_id</th>\n",
224+
" <th>user_id</th>\n",
225+
" <th>created_date</th>\n",
226+
" <th>text</th>\n",
227+
" <th>role</th>\n",
228+
" <th>lang</th>\n",
229+
" <th>review_count</th>\n",
230+
" <th>review_result</th>\n",
231+
" <th>deleted</th>\n",
232+
" <th>rank</th>\n",
233+
" <th>synthetic</th>\n",
234+
" <th>model_name</th>\n",
235+
" <th>detoxify</th>\n",
236+
" <th>message_tree_id</th>\n",
237+
" <th>tree_state</th>\n",
238+
" <th>emojis</th>\n",
239+
" <th>labels</th>\n",
240+
" </tr>\n",
241+
" </thead>\n",
242+
" <tbody>\n",
243+
" <tr>\n",
244+
" <th>71993</th>\n",
245+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
246+
" <td>None</td>\n",
247+
" <td>a63181b7-107c-4fcc-913c-6871e68a9ffe</td>\n",
248+
" <td>2023-02-05T22:58:35.549190+00:00</td>\n",
249+
" <td>can you tell me a few good strategies to win a...</td>\n",
250+
" <td>prompter</td>\n",
251+
" <td>en</td>\n",
252+
" <td>3</td>\n",
253+
" <td>True</td>\n",
254+
" <td>False</td>\n",
255+
" <td>NaN</td>\n",
256+
" <td>False</td>\n",
257+
" <td>None</td>\n",
258+
" <td>{'toxicity': 0.0033605224452912807, 'severe_to...</td>\n",
259+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
260+
" <td>ready_for_export</td>\n",
261+
" <td>{'name': ['+1', '_skip_reply', '_skip_ranking'...</td>\n",
262+
" <td>{'name': ['spam', 'lang_mismatch', 'pii', 'not...</td>\n",
263+
" </tr>\n",
264+
" <tr>\n",
265+
" <th>71996</th>\n",
266+
" <td>8a58e588-f3c4-4190-a1e4-538c86abb17b</td>\n",
267+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
268+
" <td>df6e7d47-7c0e-4958-8bc4-4558af0ece38</td>\n",
269+
" <td>2023-02-06T03:37:06.246378+00:00</td>\n",
270+
" <td>Sure, here are a few strategies that you can u...</td>\n",
271+
" <td>assistant</td>\n",
272+
" <td>en</td>\n",
273+
" <td>3</td>\n",
274+
" <td>True</td>\n",
275+
" <td>False</td>\n",
276+
" <td>1.0</td>\n",
277+
" <td>False</td>\n",
278+
" <td>None</td>\n",
279+
" <td>{'toxicity': 0.000288925803033635, 'severe_tox...</td>\n",
280+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
281+
" <td>ready_for_export</td>\n",
282+
" <td>{'name': ['+1'], 'count': [3]}</td>\n",
283+
" <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
284+
" </tr>\n",
285+
" <tr>\n",
286+
" <th>71994</th>\n",
287+
" <td>4c27698c-01c7-42db-ac25-a2d6af5da4f4</td>\n",
288+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
289+
" <td>7de84fb1-4cf5-4142-989d-f490fc526bfd</td>\n",
290+
" <td>2023-02-06T03:56:59.785890+00:00</td>\n",
291+
" <td>Certainly! Here are a few effective tactics fo...</td>\n",
292+
" <td>assistant</td>\n",
293+
" <td>en</td>\n",
294+
" <td>3</td>\n",
295+
" <td>True</td>\n",
296+
" <td>False</td>\n",
297+
" <td>0.0</td>\n",
298+
" <td>False</td>\n",
299+
" <td>None</td>\n",
300+
" <td>{'toxicity': 0.00032153952633962035, 'severe_t...</td>\n",
301+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
302+
" <td>ready_for_export</td>\n",
303+
" <td>{'name': ['_skip_reply'], 'count': [1]}</td>\n",
304+
" <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
305+
" </tr>\n",
306+
" <tr>\n",
307+
" <th>72000</th>\n",
308+
" <td>d4154bbe-d137-4f3c-9382-8a3b306402f8</td>\n",
309+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
310+
" <td>f14aa565-a0ed-4d18-b949-0daccb549f03</td>\n",
311+
" <td>2023-02-06T04:21:18.793582+00:00</td>\n",
312+
" <td>Settlers of Catan is a game of strategy, teamw...</td>\n",
313+
" <td>assistant</td>\n",
314+
" <td>en</td>\n",
315+
" <td>3</td>\n",
316+
" <td>True</td>\n",
317+
" <td>False</td>\n",
318+
" <td>2.0</td>\n",
319+
" <td>False</td>\n",
320+
" <td>None</td>\n",
321+
" <td>{'toxicity': 0.000376795360352844, 'severe_tox...</td>\n",
322+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
323+
" <td>ready_for_export</td>\n",
324+
" <td>{'name': ['+1', '-1'], 'count': [1, 1]}</td>\n",
325+
" <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
326+
" </tr>\n",
327+
" <tr>\n",
328+
" <th>71997</th>\n",
329+
" <td>50416bbf-bf73-4022-833c-10c7ca062ad6</td>\n",
330+
" <td>8a58e588-f3c4-4190-a1e4-538c86abb17b</td>\n",
331+
" <td>0b53f16f-910a-453b-8101-dfe64c6dd30c</td>\n",
332+
" <td>2023-02-06T06:23:21.276590+00:00</td>\n",
333+
" <td>Do the Settlers of Catan expansions require di...</td>\n",
334+
" <td>prompter</td>\n",
335+
" <td>en</td>\n",
336+
" <td>3</td>\n",
337+
" <td>True</td>\n",
338+
" <td>False</td>\n",
339+
" <td>NaN</td>\n",
340+
" <td>False</td>\n",
341+
" <td>None</td>\n",
342+
" <td>{'toxicity': 0.0004958086065016687, 'severe_to...</td>\n",
343+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
344+
" <td>ready_for_export</td>\n",
345+
" <td>{'name': ['_skip_reply', '_skip_ranking'], 'co...</td>\n",
346+
" <td>{'name': ['spam', 'lang_mismatch', 'pii', 'not...</td>\n",
347+
" </tr>\n",
348+
" <tr>\n",
349+
" <th>72001</th>\n",
350+
" <td>81ba6358-9ac8-45b2-a202-723b605b8c95</td>\n",
351+
" <td>d4154bbe-d137-4f3c-9382-8a3b306402f8</td>\n",
352+
" <td>6a4224c2-6b7f-4981-85d3-5b26a75a5324</td>\n",
353+
" <td>2023-02-06T08:25:07.775154+00:00</td>\n",
354+
" <td>What are good long term strategies for getting...</td>\n",
355+
" <td>prompter</td>\n",
356+
" <td>en</td>\n",
357+
" <td>3</td>\n",
358+
" <td>True</td>\n",
359+
" <td>False</td>\n",
360+
" <td>NaN</td>\n",
361+
" <td>False</td>\n",
362+
" <td>None</td>\n",
363+
" <td>{'toxicity': 0.00035797784221358597, 'severe_t...</td>\n",
364+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
365+
" <td>ready_for_export</td>\n",
366+
" <td>{'name': ['+1', '_skip_reply', '_skip_ranking'...</td>\n",
367+
" <td>{'name': ['spam', 'lang_mismatch', 'pii', 'not...</td>\n",
368+
" </tr>\n",
369+
" <tr>\n",
370+
" <th>71999</th>\n",
371+
" <td>ed3fe459-77eb-4dd3-9672-46e5dcddbd2e</td>\n",
372+
" <td>50416bbf-bf73-4022-833c-10c7ca062ad6</td>\n",
373+
" <td>224c3d09-c385-46cd-93e2-5f3682ac2624</td>\n",
374+
" <td>2023-02-06T08:43:38.464529+00:00</td>\n",
375+
" <td>The expansions in \"Settlers of Catan\" can have...</td>\n",
376+
" <td>assistant</td>\n",
377+
" <td>en</td>\n",
378+
" <td>3</td>\n",
379+
" <td>True</td>\n",
380+
" <td>False</td>\n",
381+
" <td>1.0</td>\n",
382+
" <td>False</td>\n",
383+
" <td>None</td>\n",
384+
" <td>{'toxicity': 0.00036860740510746837, 'severe_t...</td>\n",
385+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
386+
" <td>ready_for_export</td>\n",
387+
" <td>{'name': ['+1'], 'count': [1]}</td>\n",
388+
" <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
389+
" </tr>\n",
390+
" <tr>\n",
391+
" <th>71998</th>\n",
392+
" <td>0936e043-4cbb-4441-adaa-e5443c4cd99d</td>\n",
393+
" <td>50416bbf-bf73-4022-833c-10c7ca062ad6</td>\n",
394+
" <td>03d8200e-9023-423c-a3f0-47828a5b4189</td>\n",
395+
" <td>2023-02-06T08:58:01.473538+00:00</td>\n",
396+
" <td>Yes, the expansions for Settlers of Catan can ...</td>\n",
397+
" <td>assistant</td>\n",
398+
" <td>en</td>\n",
399+
" <td>3</td>\n",
400+
" <td>True</td>\n",
401+
" <td>False</td>\n",
402+
" <td>0.0</td>\n",
403+
" <td>False</td>\n",
404+
" <td>None</td>\n",
405+
" <td>{'toxicity': 0.00033097557025030255, 'severe_t...</td>\n",
406+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
407+
" <td>ready_for_export</td>\n",
408+
" <td>{'name': ['_skip_labeling'], 'count': [1]}</td>\n",
409+
" <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
410+
" </tr>\n",
411+
" <tr>\n",
412+
" <th>71995</th>\n",
413+
" <td>43431488-6f0f-481e-9001-8407ab25b6cf</td>\n",
414+
" <td>4c27698c-01c7-42db-ac25-a2d6af5da4f4</td>\n",
415+
" <td>ecaa4818-48da-45a8-bab5-ae6de585ad8a</td>\n",
416+
" <td>2023-02-06T10:19:04.483254+00:00</td>\n",
417+
" <td>Is there an online community? Can i play it on...</td>\n",
418+
" <td>prompter</td>\n",
419+
" <td>en</td>\n",
420+
" <td>3</td>\n",
421+
" <td>True</td>\n",
422+
" <td>False</td>\n",
423+
" <td>NaN</td>\n",
424+
" <td>False</td>\n",
425+
" <td>None</td>\n",
426+
" <td>{'toxicity': 0.000970872410107404, 'severe_tox...</td>\n",
427+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
428+
" <td>ready_for_export</td>\n",
429+
" <td>None</td>\n",
430+
" <td>{'name': ['spam', 'lang_mismatch', 'pii', 'not...</td>\n",
431+
" </tr>\n",
432+
" <tr>\n",
433+
" <th>72003</th>\n",
434+
" <td>c0e39213-4fd0-4612-ab34-f5bc2b74300e</td>\n",
435+
" <td>81ba6358-9ac8-45b2-a202-723b605b8c95</td>\n",
436+
" <td>8a9d78b7-5b39-4033-8829-1b5486fdba76</td>\n",
437+
" <td>2023-02-06T11:53:46.248417+00:00</td>\n",
438+
" <td>A good long term strategy for getting the long...</td>\n",
439+
" <td>assistant</td>\n",
440+
" <td>en</td>\n",
441+
" <td>3</td>\n",
442+
" <td>True</td>\n",
443+
" <td>False</td>\n",
444+
" <td>1.0</td>\n",
445+
" <td>False</td>\n",
446+
" <td>None</td>\n",
447+
" <td>{'toxicity': 0.0003667851269710809, 'severe_to...</td>\n",
448+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
449+
" <td>ready_for_export</td>\n",
450+
" <td>{'name': ['+1', '_skip_labeling'], 'count': [3...</td>\n",
451+
" <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
452+
" </tr>\n",
453+
" <tr>\n",
454+
" <th>72004</th>\n",
455+
" <td>f11e5cc5-2c08-4450-b619-89b8c8ec2862</td>\n",
456+
" <td>81ba6358-9ac8-45b2-a202-723b605b8c95</td>\n",
457+
" <td>e14803c4-5c0d-49f3-808d-f1b6dc80e8ed</td>\n",
458+
" <td>2023-02-06T13:18:54.555828+00:00</td>\n",
459+
" <td>In the board game \"Catan,\" \"the longest road\" ...</td>\n",
460+
" <td>assistant</td>\n",
461+
" <td>en</td>\n",
462+
" <td>3</td>\n",
463+
" <td>True</td>\n",
464+
" <td>False</td>\n",
465+
" <td>2.0</td>\n",
466+
" <td>False</td>\n",
467+
" <td>None</td>\n",
468+
" <td>{'toxicity': 0.0003078073204960674, 'severe_to...</td>\n",
469+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
470+
" <td>ready_for_export</td>\n",
471+
" <td>{'name': ['_skip_labeling'], 'count': [1]}</td>\n",
472+
" <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
473+
" </tr>\n",
474+
" <tr>\n",
475+
" <th>72002</th>\n",
476+
" <td>7a60b98a-e15b-44b6-a049-63b96ce3cf43</td>\n",
477+
" <td>81ba6358-9ac8-45b2-a202-723b605b8c95</td>\n",
478+
" <td>f53ff129-eb80-4bfb-9468-ff6b6630d356</td>\n",
479+
" <td>2023-02-06T13:28:06.829895+00:00</td>\n",
480+
" <td>Getting the Longest Road is a key victory cond...</td>\n",
481+
" <td>assistant</td>\n",
482+
" <td>en</td>\n",
483+
" <td>3</td>\n",
484+
" <td>True</td>\n",
485+
" <td>False</td>\n",
486+
" <td>0.0</td>\n",
487+
" <td>False</td>\n",
488+
" <td>None</td>\n",
489+
" <td>{'toxicity': 0.0003417894768062979, 'severe_to...</td>\n",
490+
" <td>e63d94b6-6f92-410f-b9d8-5cbd9367f2e3</td>\n",
491+
" <td>ready_for_export</td>\n",
492+
" <td>None</td>\n",
493+
" <td>{'name': ['spam', 'fails_task', 'lang_mismatch...</td>\n",
494+
" </tr>\n",
495+
" </tbody>\n",
496+
"</table>\n",
497+
"</div>"
498+
],
499+
"text/plain": [
500+
" message_id parent_id user_id created_date text role lang review_count review_result deleted rank synthetic model_name detoxify message_tree_id tree_state emojis labels\n",
501+
"71993 e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 None a63181b7-107c-4fcc-913c-6871e68a9ffe 2023-02-05T22:58:35.549190+00:00 can you tell me a few good strategies to win a... prompter en 3 True False NaN False None {'toxicity': 0.0033605224452912807, 'severe_to... e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 ready_for_export {'name': ['+1', '_skip_reply', '_skip_ranking'... {'name': ['spam', 'lang_mismatch', 'pii', 'not...\n",
502+
"71996 8a58e588-f3c4-4190-a1e4-538c86abb17b e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 df6e7d47-7c0e-4958-8bc4-4558af0ece38 2023-02-06T03:37:06.246378+00:00 Sure, here are a few strategies that you can u... assistant en 3 True False 1.0 False None {'toxicity': 0.000288925803033635, 'severe_tox... e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 ready_for_export {'name': ['+1'], 'count': [3]} {'name': ['spam', 'fails_task', 'lang_mismatch...\n",
503+
"71994 4c27698c-01c7-42db-ac25-a2d6af5da4f4 e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 7de84fb1-4cf5-4142-989d-f490fc526bfd 2023-02-06T03:56:59.785890+00:00 Certainly! Here are a few effective tactics fo... assistant en 3 True False 0.0 False None {'toxicity': 0.00032153952633962035, 'severe_t... e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 ready_for_export {'name': ['_skip_reply'], 'count': [1]} {'name': ['spam', 'fails_task', 'lang_mismatch...\n",
504+
"72000 d4154bbe-d137-4f3c-9382-8a3b306402f8 e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 f14aa565-a0ed-4d18-b949-0daccb549f03 2023-02-06T04:21:18.793582+00:00 Settlers of Catan is a game of strategy, teamw... assistant en 3 True False 2.0 False None {'toxicity': 0.000376795360352844, 'severe_tox... e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 ready_for_export {'name': ['+1', '-1'], 'count': [1, 1]} {'name': ['spam', 'fails_task', 'lang_mismatch...\n",
505+
"71997 50416bbf-bf73-4022-833c-10c7ca062ad6 8a58e588-f3c4-4190-a1e4-538c86abb17b 0b53f16f-910a-453b-8101-dfe64c6dd30c 2023-02-06T06:23:21.276590+00:00 Do the Settlers of Catan expansions require di... prompter en 3 True False NaN False None {'toxicity': 0.0004958086065016687, 'severe_to... e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 ready_for_export {'name': ['_skip_reply', '_skip_ranking'], 'co... {'name': ['spam', 'lang_mismatch', 'pii', 'not...\n",
506+
"72001 81ba6358-9ac8-45b2-a202-723b605b8c95 d4154bbe-d137-4f3c-9382-8a3b306402f8 6a4224c2-6b7f-4981-85d3-5b26a75a5324 2023-02-06T08:25:07.775154+00:00 What are good long term strategies for getting... prompter en 3 True False NaN False None {'toxicity': 0.00035797784221358597, 'severe_t... e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 ready_for_export {'name': ['+1', '_skip_reply', '_skip_ranking'... {'name': ['spam', 'lang_mismatch', 'pii', 'not...\n",
507+
"71999 ed3fe459-77eb-4dd3-9672-46e5dcddbd2e 50416bbf-bf73-4022-833c-10c7ca062ad6 224c3d09-c385-46cd-93e2-5f3682ac2624 2023-02-06T08:43:38.464529+00:00 The expansions in \"Settlers of Catan\" can have... assistant en 3 True False 1.0 False None {'toxicity': 0.00036860740510746837, 'severe_t... e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 ready_for_export {'name': ['+1'], 'count': [1]} {'name': ['spam', 'fails_task', 'lang_mismatch...\n",
508+
"71998 0936e043-4cbb-4441-adaa-e5443c4cd99d 50416bbf-bf73-4022-833c-10c7ca062ad6 03d8200e-9023-423c-a3f0-47828a5b4189 2023-02-06T08:58:01.473538+00:00 Yes, the expansions for Settlers of Catan can ... assistant en 3 True False 0.0 False None {'toxicity': 0.00033097557025030255, 'severe_t... e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 ready_for_export {'name': ['_skip_labeling'], 'count': [1]} {'name': ['spam', 'fails_task', 'lang_mismatch...\n",
509+
"71995 43431488-6f0f-481e-9001-8407ab25b6cf 4c27698c-01c7-42db-ac25-a2d6af5da4f4 ecaa4818-48da-45a8-bab5-ae6de585ad8a 2023-02-06T10:19:04.483254+00:00 Is there an online community? Can i play it on... prompter en 3 True False NaN False None {'toxicity': 0.000970872410107404, 'severe_tox... e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 ready_for_export None {'name': ['spam', 'lang_mismatch', 'pii', 'not...\n",
510+
"72003 c0e39213-4fd0-4612-ab34-f5bc2b74300e 81ba6358-9ac8-45b2-a202-723b605b8c95 8a9d78b7-5b39-4033-8829-1b5486fdba76 2023-02-06T11:53:46.248417+00:00 A good long term strategy for getting the long... assistant en 3 True False 1.0 False None {'toxicity': 0.0003667851269710809, 'severe_to... e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 ready_for_export {'name': ['+1', '_skip_labeling'], 'count': [3... {'name': ['spam', 'fails_task', 'lang_mismatch...\n",
511+
"72004 f11e5cc5-2c08-4450-b619-89b8c8ec2862 81ba6358-9ac8-45b2-a202-723b605b8c95 e14803c4-5c0d-49f3-808d-f1b6dc80e8ed 2023-02-06T13:18:54.555828+00:00 In the board game \"Catan,\" \"the longest road\" ... assistant en 3 True False 2.0 False None {'toxicity': 0.0003078073204960674, 'severe_to... e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 ready_for_export {'name': ['_skip_labeling'], 'count': [1]} {'name': ['spam', 'fails_task', 'lang_mismatch...\n",
512+
"72002 7a60b98a-e15b-44b6-a049-63b96ce3cf43 81ba6358-9ac8-45b2-a202-723b605b8c95 f53ff129-eb80-4bfb-9468-ff6b6630d356 2023-02-06T13:28:06.829895+00:00 Getting the Longest Road is a key victory cond... assistant en 3 True False 0.0 False None {'toxicity': 0.0003417894768062979, 'severe_to... e63d94b6-6f92-410f-b9d8-5cbd9367f2e3 ready_for_export None {'name': ['spam', 'fails_task', 'lang_mismatch..."
513+
]
514+
},
515+
"execution_count": 8,
516+
"metadata": {},
517+
"output_type": "execute_result"
518+
}
519+
],
520+
"source": [
521+
"# look at all data for this message tree\n",
522+
"df.query(f\"message_tree_id == '{message_tree_id}'\").sort_values(\"created_date\")"
523+
]
524+
},
525+
{
526+
"cell_type": "code",
527+
"execution_count": null,
528+
"metadata": {},
529+
"outputs": [],
530+
"source": []
531+
}
532+
],
533+
"metadata": {
534+
"kernelspec": {
535+
"display_name": "Python 3",
536+
"language": "python",
537+
"name": "python3"
538+
},
539+
"language_info": {
540+
"codemirror_mode": {
541+
"name": "ipython",
542+
"version": 3
543+
},
544+
"file_extension": ".py",
545+
"mimetype": "text/x-python",
546+
"name": "python",
547+
"nbconvert_exporter": "python",
548+
"pygments_lexer": "ipython3",
549+
"version": "3.10.7"
550+
},
551+
"orig_nbformat": 4,
552+
"vscode": {
553+
"interpreter": {
554+
"hash": "25d5c2324055587ceaeef27650c79ce8358ea61d7689f2e0b8ada5d53f85bce4"
555+
}
556+
}
557+
},
558+
"nbformat": 4,
559+
"nbformat_minor": 2
560+
}

0 commit comments

Comments
 (0)
Please sign in to comment.