forked from LAION-AI/Open-Assistant
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path__init__.py
44 lines (41 loc) · 2.24 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
TEXT_DATASETS = {
"gutenberg_english": "sedthh/gutenberg_english", # Gutenberg eBooks in English
"gutenberg_multilang": "sedthh/gutenberg_multilang", # Gutenberg eBooks in foreign languages
"tv_dialogue": "sedthh/tv_dialogue", # TV and Movie dialogues and transcripts
"fd_dialogue": "sedthh/fd_dialogue", # TV and Movie dialogues and transcripts from ForeverDreaming
"tlcv2.0_oa": "pythainlp/tlcv2.0_oa", # Thai classical literature texts
"fa-isna-news": "pourmand1376/isna-news", # Isna Persian News
"fa-wikipedia": "pourmand1376/fa-wikipedia", # Farsi Wikipedia texts
}
INSTRUCTION_DATASETS = {
"humaneval_mbpp_codegen_qa": "OllieStanley/humaneval-mbpp-codegen-qa",
"humaneval_mbpp_testgen_qa": "OllieStanley/humaneval-mbpp-testgen-qa",
"grade_school_math_instructions": "qwedsacf/grade-school-math-instructions",
"recipes": "dctanner/oa_recipes",
"ubuntu_dialogue_qa": "sedthh/ubuntu_dialogue_qa",
"cmu_wiki_qa": "sedthh/cmu_wiki_qa",
"youtube_subs_howto100M": "totuta/youtube_subs_howto100M",
"iapp_wiki_qa_squad": "wannaphong/iapp_wiki_qa_squad_oa",
"zhihu-kol": "wangrui6/zhihu-kol",
"tell_a_joke": "mikegarts/oa_tell_a_joke_20000",
"oa_wiki_qa_bart_10000row": "michaelthwan/oa_wiki_qa_bart_10000row",
"biostars_qa": "cannin/biostars_qa",
"oa_leet10k": "ehartford/oa_leet10k",
"LogicInference_OA": "KK04/LogicInference_OA",
"oa_dolly_15k": "OllieStanley/oa_dolly_15k",
"TSSB-3M": "zirui3/TSSB-3M-instructions",
"poetry_instruction": "checkai/instruction-poems",
"oa_stackexchange": "donfu/oa-stackexchange",
"stable_diffusion_instructional_dataset": "MadVoyager/stable_diffusion_instructional_dataset",
"ru_riddles_337": "0x22almostEvil/ru-riddles-377",
"instructional_codesearchnet_python": "Nan-Do/instructional_code-search-net-python",
"tatoeba_mt_qna_oa": "0x22almostEvil/tatoeba-mt-qna-oa",
"reasoning_bg_oa": "0x22almostEvil/reasoning_bg_oa",
"reasoning_gsm_qna_oa": "0x22almostEvil/reasoning-gsm-qna-oa",
"semantics_ws_qna_oa": "0x22almostEvil/semantics-ws-qna-oa",
}
SAFETY_DATASETS = {
"prosocial-dialog": "allenai/prosocial-dialog",
"prosocial-confessions": "shahules786/prosocial-confessions",
}
MULTI_TURN_DIALOG_DATASETS = {}