forked from LAION-AI/Open-Assistant
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.py
287 lines (221 loc) · 10.2 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
from pathlib import Path
from typing import Any, Dict, List, Optional
from oasst_shared.schemas.protocol import TextLabel
from pydantic import AnyHttpUrl, BaseModel, BaseSettings, FilePath, PostgresDsn, validator
class TreeManagerConfiguration(BaseModel):
"""TreeManager configuration settings"""
max_active_trees: int = 10
"""Maximum number of concurrently active message trees in the database.
No new initial prompt tasks are handed out to users if this
number is reached."""
max_initial_prompt_review: int = 100
"""Maximum number of initial prompts under review before no more initial prompt tasks will be handed out."""
max_tree_depth: int = 3
"""Maximum depth of message tree."""
max_children_count: int = 3
"""Maximum number of reply messages per tree node."""
num_prompter_replies: int = 1
"""Number of prompter replies to collect per assistant reply."""
goal_tree_size: int = 12
"""Total number of messages to gather per tree."""
random_goal_tree_size: bool = False
"""If set to true goal tree sizes will be generated randomly within range [min_goal_tree_size, goal_tree_size]."""
min_goal_tree_size: int = 5
"""Minimum tree size for random goal sizes."""
num_reviews_initial_prompt: int = 3
"""Number of peer review checks to collect in INITIAL_PROMPT_REVIEW state."""
num_reviews_reply: int = 3
"""Number of peer review checks to collect per reply (other than initial_prompt)."""
auto_mod_enabled: bool = True
"""Flag to enable/disable auto moderation."""
auto_mod_max_skip_reply: int = 25
"""Automatically set tree state to `halted_by_moderator` when more than the specified number
of users skip replying to a message. (auto moderation)"""
auto_mod_red_flags: int = 4
"""Delete messages that receive more than this number of red flags if it is a reply or
set the tree to `aborted_low_grade` when a prompt is flagged. (auto moderation)"""
p_full_labeling_review_prompt: float = 1.0
"""Probability of full text-labeling (instead of mandatory only) for initial prompts."""
p_full_labeling_review_reply_assistant: float = 1.0
"""Probability of full text-labeling (instead of mandatory only) for assistant replies."""
p_full_labeling_review_reply_prompter: float = 0.25
"""Probability of full text-labeling (instead of mandatory only) for prompter replies."""
acceptance_threshold_initial_prompt: float = 0.6
"""Threshold for accepting an initial prompt."""
acceptance_threshold_reply: float = 0.6
"""Threshold for accepting a reply."""
num_required_rankings: int = 3
"""Number of rankings in which the message participated."""
p_activate_backlog_tree: float = 0.1
"""Probability to activate a message tree in BACKLOG_RANKING state when another tree enters
a terminal state."""
min_active_rankings_per_lang: int = 0
"""When the number of active ranking tasks is below this value when a tree enters a terminal
state an available trees in BACKLOG_RANKING will be activated (i.e. enters the RANKING state)."""
labels_initial_prompt: list[TextLabel] = [
TextLabel.spam,
TextLabel.lang_mismatch,
TextLabel.quality,
TextLabel.creativity,
TextLabel.humor,
TextLabel.toxicity,
TextLabel.violence,
TextLabel.not_appropriate,
TextLabel.pii,
TextLabel.hate_speech,
TextLabel.sexual_content,
]
labels_assistant_reply: list[TextLabel] = [
TextLabel.spam,
TextLabel.lang_mismatch,
TextLabel.fails_task,
TextLabel.quality,
TextLabel.helpfulness,
TextLabel.creativity,
TextLabel.humor,
TextLabel.toxicity,
TextLabel.violence,
TextLabel.not_appropriate,
TextLabel.pii,
TextLabel.hate_speech,
TextLabel.sexual_content,
]
labels_prompter_reply: list[TextLabel] = [
TextLabel.spam,
TextLabel.lang_mismatch,
TextLabel.quality,
TextLabel.creativity,
TextLabel.humor,
TextLabel.toxicity,
TextLabel.violence,
TextLabel.not_appropriate,
TextLabel.pii,
TextLabel.hate_speech,
TextLabel.sexual_content,
]
mandatory_labels_initial_prompt: Optional[list[TextLabel]] = [TextLabel.spam]
"""Mandatory labels in text-labeling tasks for initial prompts."""
mandatory_labels_assistant_reply: Optional[list[TextLabel]] = [TextLabel.spam]
"""Mandatory labels in text-labeling tasks for assistant replies."""
mandatory_labels_prompter_reply: Optional[list[TextLabel]] = [TextLabel.spam]
"""Mandatory labels in text-labeling tasks for prompter replies."""
rank_prompter_replies: bool = False
lonely_children_count: int = 2
"""Number of children below which parents are preferred during sampling for reply tasks."""
p_lonely_child_extension: float = 0.75
"""Probability to select a prompter message parent with less than lonely_children_count children."""
recent_tasks_span_sec: int = 5 * 60 # 5 min
"""Time in seconds of recent tasks to consider for exclusion during task selection."""
max_pending_tasks_per_user: int = 8
"""Maximum number of pending tasks (neither canceled nor completed) by a single user within
the time span defined by `recent_tasks_span_sec`."""
max_prompt_lottery_waiting: int = 250
"""Maximum number of prompts in prompt_lottery_waiting state per language. If this value
is exceeded no new initial prompt tasks for that language are generated."""
init_prompt_disabled_langs: str = ""
@property
def init_prompt_disabled_langs_list(self) -> list[str]:
return self.init_prompt_disabled_langs.split(",")
class Settings(BaseSettings):
PROJECT_NAME: str = "open-assistant backend"
API_V1_STR: str = "/api/v1"
OFFICIAL_WEB_API_KEY: str = "1234"
# Encryption fields for handling the web generated JSON Web Tokens.
# These fields need to be shared with the web's auth settings in order to
# correctly decrypt the web tokens.
AUTH_INFO: bytes = b"NextAuth.js Generated Encryption Key"
AUTH_SALT: bytes = b""
AUTH_LENGTH: int = 32
AUTH_SECRET: bytes = b"O/M2uIbGj+lDD2oyNa8ax4jEOJqCPJzO53UbWShmq98="
AUTH_COOKIE_NAME: str = "next-auth.session-token"
AUTH_ALGORITHM: str = "HS256"
AUTH_ACCESS_TOKEN_EXPIRE_MINUTES: int = 30
AUTH_DISCORD_CLIENT_ID: str = ""
AUTH_DISCORD_CLIENT_SECRET: str = ""
POSTGRES_HOST: str = "localhost"
POSTGRES_PORT: str = "5432"
POSTGRES_USER: str = "postgres"
POSTGRES_PASSWORD: str = "postgres"
POSTGRES_DB: str = "postgres"
DATABASE_URI: Optional[PostgresDsn] = None
DATABASE_MAX_TX_RETRY_COUNT: int = 3
DATABASE_POOL_SIZE = 75
DATABASE_MAX_OVERFLOW = 20
RATE_LIMIT: bool = True
MESSAGE_SIZE_LIMIT: int = 2000
REDIS_HOST: str = "localhost"
REDIS_PORT: str = "6379"
DEBUG_USE_SEED_DATA: bool = False
DEBUG_USE_SEED_DATA_PATH: Optional[FilePath] = (
Path(__file__).parent.parent / "test_data/realistic/realistic_seed_data.json"
)
DEBUG_ALLOW_SELF_LABELING: bool = False # allow users to label their own messages
DEBUG_ALLOW_SELF_RANKING: bool = False # allow users to rank their own messages
DEBUG_ALLOW_DUPLICATE_TASKS: bool = False # offer users tasks to which they already responded
DEBUG_SKIP_EMBEDDING_COMPUTATION: bool = False
DEBUG_SKIP_TOXICITY_CALCULATION: bool = False
DEBUG_DATABASE_ECHO: bool = False
DEBUG_IGNORE_TOS_ACCEPTANCE: bool = ( # ignore whether users accepted the ToS
True # TODO: set False after ToS acceptance UI was added to web-frontend
)
DUPLICATE_MESSAGE_FILTER_WINDOW_MINUTES: int = 120
HUGGING_FACE_API_KEY: str = ""
ROOT_TOKENS: List[str] = ["1234"] # supply a string that can be parsed to a json list
ENABLE_PROM_METRICS: bool = True # enable prometheus metrics at /metrics
@validator("DATABASE_URI", pre=True)
def assemble_db_connection(cls, v: Optional[str], values: Dict[str, Any]) -> Any:
if isinstance(v, str):
return v
return PostgresDsn.build(
scheme="postgresql",
user=values.get("POSTGRES_USER"),
password=values.get("POSTGRES_PASSWORD"),
host=values.get("POSTGRES_HOST"),
port=values.get("POSTGRES_PORT"),
path=f"/{values.get('POSTGRES_DB') or ''}",
)
BACKEND_CORS_ORIGINS_CSV: Optional[str] # allow setting CORS origins as comma separated values
BACKEND_CORS_ORIGINS: List[AnyHttpUrl] = []
@validator("BACKEND_CORS_ORIGINS", pre=True)
def assemble_cors_origins(cls, v: Optional[List[str]], values: Dict[str, Any]) -> List[str]:
s = values.get("BACKEND_CORS_ORIGINS_CSV")
if isinstance(s, str):
v = [i.strip() for i in s.split(",")]
return v
return v
UPDATE_ALEMBIC: bool = True
tree_manager: Optional[TreeManagerConfiguration] = TreeManagerConfiguration()
USER_STATS_INTERVAL_DAY: int = 5 # minutes
USER_STATS_INTERVAL_WEEK: int = 15 # minutes
USER_STATS_INTERVAL_MONTH: int = 60 # minutes
USER_STATS_INTERVAL_TOTAL: int = 240 # minutes
USER_STREAK_UPDATE_INTERVAL: int = 4 # Hours
@validator(
"USER_STATS_INTERVAL_DAY",
"USER_STATS_INTERVAL_WEEK",
"USER_STATS_INTERVAL_MONTH",
"USER_STATS_INTERVAL_TOTAL",
"USER_STREAK_UPDATE_INTERVAL",
)
def validate_user_stats_intervals(cls, v: int):
if v < 1:
raise ValueError(v)
return v
CACHED_STATS_UPDATE_INTERVAL: int = 60 # minutes
RATE_LIMIT_TASK_USER_TIMES: int = 30
RATE_LIMIT_TASK_USER_MINUTES: int = 4
RATE_LIMIT_TASK_API_TIMES: int = 10_000
RATE_LIMIT_TASK_API_MINUTES: int = 1
RATE_LIMIT_ASSISTANT_USER_TIMES: int = 4
RATE_LIMIT_ASSISTANT_USER_MINUTES: int = 2
RATE_LIMIT_PROMPTER_USER_TIMES: int = 8
RATE_LIMIT_PROMPTER_USER_MINUTES: int = 2
TASK_VALIDITY_MINUTES: int = 60 * 24 * 2 # tasks expire after 2 days
DISCORD_API_KEY: str | None = None
DISCORD_CHANNEL_ID: str | None = None
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
case_sensitive = False
env_nested_delimiter = "__"
settings = Settings()