CJ99
diff --git a/‎.pre-commit-config.yaml
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/export.py
Lines changed: 0 additions & 1 deletion b/‎backend/export.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎backend/oasst_backend/api/v1/messages.py
Lines changed: 0 additions & 1 deletion b/‎backend/oasst_backend/api/v1/messages.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎backend/oasst_backend/prompt_repository.py
Lines changed: 0 additions & 3 deletions b/‎backend/oasst_backend/prompt_repository.py
Lines changed: 0 additions & 3 deletions
diff --git a/‎backend/oasst_backend/tree_manager.py
Lines changed: 0 additions & 8 deletions b/‎backend/oasst_backend/tree_manager.py
Lines changed: 0 additions & 8 deletions
diff --git a/‎backend/oasst_backend/user_repository.py
Lines changed: 0 additions & 1 deletion b/‎backend/oasst_backend/user_repository.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎backend/oasst_backend/utils/hugging_face.py
Lines changed: 0 additions & 1 deletion b/‎backend/oasst_backend/utils/hugging_face.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎backend/oasst_backend/utils/ranking.py
Lines changed: 1 addition & 2 deletions b/‎backend/oasst_backend/utils/ranking.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎model/model_training/custom_datasets/qa_datasets.py
Lines changed: 1 addition & 6 deletions b/‎model/model_training/custom_datasets/qa_datasets.py
Lines changed: 1 addition & 6 deletions
diff --git a/‎model/model_training/custom_datasets/translation.py
Lines changed: 0 additions & 1 deletion b/‎model/model_training/custom_datasets/translation.py
Lines changed: 0 additions & 1 deletion
@@ -58,7 +58,7 @@ repos:
       - id: end-of-file-fixer
 
   - repo: https://github.com/psf/black
-    rev: 22.12.0
+    rev: 23.1.0
     hooks:
       - id: black-jupyter
 
 
@@ -71,7 +71,6 @@ def fetch_tree_messages_and_avg_labels(
     lang: Optional[str] = None,
     review_result: Optional[bool] = None,
 ) -> List[Message]:
-
     args = [Message]
 
     for l in TextLabel:
 
@@ -204,7 +204,6 @@ def get_message_tree_state(
     api_client: ApiClient = Depends(deps.get_api_client),
     db: Session = Depends(deps.get_db),
 ) -> MessageTreeStateResponse:
-
     pr = PromptRepository(db, api_client, frontend_user=frontend_user)
     message = pr.fetch_message(message_id=message_id, fail_if_missing=True)
     mts = pr.fetch_tree_state(message.message_tree_id)
 
@@ -357,7 +357,6 @@ def store_ranking(self, ranking: protocol_schema.MessageRanking) -> tuple[Messag
         )
 
         match type(task_payload):
-
             case db_payload.RankPrompterRepliesPayload | db_payload.RankAssistantRepliesPayload:
                 # validate ranking
                 if sorted(ranking.ranking) != list(range(num_replies := len(task_payload.reply_messages))):
@@ -736,7 +735,6 @@ def fetch_message(self, message_id: UUID, fail_if_missing: bool = True) -> Optio
         return message
 
     def fetch_non_task_text_labels(self, message_id: UUID, user_id: UUID) -> Optional[TextLabels]:
-
         query = (
             self.db.query(TextLabels)
             .outerjoin(Task, Task.id == TextLabels.id)
@@ -1189,7 +1187,6 @@ def fetch_flagged_messages(self, max_count: Optional[int]) -> list[FlaggedMessag
         return qry.all()
 
     def process_flagged_message(self, message_id: UUID) -> FlaggedMessage:
-
         message = self.db.query(FlaggedMessage).get(message_id)
 
         if not message:
 
@@ -255,7 +255,6 @@ def _prompt_lottery(self, lang: str, max_activate: int = 1) -> int:
         activated = 0
 
         while True:
-
             stats = self.tree_counts_by_state_stats(lang=lang, only_active=True)
 
             remaining_prompt_review = max(0, self.cfg.max_initial_prompt_review - stats.initial_prompt_review)
@@ -267,7 +266,6 @@ def _prompt_lottery(self, lang: str, max_activate: int = 1) -> int:
 
             @managed_tx_function(CommitMode.COMMIT)
             def activate_one(db: Session) -> int:
-
                 # select among distinct users
                 authors_qry = (
                     db.query(Message.user_id)
@@ -397,7 +395,6 @@ def next_task(
         desired_task_type: protocol_schema.TaskRequestType = protocol_schema.TaskRequestType.random,
         lang: str = "en",
     ) -> Tuple[protocol_schema.Task, Optional[UUID], Optional[UUID]]:
-
         logger.debug(f"TreeManager.next_task({desired_task_type=}, {lang=})")
 
         self.pr.ensure_user_is_enabled()
@@ -537,7 +534,6 @@ def next_task(
                     message_tree_id = messages[-1].message_tree_id
 
             case TaskType.LABEL_REPLY:
-
                 if task_role == TaskRole.PROMPTER:
                     replies_need_review = list(filter(lambda m: m.role == "prompter", replies_need_review))
                 elif task_role == TaskRole.ASSISTANT:
@@ -610,7 +606,6 @@ def next_task(
                     message_tree_id = message.message_tree_id
 
             case TaskType.REPLY:
-
                 if task_role == TaskRole.PROMPTER:
                     extendible_parents = list(filter(lambda x: x.parent_role == "assistant", extendible_parents))
                 elif task_role == TaskRole.ASSISTANT:
@@ -920,7 +915,6 @@ def check_condition_for_scoring_state(self, message_tree_id: UUID) -> bool:
     def update_message_ranks(
         self, message_tree_id: UUID, rankings_by_message: dict[UUID, list[MessageReaction]]
     ) -> bool:
-
         mts = self.pr.fetch_tree_state(message_tree_id)
         # check state, allow retry if in SCORING_FAILED state
         if mts.state not in (message_tree_state.State.READY_FOR_SCORING, message_tree_state.State.SCORING_FAILED):
@@ -1015,7 +1009,6 @@ def _calculate_acceptance(self, labels: list[TextLabels]):
     def _query_need_review(
         self, state: message_tree_state.State, required_reviews: int, root: bool, lang: str
     ) -> list[Message]:
-
         need_review = (
             self.db.query(Message)
             .select_from(MessageTreeState)
@@ -1668,7 +1661,6 @@ def purge_user_messages(
         min_date: datetime = None,
         max_date: datetime = None,
     ):
-
         # find all affected message trees
         replies_by_tree, prompts = self.get_user_messages_by_tree(user_id, min_date, max_date)
         total_messages = sum(len(x) for x in replies_by_tree.values())
 
@@ -261,7 +261,6 @@ def query_users_ordered_by_display_name(
         limit: Optional[int] = 100,
         desc: bool = False,
     ) -> list[User]:
-
         if not self.api_client.trusted:
             if not api_client_id:
                 # Let unprivileged api clients query their own users without api_client_id being set
 
@@ -27,7 +27,6 @@ def __init__(
         self,
         api_url: str,
     ):
-
         # The API endpoint we want to access
         self.api_url: str = api_url
 
 
@@ -110,7 +110,7 @@ def ranked_pairs(ranks: List[List[int]]):
     sorted_majorities = np.array(sorted(sorted_majorities, key=lambda x: x[2], reverse=True))
     # now do lock ins
     lock_ins = []
-    for (x, y, _) in sorted_majorities:
+    for x, y, _ in sorted_majorities:
         # invariant: lock_ins has no cycles here
         lock_ins.append((x, y))
         # print("lock ins are now",np.array(lock_ins))
@@ -130,7 +130,6 @@ def ranked_pairs(ranks: List[List[int]]):
 
 
 if __name__ == "__main__":
-
     ranks = """ (
         [("w", "x", "z", "y") for _ in range(1)]
         + [("w", "y", "x", "z") for _ in range(2)]
 
@@ -167,13 +167,11 @@ def __len__(self):
         return self.length
 
     def __getitem__(self, idx):
-
         data = self.dataset[idx]
         return format_pair(self.index_fn(data))
 
 
 class WebGPT(Dataset):
-
     name = "webgpt"
 
     def __init__(self) -> None:
@@ -206,7 +204,6 @@ def __getitem__(self, index):
 
 
 class SODA(Dataset):
-
     name = "soda"
 
     def process_soda_convo(self, data):
@@ -252,7 +249,7 @@ def __init__(self, cache_dir, input_max_length=1024) -> None:
         dataset = load_dataset("allenai/soda", cache_dir=cache_dir)["train"]
         for data in dataset:
             data_pair = self.process_soda_convo(data)
-            for (prompt, answer) in data_pair:
+            for prompt, answer in data_pair:
                 if len(prompt) < input_max_length:
                     self.pairs.append((prompt, answer))
 
@@ -268,7 +265,6 @@ class SODADialogue(Dataset):
     url = "https://drive.google.com/uc?id=1TOGQfr419n8wpzJpYLLw4nB3tSKD8zXV"
 
     def __init__(self, cache_dir, verbose=True):
-
         path = os.path.join(cache_dir, "soda_dialog.jsonl")
 
         if not os.path.exists(path):
@@ -316,7 +312,6 @@ def __getitem__(self, index):
 
 
 class JokeExplaination(Dataset):
-
     name = "joke"
     url = "https://gist.github.com/theblackcat102/42b697e24a13fdb499e20edfbf618361/raw/1834dca207898c15f93b809d1195f6f6e47c9e1e/joke_explained.jsonl"
 
 
@@ -131,7 +131,6 @@ def __init__(self, pair="zh-en", split="train", mix_prob=0.2, maximum_size=10000
 
 
 class DiveMT(TranslationPair):
-
     REMAP = {"tur": "tr", "ita": "it", "ukr": "uk", "nld": "nl", "vie": "vi", "ara": "ar"}
 
     def __init__(self, split="train", mix_prob=0.2) -> None: