|
| 1 | +from oasst_backend.models import CachedStats, Message, MessageTreeState, User |
| 2 | +from oasst_shared.exceptions.oasst_api_error import OasstError, OasstErrorCode |
| 3 | +from oasst_shared.schemas.protocol import AllCachedStatsResponse, CachedStatsName, CachedStatsResponse |
| 4 | +from oasst_shared.utils import log_timing, utcnow |
| 5 | +from sqlalchemy.orm.attributes import flag_modified |
| 6 | +from sqlmodel import Session, func, not_ |
| 7 | + |
| 8 | + |
| 9 | +def row_to_dict(r) -> dict: |
| 10 | + return {k: r[k] for k in r.keys()} |
| 11 | + |
| 12 | + |
| 13 | +class CachedStatsRepository: |
| 14 | + def __init__(self, db: Session): |
| 15 | + self.db = db |
| 16 | + |
| 17 | + def qry_human_messages_by_lang(self) -> dict[str, int]: |
| 18 | + qry = ( |
| 19 | + self.db.query(Message.lang, func.count(Message.id).label("count")) |
| 20 | + .filter(not_(Message.deleted), Message.review_result, not_(Message.synthetic)) |
| 21 | + .group_by(Message.lang) |
| 22 | + ) |
| 23 | + return {r["lang"]: r["count"] for r in qry} |
| 24 | + |
| 25 | + def qry_human_messages_by_role(self) -> dict[str, int]: |
| 26 | + qry = ( |
| 27 | + self.db.query(Message.role, func.count(Message.id).label("count")) |
| 28 | + .filter(not_(Message.deleted), Message.review_result, not_(Message.synthetic)) |
| 29 | + .group_by(Message.role) |
| 30 | + ) |
| 31 | + return {r["role"]: r["count"] for r in qry} |
| 32 | + |
| 33 | + def qry_message_trees_by_state(self) -> dict[str, int]: |
| 34 | + qry = self.db.query( |
| 35 | + MessageTreeState.state, func.count(MessageTreeState.message_tree_id).label("count") |
| 36 | + ).group_by(MessageTreeState.state) |
| 37 | + return {r["state"]: r["count"] for r in qry} |
| 38 | + |
| 39 | + def qry_message_trees_states_by_lang(self) -> list: |
| 40 | + qry = ( |
| 41 | + self.db.query( |
| 42 | + Message.lang, MessageTreeState.state, func.count(MessageTreeState.message_tree_id).label("count") |
| 43 | + ) |
| 44 | + .select_from(MessageTreeState) |
| 45 | + .join(Message, MessageTreeState.message_tree_id == Message.id) |
| 46 | + .group_by(MessageTreeState.state, Message.lang) |
| 47 | + .order_by(Message.lang, MessageTreeState.state) |
| 48 | + ) |
| 49 | + return [row_to_dict(r) for r in qry] |
| 50 | + |
| 51 | + def qry_users_accepted_tos(self) -> dict[str, int]: |
| 52 | + qry = self.db.query(func.count(User.id)).filter(User.enabled, User.tos_acceptance_date.is_not(None)) |
| 53 | + return {"count": qry.scalar()} |
| 54 | + |
| 55 | + @log_timing(level="INFO") |
| 56 | + def update_all_cached_stats(self): |
| 57 | + v = self.qry_human_messages_by_lang() |
| 58 | + self._insert_cached_stats(CachedStatsName.human_messages_by_lang, v) |
| 59 | + |
| 60 | + v = self.qry_human_messages_by_role() |
| 61 | + self._insert_cached_stats(CachedStatsName.human_messages_by_role, v) |
| 62 | + |
| 63 | + v = self.qry_message_trees_by_state() |
| 64 | + self._insert_cached_stats(CachedStatsName.message_trees_by_state, v) |
| 65 | + |
| 66 | + v = self.qry_message_trees_states_by_lang() |
| 67 | + self._insert_cached_stats(CachedStatsName.message_trees_states_by_lang, v) |
| 68 | + |
| 69 | + v = self.qry_users_accepted_tos() |
| 70 | + self._insert_cached_stats(CachedStatsName.users_accepted_tos, v) |
| 71 | + |
| 72 | + def _insert_cached_stats(self, name: CachedStatsName, stats: dict | list): |
| 73 | + row: CachedStats | None = self.db.query(CachedStats).filter(CachedStats.name == name).one_or_none() |
| 74 | + if row: |
| 75 | + row.modified_date = utcnow() |
| 76 | + row.stats = stats |
| 77 | + flag_modified(row, "stats") |
| 78 | + else: |
| 79 | + row = CachedStats(name=name, modified_date=utcnow(), stats=stats) |
| 80 | + self.db.add(row) |
| 81 | + |
| 82 | + def get_stats(self, name: CachedStatsName) -> CachedStatsResponse: |
| 83 | + row: CachedStats | None = self.db.query(CachedStats).filter(CachedStats.name == name).one_or_none() |
| 84 | + if not row: |
| 85 | + raise OasstError(f"Cached stats '{name.value}' not found.", OasstErrorCode.CACHED_STATS_NOT_AVAILABLE) |
| 86 | + return CachedStatsResponse(name=row.name, last_updated=row.modified_date, stats=row.stats) |
| 87 | + |
| 88 | + def get_stats_all(self) -> AllCachedStatsResponse: |
| 89 | + by_name: dict[CachedStatsName, CachedStatsResponse] = {} |
| 90 | + qry = self.db.query(CachedStats) |
| 91 | + for row in qry: |
| 92 | + by_name[row.name] = CachedStatsResponse(name=row.name, last_updated=row.modified_date, stats=row.stats) |
| 93 | + return AllCachedStatsResponse(stats_by_name=by_name) |
| 94 | + |
| 95 | + |
| 96 | +if __name__ == "__main__": |
| 97 | + # from oasst_backend.api.deps import create_api_client |
| 98 | + from oasst_backend.database import engine |
| 99 | + |
| 100 | + with Session(engine) as db: |
| 101 | + csr = CachedStatsRepository(db) |
| 102 | + csr.update_all_cached_stats()() |
| 103 | + db.commit() |
0 commit comments