cohere-ai
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎CONTRIBUTORS.md
+16 b/‎CONTRIBUTORS.md
+16
diff --git a/‎LICENSE
+21 b/‎LICENSE
+21
diff --git a/‎README.md
+76 b/‎README.md
+76
diff --git a/‎cli_demo.py
+27 b/‎cli_demo.py
+27
diff --git a/‎discord_bot.py
+73 b/‎discord_bot.py
+73
diff --git a/‎qa/__init__.py
+7 b/‎qa/__init__.py
+7
diff --git a/‎qa/answer.py
+85 b/‎qa/answer.py
+85
diff --git a/‎qa/bot.py
+58 b/‎qa/bot.py
+58
@@ -0,0 +1 @@
+qa/__pycache__/
@@ -0,0 +1,16 @@
+Thank you for your interest in contributing to this repository. To help maintain
+the quality of the codebase and ensure a quick review of your pull request, you
+should:
+1. Write clear, clean code and format it in line with the style used in the 
+repository.
+2. Leave comments, and use docstrings where appropriate.
+3. Add unit tests for any new functionality you introduce, if a set of test cases
+are already set up in the repository.
+4. Use git commit messages to leave an informative trace of what additions and
+changes were made.
+5. Write an informative high level description of the pull request, changes made,
+and the reason for these changes before submitting the pull request.
+
+If you have not signed our Contributor License Agreement, you will be asked to
+sign one by our automated system when you submit your first pull request to
+a Cohere repository.
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Cohere Inc. and its affiliates.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,76 @@
+```
+################################################################################
+#    ____      _                     ____                  _ _                 #
+#   / ___|___ | |__   ___ _ __ ___  / ___|  __ _ _ __   __| | |__   _____  __  #
+#  | |   / _ \| '_ \ / _ \ '__/ _ \ \___ \ / _` | '_ \ / _` | '_ \ / _ \ \/ /  #
+#  | |__| (_) | | | |  __/ | |  __/  ___) | (_| | | | | (_| | |_) | (_) >  <   #
+#   \____\___/|_| |_|\___|_|  \___| |____/ \__,_|_| |_|\__,_|_.__/ \___/_/\_\  #
+#                                                                              #
+# This project is part of Cohere Sandbox, Cohere's Experimental Open Source    #
+# offering. This project provides a library, tooling, or demo making use of    #
+# the Cohere Platform. You should expect (self-)documented, high quality code  #
+# but be warned that this is EXPERIMENTAL. Therefore, also expect rough edges, #
+# non-backwards compatible changes, or potential changes in functionality as   #
+# the library, tool, or demo evolves. Please consider referencing a specific   #
+# git commit or version if depending upon the project in any mission-critical  #
+# code as part of your own projects.                                           #
+#                                                                              #
+# Please don't hesitate to raise issues or submit pull requests, and thanks    #
+# for checking out this project!                                               #
+#                                                                              #
+################################################################################
+```
+
+**Maintainer:** [nickfrosst](https://github.com/nickfrosst) \
+**Project maintained until at least (YYYY-MM-DD):** 2023-01-01
+
+# Grounded Question Answering
+
+This is a Cohere API / Serp API powered contextualized factual question answering bot! 
+
+It responds to question in discord or in the cli by understanding the context, google 
+searching what it believes to be the appropriate question, finding relevant 
+information on the google result pages and then answering the question based on 
+what it found.
+
+## Motivation
+
+Language models are very good at creating sensible answers to complex questions. They are not however very good at creating truthful answers. This is because language models don't have a mechanism for determining truth. They are trained on data from the web, and so pick up statistical correlations between words that make them ok at answering simple and static questions (things like "how far away is the moon from the earth", which has a single and unchanging factual answer), but more nuanced questions or that have factual answers which change over time (things like "who is the prime minister of the UK") are difficult or impossible for language models to answer.  
+
+Google search, on the other hand, is very good at retrieving factual information about these time-sensitive questions. Google makes use of a consensus mechanism for determining truth. Google search results are heavily affected by human user behaviour; which links people click, which links they stay on, and which ones they revisit all affect the ordering of the results. In this way, google determines which links are truthful through user consensus. Google however is quite poor at responding to contextual questions, and at responding to complex questions in natural language.  
+
+This project attempts to join the best of both of these methods; It uses cohere language models to contextualize the given questions and create a natural language answer, but it uses google search as a source of truth.  
+
+## Installation and Demo Use
+
+To use this library, you will need:
+* A Serp API key, which you can obtain by registering at https://serpapi.com/users/welcome.
+* A Cohere API key: sign up for a free key at https://dashboard.cohere.ai/welcome/register.
+* (Optional) A Discord key, which is the Discord bot token obtained when creating and configuring a Discord bot. See [docs](https://discord.com/developers/docs/topics/oauth2) for more info.
+
+1. Clone the repository.
+2. Install all the dependencies
+```sh
+pip install -r requirements.txt
+```
+3. Try the demo by running the cli tool
+```sh
+python3 cli_demo.py --cohere_api_key <API_KEY> --serp_api_key <API_KEY>
+```
+4. (Optional) Run the discord bot demo:  
+You can create a discord both with this functionality by creating a bot account with message read and write permissions at https://discord.com/developers then running the following command
+```sh
+python3 cli_demo.py --cohere_api_key <API_KEY> --serp_api_key <API_KEY> --discord_key <DISCORD_KEY>
+```
+
+# Get support
+If you have any questions or comments, please file an issue or reach out to us on [Discord](https://discord.gg/co-mmunity).
+
+# Contributors
+If you would like to contribute to this project, please read `CONTRIBUTORS.md`
+in this repository, and sign the Contributor License Agreement before submitting
+any pull requests. A link to sign the Cohere CLA will be generated the first time 
+you make a pull request to a Cohere repository.
+
+# License
+Grounded Question Answering has an MIT license, as found in the LICENSE file.
@@ -0,0 +1,27 @@
+# Copyright (c) 2022 Cohere Inc. and its affiliates.
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+#
+# You may obtain a copy of the License in the LICENSE file at the top
+# level of this repository.
+
+# this is a cli demo of you the bot. You can run it and ask questions directly in your terminal
+
+import argparse
+
+from qa.bot import GroundedQaBot
+
+parser = argparse.ArgumentParser(description="A grounded QA bot with cohere and google search")
+parser.add_argument("--cohere_api_key", type=str, help="api key for cohere", required=True)
+parser.add_argument("--serp_api_key", type=str, help="api key for serpAPI", required=True)
+parser.add_argument("--verbosity", type=int, default=0, help="verbosity level")
+args = parser.parse_args()
+
+bot = GroundedQaBot(args.cohere_api_key, args.serp_api_key)
+
+if __name__ == "__main__":
+    while True:
+        question = input("question: ")
+        reply = bot.answer(question, verbosity=args.verbosity, n_paragraphs=2)
+        print("answer: " + reply)
@@ -0,0 +1,73 @@
+# Copyright (c) 2022 Cohere Inc. and its affiliates.
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+#
+# You may obtain a copy of the License in the LICENSE file at the top
+# level of this repository.
+
+# this is a demo discord bot. You can make a discord bot token by visiting https://discord.com/developers
+
+import argparse
+
+import discord
+from discord import Embed
+from discord.ext import commands
+
+from qa.bot import GroundedQaBot
+
+parser = argparse.ArgumentParser(description="A grounded QA bot with cohere and google search")
+parser.add_argument("--cohere_api_key", type=str, help="api key for cohere", required=True)
+parser.add_argument("--serp_api_key", type=str, help="api key for serpAPI", required=True)
+parser.add_argument("--discord_key", type=str, help="api key for discord boat", required=True)
+parser.add_argument("--verbosity", type=int, default=0, help="verbosity level")
+args = parser.parse_args()
+
+bot = GroundedQaBot(args.cohere_api_key, args.serp_api_key)
+
+
+class MyClient(discord.Client):
+
+    async def on_ready(self):
+        """Initializes bot"""
+        print(f"We have logged in as {self.user}")
+
+        for guild in self.guilds:
+            print(f"{self.user} is connected to the following guild:\n"
+                  f"{guild.name}(id: {guild.id})")
+
+    async def answer(self, message):
+        """Answers a question based on the context of the conversation and information from the web"""
+        history = []
+        async for historic_msg in message.channel.history(limit=6, before=message):
+            if historic_msg.content:
+                name = "user"
+                if historic_msg.author.name == self.user.name:
+                    name = "bot"
+                history = [f"{name}: {historic_msg.clean_content}"] + history
+
+        print(history)
+        bot.set_chat_history(history)
+
+        async with message.channel.typing():
+            reply = bot.answer(message.clean_content, verbosity=2, n_paragraphs=3)
+            response_msg = await message.channel.send(reply, reference=message)
+            await response_msg.edit(suppress=True)
+            return
+
+    async def on_message(self, message):
+        """Handles query messages triggered by direct messages to the bot"""
+        if isinstance(message.channel, discord.channel.DMChannel) and message.author != self.user:
+            await self.answer(message)
+
+    async def on_reaction_add(self, reaction, user):
+        """Handles query messages triggered by emoji from user."""
+        if user != self.user:
+            if str(reaction.emoji) == "❓" and reaction.count == 1:
+                await self.answer(reaction.message)
+
+
+if __name__ == "__main__":
+    intents = discord.Intents.all()
+    client = MyClient(intents=intents)
+    client.run(args.discord_key)
@@ -0,0 +1,7 @@
+# Copyright (c) 2022 Cohere Inc. and its affiliates.
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+#
+# You may obtain a copy of the License in the LICENSE file at the top
+# level of this repository.
@@ -0,0 +1,85 @@
+# Copyright (c) 2022 Cohere Inc. and its affiliates.
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+#
+# You may obtain a copy of the License in the LICENSE file at the top
+# level of this repository.
+
+import numpy as np
+
+from qa.model import get_sample_answer
+from qa.search import embedding_search, get_results_paragraphs_multi_process
+from qa.util import pretty_print
+
+
+def trim_stop_sequences(s, stop_sequences):
+    """Remove stop sequences found at the end of returned generated text."""
+
+    for stop_sequence in stop_sequences:
+        if s.endswith(stop_sequence):
+            return s[:-len(stop_sequence)]
+    return s
+
+
+def answer(question, context, co, model, chat_history=""):
+    """Answer a question given some context."""
+
+    prompt = ("This is an example of question answering based on a text passage:\n "
+              f"Context:-{context}\nQuestion:\n-{question}\nAnswer:\n-")
+    if chat_history:
+        prompt = ("This is an example of factual question answering chat bot. It "
+                  "takes the text context and answers related questions:\n "
+                  f"Context:-{context}\nChat Log\n{chat_history}\nbot:")
+
+    stop_sequences = ["\n"]
+
+    num_generations = 4
+    prompt = "".join(co.tokenize(text=prompt).token_strings[-1900:])
+    prediction = co.generate(model=model,
+                             prompt=prompt,
+                             max_tokens=100,
+                             temperature=0.3,
+                             stop_sequences=stop_sequences,
+                             num_generations=num_generations,
+                             return_likelihoods="GENERATION")
+    generations = [[
+        trim_stop_sequences(prediction.generations[i].text.strip(), stop_sequences),
+        prediction.generations[i].likelihood
+    ] for i in range(num_generations)]
+    generations = list(filter(lambda x: not x[0].isspace(), generations))
+    response = generations[np.argmax([g[1] for g in generations])][0]
+    return response.strip()
+
+
+def answer_with_search(question,
+                       co,
+                       serp_api_token,
+                       chat_history="",
+                       model="xlarge",
+                       embedding_model="small",
+                       url=None,
+                       n_paragraphs=1,
+                       verbosity=0):
+    """Generates completion based on search results."""
+
+    paragraphs, paragraph_sources = get_results_paragraphs_multi_process(question, serp_api_token, url=url)
+    if not paragraphs:
+        return ("", "", "")
+    sample_answer = get_sample_answer(question, co)
+
+    results = embedding_search(paragraphs, paragraph_sources, sample_answer, co, model=embedding_model)
+
+    if verbosity > 1:
+        pprint_results = "\n".join([r[0] for r in results])
+        pretty_print("OKGREEN", f"all search result context: {pprint_results}")
+
+    results = results[-n_paragraphs:]
+    context = "\n".join([r[0] for r in results])
+
+    if verbosity:
+        pretty_print("OKCYAN", "relevant result context: " + context)
+
+    response = answer(question, context, co, chat_history=chat_history, model=model)
+
+    return (response, [r[1] for r in results], [r[0] for r in results])
@@ -0,0 +1,58 @@
+# Copyright (c) 2022 Cohere Inc. and its affiliates.
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+#
+# You may obtain a copy of the License in the LICENSE file at the top
+# level of this repository.
+
+from sys import settrace
+
+import cohere
+
+from qa.answer import answer_with_search
+from qa.model import get_contextual_search_query
+from qa.util import pretty_print
+
+
+class GroundedQaBot():
+    """A class yielding Grounded question-answering conversational agents."""
+
+    def __init__(self, cohere_api_key, serp_api_key):
+        self._cohere_api_key = cohere_api_key
+        self._serp_api_key = serp_api_key
+        self._chat_history = []
+        self._co = cohere.Client(self._cohere_api_key)
+
+    @property
+    def chat_history(self):
+        return self._chat_history
+
+    def set_chat_history(self, chat_history):
+        self._chat_history = chat_history
+
+    def answer(self, question, verbosity=0, n_paragraphs=1):
+        """Answer a question, based on recent conversational history."""
+
+        self.chat_history.append("user: " + question)
+
+        history = "\n".join(self.chat_history[-6:])
+        question = get_contextual_search_query(history, self._co, verbosity=verbosity)
+
+        answer_text, source_urls, source_texts = answer_with_search(question,
+                                                                    self._co,
+                                                                    self._serp_api_key,
+                                                                    verbosity=verbosity,
+                                                                    n_paragraphs=n_paragraphs)
+
+        self._chat_history.append("bot: " + answer_text)
+
+        if not source_texts or "".join(source_texts).isspace():
+            reply = ("Sorry, I could not find any relevant information for that " "question.")
+        elif answer_text.strip() == question.strip():
+            reply = ("I had trouble answering the question, but maybe this link on " "the right will help.")
+        else:
+            sources_str = "\n".join(list(set(source_urls)))
+            reply = f"{answer_text}\nSource:\n{sources_str}"
+
+        return reply