codebasics · dhavalsays · Jul 13, 2022 · Jul 11, 2022 · Jul 11, 2022 · Jul 11, 2022
diff --git a/5_spacy_lang_processing_pipeline/language_processing_exercise.ipynb b/5_spacy_lang_processing_pipeline/language_processing_exercise.ipynb
@@ -0,0 +1,167 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Yrci22GYhTQP"
+   },
+   "source": [
+    "###                     **Spacy Language Processing Pipelines: Exercises**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "YUMPkcohhgam"
+   },
+   "outputs": [],
+   "source": [
+    "#importing necessary libraries \n",
+    "import spacy\n",
+    "\n",
+    "nlp = spacy.load(\"en_core_web_sm\")  #creating an object and loading the pre-trained model for \"English\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "hxtliEGIh4gS"
+   },
+   "source": [
+    "#### **Excersie: 1**\n",
+    "\n",
+    "- Get all the proper nouns from a given text in a list and also count how many of them.\n",
+    "- **Proper Noun** means a noun that names a particular person, place, or thing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "lRGfbeEshFf-",
+    "outputId": "f8d6beed-c03a-479c-b7bd-4a21173aba55"
+   },
+   "outputs": [],
+   "source": [
+    "text = '''Ravi and Raju are the best friends from school days.They wanted to go for a world tour and \n",
+    "visit famous cities like Paris, London, Dubai, Rome etc and also they called their another friend Mohan to take part of this world tour.\n",
+    "They started their journey from Hyderabad and spent next 3 months travelling all the wonderful cities in the world and cherish a happy moments!\n",
+    "'''\n",
+    "\n",
+    "# https://spacy.io/usage/linguistic-features\n",
+    "\n",
+    "#creating the nlp object\n",
+    "doc = nlp(text)   \n",
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "WfU6CRIWhFh8"
+   },
+   "source": [
+    "**Expected Output**\n",
+    "\n",
+    "Proper Nouns:  [Ravi, Raju, Paris, London, Dubai, Rome, Mohan, Hyderabad]\n",
+    "\n",
+    "Count:  8\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "FUr2rnbYmdlv"
+   },
+   "source": [
+    "#### **Excersie: 2**\n",
+    "\n",
+    "- Get all companies names from a given text and also the count of them.\n",
+    "- **Hint**: Use the spacy **ner** functionality "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "LLf4xyGEmZ2P",
+    "outputId": "e9582d9f-4f1e-4574-e3d8-a5526a4fb6cb"
+   },
+   "outputs": [],
+   "source": [
+    "text = '''The Top 5 companies in USA are Tesla, Walmart, Amazon, Microsoft, Google and the top 5 companies in \n",
+    "India are Infosys, Reliance, HDFC Bank, Hindustan Unilever and Bharti Airtel'''\n",
+    "\n",
+    "\n",
+    "doc = nlp(text)\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "4JK5eMsCmZ5i"
+   },
+   "source": [
+    "**Expected Output**\n",
+    "\n",
+    "\n",
+    "Company Names:  [Tesla, Walmart, Amazon, Microsoft, Google, Infosys, Reliance, HDFC Bank, Hindustan Unilever, Bharti Airtel]\n",
+    "\n",
+    "Count:  10"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "HkbNaNVChFoB"
+   },
+   "source": [
+    "## [**Solution**](./language_processing_exercise_solutions.ipynb)"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "collapsed_sections": [],
+   "name": "Language Processing_exercise.ipynb",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/5_spacy_lang_processing_pipeline/language_processing_exercise_solutions.ipynb b/5_spacy_lang_processing_pipeline/language_processing_exercise_solutions.ipynb
@@ -0,0 +1,164 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Yrci22GYhTQP"
+   },
+   "source": [
+    "###                     **Spacy Language Processing Pipelines: Solutions**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "id": "YUMPkcohhgam"
+   },
+   "outputs": [],
+   "source": [
+    "#importing necessary libraries \n",
+    "import spacy\n",
+    "\n",
+    "nlp = spacy.load(\"en_core_web_sm\")  #creating an object and loading the pre-trained model for \"English\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "hxtliEGIh4gS"
+   },
+   "source": [
+    "#### **Excersie: 1**\n",
+    "\n",
+    "- Get all the proper nouns from a given text in a list and also count how many of them.\n",
+    "- **Proper Noun** means a noun that names a particular person, place, or thing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "lRGfbeEshFf-",
+    "outputId": "f8d6beed-c03a-479c-b7bd-4a21173aba55"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Proper Nouns:  [Ravi, Raju, Paris, London, Dubai, Rome, Mohan, Hyderabad]\n",
+      "Count:  8\n"
+     ]
+    }
+   ],
+   "source": [
+    "text = '''Ravi and Raju are the best friends from school days.They wanted to go for a world tour and \n",
+    "visit famous cities like Paris, London, Dubai, Rome etc and also they called their another friend Mohan to take part of this world tour.\n",
+    "They started their journey from Hyderabad and spent next 3 months travelling all the wonderful cities in the world and cherish a happy moments!\n",
+    "'''\n",
+    "\n",
+    "# https://spacy.io/usage/linguistic-features\n",
+    "\n",
+    "#creating the nlp object\n",
+    "doc = nlp(text)   \n",
+    "\n",
+    "\n",
+    "#list for storing the proper nouns\n",
+    "all_proper_nouns = []  \n",
+    "\n",
+    "\n",
+    "for token in doc:\n",
+    "  if token.pos_ == \"PROPN\":        #checking the whether token belongs to parts of speech \"PROPN\" [Proper Noun]\n",
+    "    all_proper_nouns.append(token)\n",
+    "  \n",
+    "\n",
+    "#finally printing the results\n",
+    "print(\"Proper Nouns: \", all_proper_nouns)\n",
+    "print(\"Count: \", len(all_proper_nouns))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "FUr2rnbYmdlv"
+   },
+   "source": [
+    "#### **Excersie: 2**\n",
+    "\n",
+    "- Get all companies names from a given text and also the count of them.\n",
+    "- **Hint**: Use the spacy **ner** functionality "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "LLf4xyGEmZ2P",
+    "outputId": "e9582d9f-4f1e-4574-e3d8-a5526a4fb6cb"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Company Names:  [Tesla, Walmart, Amazon, Microsoft, Google, Infosys, Reliance, HDFC Bank, Hindustan Unilever, Bharti Airtel]\n",
+      "Count:  10\n"
+     ]
+    }
+   ],
+   "source": [
+    "text = '''The Top 5 companies in USA are Tesla, Walmart, Amazon, Microsoft, Google and the top 5 companies in \n",
+    "India are Infosys, Reliance, HDFC Bank, Hindustan Unilever and Bharti Airtel'''\n",
+    "\n",
+    "\n",
+    "doc = nlp(text)\n",
+    "\n",
+    "#list for storing the company names\n",
+    "all_company_names = []\n",
+    "\n",
+    "for ent in doc.ents:\n",
+    "  if ent.label_ == 'ORG':     #checking the whether token belongs to entity \"ORG\" [Organisation]\n",
+    "    all_company_names.append(ent)\n",
+    "\n",
+    "\n",
+    "\n",
+    "#finally printing the results\n",
+    "print(\"Company Names: \", all_company_names)\n",
+    "print(\"Count: \", len(all_company_names))"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "collapsed_sections": [],
+   "name": "Language Processing_exercise.ipynb",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}