From bbd8d2dbc1b1150446788a38705d2957c218c9aa Mon Sep 17 00:00:00 2001
From: Paul Tremberth <paul.tremberth@gmail.com>
Date: Tue, 7 Jun 2016 18:31:10 +0200
Subject: [PATCH 1/4] Use six for Python3 compatibility

---
 dirbot/pipelines.py | 4 +++-
 setup.py            | 4 ++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/dirbot/pipelines.py b/dirbot/pipelines.py
index d301b59..b7c9f3c 100644
--- a/dirbot/pipelines.py
+++ b/dirbot/pipelines.py
@@ -1,3 +1,5 @@
+import six
+
 from scrapy.exceptions import DropItem
 
 
@@ -10,7 +12,7 @@ class FilterWordsPipeline(object):
 
     def process_item(self, item, spider):
         for word in self.words_to_filter:
-            if word in unicode(item['description']).lower():
+            if word in six.text_type(item['description']).lower():
                 raise DropItem("Contains forbidden word: %s" % word)
         else:
             return item
diff --git a/setup.py b/setup.py
index fac72ad..af95442 100644
--- a/setup.py
+++ b/setup.py
@@ -5,4 +5,8 @@
     version='1.0',
     packages=find_packages(),
     entry_points={'scrapy': ['settings = dirbot.settings']},
+    install_requires=[
+        'scrapy',
+        'six'
+    ]
 )

From 6d0318e0d86b41d13bdd411c1be8704fadf5298f Mon Sep 17 00:00:00 2001
From: Paul Tremberth <paul.tremberth@gmail.com>
Date: Wed, 8 Jun 2016 11:54:20 +0200
Subject: [PATCH 2/4] Use single values when building items

Also reverts six dependency
---
 dirbot/pipelines.py    |  4 +---
 dirbot/spiders/dmoz.py | 12 +++++++-----
 setup.py               |  4 ----
 3 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/dirbot/pipelines.py b/dirbot/pipelines.py
index b7c9f3c..be30566 100644
--- a/dirbot/pipelines.py
+++ b/dirbot/pipelines.py
@@ -1,5 +1,3 @@
-import six
-
 from scrapy.exceptions import DropItem
 
 
@@ -12,7 +10,7 @@ class FilterWordsPipeline(object):
 
     def process_item(self, item, spider):
         for word in self.words_to_filter:
-            if word in six.text_type(item['description']).lower():
+            if word in item['description'].lower():
                 raise DropItem("Contains forbidden word: %s" % word)
         else:
             return item
diff --git a/dirbot/spiders/dmoz.py b/dirbot/spiders/dmoz.py
index 919fe97..250cf6c 100644
--- a/dirbot/spiders/dmoz.py
+++ b/dirbot/spiders/dmoz.py
@@ -20,15 +20,17 @@ def parse(self, response):
         @url http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/
         @scrapes name
         """
-        sel = Selector(response)
-        sites = sel.xpath('//ul[@class="directory-url"]/li')
+        sites = response.css('#site-list-content > div.site-item > div.title-and-desc')
         items = []
 
         for site in sites:
             item = Website()
-            item['name'] = site.xpath('a/text()').extract()
-            item['url'] = site.xpath('a/@href').extract()
-            item['description'] = site.xpath('text()').re('-\s[^\n]*\\r')
+            item['name'] = site.css(
+                'a > div.site-title::text').extract_first().strip()
+            item['url'] = site.xpath(
+                'a/@href').extract_first().strip()
+            item['description'] = site.css(
+                'div.site-descr').xpath('text()[1]').extract_first().strip()
             items.append(item)
 
         return items
diff --git a/setup.py b/setup.py
index af95442..fac72ad 100644
--- a/setup.py
+++ b/setup.py
@@ -5,8 +5,4 @@
     version='1.0',
     packages=find_packages(),
     entry_points={'scrapy': ['settings = dirbot.settings']},
-    install_requires=[
-        'scrapy',
-        'six'
-    ]
 )

From 810eae1563f78c0402d054b4edfb12729da64887 Mon Sep 17 00:00:00 2001
From: Paul Tremberth <paul.tremberth@gmail.com>
Date: Wed, 8 Jun 2016 12:02:57 +0200
Subject: [PATCH 3/4] Simplify description extraction

---
 dirbot/spiders/dmoz.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dirbot/spiders/dmoz.py b/dirbot/spiders/dmoz.py
index 250cf6c..6ecb2ff 100644
--- a/dirbot/spiders/dmoz.py
+++ b/dirbot/spiders/dmoz.py
@@ -30,7 +30,7 @@ def parse(self, response):
             item['url'] = site.xpath(
                 'a/@href').extract_first().strip()
             item['description'] = site.css(
-                'div.site-descr').xpath('text()[1]').extract_first().strip()
+                'div.site-descr::text').extract_first().strip()
             items.append(item)
 
         return items

From 8e995447361ad83f6693d2e07d17d90912268ace Mon Sep 17 00:00:00 2001
From: Paul Tremberth <paul.tremberth@gmail.com>
Date: Thu, 30 Mar 2017 16:10:19 +0200
Subject: [PATCH 4/4] Update README.rst

---
 README.rst | 49 +++++++------------------------------------------
 1 file changed, 7 insertions(+), 42 deletions(-)

diff --git a/README.rst b/README.rst
index dc4e978..9955c53 100644
--- a/README.rst
+++ b/README.rst
@@ -2,48 +2,13 @@
 dirbot
 ======
 
-This is a Scrapy project to scrape websites from public web directories.
+Deprecation notice (March 2017)
+===============================
 
-This project is only meant for educational purposes.
+**This project is now deprecated.**
 
-Items
-=====
+http://dmoz.org is no more and Scrapy's tutorial has been re-written
+against http://quotes.toscrape.com/.
 
-The items scraped by this project are websites, and the item is defined in the
-class::
-
-    dirbot.items.Website
-
-See the source code for more details.
-
-Spiders
-=======
-
-This project contains one spider called ``dmoz`` that you can see by running::
-
-    scrapy list
-
-Spider: dmoz
-------------
-
-The ``dmoz`` spider scrapes the Open Directory Project (dmoz.org), and it's
-based on the dmoz spider described in the `Scrapy tutorial`_
-
-This spider doesn't crawl the entire dmoz.org site but only a few pages by
-default (defined in the ``start_urls`` attribute). These pages are:
-
-* http://www.dmoz.org/Computers/Programming/Languages/Python/Books/
-* http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/
-
-So, if you run the spider regularly (with ``scrapy crawl dmoz``) it will scrape
-only those two pages.
-
-.. _Scrapy tutorial: http://doc.scrapy.org/en/latest/intro/tutorial.html
-
-Pipelines
-=========
-
-This project uses a pipeline to filter out websites containing certain
-forbidden words in their description. This pipeline is defined in the class::
-
-    dirbot.pipelines.FilterWordsPipeline
+Please refer to https://github.com/scrapy/quotesbot for a more relevant
+and up-to-date educational project on how to get started with Scrapy.