PaddlePaddle
diff --git a/‎PaddleNLP/emotion_detection/README.md
+16-11 b/‎PaddleNLP/emotion_detection/README.md
+16-11
diff --git a/‎PaddleNLP/emotion_detection/config.py
+2-1 b/‎PaddleNLP/emotion_detection/config.py
+2-1
diff --git a/‎PaddleNLP/emotion_detection/download.py
+153 b/‎PaddleNLP/emotion_detection/download.py
+153
diff --git a/‎PaddleNLP/emotion_detection/download_model.sh
+2-2 b/‎PaddleNLP/emotion_detection/download_model.sh
+2-2
diff --git a/‎PaddleNLP/emotion_detection/inference_model.py
+4-19 b/‎PaddleNLP/emotion_detection/inference_model.py
+4-19
diff --git a/‎PaddleNLP/emotion_detection/reader.py
+4-4 b/‎PaddleNLP/emotion_detection/reader.py
+4-4
@@ -25,15 +25,15 @@
 | BERT | 93.6% | 92.3%  | 78.6%  |
 | ERNIE | 94.4% | 94.0% | 80.6% |
 
-同时推荐用户参考[IPython Notebook demo](https://aistudio.baidu.com/aistudio/projectDetail/122291)
+同时推荐用户参考[IPython Notebook demo](https://aistudio.baidu.com/aistudio/projectDetail/122291)。
 
 ## 快速开始
 
 ### 安装说明
 
 1. PaddlePaddle 安装
 
-   本项目依赖于 PaddlePaddle Fluid 1.3.2 及以上版本，请参考 [安装指南](http://www.paddlepaddle.org/#quick-start) 进行安装
+   本项目依赖于 PaddlePaddle Fluid 1.6 及以上版本，请参考 [安装指南](http://www.paddlepaddle.org/#quick-start) 进行安装。
 
 2. 代码安装
 
@@ -46,7 +46,7 @@
 
 3. 环境依赖
 
-   请参考 PaddlePaddle [安装说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/1.5/beginners_guide/install/index_cn.html) 部分的内容
+   Python 2 的版本要求 2.7.15+，Python 3 的版本要求 3.5.1+/3.6/3.7，其它环境请参考 PaddlePaddle [安装说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/1.5/beginners_guide/install/index_cn.html) 部分的内容。
 
 ### 代码结构说明
 
@@ -56,7 +56,8 @@
 .
 ├── config.json             # 配置文件
 ├── config.py               # 配置文件读取接口
-├── inference_model.py			# 保存 inference_model 的脚本
+├── download.py             # 下载数据及预训练模型脚本
+├── inference_model.py      # 保存 inference_model 的脚本
 ├── reader.py               # 数据读取接口
 ├── run_classifier.py       # 项目的主程序入口，包括训练、预测、评估
 ├── run.sh                  # 训练、预测、评估运行脚本
@@ -86,15 +87,15 @@ python tokenizer.py --test_data_dir ./test.txt.utf8 --batch_size 1 > test.txt.ut
 
 #### 公开数据集
 
-这里我们提供一份已标注的、经过分词预处理的机器人聊天数据集，只需运行数据下载脚本 ```sh download_data.sh```，运行成功后，会生成文件夹 ```data```，其目录结构如下：
+这里我们提供一份已标注的、经过分词预处理的机器人聊天数据集，只需运行数据下载脚本 ```sh download_data.sh```，或者 ```python download.py dataset``` 运行成功后，会生成文件夹 ```data```，其目录结构如下：
 
 ```text
 .
-├── train.tsv				# 训练集
-├── dev.tsv					# 验证集
-├── test.tsv				# 测试集
-├── infer.tsv				# 待预测数据
-├── vocab.txt				# 词典
+├── train.tsv       # 训练集
+├── dev.tsv         # 验证集
+├── test.tsv        # 测试集
+├── infer.tsv       # 待预测数据
+├── vocab.txt       # 词典
 ```
 
 ### 单机训练
@@ -181,6 +182,8 @@ tar xvf emotion_detection_ernie_finetune-1.0.0.tar.gz
 
 ```shell
 sh download_model.sh
+# 或者
+python download.py model
 ```
 
 以上两种方式会将预训练的 TextCNN 模型和 ERNIE模型，保存在```pretrain_models```目录下，可直接修改```run.sh```脚本中的```init_checkpoint```参数进行评估、预测。
@@ -302,7 +305,7 @@ Final test result:
 
 我们也提供了使用 PaddleHub 加载 ERNIE 模型的选项，PaddleHub 是 PaddlePaddle 的预训练模型管理工具，可以一行代码完成预训练模型的加载，简化预训练模型的使用和迁移学习。更多相关的介绍，可以查看 [PaddleHub](https://github.com/PaddlePaddle/PaddleHub)
 
-注意：使用该选项需要先安装PaddleHub，安装命令如下
+注意：使用该选项需要先安装PaddleHub >= 1.2.0，安装命令如下
 ```shell
 pip install paddlehub
 ```
@@ -333,6 +336,8 @@ sh run_ernie.sh infer
 
 ## 版本更新
 
+2019/10/21 PaddlePaddle1.6适配，添加download.py脚本。
+
 2019/08/26 规范化配置的使用，对模块内数据处理代码进行了重构，更新README结构，提高易用性。
 
 2019/06/13 添加PaddleHub调用ERNIE方式。
 
@@ -19,6 +19,7 @@
 from __future__ import division
 from __future__ import print_function
 
+import io
 import os
 import six
 import json
@@ -122,7 +123,7 @@ def load_json(self, file_path):
             return
 
         try:
-            with open(file_path, "r") as fin:
+            with io.open(file_path, "r") as fin:
                 self.json_config = json.load(fin)
         except Exception as e:
             raise IOError("Error in parsing json config file '%s'" % file_path)
 
@@ -0,0 +1,153 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Download script, download dataset and pretrain models.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import io
+import os
+import sys
+import time
+import hashlib
+import tarfile
+import requests
+
+
+def usage():
+    desc = ("\nDownload datasets and pretrained models for EmotionDetection task.\n"
+    "Usage:\n"
+    "   1. python download.py dataset\n"
+    "   2. python download.py model\n")
+    print(desc)
+
+
+def md5file(fname):
+    hash_md5 = hashlib.md5()
+    with io.open(fname, "rb") as fin:
+        for chunk in iter(lambda: fin.read(4096), b""):
+            hash_md5.update(chunk)
+    return hash_md5.hexdigest()
+
+
+def extract(fname, dir_path):
+    """
+    Extract tar.gz file
+    """
+    try:
+        tar = tarfile.open(fname, "r:gz")
+        file_names = tar.getnames()
+        for file_name in file_names:
+            tar.extract(file_name, dir_path)
+            print(file_name)
+        tar.close()
+    except Exception as e:
+        raise e
+
+
+def download(url, filename, md5sum):
+    """
+    Download file and check md5
+    """
+    retry = 0
+    retry_limit = 3
+    chunk_size = 4096
+    while not (os.path.exists(filename) and md5file(filename) == md5sum):
+        if retry < retry_limit:
+            retry += 1
+        else:
+            raise RuntimeError("Cannot download dataset ({0}) with retry {1} times.".
+                    format(url, retry_limit))
+        try:
+            start = time.time()
+            size = 0
+            res = requests.get(url, stream=True)
+            filesize = int(res.headers['content-length'])
+            if res.status_code == 200:
+                print("[Filesize]: %0.2f MB" % (filesize / 1024 / 1024))
+                # save by chunk
+                with io.open(filename, "wb") as fout:
+                    for chunk in res.iter_content(chunk_size=chunk_size):
+                        if chunk:
+                            fout.write(chunk)
+                            size += len(chunk)
+                            pr = '>' * int(size * 50 / filesize)
+                            print('\r[Process ]: %s%.2f%%' % (pr, float(size / filesize*100)), end='')
+            end = time.time()
+            print("\n[CostTime]: %.2f s" % (end - start))
+        except Exception as e:
+            print(e)
+
+
+def download_dataset(dir_path):
+    BASE_URL = "https://baidu-nlp.bj.bcebos.com/"
+    DATASET_NAME = "emotion_detection-dataset-1.0.0.tar.gz"
+    DATASET_MD5 = "512d256add5f9ebae2c101b74ab053e9"
+    file_path = os.path.join(dir_path, DATASET_NAME)
+    url = BASE_URL + DATASET_NAME
+
+    if not os.path.exists(dir_path):
+        os.makedirs(dir_path)
+    # download dataset
+    print("Downloading dataset: %s" % url)
+    download(url, file_path, DATASET_MD5)
+    # extract dataset
+    print("Extracting dataset: %s" % file_path)
+    extract(file_path, dir_path)
+    os.remove(file_path)
+
+
+def download_model(dir_path):
+    MODELS = {}
+    BASE_URL = "https://baidu-nlp.bj.bcebos.com/"
+    CNN_NAME = "emotion_detection_textcnn-1.0.0.tar.gz"
+    CNN_MD5 = "b7ee648fcd108835c880a5f5fce0d8ab"
+    ERNIE_NAME = "emotion_detection_ernie_finetune-1.0.0.tar.gz"
+    ERNIE_MD5 = "dfeb68ddbbc87f466d3bb93e7d11c03a"
+    MODELS[CNN_NAME] = CNN_MD5
+    MODELS[ERNIE_NAME] = ERNIE_MD5
+
+    if not os.path.exists(dir_path):
+        os.makedirs(dir_path)
+
+    for model in MODELS:
+        url = BASE_URL + model
+        model_path = os.path.join(dir_path, model)
+        print("Downloading model: %s" % url)
+        # download model
+        download(url, model_path, MODELS[model])
+        # extract model.tar.gz
+        print("Extracting model: %s" % model_path)
+        extract(model_path, dir_path)
+        os.remove(model_path)
+
+
+if __name__ == '__main__':
+    if len(sys.argv) != 2:
+        usage()
+        sys.exit(1)
+
+    if sys.argv[1] == "dataset":
+        pwd = os.path.join(os.path.dirname(__file__), './')
+        download_dataset(pwd)
+    elif sys.argv[1] == "model":
+        pwd = os.path.join(os.path.dirname(__file__), './pretrain_models')
+        download_model(pwd)
+    else:
+        usage()
+
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-mkdir -p models
-cd models
+mkdir -p pretrain_models
+cd pretrain_models
 
 # download pretrain model file to ./models/
 MODEL_CNN=https://baidu-nlp.bj.bcebos.com/emotion_detection_textcnn-1.0.0.tar.gz
 
@@ -1,4 +1,4 @@
-# -*- encoding: utf8 -*-
+# -*- coding: UTF-8 -*-
 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -44,9 +44,8 @@ def do_save_inference_model(args):
 
     with fluid.program_guard(test_prog, startup_prog):
         with fluid.unique_name.guard():
-            infer_pyreader, probs, feed_target_names = create_model(
+            infer_loader, probs, feed_target_names = create_model(
                 args,
-                pyreader_name='infer_reader',
                 num_labels=args.num_labels,
                 is_prediction=True)
 
@@ -79,20 +78,7 @@ def test_inference_model(args, texts):
         dev_count = int(os.environ.get('CPU_NUM', 1))
         place = fluid.CPUPlace()
 
-    test_prog = fluid.default_main_program()
-    startup_prog = fluid.default_startup_program()
-
-    with fluid.program_guard(test_prog, startup_prog):
-        with fluid.unique_name.guard():
-            infer_pyreader, probs, feed_target_names = create_model(
-                args,
-                pyreader_name='infer_reader',
-                num_labels=args.num_labels,
-                is_prediction=True)
-
-    test_prog = test_prog.clone(for_test=True)
     exe = fluid.Executor(place)
-    exe.run(startup_prog)
 
     assert (args.inference_model_dir)
     infer_program, feed_names, fetch_targets = fluid.io.load_inference_model(
@@ -107,9 +93,8 @@ def test_inference_model(args, texts):
         wids, seq_len = utils.pad_wid(wids)
         data.append(wids)
         seq_lens.append(seq_len)
-    batch_size = len(data)
-    data = np.array(data).reshape((batch_size, 128, 1))
-    seq_lens = np.array(seq_lens).reshape((batch_size, 1))
+    data = np.array(data)
+    seq_lens = np.array(seq_lens)
 
     pred = exe.run(infer_program,
                 feed={
 
@@ -96,16 +96,16 @@ def data_generator(self, batch_size, phase='train', epoch=1):
         Generate data for train, dev or test
         """
         if phase == "train":
-            return paddle.batch(
+            return fluid.io.batch(
                 self.get_train_examples(self.data_dir, epoch, self.max_seq_len), batch_size)
         elif phase == "dev":
-            return paddle.batch(
+            return fluid.io.batch(
                 self.get_dev_examples(self.data_dir, epoch, self.max_seq_len), batch_size)
         elif phase == "test":
-            return paddle.batch(
+            return fluid.io.batch(
                 self.get_test_examples(self.data_dir, epoch, self.max_seq_len), batch_size)
         elif phase == "infer":
-            return paddle.batch(
+            return fluid.io.batch(
                 self.get_infer_examples(self.data_dir, epoch, self.max_seq_len), batch_size)
         else:
             raise ValueError(