Skip to content

Commit c471fe3

Browse files
committed
first commit
1 parent 4624700 commit c471fe3

File tree

17 files changed

+3675
-0
lines changed

17 files changed

+3675
-0
lines changed

.pylintrc

Lines changed: 408 additions & 0 deletions
Large diffs are not rendered by default.

README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# abstruct
2+
Adding structure to scientific abstracts
3+
4+
## Setting up
5+
6+
```
7+
# Create a Python environment with your environment mangager:
8+
9+
conda create -n abstruct python=3.6
10+
11+
source activate abstruct
12+
13+
conda env create -f environment.yml
14+
```
15+
16+
## Tests
17+
18+
```
19+
pytest tests/
20+
```
21+

data/CSAbstruct/dev.jsonl

Lines changed: 295 additions & 0 deletions
Large diffs are not rendered by default.

data/CSAbstruct/test.jsonl

Lines changed: 226 additions & 0 deletions
Large diffs are not rendered by default.

data/CSAbstruct/train.jsonl

Lines changed: 1668 additions & 0 deletions
Large diffs are not rendered by default.

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
jsonlines
2+
-e git://github.com/ibeltagy/allennlp@fp16_and_others#egg=allennlp

scripts/sci_sum_eval/__init__.py

Whitespace-only changes.
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# When downloading the data, both train and test papers ended up with their data in the same folder
2+
# This script filters out the data for the test paper ids into a separate file
3+
import os
4+
import json
5+
6+
def main():
7+
sci_sum_data_path = os.path.join(os.getcwd(), "data", "sci_sum")
8+
test_ids_path = os.path.join(sci_sum_data_path, "cspubsum_test_ids.txt")
9+
with open(test_ids_path) as _test_ids_file:
10+
test_ids = set([url_id.split('/')[-1] for url_id in _test_ids_file.read().split('\n')[:-1]])
11+
12+
print("Number of test ids:", len(test_ids))
13+
14+
train_path = os.path.join(sci_sum_data_path, "train.jsonl")
15+
dev_path = os.path.join(sci_sum_data_path, "dev.jsonl")
16+
test_path = os.path.join(sci_sum_data_path, "test.jsonl")
17+
18+
filtered_train_examples = []
19+
filtered_dev_examples = []
20+
filtered_test_examples = []
21+
22+
actual_test_examples = []
23+
24+
with open(train_path) as _train_jsonl_file:
25+
train_lines = [json.loads(line) for line in _train_jsonl_file.read().split('\n')[:-1]]
26+
27+
with open(dev_path) as _dev_jsonl_file:
28+
dev_lines = [json.loads(line) for line in _dev_jsonl_file.read().split('\n')[:-1]]
29+
30+
with open(test_path) as _test_jsonl_file:
31+
test_lines = [json.loads(line) for line in _test_jsonl_file.read().split('\n')[:-1]]
32+
33+
print("Train examples read:", len(train_lines))
34+
print("Dev examples read:", len(dev_lines))
35+
print("Test examples read:", len(test_lines))
36+
37+
ids_filtered_out = set()
38+
for line in train_lines:
39+
paper_id = line["abstract_id"]
40+
if paper_id in test_ids:
41+
ids_filtered_out.add(paper_id)
42+
actual_test_examples.append(line)
43+
else:
44+
filtered_train_examples.append(line)
45+
46+
for line in dev_lines:
47+
paper_id = line["abstract_id"]
48+
if paper_id in test_ids:
49+
ids_filtered_out.add(paper_id)
50+
actual_test_examples.append(line)
51+
else:
52+
filtered_dev_examples.append(line)
53+
54+
for line in test_lines:
55+
paper_id = line["abstract_id"]
56+
if paper_id in test_ids:
57+
ids_filtered_out.add(paper_id)
58+
actual_test_examples.append(line)
59+
else:
60+
filtered_test_examples.append(line)
61+
62+
print("Number of examples filtered out:", len(actual_test_examples))
63+
print("Ids not found in original set:", test_ids - ids_filtered_out)
64+
print("Train examples to write:", len(filtered_train_examples))
65+
print("dev examples to write:", len(filtered_dev_examples))
66+
print("Test examples to write:", len(filtered_test_examples))
67+
68+
new_train_path = os.path.join(sci_sum_data_path, "train_new.jsonl")
69+
new_dev_path = os.path.join(sci_sum_data_path, "dev_new.jsonl")
70+
new_test_path = os.path.join(sci_sum_data_path, "test_new.jsonl")
71+
actual_test_path = os.path.join(sci_sum_data_path, "rouge_test.jsonl")
72+
73+
# Commented out writing to file to prevent me from accidentally overwriting data files again
74+
# with open(new_train_path, "w") as _new_train_file:
75+
# for line in filtered_train_examples:
76+
# _new_train_file.write(json.dumps(line))
77+
# _new_train_file.write('\n')
78+
79+
# with open(new_dev_path, "w") as _new_dev_file:
80+
# for line in filtered_dev_examples:
81+
# _new_dev_file.write(json.dumps(line))
82+
# _new_dev_file.write('\n')
83+
84+
# with open(new_test_path, "w") as _new_test_file:
85+
# for line in filtered_test_examples:
86+
# _new_test_file.write(json.dumps(line))
87+
# _new_test_file.write('\n')
88+
89+
# with open(actual_test_path, "w") as _actual_test_file:
90+
# for line in actual_test_examples:
91+
# _actual_test_file.write(json.dumps(line))
92+
# _actual_test_file.write('\n')
93+
94+
if __name__ == "__main__":
95+
main()

scripts/sci_sum_eval/rouge_eval.py

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
import os
2+
import sys
3+
sys.path.insert(0, os.path.dirname(os.path.abspath(os.path.join(__file__, os.pardir, os.pardir))))
4+
5+
import json
6+
from tqdm import tqdm
7+
import numpy as np
8+
import argparse
9+
10+
from allennlp.models.archival import load_archive
11+
from abstruct.models.abstruct_baseline_model_before_multi_sep import AbstructBaselineModelBeforeMultiSep
12+
from abstruct.models.abstruct_baseline_model import AbstructBaselineModel
13+
from abstruct.models.abstruct_predictor import AbstructPredictor
14+
from abstruct.data.abstruct_dataset_reader import AbstructDatasetReader
15+
from abstruct.data.abstruct_dataset_reader_before_multi_sep import AbstructDatasetReaderBeforeMultiSep
16+
from allennlp.service.predictors import Predictor
17+
from allennlp.common.params import Params
18+
19+
# Rouge computation is taken from https://github.com/EdCo95/scientific-paper-summarisation/blob/master/Evaluation/rouge.py
20+
#
21+
# File Name : https://github.com/EdCo95/scientific-paper-summarisation/blob/master/Evaluation/rouge.py
22+
#
23+
# Description : Computes ROUGE-L metric as described by Lin and Hovey (2004)
24+
#
25+
# Creation Date : 2015-01-07 06:03
26+
# Author : Ramakrishna Vedantam <vrama91@vt.edu>
27+
28+
def my_lcs(string, sub):
29+
"""
30+
Calculates longest common subsequence for a pair of tokenized strings
31+
:param string : list of str : tokens from a string split using whitespace
32+
:param sub : list of str : shorter string, also split using whitespace
33+
:returns: length (list of int): length of the longest common subsequence between the two strings
34+
35+
Note: my_lcs only gives length of the longest common subsequence, not the actual LCS
36+
"""
37+
if(len(string)< len(sub)):
38+
sub, string = string, sub
39+
40+
lengths = [[0 for i in range(0,len(sub)+1)] for j in range(0,len(string)+1)]
41+
42+
for j in range(1,len(sub)+1):
43+
for i in range(1,len(string)+1):
44+
if(string[i-1] == sub[j-1]):
45+
lengths[i][j] = lengths[i-1][j-1] + 1
46+
else:
47+
lengths[i][j] = max(lengths[i-1][j] , lengths[i][j-1])
48+
49+
return lengths[len(string)][len(sub)]
50+
51+
class Rouge():
52+
'''
53+
Class for computing ROUGE-L score for a set of candidate sentences for the MS COCO test set
54+
55+
'''
56+
def __init__(self):
57+
# vrama91: updated the value below based on discussion with Hovey
58+
self.beta = 1.2
59+
60+
def calc_score(self, candidate, refs):
61+
"""
62+
Compute ROUGE-L score given one candidate and references for an image
63+
:param candidate: str : candidate sentence to be evaluated
64+
:param refs: list of str : COCO reference sentences for the particular image to be evaluated
65+
:returns score: int (ROUGE-L score for the candidate evaluated against references)
66+
"""
67+
assert(len(candidate)==1)
68+
assert(len(refs)>0)
69+
prec = []
70+
rec = []
71+
72+
# split into tokens
73+
token_c = candidate[0].split(" ")
74+
75+
for reference in refs:
76+
# split into tokens
77+
token_r = reference.split(" ")
78+
# compute the longest common subsequence
79+
lcs = my_lcs(token_r, token_c)
80+
prec.append(lcs/float(len(token_c)))
81+
rec.append(lcs/float(len(token_r)))
82+
83+
prec_max = max(prec)
84+
rec_max = max(rec)
85+
86+
if(prec_max!=0 and rec_max !=0):
87+
score = ((1 + self.beta**2)*prec_max*rec_max)/float(rec_max + self.beta**2*prec_max)
88+
else:
89+
score = 0.0
90+
return score
91+
92+
def compute_score(self, gts, res):
93+
"""
94+
Computes Rouge-L score given a set of reference and candidate sentences for the dataset
95+
Invoked by evaluate_captions.py
96+
:param hypo_for_image: dict : candidate / test sentences with "image name" key and "tokenized sentences" as values
97+
:param ref_for_image: dict : reference MS-COCO sentences with "image name" key and "tokenized sentences" as values
98+
:returns: average_score: float (mean ROUGE-L score computed by averaging scores for all the images)
99+
"""
100+
assert(gts.keys() == res.keys())
101+
imgIds = gts.keys()
102+
103+
score = []
104+
for id in imgIds:
105+
hypo = res[id]
106+
ref = gts[id]
107+
108+
score.append(self.calc_score(hypo, ref))
109+
110+
# Sanity check.
111+
assert(type(hypo) is list)
112+
assert(len(hypo) == 1)
113+
assert(type(ref) is list)
114+
assert(len(ref) > 0)
115+
116+
average_score = np.mean(np.array(score))
117+
return average_score, np.array(score)
118+
119+
def method(self):
120+
return "Rouge"
121+
122+
def main(model_path: str, test_jsonl_file: str, test_highlights_path: str, model_type: str, reader_type: str):
123+
rouge = Rouge()
124+
# Load paper highlights
125+
with open(test_highlights_path) as _highlights_json_file:
126+
higlights_by_id = json.load(_highlights_json_file)
127+
128+
with open(test_abstracts_path) as _abstracts_json_file:
129+
abstracts_by_id = json.load(_abstracts_json_file)
130+
131+
# Load allennlp model
132+
text_field_embedder = {"token_embedders": {"bert": {"pretrained_model": "/net/nfs.corp/s2-research/scibert/scibert_scivocab_uncased.tar.gz"}}}
133+
token_indexers = {"bert": {"pretrained_model": "/net/nfs.corp/s2-research/scibert/scivocab_uncased.vocab"}}
134+
overrides = {"model": {"type": model_type, "text_field_embedder": text_field_embedder},
135+
"dataset_reader": {"type": reader_type, "token_indexers": token_indexers},}
136+
model_archive = load_archive(model_path, overrides=json.dumps(overrides), cuda_device=0)
137+
predictor = Predictor.from_archive(model_archive, 'abstruct-predictor')
138+
139+
# Load papers to predict on
140+
with open(test_jsonl_file) as _test_jsonl_file:
141+
test_lines = [json.loads(line) for line in _test_jsonl_file.read().split('\n')[:-1]]
142+
143+
print("{} test lines loaded".format(len(test_lines)))
144+
145+
num_sentences_limit = 500
146+
train_on_highlights = True
147+
dataset_reader = AbstructDatasetReader.from_params(Params({
148+
"use_lexical_features": False,
149+
"use_umls_features": False,
150+
"lazy": True,
151+
"sent_len_limit": 40,
152+
"num_sentences_limit": num_sentences_limit,
153+
"umls_features_path": "data/PubMed_20k_formatted/umls_features.json",
154+
"word_splitter": "just_spaces",
155+
"token_indexers": {
156+
"bert": {
157+
"type": "bert-pretrained",
158+
"pretrained_model": "/net/nfs.corp/s2-research/scibert/scivocab_uncased.vocab",
159+
"do_lowercase": True,
160+
"use_starting_offsets": False
161+
}
162+
},
163+
"use_sep": 'no', # 'all'
164+
"sci_sum_context_size": -1,
165+
"max_sent_per_example": 25, # 10
166+
"predict": True,
167+
"sci_sum": True,
168+
"use_abstract_scores": True, # False
169+
"use_sentence_index": True, # False
170+
"train_on_highlights": train_on_highlights,
171+
}))
172+
173+
pos_index = 2
174+
neg_index = 1
175+
neutral_index = 0
176+
177+
abstract_total_score = 0
178+
abstract_total_instances = 0
179+
# Using abstracts as the predictions
180+
for line in test_lines:
181+
paper_id = line["abstract_id"]
182+
abstract_sentences = abstracts_by_id[paper_id]
183+
highlights = higlights_by_id[paper_id]
184+
185+
summary_score = 0
186+
summary_sentences = 0
187+
for sentence in abstract_sentences:
188+
score = rouge.calc_score([sentence], highlights)
189+
summary_score += score
190+
summary_sentences += 1
191+
192+
avg_summary_score = summary_score / summary_sentences
193+
abstract_total_score += avg_summary_score
194+
abstract_total_instances += 1
195+
196+
print("final score:", abstract_total_score / abstract_total_instances)
197+
198+
test_jsons = []
199+
with open(test_jsonl_file) as f:
200+
for line in f:
201+
test_jsons.append(json.loads(line))
202+
203+
print("{} test jsons loaded".format(len(test_jsons)))
204+
205+
# Predict on said papers
206+
207+
total_score = 0
208+
total_instances = 0
209+
for json_dict in tqdm(test_jsons, desc="Predicting..."):
210+
instances = dataset_reader.read_one_example(json_dict)
211+
if not isinstance(instances, list): # if the datareader returns one instnace, put it in a list
212+
instances = [instances]
213+
214+
sentences = json_dict['sentences'][:num_sentences_limit]
215+
gold_scores_list = json_dict['highlight_scores'][:num_sentences_limit]
216+
paper_id = instances[0].fields["abstract_id"].metadata
217+
highlights = higlights_by_id[paper_id]
218+
219+
scores_list = []
220+
for instance in instances:
221+
prediction = predictor.predict_instance(instance)
222+
probs = prediction['action_probs']
223+
if not train_on_highlights:
224+
probs = [p[pos_index] for p in probs]
225+
scores_list.extend(probs)
226+
227+
assert len(sentences) == len(scores_list)
228+
assert len(sentences) == len(gold_scores_list)
229+
230+
sentences_with_scores = list(zip(sentences, scores_list))
231+
232+
# Note: the following line should get Oracle performance
233+
# sentences_with_scores = list(zip(sentences, gold_scores_list))
234+
sentences_with_scores = sorted(sentences_with_scores, key=lambda x: x[1], reverse=True)
235+
236+
top_10_sentences = [s[0] for s in sentences_with_scores[:10]]
237+
238+
summary_score = 0
239+
summary_sentences = 0
240+
for sentence in top_10_sentences:
241+
score = rouge.calc_score([sentence], highlights)
242+
summary_score += score
243+
summary_sentences += 1
244+
245+
avg_summary_score = summary_score / summary_sentences
246+
total_score += avg_summary_score
247+
total_instances += 1
248+
249+
250+
print("final score:", total_score / total_instances)
251+
252+
if __name__ == "__main__":
253+
parser = argparse.ArgumentParser()
254+
parser.add_argument(
255+
"--path_to_model",
256+
help="Path to the model to evaluate"
257+
)
258+
parser.add_argument(
259+
"--model_type",
260+
help="The AllenNLP registered model type",
261+
default="AbstructBaselineModel"
262+
)
263+
parser.add_argument(
264+
"--reader_type",
265+
help="The AllenNLP registered dataset reader type",
266+
default="AbstructDatasetReader"
267+
)
268+
269+
args = parser.parse_args()
270+
271+
abstruct_root_dir = os.path.dirname(os.path.abspath(os.path.join(__file__, os.pardir, os.pardir)))
272+
test_jsonl_file = os.path.join(abstruct_root_dir, "data", "sci_sum", "abstruct_regen_data_test.jsonl")
273+
test_highlights_path = os.path.join(abstruct_root_dir, "data", "sci_sum", "test_highlights.json")
274+
test_abstracts_path = os.path.join(abstruct_root_dir, "data", "sci_sum", "test_abstracts.json")
275+
276+
main(args.path_to_model, test_jsonl_file, test_highlights_path, args.model_type, args.reader_type)

0 commit comments

Comments
 (0)