@@ -75,6 +75,8 @@ def read_one_example(self, json_dict):
75
75
else :
76
76
labels = None
77
77
78
+ confidences = json_dict .get ("confs" , None )
79
+
78
80
additional_features = None
79
81
if self .sci_sum :
80
82
if self .sci_sum_fake_scores :
@@ -98,18 +100,19 @@ def read_one_example(self, json_dict):
98
100
if len (sentences ) == 0 :
99
101
return []
100
102
101
- for sentences_loop , labels_loop , additional_features_loop in \
102
- self .enforce_max_sent_per_example (sentences , labels , additional_features ):
103
+ for sentences_loop , labels_loop , confidences_loop , additional_features_loop in \
104
+ self .enforce_max_sent_per_example (sentences , labels , confidences , additional_features ):
103
105
104
106
instance = self .text_to_instance (
105
107
sentences = sentences_loop ,
106
108
labels = labels_loop ,
109
+ confidences = confidences_loop ,
107
110
additional_features = additional_features_loop ,
108
111
)
109
112
instances .append (instance )
110
113
return instances
111
114
112
- def enforce_max_sent_per_example (self , sentences , labels = None , additional_features = None ):
115
+ def enforce_max_sent_per_example (self , sentences , labels = None , confidences = None , additional_features = None ):
113
116
"""
114
117
Splits examples with len(sentences) > self.max_sent_per_example into multiple smaller examples
115
118
with len(sentences) <= self.max_sent_per_example.
@@ -121,20 +124,24 @@ def enforce_max_sent_per_example(self, sentences, labels=None, additional_featur
121
124
"""
122
125
if labels is not None :
123
126
assert len (sentences ) == len (labels )
127
+ if confidences is not None :
128
+ assert len (sentences ) == len (confidences )
124
129
if additional_features is not None :
125
130
assert len (sentences ) == len (additional_features )
126
131
127
132
if len (sentences ) > self .max_sent_per_example and self .max_sent_per_example > 0 :
128
133
i = len (sentences ) // 2
129
134
l1 = self .enforce_max_sent_per_example (
130
135
sentences [:i ], None if labels is None else labels [:i ],
136
+ None if confidences is None else confidences [:i ],
131
137
None if additional_features is None else additional_features [:i ])
132
138
l2 = self .enforce_max_sent_per_example (
133
139
sentences [i :], None if labels is None else labels [i :],
140
+ None if confidences is None else confidences [i :],
134
141
None if additional_features is None else additional_features [i :])
135
142
return l1 + l2
136
143
else :
137
- return [(sentences , labels , additional_features )]
144
+ return [(sentences , labels , confidences , additional_features )]
138
145
139
146
def is_bad_sentence (self , sentence : str ):
140
147
if len (sentence ) > 10 and len (sentence ) < 600 :
@@ -171,10 +178,13 @@ def filter_bad_sci_sum_sentences(self, sentences, labels):
171
178
def text_to_instance (self ,
172
179
sentences : List [str ],
173
180
labels : List [str ] = None ,
181
+ confidences : List [float ] = None ,
174
182
additional_features : List [float ] = None ,
175
183
) -> Instance :
176
184
if not self .predict :
177
185
assert len (sentences ) == len (labels )
186
+ if confidences is not None :
187
+ assert len (sentences ) == len (confidences )
178
188
if additional_features is not None :
179
189
assert len (sentences ) == len (additional_features )
180
190
@@ -209,6 +219,8 @@ def text_to_instance(self,
209
219
LabelField (str (label )+ "_label" ) for label in labels
210
220
])
211
221
222
+ if confidences is not None :
223
+ fields ['confidences' ] = ArrayField (np .array (confidences ))
212
224
if additional_features is not None :
213
225
fields ["additional_features" ] = ArrayField (np .array (additional_features ))
214
226
0 commit comments