7
7
from typing import Optional , Union
8
8
import torch
9
9
10
+ from modules .whisper .data_classes import *
10
11
from modules .utils .paths import DIARIZATION_MODELS_DIR
11
12
from modules .diarize .audio_loader import load_audio , SAMPLE_RATE
12
13
@@ -44,7 +45,8 @@ def __call__(self, audio: Union[str, np.ndarray], min_speakers=None, max_speaker
44
45
def assign_word_speakers (diarize_df , transcript_result , fill_nearest = False ):
45
46
transcript_segments = transcript_result ["segments" ]
46
47
for seg in transcript_segments :
47
- seg = seg .dict ()
48
+ if isinstance (seg , Segment ):
49
+ seg = seg .model_dump ()
48
50
# assign speaker to segment (if any)
49
51
diarize_df ['intersection' ] = np .minimum (diarize_df ['end' ], seg ['end' ]) - np .maximum (diarize_df ['start' ],
50
52
seg ['start' ])
@@ -64,7 +66,7 @@ def assign_word_speakers(diarize_df, transcript_result, fill_nearest=False):
64
66
seg ["speaker" ] = speaker
65
67
66
68
# assign speaker to words
67
- if 'words' in seg :
69
+ if 'words' in seg and seg [ 'words' ] is not None :
68
70
for word in seg ['words' ]:
69
71
if 'start' in word :
70
72
diarize_df ['intersection' ] = np .minimum (diarize_df ['end' ], word ['end' ]) - np .maximum (
@@ -89,7 +91,7 @@ def assign_word_speakers(diarize_df, transcript_result, fill_nearest=False):
89
91
return transcript_result
90
92
91
93
92
- class Segment :
94
+ class DiarizationSegment :
93
95
def __init__ (self , start , end , speaker = None ):
94
96
self .start = start
95
97
self .end = end
0 commit comments