Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ jobs:
if: (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags')) && github.actor != 'dependabot[bot]'
with:
context: .
platforms: linux/amd64
platforms: linux/amd64, linux/arm64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
Expand Down
21 changes: 1 addition & 20 deletions ingestors/media/audio.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
import logging
from datetime import datetime
from followthemoney import model
from pymediainfo import MediaInfo
from normality import stringify

from ingestors.ingestor import Ingestor
from ingestors.support.timestamp import TimestampSupport
from ingestors.exc import ProcessingException
from ingestors.support.transcription import TranscriptionSupport

log = logging.getLogger(__name__)


class AudioIngestor(Ingestor, TimestampSupport, TranscriptionSupport):
class AudioIngestor(Ingestor, TimestampSupport):
MIME_TYPES = [
"audio/mpeg",
"audio/mp3",
Expand Down Expand Up @@ -59,22 +56,6 @@ def ingest(self, file_path, entity):
entity.add("duration", track.duration)
except Exception as ex:
raise ProcessingException(f"Could not read audio: {ex}") from ex
try:
start = datetime.now()
log.info(f"Attempting to transcribe {file_path}")
self.transcribe(file_path, entity)
elapsed_time = datetime.now() - start
# caution! this can't store an elapsed time larger than 24h
# datetime.seconds capped at [0,86400)
elapsed_time = divmod(elapsed_time.total_seconds(), 60)[0]
log.info(
f"Transcription duration: {elapsed_time} minutes (audio duration: {entity.get('duration')})"
)
except Exception as ex:
# If the transcription fails, the file processing should still count as a success.
# The existance of a transcription is not mandatory, for now.
entity.set("processingError", stringify(ex))
log.error(ex)

@classmethod
def match(cls, file_path, entity):
Expand Down
22 changes: 1 addition & 21 deletions ingestors/media/video.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
import logging
from datetime import datetime
from followthemoney import model
from pymediainfo import MediaInfo
from normality import stringify

from ingestors.ingestor import Ingestor
from ingestors.support.timestamp import TimestampSupport
from ingestors.exc import ProcessingException
from ingestors.support.transcription import TranscriptionSupport

log = logging.getLogger(__name__)


class VideoIngestor(Ingestor, TimestampSupport, TranscriptionSupport):
class VideoIngestor(Ingestor, TimestampSupport):
MIME_TYPES = [
"application/x-shockwave-flash",
"video/quicktime",
Expand Down Expand Up @@ -47,23 +44,6 @@ def ingest(self, file_path, entity):
entity.add("duration", track.duration)
except Exception as ex:
raise ProcessingException("Could not read video: %r", ex) from ex
try:
start = datetime.now()
log.info(f"Attempting to transcribe {file_path}")
audio_only_file = self.extract_audio(file_path)
self.transcribe(audio_only_file, entity)
elapsed_time = datetime.now() - start
# caution! this can't store an elapsed time larger than 24h
# datetime.seconds capped at [0,86400)
elapsed_time = divmod(elapsed_time.total_seconds(), 60)[0]
log.info(
f"Transcription duration: {elapsed_time} minutes (audio duration: {entity.get('duration')})"
)
except Exception as ex:
# If the transcription fails, the file processing should still count as a success.
# The existance of a transcription is not mandatory, for now.
entity.set("processingError", stringify(ex))
log.error(ex)

@classmethod
def match(cls, file_path, entity):
Expand Down
Loading