From 19ce0ab24652c17e81d2f31847470991186c2f71 Mon Sep 17 00:00:00 2001 From: Brannon King Date: Tue, 11 Feb 2020 12:23:19 -0700 Subject: [PATCH 1/5] ogg -> ogv, ignore files that aren't video --- lbry/extras/daemon/daemon.py | 5 ++++- lbry/file_analysis.py | 21 ++++++++++----------- lbry/schema/mime_types.py | 2 ++ scripts/check_video.py | 4 ++-- tests/integration/other/test_transcoding.py | 4 ++-- 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/lbry/extras/daemon/daemon.py b/lbry/extras/daemon/daemon.py index 256a8d812..6552f5eb0 100644 --- a/lbry/extras/daemon/daemon.py +++ b/lbry/extras/daemon/daemon.py @@ -3142,7 +3142,10 @@ class Daemon(metaclass=JSONRPCServerType): f"Use --allow-duplicate-name flag to override." ) - file_path = await self._video_file_analyzer.verify_or_repair(validate_file, optimize_file, file_path) + try: + file_path = await self._video_file_analyzer.verify_or_repair(validate_file, optimize_file, file_path) + except ValueError: + pass # it's not a video file claim = Claim() claim.stream.update(file_path=file_path, sd_hash='0' * 96, **kwargs) diff --git a/lbry/file_analysis.py b/lbry/file_analysis.py index 59abafa1c..c2106a896 100644 --- a/lbry/file_analysis.py +++ b/lbry/file_analysis.py @@ -257,12 +257,12 @@ class VideoFileAnalyzer: continue codec = stream["codec_name"].split(",") if "theora" in codec: - return "ogg" + return "ogv" if {"vp8", "vp9", "av1"}.intersection(codec): return "webm" if "theora" in video_encoder: - return "ogg" + return "ogv" elif re.search(r"vp[89x]|av1", video_encoder.split(" ", 1)[0]): return "webm" return "mp4" @@ -274,16 +274,15 @@ class VideoFileAnalyzer: scan_data = json.loads(result) except Exception as e: log.debug("Failure in JSON parsing ffprobe results. Message: %s", str(e)) - if validate: - raise Exception(f'Invalid video file: {file_path}') - log.info("Unable to optimize %s . FFmpeg output was unreadable.", file_path) - return + raise ValueError(f'Absent or unreadable video file: {file_path}') - if "format" not in scan_data: - if validate: - raise FileNotFoundError(f'Unexpected or absent video file contents at: {file_path}') - log.info("Unable to optimize %s . FFmpeg output is missing the format section.", file_path) - return + if "format" not in scan_data or "duration" not in scan_data["format"]: + log.debug("Format data is missing from ffprobe results for: %s", file_path) + raise ValueError(f'Media file does not appear to contain video content at: {file_path}') + + if float(scan_data["format"]["duration"]) < 0.1: + log.debug("Media file appears to be an image: %s", file_path) + raise ValueError(f'Assuming image file at: {file_path}') return scan_data diff --git a/lbry/schema/mime_types.py b/lbry/schema/mime_types.py index f873db897..00505e762 100644 --- a/lbry/schema/mime_types.py +++ b/lbry/schema/mime_types.py @@ -148,6 +148,7 @@ types_map = { '.mobi': ('application/x-mobipocket-ebook', 'document'), '.oga': ('audio/ogg', 'audio'), '.ogv': ('video/ogg', 'video'), + '.ogg': ('video/ogg', 'video'), '.pct': ('image/pict', 'image'), '.pic': ('image/pict', 'image'), '.pict': ('image/pict', 'image'), @@ -162,6 +163,7 @@ types_map = { def guess_media_type(path): + # should we be using "file --mime-type -b $filename" on linux? _, ext = os.path.splitext(path) extension = ext.strip().lower() if extension[1:]: diff --git a/scripts/check_video.py b/scripts/check_video.py index ee3a26899..61331142b 100755 --- a/scripts/check_video.py +++ b/scripts/check_video.py @@ -26,8 +26,8 @@ async def process_video(analyzer, video_file): try: await analyzer.verify_or_repair(True, False, video_file) print("No concerns. Ship it!") - except FileNotFoundError as e: - print(str(e)) + except (FileNotFoundError, ValueError) as e: + print("Analysis failed.", str(e)) except Exception as e: print(str(e)) transcode = input("Would you like to make a repaired clone now? [y/N] ") diff --git a/tests/integration/other/test_transcoding.py b/tests/integration/other/test_transcoding.py index c832ec4ec..60cad7992 100644 --- a/tests/integration/other/test_transcoding.py +++ b/tests/integration/other/test_transcoding.py @@ -130,7 +130,7 @@ class TranscodeValidation(ClaimTestCase): scan_data = await self.analyzer._get_scan_data(True, self.video_file_ogg) extension = self.analyzer._get_best_container_extension(scan_data, "") - self.assertEqual(extension, "ogg") + self.assertEqual(extension, "ogv") scan_data = await self.analyzer._get_scan_data(True, self.video_file_webm) extension = self.analyzer._get_best_container_extension(scan_data, "") @@ -143,7 +143,7 @@ class TranscodeValidation(ClaimTestCase): self.assertEqual("webm", extension) extension = self.analyzer._get_best_container_extension("", "libtheora") - self.assertEqual("ogg", extension) + self.assertEqual("ogv", extension) async def test_no_ffmpeg(self): self.conf.ffmpeg_folder = "I don't really exist/" From a3294d4a0dd6b6abb06f91a2862e66e94f16689f Mon Sep 17 00:00:00 2001 From: Brannon King Date: Tue, 3 Mar 2020 17:17:32 -0700 Subject: [PATCH 2/5] make bit_rate check support maximum --- lbry/conf.py | 10 +++++++--- lbry/file_analysis.py | 34 ++++++++++++++++++---------------- lbry/schema/mime_types.py | 1 - lbry/testcase.py | 3 +++ 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/lbry/conf.py b/lbry/conf.py index fa9b6fed4..0f168a3ee 100644 --- a/lbry/conf.py +++ b/lbry/conf.py @@ -467,12 +467,16 @@ class TranscodeConfig(BaseConfig): ffmpeg_folder = String('The path to ffmpeg and ffprobe', '') video_encoder = String('FFmpeg codec and parameters for the video encoding. ' 'Example: libaom-av1 -crf 25 -b:v 0 -strict experimental', - 'libx264 -crf 18 -vf "format=yuv420p"') + 'libx264 -crf 21 -preset faster -pix_fmt yuv420p') + video_bitrate_maximum = Integer('Maximum bits per second allowed for video streams (0 to disable).', 8400000) + video_scaler = String('FFmpeg scaling parameters for reducing bitrate. ' + 'Example: -vf "scale=-2:720,fps=24" -maxrate 5M -bufsize 3M', + '-vf "scale=if(gte(iw\,ih)\,min(2560\,iw)\,-2):if(lt(iw\,ih)\,min(2560\,ih)\,-2)" -maxrate 8400K -bufsize 5000K') audio_encoder = String('FFmpeg codec and parameters for the audio encoding. ' 'Example: libopus -b:a 128k', - 'aac -b:a 192k') + 'aac -b:a 160k') volume_filter = String('FFmpeg filter for audio normalization.', '-af loudnorm') - volume_analysis_time = Integer('Maximum seconds into the file that we examine audio volume (0 to disable).', '240') + volume_analysis_time = Integer('Maximum seconds into the file that we examine audio volume (0 to disable).', 240) class CLIConfig(TranscodeConfig): diff --git a/lbry/file_analysis.py b/lbry/file_analysis.py index c2106a896..6e2964a66 100644 --- a/lbry/file_analysis.py +++ b/lbry/file_analysis.py @@ -97,24 +97,21 @@ class VideoFileAnalyzer: return "" - @staticmethod - def _verify_bitrate(scan_data: json): - if "bit_rate" not in scan_data["format"]: + def _verify_bitrate(self, scan_data: json, file_path): + bit_rate_max = float(self._conf.video_bitrate_maximum) + if bit_rate_max <= 0: return "" - bit_rate = float(scan_data["format"]["bit_rate"]) - log.debug(" Detected bitrate is %s Mbps", str(bit_rate / 1000000.0)) - pixels = -1.0 - for stream in scan_data["streams"]: - if stream["codec_type"] == "video": - pieces = stream["r_frame_rate"].split('/', 1) - frame_rate = float(pieces[0]) if len(pieces) == 1 \ - else float(pieces[0]) / float(pieces[1]) - pixels = max(pixels, float(stream["height"]) * float(stream["width"]) * frame_rate) + if "bit_rate" in scan_data["format"]: + bit_rate = float(scan_data["format"]["bit_rate"]) + else: + bit_rate = os.stat(file_path).st_size / float(scan_data["format"]["duration"]) + log.debug(" Detected bitrate is %s Mbps. Allowed is %s Mbps", + str(bit_rate / 1000000.0), str(bit_rate_max / 1000000.0)) - if pixels > 0.0 and pixels / bit_rate < 3.0: - return "Bits per second is excessive for this data; this may impact web streaming performance. " \ - f"Actual: {str(bit_rate / 1000000.0)} Mbps" + if bit_rate > bit_rate_max: + return "The bit rate is above the configured maximum. Actual: " \ + f"{bit_rate / 1000000.0} Mbps; Allowed: {bit_rate_max / 1000000.0} Mbps" return "" @@ -178,6 +175,9 @@ class VideoFileAnalyzer: # https://developers.google.com/media/vp9/settings/vod/ return int(-0.011 * height + 40) + def _get_video_scaler(self): + return self._conf.video_scaler + async def _get_video_encoder(self, scan_data): # use what the user said if it's there: # if it's not there, use h264 if we can because it's way faster than the others @@ -297,7 +297,7 @@ class VideoFileAnalyzer: log.debug("Analyzing %s:", file_path) log.debug(" Detected faststart is %s", "false" if fast_start_msg else "true") container_msg = self._verify_container(scan_data) - bitrate_msg = self._verify_bitrate(scan_data) + bitrate_msg = self._verify_bitrate(scan_data, file_path) video_msg = self._verify_video_encoding(scan_data) audio_msg = self._verify_audio_encoding(scan_data) volume_msg = await self._verify_audio_volume(self._conf.volume_analysis_time, file_path) @@ -323,6 +323,8 @@ class VideoFileAnalyzer: if video_msg or bitrate_msg: video_encoder = await self._get_video_encoder(scan_data) transcode_command.append(video_encoder) + # could do the scaling only if bitrate_msg, but if we're going to the effort to re-encode anyway... + transcode_command.append(self._get_video_scaler()) else: transcode_command.append("copy") diff --git a/lbry/schema/mime_types.py b/lbry/schema/mime_types.py index 00505e762..cfb2d9f82 100644 --- a/lbry/schema/mime_types.py +++ b/lbry/schema/mime_types.py @@ -163,7 +163,6 @@ types_map = { def guess_media_type(path): - # should we be using "file --mime-type -b $filename" on linux? _, ext = os.path.splitext(path) extension = ext.strip().lower() if extension[1:]: diff --git a/lbry/testcase.py b/lbry/testcase.py index 4878f36fb..5520f5f95 100644 --- a/lbry/testcase.py +++ b/lbry/testcase.py @@ -220,6 +220,9 @@ class IntegrationTestCase(AsyncioTestCase): self.account: Optional[Account] = None async def asyncSetUp(self): + if sys.version_info < (3, 8): + # hide warning about TaskWakeupMethWrapper, see bugs.python.org/issue38608 + asyncio.get_running_loop().set_debug(False) self.conductor = Conductor(seed=self.SEED) await self.conductor.start_blockchain() self.addCleanup(self.conductor.stop_blockchain) From e060df5367ce3dc359755fe8bc8008e1144f791e Mon Sep 17 00:00:00 2001 From: Brannon King Date: Tue, 3 Mar 2020 17:27:07 -0700 Subject: [PATCH 3/5] hide ValueError --- lbry/extras/daemon/daemon.py | 7 ++----- lbry/file_analysis.py | 11 ++++++++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/lbry/extras/daemon/daemon.py b/lbry/extras/daemon/daemon.py index 6552f5eb0..b1717283d 100644 --- a/lbry/extras/daemon/daemon.py +++ b/lbry/extras/daemon/daemon.py @@ -3142,11 +3142,8 @@ class Daemon(metaclass=JSONRPCServerType): f"Use --allow-duplicate-name flag to override." ) - try: - file_path = await self._video_file_analyzer.verify_or_repair(validate_file, optimize_file, file_path) - except ValueError: - pass # it's not a video file - + file_path = await self._video_file_analyzer.verify_or_repair(validate_file, optimize_file, + file_path, ignore_non_video=True) claim = Claim() claim.stream.update(file_path=file_path, sd_hash='0' * 96, **kwargs) tx = await Transaction.claim_create( diff --git a/lbry/file_analysis.py b/lbry/file_analysis.py index 6e2964a66..b5714af86 100644 --- a/lbry/file_analysis.py +++ b/lbry/file_analysis.py @@ -47,7 +47,7 @@ class VideoFileAnalyzer: return await self._verify_executable("ffprobe") version = await self._verify_executable("ffmpeg") - self._which = shutil.which("ffmpeg") + self._which = shutil.which(os.path.join(self._conf.ffmpeg_folder, "ffmpeg")) self._ffmpeg_installed = True log.debug("Using %s at %s", version.splitlines()[0].split(" Copyright")[0], self._which) @@ -286,12 +286,17 @@ class VideoFileAnalyzer: return scan_data - async def verify_or_repair(self, validate, repair, file_path): + async def verify_or_repair(self, validate, repair, file_path, ignore_non_video=False): if not validate and not repair: return file_path await self._verify_ffmpeg_installed() - scan_data = await self._get_scan_data(validate, file_path) + try: + scan_data = await self._get_scan_data(validate, file_path) + except ValueError: + if ignore_non_video: + return file_path + raise fast_start_msg = await self._verify_fast_start(scan_data, file_path) log.debug("Analyzing %s:", file_path) From 926b3e56b9d01fc8c31f601584b2d7f2aea57597 Mon Sep 17 00:00:00 2001 From: Brannon King Date: Tue, 3 Mar 2020 17:37:16 -0700 Subject: [PATCH 4/5] take a hint, lint --- lbry/conf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lbry/conf.py b/lbry/conf.py index 0f168a3ee..8b39570a1 100644 --- a/lbry/conf.py +++ b/lbry/conf.py @@ -471,7 +471,8 @@ class TranscodeConfig(BaseConfig): video_bitrate_maximum = Integer('Maximum bits per second allowed for video streams (0 to disable).', 8400000) video_scaler = String('FFmpeg scaling parameters for reducing bitrate. ' 'Example: -vf "scale=-2:720,fps=24" -maxrate 5M -bufsize 3M', - '-vf "scale=if(gte(iw\,ih)\,min(2560\,iw)\,-2):if(lt(iw\,ih)\,min(2560\,ih)\,-2)" -maxrate 8400K -bufsize 5000K') + r'-vf "scale=if(gte(iw\,ih)\,min(2560\,iw)\,-2):if(lt(iw\,ih)\,min(2560\,ih)\,-2)" ' + r'-maxrate 8400K -bufsize 5000K') audio_encoder = String('FFmpeg codec and parameters for the audio encoding. ' 'Example: libopus -b:a 128k', 'aac -b:a 160k') From ee39880fb5fa0adf802e52caeff70571266215c0 Mon Sep 17 00:00:00 2001 From: Brannon King Date: Tue, 3 Mar 2020 21:38:24 -0700 Subject: [PATCH 5/5] fix items from review --- lbry/extras/daemon/daemon.py | 5 +++-- lbry/testcase.py | 3 --- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/lbry/extras/daemon/daemon.py b/lbry/extras/daemon/daemon.py index b1717283d..2a26f0544 100644 --- a/lbry/extras/daemon/daemon.py +++ b/lbry/extras/daemon/daemon.py @@ -3142,8 +3142,9 @@ class Daemon(metaclass=JSONRPCServerType): f"Use --allow-duplicate-name flag to override." ) - file_path = await self._video_file_analyzer.verify_or_repair(validate_file, optimize_file, - file_path, ignore_non_video=True) + file_path = await self._video_file_analyzer.verify_or_repair( + validate_file, optimize_file, file_path, ignore_non_video=True + ) claim = Claim() claim.stream.update(file_path=file_path, sd_hash='0' * 96, **kwargs) tx = await Transaction.claim_create( diff --git a/lbry/testcase.py b/lbry/testcase.py index 5520f5f95..4878f36fb 100644 --- a/lbry/testcase.py +++ b/lbry/testcase.py @@ -220,9 +220,6 @@ class IntegrationTestCase(AsyncioTestCase): self.account: Optional[Account] = None async def asyncSetUp(self): - if sys.version_info < (3, 8): - # hide warning about TaskWakeupMethWrapper, see bugs.python.org/issue38608 - asyncio.get_running_loop().set_debug(False) self.conductor = Conductor(seed=self.SEED) await self.conductor.start_blockchain() self.addCleanup(self.conductor.stop_blockchain)