[cleanup] Misc fixes

Closes #4027
This commit is contained in:
pukkandan
2022-06-11 00:33:54 +05:30
parent d05460e5fe
commit 56ba69e4c9
13 changed files with 72 additions and 83 deletions

View File

@@ -2570,7 +2570,7 @@ class YoutubeDL:
format['dynamic_range'] = 'SDR'
if (info_dict.get('duration') and format.get('tbr')
and not format.get('filesize') and not format.get('filesize_approx')):
format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
# Add HTTP headers, so that external programs can use them from the
# json output
@@ -3059,16 +3059,15 @@ class YoutubeDL:
return file
success = True
merger = FFmpegMergerPP(self)
fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
if fd is not FFmpegFD and (
info_dict.get('section_start') or info_dict.get('section_end')):
msg = ('This format cannot be partially downloaded' if merger.available
else 'You have requested downloading the video partially, but ffmpeg is not installed')
if not self.params.get('ignoreerrors'):
self.report_error(f'{msg}. Aborting due to --abort-on-error')
merger, fd = FFmpegMergerPP(self), None
if info_dict.get('url'):
fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
if fd is not FFmpegFD and (
info_dict.get('section_start') or info_dict.get('section_end')):
msg = ('This format cannot be partially downloaded' if merger.available
else 'You have requested downloading the video partially, but ffmpeg is not installed')
self.report_error(f'{msg}. Aborting')
return
self.report_warning(f'{msg}. The entire video will be downloaded')
if info_dict.get('requested_formats') is not None:

View File

@@ -337,14 +337,11 @@ class ChromeCookieDecryptor:
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
if sys.platform in ('linux', 'linux2'):
return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
elif sys.platform == 'darwin':
if sys.platform == 'darwin':
return MacChromeCookieDecryptor(browser_keyring_name, logger)
elif sys.platform == 'win32':
elif sys.platform in ('win32', 'cygwin'):
return WindowsChromeCookieDecryptor(browser_root, logger)
else:
raise NotImplementedError(f'Chrome cookie decryption is not supported on this platform: {sys.platform}')
return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):

View File

@@ -1487,7 +1487,7 @@ class InfoExtractor:
# however some websites are using 'Text' type instead.
# 1. https://schema.org/VideoObject
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
'filesize': float_or_none(e.get('contentSize')),
'filesize': int_or_none(float_or_none(e.get('contentSize'))),
'tbr': int_or_none(e.get('bitrate')),
'width': int_or_none(e.get('width')),
'height': int_or_none(e.get('height')),

View File

@@ -534,7 +534,6 @@ from .foxnews import (
)
from .foxsports import FoxSportsIE
from .fptplay import FptplayIE
from .franceculture import FranceCultureIE
from .franceinter import FranceInterIE
from .francetv import (
FranceTVIE,
@@ -1348,7 +1347,7 @@ from .radiocanada import (
from .radiode import RadioDeIE
from .radiojavan import RadioJavanIE
from .radiobremen import RadioBremenIE
from .radiofrance import RadioFranceIE
from .radiofrance import FranceCultureIE, RadioFranceIE
from .radiozet import RadioZetPodcastIE
from .radiokapital import (
RadioKapitalIE,

View File

@@ -1,46 +0,0 @@
from .common import InfoExtractor
from ..utils import int_or_none, parse_duration, unified_strdate
class FranceCultureIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radiofrance\.fr/franceculture/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d+)($|[?#])'
_TESTS = [
{
'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487',
'info_dict': {
'id': '8440487',
'display_id': 'la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau',
'ext': 'mp3',
'title': 'La physique dEinstein aiderait-elle à comprendre le cerveau ?',
'description': 'Existerait-il un pont conceptuel entre la physique de lespace-temps et les neurosciences ?',
'thumbnail': 'https://cdn.radiofrance.fr/s3/cruiser-production/2022/05/d184e7a3-4827-4494-bf94-04ed7b120db4/1200x630_gettyimages-200171095-001.jpg',
'upload_date': '20220514',
'duration': 2750,
},
},
]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
webpage = self._download_webpage(url, display_id)
# _search_json_ld doesn't correctly handle this. See https://github.com/yt-dlp/yt-dlp/pull/3874#discussion_r891903846
video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'\s*"@type"\s*:\s*"AudioObject"\s*.+')
return {
'id': video_id,
'display_id': display_id,
'url': video_data['contentUrl'],
'ext': video_data.get('encodingFormat'),
'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None,
'duration': parse_duration(video_data.get('duration')),
'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
webpage, 'title', default=self._og_search_title(webpage)),
'description': self._html_search_regex(
r'(?s)<meta name="description"\s*content="([^"]+)', webpage, 'description', default=None),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader': self._html_search_regex(
r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None),
'upload_date': unified_strdate(self._search_regex(
r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False))
}

View File

@@ -2,11 +2,7 @@ import itertools
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
traverse_obj,
urlencode_postdata,
)
from ..utils import int_or_none, traverse_obj, urlencode_postdata
class FreeTvBaseIE(InfoExtractor):

View File

@@ -1,6 +1,7 @@
import re
from .common import InfoExtractor
from ..utils import parse_duration, unified_strdate
class RadioFranceIE(InfoExtractor):
@@ -54,3 +55,47 @@ class RadioFranceIE(InfoExtractor):
'description': description,
'uploader': uploader,
}
class FranceCultureIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radiofrance\.fr/franceculture/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d+)($|[?#])'
_TESTS = [
{
'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487',
'info_dict': {
'id': '8440487',
'display_id': 'la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau',
'ext': 'mp3',
'title': 'La physique dEinstein aiderait-elle à comprendre le cerveau ?',
'description': 'Existerait-il un pont conceptuel entre la physique de lespace-temps et les neurosciences ?',
'thumbnail': 'https://cdn.radiofrance.fr/s3/cruiser-production/2022/05/d184e7a3-4827-4494-bf94-04ed7b120db4/1200x630_gettyimages-200171095-001.jpg',
'upload_date': '20220514',
'duration': 2750,
},
},
]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
webpage = self._download_webpage(url, display_id)
# _search_json_ld doesn't correctly handle this. See https://github.com/yt-dlp/yt-dlp/pull/3874#discussion_r891903846
video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'\s*"@type"\s*:\s*"AudioObject"\s*.+')
return {
'id': video_id,
'display_id': display_id,
'url': video_data['contentUrl'],
'ext': video_data.get('encodingFormat'),
'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None,
'duration': parse_duration(video_data.get('duration')),
'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
webpage, 'title', default=self._og_search_title(webpage)),
'description': self._html_search_regex(
r'(?s)<meta name="description"\s*content="([^"]+)', webpage, 'description', default=None),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader': self._html_search_regex(
r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None),
'upload_date': unified_strdate(self._search_regex(
r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False))
}

View File

@@ -3674,8 +3674,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
initial_data = None
if webpage:
initial_data = self._search_json(
self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', video_id, fatal=False)
initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
if not initial_data:
query = {'videoId': video_id}
query.update(self._get_checkok_params())

View File

@@ -45,9 +45,6 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
an initial argument and then with the returned value of the previous
PostProcessor.
The chain will be stopped if one of them ever returns None or the end
of the chain is reached.
PostProcessor objects follow a "mutual registration" process similar
to InfoExtractor objects.

View File

@@ -3498,13 +3498,13 @@ def match_filter_func(filters):
def download_range_func(chapters, ranges):
def inner(info_dict, ydl):
warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
else 'Chapter information is unavailable')
else 'Cannot match chapters since chapter information is unavailable')
for regex in chapters or []:
for i, chapter in enumerate(info_dict.get('chapters') or []):
if re.search(regex, chapter['title']):
warning = None
yield {**chapter, 'index': i}
if warning:
if chapters and warning:
ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
yield from ({'start_time': start, 'end_time': end} for start, end in ranges or [])
@@ -4903,9 +4903,9 @@ def to_high_limit_path(path):
return path
def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=None):
val = traverse_obj(obj, *variadic(field))
if val in ignore:
if (not val and val != 0) if ignore is NO_DEFAULT else val in ignore:
return default
return template % (func(val) if func else val)