@ -1,8 +1,9 @@
import re
import logging
import requests
import lxml . html
from urllib . parse import urlparse
from PyQt5 . QtWidgets import QMessageBox
DOMAIN_UNKNOWN = - 1
@ -43,6 +44,10 @@ class DomainIdentifier:
class DomainLinkGenerator :
"""
Base class for classes that generate a link to the file on their domain given a sample of a filename pattern and
implement a method to scrape the available metadata .
"""
def __init__ ( self , domain : DomainIdentifier ) :
self . __identifier = domain
@ -166,8 +171,16 @@ class LinkGenerator:
url_domain = urlparse ( url ) . netloc
for g in self . __link_generators :
if g . get_identifier ( ) == domain or g . get_domain_name ( ) == url_domain :
try :
try :
return g . scrape_tags ( url = url , headers = headers , file_name = file_name )
except requests . exceptions . ConnectionError as e :
logging . warning ( f " Encountered connection error when trying to scrape tags from \" { url } \" . \n "
f " See also: \n { e } " )
QMessageBox . warning ( None , " Connection Error " ,
f " The http connection to \" { url } \" ran into an error. Check your network. \n "
f " \n { e } " )
break
except NotImplementedError :
pass
return None