|
|
@ -73,11 +73,14 @@ class DomainLinkGenerator:
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
|
|
|
|
def scrape_tags(self, url: str, headers: dict) -> list:
|
|
|
|
def scrape_tags(self, url: str, headers: dict, file_name: str) -> list:
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Scrape the tags from the given url for all tags associated with the work.
|
|
|
|
Scrape the tags from the given url for all tags associated with the work.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
The file_name can also be used to check the given url against prediction results.
|
|
|
|
:param url:
|
|
|
|
:param url:
|
|
|
|
:param headers:
|
|
|
|
:param headers:
|
|
|
|
|
|
|
|
:param file_name:
|
|
|
|
:return:
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
raise NotImplementedError
|
|
|
@ -149,11 +152,12 @@ class LinkGenerator:
|
|
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def scrape_tags(self, url: str, domain: int) -> dict:
|
|
|
|
def scrape_tags(self, url: str, domain: int, file_name: str) -> dict:
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Scrapes the tags from the given url
|
|
|
|
Scrapes the tags from the given url
|
|
|
|
:param url:
|
|
|
|
:param url:
|
|
|
|
:param domain:
|
|
|
|
:param domain:
|
|
|
|
|
|
|
|
:param file_name:
|
|
|
|
:return:
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
headers = {
|
|
|
|
headers = {
|
|
|
@ -163,7 +167,7 @@ class LinkGenerator:
|
|
|
|
for g in self.__link_generators:
|
|
|
|
for g in self.__link_generators:
|
|
|
|
if g.get_identifier() == domain or g.get_domain_name() == url_domain:
|
|
|
|
if g.get_identifier() == domain or g.get_domain_name() == url_domain:
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
return g.scrape_tags(url, headers)
|
|
|
|
return g.scrape_tags(url=url, headers=headers, file_name=file_name)
|
|
|
|
except NotImplementedError:
|
|
|
|
except NotImplementedError:
|
|
|
|
pass
|
|
|
|
pass
|
|
|
|
return None
|
|
|
|
return None
|
|
|
|