From 6fab7546124edabc176da61e99940c8938427ac5 Mon Sep 17 00:00:00 2001 From: Peery Date: Mon, 3 Jan 2022 20:21:34 +0100 Subject: [PATCH] URL prediction verification with file name pattern Introduced file name as a parameter to allow to also verify url predictions in link_generator.py --- ArtNet/artnet_manager.py | 6 +++--- ArtNet/web/link_generator.py | 10 +++++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/ArtNet/artnet_manager.py b/ArtNet/artnet_manager.py index dc84dc6..0566271 100644 --- a/ArtNet/artnet_manager.py +++ b/ArtNet/artnet_manager.py @@ -100,13 +100,13 @@ class ArtNetManager: if url is None: return None - #tags = ArtNet.web.Scrap_Tags.scrap_tags(file_name, url, ArtNet.web.link_generator.predict_domain(file_name)) - tags = LinkGenerator.get_instance().scrape_tags(url, LinkGenerator.get_instance().predict_domain(file_name)) + tags = LinkGenerator.get_instance().scrape_tags(url=url, file_name=file_name, + domain=LinkGenerator.get_instance().predict_domain(file_name)) if tags is None: return None already_applied_tags = self.db_connection.get_art_tags_by_ID(art_ID) - for i in range(len(already_applied_tags)): + for i in range(len(already_applied_tags)): # converting the list to List[str] already_applied_tags[i] = self.db_connection.get_tag_by_ID(already_applied_tags[i])[0][1].strip() importable_tags = [] diff --git a/ArtNet/web/link_generator.py b/ArtNet/web/link_generator.py index 5de42c6..b1d4b84 100644 --- a/ArtNet/web/link_generator.py +++ b/ArtNet/web/link_generator.py @@ -73,11 +73,14 @@ class DomainLinkGenerator: """ raise NotImplementedError - def scrape_tags(self, url: str, headers: dict) -> list: + def scrape_tags(self, url: str, headers: dict, file_name: str) -> list: """ Scrape the tags from the given url for all tags associated with the work. + + The file_name can also be used to check the given url against prediction results. :param url: :param headers: + :param file_name: :return: """ raise NotImplementedError @@ -149,11 +152,12 @@ class LinkGenerator: return None - def scrape_tags(self, url: str, domain: int) -> dict: + def scrape_tags(self, url: str, domain: int, file_name: str) -> dict: """ Scrapes the tags from the given url :param url: :param domain: + :param file_name: :return: """ headers = { @@ -163,7 +167,7 @@ class LinkGenerator: for g in self.__link_generators: if g.get_identifier() == domain or g.get_domain_name() == url_domain: try: - return g.scrape_tags(url, headers) + return g.scrape_tags(url=url, headers=headers, file_name=file_name) except NotImplementedError: pass return None