Skip to content

Commit

Permalink
scrpr cleanup: responding to code review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolassaw committed Sep 22, 2024
1 parent e569d6a commit b2849f1
Showing 1 changed file with 9 additions and 15 deletions.
24 changes: 9 additions & 15 deletions src/scraper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def get_ody_link(self,

try:
base_url = odyssey_version = notes = None
# CSV is located in 'resources' folder
with open(
os.path.join(os.path.dirname(__file__), "..", "..", "resources", "texas_county_data.csv"),
mode="r",
Expand All @@ -172,7 +173,7 @@ def get_ody_link(self,
if not base_url or not odyssey_version:
raise Exception("The required data to scrape this county is not in /resources/texas_county_data.csv")
except Exception as e:
logger.info(f"Error getting county-specific information from csv: {e}")
logger.exception(e, "Error getting county-specific information from csv.")
raise
return base_url, odyssey_version, notes

Expand All @@ -181,6 +182,7 @@ def get_class_and_method(
county: str,
logger: logging.Logger
) -> Tuple[Type[object], Callable]:

"""
Dynamically imports a module, retrieves a class, and gets a method from it based on the county name.
Expand All @@ -189,7 +191,6 @@ def get_class_and_method(
:returns: A tuple containing the instance of the class and the method callable.
:raises ImportError: If the module cannot be imported.
:raises AttributeError: If the class or method cannot be found.
:raises Exception: For any other unexpected errors.
"""

module_name = county
Expand Down Expand Up @@ -219,10 +220,7 @@ def get_class_and_method(
return instance, method

except (FileNotFoundError, ImportError, AttributeError) as e:
logger.error(f"Error dynamically loading module or retrieving class/method: {e}")
raise
except Exception as e:
logger.error(f"Unexpected error: {e}")
logger.exception(e, "Error dynamically loading module or retrieving class/method.")
raise

def scrape_main_page(self,
Expand Down Expand Up @@ -264,8 +262,7 @@ def scrape_main_page(self,
"SignOn": "Sign On",
}

# not sure how this is being used. response doesn't seem to be used anywhere, but it may just be opening the page.
response = request_page_with_retry(
request_page_with_retry(
session=session,
url=urllib.parse.urljoin(base_url, "login.aspx"),
logger=logger,
Expand All @@ -284,7 +281,7 @@ def scrape_main_page(self,
)
main_soup = BeautifulSoup(main_page_html, "html.parser")
except Exception as e:
logger.error(f"Error scraping main page for main page HTML: {e}")
logger.exception(e, f"Error scraping main page for main page HTML.")
raise
return main_page_html, main_soup

Expand Down Expand Up @@ -582,15 +579,12 @@ def scrape_multiple_cases(
jo_id = judicial_officer_to_ID[JO_name]
logger.info(f"Searching cases on {date_string} for {JO_name}")

results_html, results_soup = self.scrape_results_page(
results_soup = self.scrape_results_page(
odyssey_version, base_url, search_url, hidden_values, jo_id, date_string, session, logger, ms_wait
)

scraper_instance, scraper_function = self.get_class_and_method(county, logger)
if scraper_instance and scraper_function:
scraper_function(base_url, results_soup, case_html_path, logger, session, ms_wait)
else:
logger.error("Error: Could not obtain parser instance or function.")
scraper_function = self.get_class_and_method(county, logger)
scraper_function(base_url, results_soup, case_html_path, logger, session, ms_wait)

def scrape(
self,
Expand Down

0 comments on commit b2849f1

Please sign in to comment.