Source code for scrapers.base

# SPDX-License-Identifier: GPL-3.0+

from __future__ import unicode_literals

import json
from datetime import date, datetime, timedelta

from neomodel import config as neomodel_config

from scrapers.teiid import Teiid


[docs]class BaseScraper(object): """Base scraper class to standardize the main scraper functionality.""" teiid_host = 'virtualdb.engineering.redhat.com' teiid_port = 5432 # Default start date and end date to fetch data default_since = (datetime.utcnow() - timedelta(days=365)).strftime('%Y-%m-%d') default_until = str(date.today() + timedelta(days=1)) def __init__(self, teiid_user=None, teiid_password=None, kerberos=False, neo4j_user='neo4j', neo4j_password='neo4j', neo4j_server='localhost', neo4j_scheme='bolt'): """ Initialize the BaseScraper class. :kwarg str teiid_user: the user to connect as :kwarg str teiid_password: the password to connect as :kwarg bool kerberos: if Kerberos authentication should be used :kwarg str neo4j_user: the Neo4j user to connect as :kwarg str neo4j_password: the Neo4j user's password to connect with :kwarg str neo4j_server: the FQDN of the Neo4j server """ if kerberos: # In case credentials were passed in, we can wipe them since we won't be using them teiid_user = None teiid_password = None self.teiid_port = 5433 self.teiid = Teiid(self.teiid_host, self.teiid_port, teiid_user, teiid_password) neomodel_config.DATABASE_URL = '{scheme}://{user}:{password}@{server}:7687'.format( scheme=neo4j_scheme, user=neo4j_user, password=neo4j_password, server=neo4j_server, )
[docs] def run(self, since=None): """ Run the scraper. :kwarg str since: a datetime to start scraping data from :raises NotImplementedError: if the function is not overridden """ raise NotImplementedError()
[docs] def is_container_build(self, build_info): """ Check whether a Koji build is a container build. :param KojiBuild build_info: build info from Teiid :return: boolean value indicating whether the build is a container build :rtype: bool """ package_name = build_info['package_name'] try: extra_json = json.loads(build_info['extra']) except (ValueError, TypeError): extra_json = {} # Checking heuristics for determining if a build is a container build, since currently # there is no definitive way to do it. if extra_json and (extra_json.get('container_koji_build_id') or extra_json.get('container_koji_task_id')): return True elif extra_json.get('image') and\ (package_name.endswith('-container') or package_name.endswith('-docker')): return True else: return False
[docs] def is_module_build(self, build_info): """ Check whether a Koji build is a module build. :param KojiBuild build_info: build info from Teiid :return: boolean value indicating whether the build is a module build :rtype: bool """ try: extra_json = json.loads(build_info['extra']) except (ValueError, TypeError): return False if extra_json.get('typeinfo', {}).get('module'): return True return False