Source code for scrapers.bugzilla

# SPDX-License-Identifier: GPL-3.0+
from __future__ import unicode_literals

from builtins import bytes

from estuary import log
from estuary.models.bugzilla import BugzillaBug
from estuary.models.user import User
from estuary.utils.general import timestamp_to_date
from scrapers.base import BaseScraper


[docs]class BugzillaScraper(BaseScraper): """Scrapes the Bugzilla tables in Teiid."""
[docs] def run(self, since=None, until=None): """ Run the Bugzilla scraper. :param str since: a datetime to start scraping data from :param str until: a datetime to scrape data until """ log.info('Starting initial load of Bugzilla bugs') if since is None: start_date = self.default_since else: start_date = timestamp_to_date(since) if until is None: end_date = self.default_until else: end_date = timestamp_to_date(until) bugs = self.get_bugzilla_bugs(start_date, end_date) log.info('Successfully fetched {0} bugs from teiid'.format(len(bugs))) self.update_neo4j(bugs) log.info('Initial load of Bugzilla bugs complete!')
[docs] def get_bugzilla_bugs(self, start_date, end_date): """ Get the Buzilla bugs information from Teiid. :param datetime.datetime start_date: when to start scraping data from :param datetime.datetime end_date: determines until when to scrape data :return: list of dictionaries containing bug info :rtype: list """ log.info('Getting all Bugzilla bugs since {0} until {1}'.format(start_date, end_date)) sql_query = """ SELECT bugs.*, products.name AS product_name, classifications.name AS classification, assigned.login_name AS assigned_to_email, reported.login_name AS reported_by_email, qa.login_name AS qa_contact_email FROM BugzillaC.bugs AS bugs LEFT JOIN BugzillaC.products AS products ON bugs.product_id = products.id LEFT JOIN BugzillaC.classifications AS classifications ON products.classification_id = classifications.id LEFT JOIN BugzillaC.profiles AS assigned ON bugs.assigned_to = assigned.userid LEFT JOIN BugzillaC.profiles AS reported ON bugs.reporter = reported.userid LEFT JOIN BugzillaC.profiles AS qa ON bugs.qa_contact = qa.userid WHERE classifications.name = 'Red Hat' AND bugs.delta_ts >= '{0}' AND bugs.delta_ts <= '{1}' ORDER BY bugs.creation_ts DESC; """.format(start_date, end_date) return self.teiid.query(sql=sql_query, db='republic')
[docs] def create_user_node(self, email): """ Create a User node in Neo4j. :param str email: the user's email :return: User object """ # If email is a Red Hat email address, username is same as domain name # prefix in the email address else store email as username if email.split('@')[1] == 'redhat.com': username = email.split('@')[0] else: username = email user = User.create_or_update(dict( username=username, email=email ))[0] return user
[docs] def update_neo4j(self, bugs): """ Update Neo4j with Bugzilla bugs information from Teiid. :param list bugs: a list of dictionaries """ log.info('Beginning to upload data to Neo4j') count = 0 for bug_dict in bugs: bug = BugzillaBug.create_or_update(dict( id_=bug_dict['bug_id'], severity=bug_dict['bug_severity'], status=bug_dict['bug_status'], creation_time=bug_dict['creation_ts'], modified_time=bug_dict['delta_ts'], priority=bug_dict['priority'], product_name=bytes(bug_dict['product_name'], 'utf-8').decode(), product_version=bug_dict['version'], resolution=bug_dict['resolution'], target_milestone=bug_dict['target_milestone'], short_description=bytes(bug_dict['short_desc'], 'utf-8').decode() ))[0] count += 1 log.info('Uploaded {0} bugs out of {1}'.format(count, len(bugs))) # Creating User nodes and updating their relationships if bug_dict['assigned_to']: assignee = self.create_user_node(bug_dict['assigned_to_email']) bug.conditional_connect(bug.assignee, assignee) if bug_dict['reporter']: reporter = self.create_user_node(bug_dict['reported_by_email']) bug.conditional_connect(bug.reporter, reporter) if bug_dict['qa_contact']: qa_contact = self.create_user_node(bug_dict['qa_contact_email']) bug.conditional_connect(bug.qa_contact, qa_contact)