Source code for scrapers.errata

# SPDX-License-Identifier: GPL-3.0+

from __future__ import unicode_literals

from estuary import log
from estuary.models.bugzilla import BugzillaBug
from estuary.models.errata import Advisory, ContainerAdvisory
from estuary.models.koji import ContainerKojiBuild, KojiBuild
from estuary.models.user import User
from estuary.utils.general import timestamp_to_date
from scrapers.base import BaseScraper


[docs]class ErrataScraper(BaseScraper): """Scrapes the Errata Tool tables in Teiid."""
[docs] def run(self, since=None, until=None): """ Run the Errata Tool scraper. :param str since: a datetime to start scraping data from :param str until: a datetime to scrape data until """ log.info('Starting initial load of Errata advisories') if since is None: start_date = self.default_since else: start_date = timestamp_to_date(since) if until is None: end_date = self.default_until else: end_date = timestamp_to_date(until) advisories = self.get_advisories(start_date, end_date) log.info('Successfully fetched {0} advisories from Teiid'.format(len(advisories))) self.update_neo4j(advisories) log.info('Initial load of Errata advisories complete!')
[docs] def update_neo4j(self, advisories): """ Update Neo4j with Errata Tool advisories from Teiid. :param list advisories: a list of dictionaries of advisories """ count = 0 for advisory in advisories: count += 1 log.info('Processing advisory {0}/{1}'.format(count, len(advisories))) adv = Advisory.create_or_update({ 'actual_ship_date': advisory['actual_ship_date'], 'advisory_name': advisory['advisory_name'], 'created_at': advisory['created_at'], 'id_': advisory['id'], 'issue_date': advisory['issue_date'], 'product_name': advisory['product_name'], 'release_date': advisory['release_date'], 'security_impact': advisory['security_impact'], 'security_sla': advisory['security_sla'], 'state': advisory['state'], 'status_time': advisory['status_time'], 'synopsis': advisory['synopsis'], 'update_date': advisory['update_date'], })[0] container_adv = False for associated_build in self.get_associated_builds(advisory['id']): # Even if a node has two labels in the database, Neo4j returns the node # only with the specific label you asked for. Hence we check for labels # ContainerKojiBuild and KojiBuild separately for the same node. build = ContainerKojiBuild.nodes.get_or_none(id_=associated_build['id_']) if not build: build = KojiBuild.nodes.get_or_none(id_=associated_build['id_']) if build and not container_adv: if build.__label__ == 'ContainerKojiBuild': adv.add_label(ContainerAdvisory.__label__) container_adv = True # If this is set, that means it was once part of the advisory but not anymore. # This relationship needs to be deleted if it exists. if associated_build['removed_index_id']: if build: adv.attached_builds.disconnect(build) else: # Query Teiid and create the entry only if the build is not present in Neo4j if not build: attached_build = self.get_koji_build(associated_build['id_']) if attached_build: if self.is_container_build(attached_build): build = ContainerKojiBuild.get_or_create( {'id_': associated_build['id_']})[0] else: build = KojiBuild.get_or_create( {'id_': associated_build['id_']})[0] # This will happen only if we do not find the build we are looking for in Teiid # which shouldn't usually happen under normal conditions if not build: log.warn('The Koji build with ID {} was not found in Teiid!'.format( associated_build['id_'])) continue if adv.__label__ != ContainerAdvisory.__label__ \ and build.__label__ == ContainerKojiBuild.__label__: adv.add_label(ContainerAdvisory.__label__) attached_rel = adv.attached_builds.relationship(build) time_attached = associated_build['time_attached'] if attached_rel: if attached_rel.time_attached != time_attached: adv.attached_builds.replace(build, {'time_attached': time_attached}) else: adv.attached_builds.connect(build, {'time_attached': time_attached}) assigned_to = User.get_or_create({'username': advisory['assigned_to'].split('@')[0]})[0] adv.conditional_connect(adv.assigned_to, assigned_to) reporter = User.get_or_create({'username': advisory['reporter'].split('@')[0]})[0] adv.conditional_connect(adv.reporter, reporter) for attached_bug in self.get_attached_bugs(advisory['id']): bug = BugzillaBug.get_or_create(attached_bug)[0] adv.attached_bugs.connect(bug)
[docs] def get_advisories(self, since, until): """ Query Teiid for the Errata Tool advisories. :param datetime.datetime since: determines when to start querying :param datetime.datetime until: determines until when to scrape data :return: a list of dictionaries :rtype: list """ sql = """\ SELECT main.actual_ship_date, main.fulladvisory as advisory_name, assigned_users.login_name AS assigned_to, states.current as state, main.created_at, main.id AS id, main.issue_date, products.name as product_name, main.release_date, reporter_users.login_name AS reporter, main.security_impact, main.security_sla, main.status_updated_at AS status_time, main.synopsis, main.errata_type AS type, main.update_date FROM Errata_public.errata_main AS main LEFT JOIN Errata_public.state_indices as states ON main.current_state_index_id = states.id LEFT JOIN Errata_public.errata_products as products ON main.product_id = products.id LEFT JOIN Errata_public.users AS assigned_users ON main.assigned_to_id = assigned_users.id LEFT JOIN Errata_public.users AS reporter_users ON main.reporter_id = reporter_users.id WHERE main.update_date >= '{0}' AND main.update_date <= '{1}' ORDER BY main.update_date DESC; """.format(since, until) log.info('Getting Errata advisories since {0} until {1}'.format(since, until)) return self.teiid.query(sql)
[docs] def get_associated_builds(self, advisory_id): """ Query Teiid to find the Brew builds associated with a specific advisory. :param int advisory_id: the advisory ID :return: a list of a dictionaries :rtype: list """ sql = """\ SELECT brew_builds.id as id_, packages.name, brew_builds.release, removed_index_id, brew_builds.version, brew_mappings.created_at as time_attached FROM Errata_public.errata_brew_mappings as brew_mappings LEFT JOIN Errata_public.brew_builds AS brew_builds ON brew_builds.id = brew_mappings.brew_build_id LEFT JOIN Errata_public.packages AS packages ON brew_builds.package_id = packages.id WHERE errata_id = {0}; """.format(advisory_id) log.info('Getting Brew builds tied to the advisory with ID {0}'.format(advisory_id)) return self.teiid.query(sql)
[docs] def get_attached_bugs(self, advisory_id): """ Query Teiid to find the Bugzilla bugs attached to a specific advisory. :param int advisory_id: the advisory ID :return: a list of a dictionaries :rtype: list """ sql = """\ SELECT filed_bugs.bug_id as id_ FROM Errata_public.filed_bugs as filed_bugs WHERE filed_bugs.errata_id = {0}; """.format(advisory_id) log.info('Getting Bugzilla bugs tied to the advisory with ID {0}'.format(advisory_id)) return self.teiid.query(sql)
[docs] def get_koji_build(self, build_id): """ Query Teiid to find the Koji build attached to a specific advisory. :param int build_id: the build ID :return: a list of a dictionaries :rtype: list """ sql = """\ SELECT brew.build.extra, brew.package.name as package_name FROM brew.build LEFT JOIN brew.package ON brew.build.pkg_id = brew.package.id WHERE build.id = {0}; """.format(build_id) result = self.teiid.query(sql) return result[0] if len(result) > 0 else None