Source code for scrapers.freshmaker

# SPDX-License-Identifier: GPL-3.0+

from __future__ import unicode_literals

import xml.etree.ElementTree as ET

import neomodel

from estuary import log
from estuary.models.errata import Advisory
from estuary.models.freshmaker import FreshmakerBuild, FreshmakerEvent
from estuary.models.koji import ContainerKojiBuild, KojiBuild
from estuary.utils.general import timestamp_to_datetime
from scrapers.base import BaseScraper
from scrapers.utils import retry_session


[docs]class FreshmakerScraper(BaseScraper): """Scrapes the Freshmaker API.""" freshmaker_url = 'https://freshmaker.engineering.redhat.com/api/2/events/?per_page=50'
[docs] def run(self, since=None, until=None): """ Run the Freshmaker scraper. :param str since: a datetime to start scraping data from :param str until: a datetime to scrape data until """ if since or until: log.warn('Ignoring the since/until parameter; They do not apply to the' 'Freshmaker scraper') log.info('Starting initial load of Freshmaker events') self.query_api_and_update_neo4j() log.info('Initial load of Freshmaker events complete!')
[docs] def query_api_and_update_neo4j(self): """ Scrape the Freshmaker API and upload the data to Neo4j. :param str start_date: a datetime to start scraping data from """ # Initialize session and url session = retry_session() fm_url = self.freshmaker_url while True: log.debug('Querying {0}'.format(fm_url)) rv_json = session.get(fm_url, timeout=60).json() for fm_event in rv_json['items']: try: int(fm_event['search_key']) except ValueError: # Skip Freshmaker Events that don't have the search_key as the Advisory ID continue if fm_event.get('dry_run'): # Skip events triggered by manual rebuilds in dry run mode continue log.debug('Creating FreshmakerEvent {0}'.format(fm_event['id'])) event_params = dict( id_=fm_event['id'], state_name=fm_event['state_name'], state_reason=fm_event['state_reason'], url=fm_event['url'] ) if fm_event.get('time_created'): event_params['time_created'] = timestamp_to_datetime(fm_event['time_created']) if fm_event.get('time_done'): event_params['time_done'] = timestamp_to_datetime(fm_event['time_created']) event = FreshmakerEvent.create_or_update(event_params)[0] log.debug('Creating Advisory {0}'.format(fm_event['search_key'])) advisory = Advisory.get_or_create(dict( id_=fm_event['search_key'] ))[0] event.conditional_connect(event.triggered_by_advisory, advisory) event_builds_url = ('https://freshmaker.engineering.redhat.com/api/2/builds/' '?event_id={0}&per_page=50'.format(fm_event['id'])) event_builds = [] while True: rv = session.get(event_builds_url, timeout=60).json() event_builds.extend(rv['items']) if rv['meta']['next']: event_builds_url = rv['meta']['next'] else: break for build_dict in event_builds: # To handle a faulty container build in Freshmaker if build_dict['build_id'] and int(build_dict['build_id']) < 0: continue log.debug('Creating FreshmakerBuild {0}'.format(build_dict['build_id'])) fb_params = dict( id_=build_dict['id'], dep_on=build_dict['dep_on'], name=build_dict['name'], original_nvr=build_dict['original_nvr'], rebuilt_nvr=build_dict['rebuilt_nvr'], state_name=build_dict['state_name'], state_reason=build_dict['state_reason'], time_submitted=timestamp_to_datetime(build_dict['time_submitted']), type_name=build_dict['type_name'], ) if build_dict['time_completed']: fb_params['time_completed'] = timestamp_to_datetime( build_dict['time_completed']) if build_dict['build_id']: fb_params['build_id'] = build_dict['build_id'] fb = FreshmakerBuild.create_or_update(fb_params)[0] event.requested_builds.connect(fb) # The build ID obtained from Freshmaker API is actually a Koji task ID task_result = None if build_dict['build_id']: task_result = self.get_koji_task_result(build_dict['build_id']) if not task_result: continue # Extract the build ID from a task result xml_root = ET.fromstring(task_result) # TODO: Change this if a task can trigger multiple builds try: build_id = xml_root.find(".//*[name='koji_builds'].//string").text except AttributeError: build_id = None if not build_id: continue log.debug('Creating ContainerKojiBuild {0}'.format(build_id)) build_params = { 'id_': build_id, 'original_nvr': build_dict['original_nvr'] } try: build = ContainerKojiBuild.create_or_update(build_params)[0] except neomodel.exceptions.ConstraintValidationFailed: # This must have errantly been created as a KojiBuild instead of a # ContainerKojiBuild, so let's fix that. build = KojiBuild.nodes.get_or_none(id_=build_id) if not build: # If there was a constraint validation failure and the build isn't just # the wrong label, then we can't recover. raise build.add_label(ContainerKojiBuild.__label__) build = ContainerKojiBuild.create_or_update(build_params)[0] event.successful_koji_builds.connect(build) if rv_json['meta'].get('next'): fm_url = rv_json['meta']['next'] else: break
[docs] def get_koji_task_result(self, task_id): """ Query Teiid for a Koji task's result attribute. :param int task_id: the Koji task ID to query :return: an XML string :rtype: str """ # SQL query to fetch task related to a certain build sql_query = """ SELECT result FROM brew.task WHERE id = {}; """.format(task_id) try: return self.teiid.query(sql=sql_query)[0]['result'] except (IndexError, KeyError): return None