From ae4687e43ed9e99b47d6382ab1a6d94a6fbc7a5c Mon Sep 17 00:00:00 2001 From: Aleksei Apaseev Date: Mon, 12 Aug 2024 11:22:30 +0800 Subject: [PATCH 1/5] ci: add redundant job to ensure 'retry_failed_jobs' job is not skipped --- .gitlab/ci/pre_check.yml | 12 ++++++++++++ .gitlab/ci/retry_failed_jobs.yml | 3 ++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.gitlab/ci/pre_check.yml b/.gitlab/ci/pre_check.yml index 09a5ffb3ad..3aa4d7807b 100644 --- a/.gitlab/ci/pre_check.yml +++ b/.gitlab/ci/pre_check.yml @@ -168,3 +168,15 @@ pipeline_variables: - pipeline.env expire_in: 1 week when: always + +redundant_pass_job: + stage: pre_check + tags: [shiny, fast_run] + image: $ESP_ENV_IMAGE + dependencies: null + before_script: [] + cache: [] + extends: [] + script: + - echo "This job is redundant to ensure the 'retry_failed_jobs' job can exist and not be skipped" + when: always diff --git a/.gitlab/ci/retry_failed_jobs.yml b/.gitlab/ci/retry_failed_jobs.yml index 28a2c1e06f..8eff2ed6b4 100644 --- a/.gitlab/ci/retry_failed_jobs.yml +++ b/.gitlab/ci/retry_failed_jobs.yml @@ -1,6 +1,7 @@ retry_failed_jobs: stage: retry_failed_jobs tags: [shiny, fast_run] + allow_failure: true image: $ESP_ENV_IMAGE dependencies: null before_script: [] @@ -11,4 +12,4 @@ retry_failed_jobs: - python tools/ci/python_packages/gitlab_api.py retry_failed_jobs $CI_MERGE_REQUEST_PROJECT_ID --pipeline_id $CI_PIPELINE_ID when: manual needs: - - generate_failed_jobs_report + - redundant_pass_job From f154151e1458d476acc75b808545cb387e3c233a Mon Sep 17 00:00:00 2001 From: Aleksei Apaseev Date: Mon, 12 Aug 2024 20:52:34 +0800 Subject: [PATCH 2/5] ci: retry jobs for parent and child pipelines --- tools/ci/python_packages/gitlab_api.py | 53 +++++++++++++++++++------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/tools/ci/python_packages/gitlab_api.py b/tools/ci/python_packages/gitlab_api.py index 7c83d6f205..625e47c489 100644 --- a/tools/ci/python_packages/gitlab_api.py +++ b/tools/ci/python_packages/gitlab_api.py @@ -18,6 +18,7 @@ from typing import Optional from typing import Union import gitlab +import requests TR = Callable[..., Any] @@ -279,6 +280,26 @@ class Gitlab(object): job = self.project.jobs.get(job_id) return ','.join(job.tag_list) + def get_child_pipeline_ids(self, parent_pipeline_id: int) -> List[int]: + """ + Fetches the child pipeline IDs for a given parent pipeline ID. + + :param parent_pipeline_id: ID of the parent pipeline. + :return: List of child pipeline IDs. + """ + response = requests.get( + f'{os.getenv("CI_DASHBOARD_API", "")}/pipelines/{parent_pipeline_id}/child-ids', + headers={'Authorization': f'Bearer {os.getenv("ESPCI_TOKEN", "")}'} + ) + + if response.status_code == 200: + response_data = response.json() + child_pipeline_ids: list = response_data.get('child_pipeline_ids', []) + return child_pipeline_ids + else: + logging.error(f'Failed to fetch child pipeline IDs: {response.text}') + return [] + def retry_failed_jobs(self, pipeline_id: int, retry_allowed_failures: bool = False) -> List[int]: """ Retry failed jobs for a specific pipeline. Optionally include jobs marked as 'allowed failures'. @@ -286,20 +307,24 @@ class Gitlab(object): :param pipeline_id: ID of the pipeline whose failed jobs are to be retried. :param retry_allowed_failures: Whether to retry jobs that are marked as allowed failures. """ - pipeline = self.project.pipelines.get(pipeline_id) - jobs_to_retry = [ - job - for job in pipeline.jobs.list(scope='failed') - if retry_allowed_failures or not job.attributes.get('allow_failure', False) - ] jobs_succeeded_retry = [] - for job in jobs_to_retry: - try: - res = self.project.jobs.get(job.id).retry() - jobs_succeeded_retry.append(job.id) - logging.info(f'Retried job {job.id} with result {res}') - except Exception as e: - logging.error(f'Failed to retry job {job.id}: {str(e)}') + pipeline_ids = [pipeline_id] + self.get_child_pipeline_ids(pipeline_id) + + for pid in pipeline_ids: + pipeline = self.project.pipelines.get(pid) + jobs_to_retry = [ + job + for job in pipeline.jobs.list(scope='failed') + if retry_allowed_failures or not job.attributes.get('allow_failure', False) + ] + for job in jobs_to_retry: + try: + res = self.project.jobs.get(job.id).retry() + jobs_succeeded_retry.append(job.id) + logging.info(f'Retried job {job.id} with result {res}') + except Exception as e: + logging.error(f'Failed to retry job {job.id}: {str(e)}') + return jobs_succeeded_retry @@ -334,7 +359,7 @@ def main() -> None: print('project id: {}'.format(ret)) elif args.action == 'retry_failed_jobs': res = gitlab_inst.retry_failed_jobs(args.pipeline_id, args.retry_allowed_failures) - print('job retried successfully: {}'.format(res)) + print('jobs retried successfully: {}'.format(res)) elif args.action == 'get_job_tags': ret = gitlab_inst.get_job_tags(args.job_id) print(ret) From 4e44310ff18c690b33e0a12a2220bd1b7ae8ee65 Mon Sep 17 00:00:00 2001 From: Aleksei Apaseev Date: Wed, 14 Aug 2024 11:32:10 +0800 Subject: [PATCH 3/5] ci: update authorization from Bearer token to CI_JOB_TOKEN while making requests to CI_DASHBOARD_API service --- tools/ci/dynamic_pipelines/utils.py | 6 +++--- tools/ci/python_packages/gitlab_api.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/ci/dynamic_pipelines/utils.py b/tools/ci/dynamic_pipelines/utils.py index 08eceb1f9a..d3947be463 100644 --- a/tools/ci/dynamic_pipelines/utils.py +++ b/tools/ci/dynamic_pipelines/utils.py @@ -102,7 +102,7 @@ def fetch_failed_jobs(commit_id: str) -> t.List[GitlabJob]: """ response = requests.get( f'{CI_DASHBOARD_API}/commits/{commit_id}/jobs', - headers={'Authorization': f'Bearer {CI_JOB_TOKEN}'} + headers={'CI-Job-Token': CI_JOB_TOKEN}, ) if response.status_code != 200: print(f'Failed to fetch jobs data: {response.status_code} with error: {response.text}') @@ -117,7 +117,7 @@ def fetch_failed_jobs(commit_id: str) -> t.List[GitlabJob]: failed_job_names = [job['name'] for job in jobs if job['status'] == 'failed'] response = requests.post( f'{CI_DASHBOARD_API}/jobs/failure_ratio', - headers={'Authorization': f'Bearer {CI_JOB_TOKEN}'}, + headers={'CI-Job-Token': CI_JOB_TOKEN}, json={'job_names': failed_job_names, 'exclude_branches': [os.getenv('CI_MERGE_REQUEST_SOURCE_BRANCH_NAME', '')]}, ) if response.status_code != 200: @@ -145,7 +145,7 @@ def fetch_failed_testcases_failure_ratio(failed_testcases: t.List[TestCase], bra req_json = {'testcase_names': list(set([testcase.name for testcase in failed_testcases])), **branches_filter} response = requests.post( f'{CI_DASHBOARD_API}/testcases/failure_ratio', - headers={'Authorization': f'Bearer {CI_JOB_TOKEN}'}, + headers={'CI-Job-Token': CI_JOB_TOKEN}, json=req_json, ) if response.status_code != 200: diff --git a/tools/ci/python_packages/gitlab_api.py b/tools/ci/python_packages/gitlab_api.py index 625e47c489..1d9d11438a 100644 --- a/tools/ci/python_packages/gitlab_api.py +++ b/tools/ci/python_packages/gitlab_api.py @@ -289,7 +289,7 @@ class Gitlab(object): """ response = requests.get( f'{os.getenv("CI_DASHBOARD_API", "")}/pipelines/{parent_pipeline_id}/child-ids', - headers={'Authorization': f'Bearer {os.getenv("ESPCI_TOKEN", "")}'} + headers={'CI-Job-Token': os.getenv('CI_JOB_TOKEN', '')}, ) if response.status_code == 200: From a0c3fb4d4a49e808d5891501f17a48df7c632301 Mon Sep 17 00:00:00 2001 From: Aleksei Apaseev Date: Wed, 14 Aug 2024 19:29:18 +0800 Subject: [PATCH 4/5] ci: add name for child pipelines --- tools/ci/dynamic_pipelines/constants.py | 3 +++ .../scripts/generate_build_child_pipeline.py | 3 ++- .../scripts/generate_target_test_child_pipeline.py | 3 ++- tools/ci/dynamic_pipelines/utils.py | 6 +++++- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/ci/dynamic_pipelines/constants.py b/tools/ci/dynamic_pipelines/constants.py index bbeefcfc4d..c0c90c4d27 100644 --- a/tools/ci/dynamic_pipelines/constants.py +++ b/tools/ci/dynamic_pipelines/constants.py @@ -17,6 +17,9 @@ DEFAULT_CASES_TEST_PER_JOB = 30 DEFAULT_BUILD_CHILD_PIPELINE_FILEPATH = os.path.join(IDF_PATH, 'build_child_pipeline.yml') DEFAULT_TARGET_TEST_CHILD_PIPELINE_FILEPATH = os.path.join(IDF_PATH, 'target_test_child_pipeline.yml') +DEFAULT_BUILD_CHILD_PIPELINE_NAME = 'Build Child Pipeline' +DEFAULT_TARGET_TEST_CHILD_PIPELINE_NAME = 'Target Test Child Pipeline' + TEST_RELATED_BUILD_JOB_NAME = 'build_test_related_apps' NON_TEST_RELATED_BUILD_JOB_NAME = 'build_non_test_related_apps' diff --git a/tools/ci/dynamic_pipelines/scripts/generate_build_child_pipeline.py b/tools/ci/dynamic_pipelines/scripts/generate_build_child_pipeline.py index 62fc2366ff..5c2cf826cc 100644 --- a/tools/ci/dynamic_pipelines/scripts/generate_build_child_pipeline.py +++ b/tools/ci/dynamic_pipelines/scripts/generate_build_child_pipeline.py @@ -9,6 +9,7 @@ import __init__ # noqa: F401 # inject the system path import yaml from dynamic_pipelines.constants import DEFAULT_APPS_BUILD_PER_JOB from dynamic_pipelines.constants import DEFAULT_BUILD_CHILD_PIPELINE_FILEPATH +from dynamic_pipelines.constants import DEFAULT_BUILD_CHILD_PIPELINE_NAME from dynamic_pipelines.constants import DEFAULT_TEST_PATHS from dynamic_pipelines.constants import NON_TEST_RELATED_APPS_FILENAME from dynamic_pipelines.constants import NON_TEST_RELATED_BUILD_JOB_NAME @@ -133,7 +134,7 @@ def main(arguments: argparse.Namespace) -> None: else: extra_include_yml = ['tools/ci/dynamic_pipelines/templates/test_child_pipeline.yml'] - dump_jobs_to_yaml(build_jobs, arguments.yaml_output, extra_include_yml) + dump_jobs_to_yaml(build_jobs, arguments.yaml_output, DEFAULT_BUILD_CHILD_PIPELINE_NAME, extra_include_yml) print(f'Generate child pipeline yaml file {arguments.yaml_output} with {sum(j.parallel for j in build_jobs)} jobs') diff --git a/tools/ci/dynamic_pipelines/scripts/generate_target_test_child_pipeline.py b/tools/ci/dynamic_pipelines/scripts/generate_target_test_child_pipeline.py index 8ec797b615..bea461414b 100644 --- a/tools/ci/dynamic_pipelines/scripts/generate_target_test_child_pipeline.py +++ b/tools/ci/dynamic_pipelines/scripts/generate_target_test_child_pipeline.py @@ -19,6 +19,7 @@ import yaml from dynamic_pipelines.constants import BUILD_ONLY_LABEL from dynamic_pipelines.constants import DEFAULT_CASES_TEST_PER_JOB from dynamic_pipelines.constants import DEFAULT_TARGET_TEST_CHILD_PIPELINE_FILEPATH +from dynamic_pipelines.constants import DEFAULT_TARGET_TEST_CHILD_PIPELINE_NAME from dynamic_pipelines.constants import DEFAULT_TEST_PATHS from dynamic_pipelines.constants import KNOWN_GENERATE_TEST_CHILD_PIPELINE_WARNINGS_FILEPATH from dynamic_pipelines.models import EmptyJob @@ -170,7 +171,7 @@ def generate_target_test_child_pipeline( if no_env_marker_test_cases_fail or no_runner_tags_fail: raise SystemExit('Failed to generate target test child pipeline.') - dump_jobs_to_yaml(target_test_jobs, output_filepath, extra_include_yml) + dump_jobs_to_yaml(target_test_jobs, output_filepath, DEFAULT_TARGET_TEST_CHILD_PIPELINE_NAME, extra_include_yml) print(f'Generate child pipeline yaml file {output_filepath} with {sum(j.parallel for j in target_test_jobs)} jobs') diff --git a/tools/ci/dynamic_pipelines/utils.py b/tools/ci/dynamic_pipelines/utils.py index d3947be463..aced43c84f 100644 --- a/tools/ci/dynamic_pipelines/utils.py +++ b/tools/ci/dynamic_pipelines/utils.py @@ -21,7 +21,10 @@ from .models import TestCase def dump_jobs_to_yaml( - jobs: t.List[Job], output_filepath: str, extra_include_yml: t.Optional[t.List[str]] = None + jobs: t.List[Job], + output_filepath: str, + pipeline_name: str, + extra_include_yml: t.Optional[t.List[str]] = None, ) -> None: yaml_dict = {} for job in jobs: @@ -35,6 +38,7 @@ def dump_jobs_to_yaml( '.gitlab/ci/common.yml', ], 'workflow': { + 'name': pipeline_name, 'rules': [ # always run the child pipeline, if they are created {'when': 'always'}, From c30d22630f82b52bc08bc04ec81e6e9b481fbd61 Mon Sep 17 00:00:00 2001 From: Aleksei Apaseev Date: Thu, 15 Aug 2024 11:58:42 +0800 Subject: [PATCH 5/5] ci: update the logic to use the gitlab API to fetch failed jobs --- tools/ci/python_packages/gitlab_api.py | 64 ++++++++++++++++---------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/tools/ci/python_packages/gitlab_api.py b/tools/ci/python_packages/gitlab_api.py index 1d9d11438a..6ac0ffae1f 100644 --- a/tools/ci/python_packages/gitlab_api.py +++ b/tools/ci/python_packages/gitlab_api.py @@ -18,7 +18,6 @@ from typing import Optional from typing import Union import gitlab -import requests TR = Callable[..., Any] @@ -68,6 +67,7 @@ class Gitlab(object): JOB_NAME_PATTERN = re.compile(r'(\w+)(\s+(\d+)/(\d+))?') DOWNLOAD_ERROR_MAX_RETRIES = 3 + DEFAULT_BUILD_CHILD_PIPELINE_NAME = 'Build Child Pipeline' def __init__(self, project_id: Union[int, str, None] = None): config_data_from_env = os.getenv('PYTHON_GITLAB_CONFIG') @@ -280,25 +280,38 @@ class Gitlab(object): job = self.project.jobs.get(job_id) return ','.join(job.tag_list) - def get_child_pipeline_ids(self, parent_pipeline_id: int) -> List[int]: + def get_downstream_pipeline_ids(self, main_pipeline_id: int) -> List[int]: """ - Fetches the child pipeline IDs for a given parent pipeline ID. + Retrieve the IDs of all downstream child pipelines for a given main pipeline. - :param parent_pipeline_id: ID of the parent pipeline. - :return: List of child pipeline IDs. + :param main_pipeline_id: The ID of the main pipeline to start the search. + :return: A list of IDs of all downstream child pipelines. """ - response = requests.get( - f'{os.getenv("CI_DASHBOARD_API", "")}/pipelines/{parent_pipeline_id}/child-ids', - headers={'CI-Job-Token': os.getenv('CI_JOB_TOKEN', '')}, - ) + bridge_pipeline_ids = [] + child_pipeline_ids = [] - if response.status_code == 200: - response_data = response.json() - child_pipeline_ids: list = response_data.get('child_pipeline_ids', []) - return child_pipeline_ids - else: - logging.error(f'Failed to fetch child pipeline IDs: {response.text}') - return [] + main_pipeline_bridges = self.project.pipelines.get(main_pipeline_id).bridges.list() + for bridge in main_pipeline_bridges: + downstream_pipeline = bridge.attributes.get('downstream_pipeline') + if not downstream_pipeline: + continue + bridge_pipeline_ids.append(downstream_pipeline['id']) + + for bridge_pipeline_id in bridge_pipeline_ids: + child_pipeline_ids.append(bridge_pipeline_id) + bridge_pipeline = self.project.pipelines.get(bridge_pipeline_id) + + if not bridge_pipeline.name == self.DEFAULT_BUILD_CHILD_PIPELINE_NAME: + continue + + child_bridges = bridge_pipeline.bridges.list() + for child_bridge in child_bridges: + downstream_child_pipeline = child_bridge.attributes.get('downstream_pipeline') + if not downstream_child_pipeline: + continue + child_pipeline_ids.append(downstream_child_pipeline.get('id')) + + return [pid for pid in child_pipeline_ids if pid is not None] def retry_failed_jobs(self, pipeline_id: int, retry_allowed_failures: bool = False) -> List[int]: """ @@ -308,22 +321,23 @@ class Gitlab(object): :param retry_allowed_failures: Whether to retry jobs that are marked as allowed failures. """ jobs_succeeded_retry = [] - pipeline_ids = [pipeline_id] + self.get_child_pipeline_ids(pipeline_id) - + pipeline_ids = [pipeline_id] + self.get_downstream_pipeline_ids(pipeline_id) + logging.info(f'Retrying jobs for pipelines: {pipeline_ids}') for pid in pipeline_ids: pipeline = self.project.pipelines.get(pid) - jobs_to_retry = [ - job + job_ids_to_retry = [ + job.id for job in pipeline.jobs.list(scope='failed') if retry_allowed_failures or not job.attributes.get('allow_failure', False) ] - for job in jobs_to_retry: + logging.info(f'Failed jobs for pipeline {pid}: {job_ids_to_retry}') + for job_id in job_ids_to_retry: try: - res = self.project.jobs.get(job.id).retry() - jobs_succeeded_retry.append(job.id) - logging.info(f'Retried job {job.id} with result {res}') + res = self.project.jobs.get(job_id).retry() + jobs_succeeded_retry.append(job_id) + logging.info(f'Retried job {job_id} with result {res}') except Exception as e: - logging.error(f'Failed to retry job {job.id}: {str(e)}') + logging.error(f'Failed to retry job {job_id}: {str(e)}') return jobs_succeeded_retry