mirror of
https://github.com/saymrwulf/transformers.git
synced 2026-05-14 20:58:08 +00:00
Update notification service (#17921)
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
parent
07575e869d
commit
2c5747edfe
2 changed files with 95 additions and 33 deletions
15
.github/workflows/self-past.yml
vendored
15
.github/workflows/self-past.yml
vendored
|
|
@ -164,6 +164,11 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/download-artifact@v2
|
||||
|
||||
# Create a directory to store test failure tables in the next step
|
||||
- name: Create directory
|
||||
run: mkdir test_failure_tables
|
||||
|
||||
- name: Send message to Slack
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
|
|
@ -176,4 +181,12 @@ jobs:
|
|||
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
||||
run: |
|
||||
pip install slack_sdk
|
||||
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
|
||||
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
|
||||
|
||||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||
- name: Failure table artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: test_failure_tables_${{ inputs.framework }}-${{ inputs.version }}
|
||||
path: test_failure_tables
|
||||
|
|
@ -233,15 +233,11 @@ class Message:
|
|||
individual_reports.append(key)
|
||||
|
||||
header = "Single | Multi | Category\n"
|
||||
category_failures_report = header + "\n".join(sorted(individual_reports))
|
||||
category_failures_report = prepare_reports(
|
||||
title="The following modeling categories had failures", header=header, reports=individual_reports
|
||||
)
|
||||
|
||||
return {
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": f"The following modeling categories had failures:\n\n```\n{category_failures_report}\n```",
|
||||
},
|
||||
}
|
||||
return {"type": "section", "text": {"type": "mrkdwn", "text": category_failures_report}}
|
||||
|
||||
@property
|
||||
def model_failures(self) -> Dict:
|
||||
|
|
@ -302,21 +298,44 @@ class Message:
|
|||
|
||||
model_header = "Single PT | Multi PT | Single TF | Multi TF | Other | Category\n"
|
||||
sorted_model_reports = sorted(model_reports, key=lambda s: s.split("] ")[-1])
|
||||
model_failures_report = model_header + "\n".join(sorted_model_reports)
|
||||
model_failures_report = prepare_reports(
|
||||
title="These following model modules had failures", header=model_header, reports=sorted_model_reports
|
||||
)
|
||||
|
||||
module_header = "Single | Multi | Category\n"
|
||||
sorted_module_reports = sorted(other_module_reports, key=lambda s: s.split("] ")[-1])
|
||||
module_failures_report = module_header + "\n".join(sorted_module_reports)
|
||||
module_failures_report = prepare_reports(
|
||||
title="The following non-model modules had failures", header=module_header, reports=sorted_module_reports
|
||||
)
|
||||
|
||||
report = ""
|
||||
model_failure_sections = [
|
||||
{"type": "section", "text": {"type": "mrkdwn", "text": model_failures_report}},
|
||||
{"type": "section", "text": {"type": "mrkdwn", "text": module_failures_report}},
|
||||
]
|
||||
|
||||
if len(model_reports):
|
||||
report += f"These following model modules had failures:\n```\n{model_failures_report}\n```\n\n"
|
||||
# Save complete tables (for past CI) - to be uploaded as artifacts
|
||||
if ci_event.startswith("Past CI"):
|
||||
model_failures_report = prepare_reports(
|
||||
title="These following model modules had failures",
|
||||
header=model_header,
|
||||
reports=sorted_model_reports,
|
||||
to_truncate=False,
|
||||
)
|
||||
file_path = os.path.join(os.getcwd(), "test_failure_tables/model_failures_report.txt")
|
||||
with open(file_path, "w", encoding="UTF-8") as fp:
|
||||
fp.write(model_failures_report)
|
||||
|
||||
if len(other_module_reports):
|
||||
report += f"The following non-model modules had failures:\n```\n{module_failures_report}\n```\n\n"
|
||||
module_failures_report = prepare_reports(
|
||||
title="The following non-model modules had failures",
|
||||
header=module_header,
|
||||
reports=sorted_module_reports,
|
||||
to_truncate=False,
|
||||
)
|
||||
file_path = os.path.join(os.getcwd(), "test_failure_tables/module_failures_report.txt")
|
||||
with open(file_path, "w", encoding="UTF-8") as fp:
|
||||
fp.write(module_failures_report)
|
||||
|
||||
return {"type": "section", "text": {"type": "mrkdwn", "text": report}}
|
||||
return model_failure_sections
|
||||
|
||||
@property
|
||||
def additional_failures(self) -> Dict:
|
||||
|
|
@ -337,15 +356,11 @@ class Message:
|
|||
individual_reports.append(report)
|
||||
|
||||
header = "Single | Multi | Category\n"
|
||||
failures_report = header + "\n".join(sorted(individual_reports))
|
||||
failures_report = prepare_reports(
|
||||
title="The following non-modeling tests had failures", header=header, reports=individual_reports
|
||||
)
|
||||
|
||||
return {
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": f"The following non-modeling tests had failures:\n```\n{failures_report}\n```",
|
||||
},
|
||||
}
|
||||
return {"type": "section", "text": {"type": "mrkdwn", "text": failures_report}}
|
||||
|
||||
@property
|
||||
def payload(self) -> str:
|
||||
|
|
@ -358,7 +373,10 @@ class Message:
|
|||
blocks.append(self.failures)
|
||||
|
||||
if self.n_model_failures > 0:
|
||||
blocks.extend([self.category_failures, self.model_failures])
|
||||
blocks.append(self.category_failures)
|
||||
for block in self.model_failures:
|
||||
if block["text"]["text"]:
|
||||
blocks.append(block)
|
||||
|
||||
if self.n_additional_failures > 0:
|
||||
blocks.append(self.additional_failures)
|
||||
|
|
@ -582,6 +600,29 @@ def retrieve_available_artifacts():
|
|||
return _available_artifacts
|
||||
|
||||
|
||||
def prepare_reports(title, header, reports, to_truncate=True):
|
||||
report = ""
|
||||
|
||||
MAX_ERROR_TEXT = 3000 - len("[Truncated]")
|
||||
if not to_truncate:
|
||||
MAX_ERROR_TEXT = float("inf")
|
||||
|
||||
if len(reports) > 0:
|
||||
# `text` must be less than 3001 characters in Slack SDK
|
||||
# keep some room for adding "[Truncated]" when necessary
|
||||
|
||||
for idx in range(len(reports)):
|
||||
_report = header + "\n".join(reports[: idx + 1])
|
||||
new_report = f"{title}:\n```\n{_report}\n```\n"
|
||||
if len(new_report) > MAX_ERROR_TEXT:
|
||||
# `report` here has length <= 3000
|
||||
report = report + "[Truncated]"
|
||||
break
|
||||
report = new_report
|
||||
|
||||
return report
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
org = "huggingface"
|
||||
|
|
@ -686,16 +727,24 @@ if __name__ == "__main__":
|
|||
|
||||
unclassified_model_failures = []
|
||||
|
||||
# This prefix is used to get job links below. For past CI, we use `workflow_call`, which changes the job names from
|
||||
# `Model tests (...)` to `PyTorch 1.5 / Model tests (...)` for example.
|
||||
job_name_prefix = ""
|
||||
if ci_event.startswith("Past CI - "):
|
||||
framework, version = ci_event.replace("Past CI - ", "").split("-")
|
||||
framework = "PyTorch" if framework == "pytorch" else "TensorFlow"
|
||||
job_name_prefix = f"{framework} {version}"
|
||||
|
||||
for model in model_results.keys():
|
||||
for artifact_path in available_artifacts[f"run_all_tests_gpu_{model}_test_reports"].paths:
|
||||
artifact = retrieve_artifact(artifact_path["name"], artifact_path["gpu"])
|
||||
if "stats" in artifact:
|
||||
# Link to the GitHub Action job
|
||||
model_results[model]["job_link"][artifact_path["gpu"]] = github_actions_job_links.get(
|
||||
# The job names use `matrix.folder` which contain things like `models/bert` instead of `models_bert`
|
||||
f"Model tests ({model.replace('models_', 'models/')}, {artifact_path['gpu']}-gpu)"
|
||||
)
|
||||
|
||||
# The job names use `matrix.folder` which contain things like `models/bert` instead of `models_bert`
|
||||
job_name = f"Model tests ({model.replace('models_', 'models/')}, {artifact_path['gpu']}-gpu)"
|
||||
if job_name_prefix:
|
||||
job_name = f"{job_name_prefix} / {job_name}"
|
||||
model_results[model]["job_link"][artifact_path["gpu"]] = github_actions_job_links.get(job_name)
|
||||
failed, success, time_spent = handle_test_results(artifact["stats"])
|
||||
model_results[model]["success"] += success
|
||||
model_results[model]["time_spent"] += time_spent[1:-1] + ", "
|
||||
|
|
@ -809,7 +858,7 @@ if __name__ == "__main__":
|
|||
|
||||
message = Message(title, ci_title, model_results, additional_results)
|
||||
|
||||
# send report only if there is any failure
|
||||
if message.n_failures:
|
||||
# send report only if there is any failure (for push CI)
|
||||
if message.n_failures or ci_event != "push":
|
||||
message.post()
|
||||
message.post_reply()
|
||||
|
|
|
|||
Loading…
Reference in a new issue