xref: /aosp_15_r20/external/aws-crt-java/codebuild/CanaryWrapper_24_7.py (revision 3c7ae9de214676c52d19f01067dc1a404272dc11)
1*3c7ae9deSAndroid Build Coastguard Worker# Python wrapper script for collecting Canary metrics, setting up alarms, reporting metrics to Cloudwatch,
2*3c7ae9deSAndroid Build Coastguard Worker# checking the alarms to ensure everything is correct at the end of the run, and checking for new
3*3c7ae9deSAndroid Build Coastguard Worker# builds in S3, downloading them, and launching them if they exist (24/7 operation)
4*3c7ae9deSAndroid Build Coastguard Worker#
5*3c7ae9deSAndroid Build Coastguard Worker# Will only stop running if the Canary application itself has an issue - in which case it Canary application will
6*3c7ae9deSAndroid Build Coastguard Worker# need to be fixed and then the wrapper script restarted
7*3c7ae9deSAndroid Build Coastguard Worker
8*3c7ae9deSAndroid Build Coastguard Worker# Needs to be installed prior to running
9*3c7ae9deSAndroid Build Coastguard Worker# Part of standard packages in Python 3.4+
10*3c7ae9deSAndroid Build Coastguard Workerimport argparse
11*3c7ae9deSAndroid Build Coastguard Workerimport time
12*3c7ae9deSAndroid Build Coastguard Worker# Dependencies in project folder
13*3c7ae9deSAndroid Build Coastguard Workerfrom CanaryWrapper_Classes import *
14*3c7ae9deSAndroid Build Coastguard Workerfrom CanaryWrapper_MetricFunctions import *
15*3c7ae9deSAndroid Build Coastguard Worker
16*3c7ae9deSAndroid Build Coastguard Worker# TODO - Using subprocess may not work on Windows for starting/stopping the application thread.
17*3c7ae9deSAndroid Build Coastguard Worker#        Canary will likely be running on Linux, so it's probably okay, but need to confirm/check at some point....
18*3c7ae9deSAndroid Build Coastguard Worker# ================================================================================
19*3c7ae9deSAndroid Build Coastguard Worker# Code for command line argument parsing
20*3c7ae9deSAndroid Build Coastguard Worker
21*3c7ae9deSAndroid Build Coastguard Workercommand_parser = argparse.ArgumentParser("CanaryWrapper_24_7")
22*3c7ae9deSAndroid Build Coastguard Workercommand_parser.add_argument("--canary_executable", type=str, required=True,
23*3c7ae9deSAndroid Build Coastguard Worker    help="The path to the canary executable")
24*3c7ae9deSAndroid Build Coastguard Workercommand_parser.add_argument("--canary_arguments", type=str, default="",
25*3c7ae9deSAndroid Build Coastguard Worker    help="The arguments to pass/launch the canary executable with")
26*3c7ae9deSAndroid Build Coastguard Workercommand_parser.add_argument("--s3_bucket_name", type=str, default="canary-wrapper-folder",
27*3c7ae9deSAndroid Build Coastguard Worker    help="(OPTIONAL, default=canary-wrapper-folder) The name of the S3 bucket where success logs will be stored")
28*3c7ae9deSAndroid Build Coastguard Workercommand_parser.add_argument("--s3_bucket_application", type=str, required=True,
29*3c7ae9deSAndroid Build Coastguard Worker    help="(OPTIONAL, default=canary-wrapper-folder) The S3 URL to monitor for changes MINUS the bucket name")
30*3c7ae9deSAndroid Build Coastguard Workercommand_parser.add_argument("--s3_bucket_application_in_zip", type=str, required=False, default="",
31*3c7ae9deSAndroid Build Coastguard Worker    help="(OPTIONAL, default="") The file path in the zip folder where the application is stored. Will be ignored if set to empty string")
32*3c7ae9deSAndroid Build Coastguard Workercommand_parser.add_argument("--lambda_name", type=str, default="iot-send-email-lambda",
33*3c7ae9deSAndroid Build Coastguard Worker    help="(OPTIONAL, default='CanarySendEmailLambda') The name of the Lambda used to send emails")
34*3c7ae9deSAndroid Build Coastguard Workercommand_parser_arguments = command_parser.parse_args()
35*3c7ae9deSAndroid Build Coastguard Worker
36*3c7ae9deSAndroid Build Coastguard Worker# ================================================================================
37*3c7ae9deSAndroid Build Coastguard Worker# Global variables that both threads use to communicate.
38*3c7ae9deSAndroid Build Coastguard Worker# NOTE - These should likely be replaced with futures or similar for better thread safety.
39*3c7ae9deSAndroid Build Coastguard Worker#        However, these variables are only either read or written to from a single thread, no
40*3c7ae9deSAndroid Build Coastguard Worker#        thread should read and write to these variables.
41*3c7ae9deSAndroid Build Coastguard Worker
42*3c7ae9deSAndroid Build Coastguard Worker# The local file path (and extension) of the Canary application that the wrapper will manage
43*3c7ae9deSAndroid Build Coastguard Worker# (This will also be the filename and directory used when a new file is detected in S3)
44*3c7ae9deSAndroid Build Coastguard Worker# [THIS IS READ ONLY]
45*3c7ae9deSAndroid Build Coastguard Workercanary_local_application_path = command_parser_arguments.canary_executable
46*3c7ae9deSAndroid Build Coastguard Workerif (canary_local_application_path == ""):
47*3c7ae9deSAndroid Build Coastguard Worker    print ("ERROR - required canary_executable is empty!")
48*3c7ae9deSAndroid Build Coastguard Worker    exit (1) # cannot run without a canary executable
49*3c7ae9deSAndroid Build Coastguard Worker# This is the arguments passed to the local file path when starting
50*3c7ae9deSAndroid Build Coastguard Worker# [THIS IS READ ONLY]
51*3c7ae9deSAndroid Build Coastguard Workercanary_local_application_arguments = command_parser_arguments.canary_arguments
52*3c7ae9deSAndroid Build Coastguard Worker# The "Git Hash" to use for metrics and dimensions
53*3c7ae9deSAndroid Build Coastguard Worker# [THIS IS READ ONLY]
54*3c7ae9deSAndroid Build Coastguard Workercanary_local_git_hash_stub = "Canary"
55*3c7ae9deSAndroid Build Coastguard Worker# The "Git Repo" name to use for metrics and dimensions. Is hard-coded since this is a 24/7 canary that should only run for MQTT
56*3c7ae9deSAndroid Build Coastguard Worker# [THIS IS READ ONLY]
57*3c7ae9deSAndroid Build Coastguard Workercanary_local_git_repo_stub = "MQTT5_24_7"
58*3c7ae9deSAndroid Build Coastguard Worker# The Fixed Namespace name for the Canary
59*3c7ae9deSAndroid Build Coastguard Worker# [THIS IS READ ONLY]
60*3c7ae9deSAndroid Build Coastguard Workercanary_local_git_fixed_namespace = "MQTT5_24_7_Canary"
61*3c7ae9deSAndroid Build Coastguard Worker# The S3 bucket name to monitor for the application
62*3c7ae9deSAndroid Build Coastguard Worker# [THIS IS READ ONLY]
63*3c7ae9deSAndroid Build Coastguard Workercanary_s3_bucket_name = command_parser_arguments.s3_bucket_name
64*3c7ae9deSAndroid Build Coastguard Workerif (canary_s3_bucket_name == ""):
65*3c7ae9deSAndroid Build Coastguard Worker    canary_s3_bucket_name = "canary-wrapper-folder"
66*3c7ae9deSAndroid Build Coastguard Worker# The file in the S3 bucket to monitor (The application filepath and file. Example: "canary/canary_application.exe")
67*3c7ae9deSAndroid Build Coastguard Worker# [THIS IS READ ONLY]
68*3c7ae9deSAndroid Build Coastguard Workercanary_s3_bucket_application_path = command_parser_arguments.s3_bucket_application
69*3c7ae9deSAndroid Build Coastguard Workerif (canary_s3_bucket_application_path == ""):
70*3c7ae9deSAndroid Build Coastguard Worker    print ("ERROR - required s3_bucket_application is empty!")
71*3c7ae9deSAndroid Build Coastguard Worker    exit (1) # cannot run without a s3_bucket_application to monitor
72*3c7ae9deSAndroid Build Coastguard Worker# The location of the file in the S3 zip, if the S3 file being monitored is a zip
73*3c7ae9deSAndroid Build Coastguard Worker# (THIS IS READ ONLY)
74*3c7ae9deSAndroid Build Coastguard Workercanary_s3_bucket_application_path_zip = command_parser_arguments.s3_bucket_application_in_zip
75*3c7ae9deSAndroid Build Coastguard Workerif (canary_s3_bucket_application_path_zip == ""):
76*3c7ae9deSAndroid Build Coastguard Worker    canary_s3_bucket_application_path_zip = None
77*3c7ae9deSAndroid Build Coastguard Worker# The name of the email lambda. If an empty string is set, it defaults to 'iot-send-email-lambda'
78*3c7ae9deSAndroid Build Coastguard Workerif (command_parser_arguments.lambda_name == ""):
79*3c7ae9deSAndroid Build Coastguard Worker    command_parser_arguments.lambda_name = "iot-send-email-lambda"
80*3c7ae9deSAndroid Build Coastguard Worker# The region the canary is running in
81*3c7ae9deSAndroid Build Coastguard Worker# (THIS IS READ ONLY)
82*3c7ae9deSAndroid Build Coastguard Workercanary_region_stub = "us-east-1"
83*3c7ae9deSAndroid Build Coastguard Worker
84*3c7ae9deSAndroid Build Coastguard Worker# How long (in seconds) to wait before gathering metrics and pushing them to Cloudwatch
85*3c7ae9deSAndroid Build Coastguard Workercanary_metrics_wait_time = 600 # 10 minutes
86*3c7ae9deSAndroid Build Coastguard Worker# How long (in seconds) to run the Application thread loop. Should be shorter or equal to the Canary Metrics time
87*3c7ae9deSAndroid Build Coastguard Workercanary_application_loop_wait_time = 300 # 5 minutes
88*3c7ae9deSAndroid Build Coastguard Worker
89*3c7ae9deSAndroid Build Coastguard Worker# For testing - set both to 30 seconds
90*3c7ae9deSAndroid Build Coastguard Worker# canary_metrics_wait_time = 30
91*3c7ae9deSAndroid Build Coastguard Worker# canary_application_loop_wait_time = 30
92*3c7ae9deSAndroid Build Coastguard Worker
93*3c7ae9deSAndroid Build Coastguard Worker# ================================================================================
94*3c7ae9deSAndroid Build Coastguard Worker
95*3c7ae9deSAndroid Build Coastguard Worker# Make the snapshot class
96*3c7ae9deSAndroid Build Coastguard Workerdata_snapshot = DataSnapshot(
97*3c7ae9deSAndroid Build Coastguard Worker    git_hash=canary_local_git_hash_stub,
98*3c7ae9deSAndroid Build Coastguard Worker    git_repo_name=canary_local_git_repo_stub,
99*3c7ae9deSAndroid Build Coastguard Worker    git_hash_as_namespace=False,
100*3c7ae9deSAndroid Build Coastguard Worker    datetime_string=None,
101*3c7ae9deSAndroid Build Coastguard Worker    git_fixed_namespace_text=canary_local_git_fixed_namespace,
102*3c7ae9deSAndroid Build Coastguard Worker    output_log_filepath="output.txt",
103*3c7ae9deSAndroid Build Coastguard Worker    output_to_console=True,
104*3c7ae9deSAndroid Build Coastguard Worker    cloudwatch_region=canary_region_stub,
105*3c7ae9deSAndroid Build Coastguard Worker    cloudwatch_make_dashboard=True,
106*3c7ae9deSAndroid Build Coastguard Worker    cloudwatch_teardown_alarms_on_complete=True,
107*3c7ae9deSAndroid Build Coastguard Worker    cloudwatch_teardown_dashboard_on_complete=False,
108*3c7ae9deSAndroid Build Coastguard Worker    s3_bucket_name=canary_s3_bucket_name,
109*3c7ae9deSAndroid Build Coastguard Worker    s3_bucket_upload_on_complete=True,
110*3c7ae9deSAndroid Build Coastguard Worker    lambda_name=command_parser_arguments.lambda_name,
111*3c7ae9deSAndroid Build Coastguard Worker    metric_frequency=canary_metrics_wait_time)
112*3c7ae9deSAndroid Build Coastguard Worker
113*3c7ae9deSAndroid Build Coastguard Worker# Make sure nothing failed
114*3c7ae9deSAndroid Build Coastguard Workerif (data_snapshot.abort_due_to_internal_error == True):
115*3c7ae9deSAndroid Build Coastguard Worker    print ("INFO - Stopping application due to error caused by credentials")
116*3c7ae9deSAndroid Build Coastguard Worker    print ("Please fix your credentials and then restart this application again")
117*3c7ae9deSAndroid Build Coastguard Worker    exit(0)
118*3c7ae9deSAndroid Build Coastguard Worker
119*3c7ae9deSAndroid Build Coastguard Worker# Register metrics
120*3c7ae9deSAndroid Build Coastguard Workerdata_snapshot.register_metric(
121*3c7ae9deSAndroid Build Coastguard Worker    new_metric_name="total_cpu_usage",
122*3c7ae9deSAndroid Build Coastguard Worker    new_metric_function=get_metric_total_cpu_usage,
123*3c7ae9deSAndroid Build Coastguard Worker    new_metric_unit="Percent",
124*3c7ae9deSAndroid Build Coastguard Worker    new_metric_alarm_threshold=70,
125*3c7ae9deSAndroid Build Coastguard Worker    new_metric_reports_to_skip=1,
126*3c7ae9deSAndroid Build Coastguard Worker    new_metric_alarm_severity=5,
127*3c7ae9deSAndroid Build Coastguard Worker    is_percent=True)
128*3c7ae9deSAndroid Build Coastguard Workerdata_snapshot.register_metric(
129*3c7ae9deSAndroid Build Coastguard Worker    new_metric_name="total_memory_usage_value",
130*3c7ae9deSAndroid Build Coastguard Worker    new_metric_function=get_metric_total_memory_usage_value,
131*3c7ae9deSAndroid Build Coastguard Worker    new_metric_unit="Bytes")
132*3c7ae9deSAndroid Build Coastguard Workerdata_snapshot.register_metric(
133*3c7ae9deSAndroid Build Coastguard Worker    new_metric_name="total_memory_usage_percent",
134*3c7ae9deSAndroid Build Coastguard Worker    new_metric_function=get_metric_total_memory_usage_percent,
135*3c7ae9deSAndroid Build Coastguard Worker    new_metric_unit="Percent",
136*3c7ae9deSAndroid Build Coastguard Worker    new_metric_alarm_threshold=70,
137*3c7ae9deSAndroid Build Coastguard Worker    new_metric_reports_to_skip=0,
138*3c7ae9deSAndroid Build Coastguard Worker    new_metric_alarm_severity=5,
139*3c7ae9deSAndroid Build Coastguard Worker    is_percent=True)
140*3c7ae9deSAndroid Build Coastguard Worker
141*3c7ae9deSAndroid Build Coastguard Workerdata_snapshot.register_dashboard_widget("Process CPU Usage - Percentage", ["total_cpu_usage"], 60)
142*3c7ae9deSAndroid Build Coastguard Workerdata_snapshot.register_dashboard_widget("Process Memory Usage - Percentage", ["total_memory_usage_percent"], 60)
143*3c7ae9deSAndroid Build Coastguard Worker
144*3c7ae9deSAndroid Build Coastguard Worker# Print diagnosis information
145*3c7ae9deSAndroid Build Coastguard Workerdata_snapshot.output_diagnosis_information("24/7 Canary cannot show dependencies!")
146*3c7ae9deSAndroid Build Coastguard Worker
147*3c7ae9deSAndroid Build Coastguard Worker# Make the S3 class
148*3c7ae9deSAndroid Build Coastguard Workers3_monitor = S3Monitor(
149*3c7ae9deSAndroid Build Coastguard Worker    s3_bucket_name=canary_s3_bucket_name,
150*3c7ae9deSAndroid Build Coastguard Worker    s3_file_name=canary_s3_bucket_application_path,
151*3c7ae9deSAndroid Build Coastguard Worker    s3_file_name_in_zip=canary_s3_bucket_application_path_zip,
152*3c7ae9deSAndroid Build Coastguard Worker    canary_local_application_path=canary_local_application_path,
153*3c7ae9deSAndroid Build Coastguard Worker    data_snapshot=data_snapshot)
154*3c7ae9deSAndroid Build Coastguard Worker
155*3c7ae9deSAndroid Build Coastguard Workerif (s3_monitor.had_internal_error == True):
156*3c7ae9deSAndroid Build Coastguard Worker    print ("INFO - Stopping application due to error caused by credentials")
157*3c7ae9deSAndroid Build Coastguard Worker    print ("Please fix your credentials and then restart this application again")
158*3c7ae9deSAndroid Build Coastguard Worker    exit(0)
159*3c7ae9deSAndroid Build Coastguard Worker
160*3c7ae9deSAndroid Build Coastguard Worker# Make the snapshot (metrics) monitor
161*3c7ae9deSAndroid Build Coastguard Workersnapshot_monitor = SnapshotMonitor(
162*3c7ae9deSAndroid Build Coastguard Worker    wrapper_data_snapshot=data_snapshot,
163*3c7ae9deSAndroid Build Coastguard Worker    wrapper_metrics_wait_time=canary_metrics_wait_time)
164*3c7ae9deSAndroid Build Coastguard Worker
165*3c7ae9deSAndroid Build Coastguard Worker# Make sure nothing failed
166*3c7ae9deSAndroid Build Coastguard Workerif (snapshot_monitor.had_internal_error == True):
167*3c7ae9deSAndroid Build Coastguard Worker    print ("INFO - Stopping application due to error caused by credentials")
168*3c7ae9deSAndroid Build Coastguard Worker    print ("Please fix your credentials and then restart this application again")
169*3c7ae9deSAndroid Build Coastguard Worker    exit(0)
170*3c7ae9deSAndroid Build Coastguard Worker
171*3c7ae9deSAndroid Build Coastguard Worker# Make the application monitor
172*3c7ae9deSAndroid Build Coastguard Workerapplication_monitor = ApplicationMonitor(
173*3c7ae9deSAndroid Build Coastguard Worker    wrapper_application_path=canary_local_application_path,
174*3c7ae9deSAndroid Build Coastguard Worker    wrapper_application_arguments=canary_local_application_arguments,
175*3c7ae9deSAndroid Build Coastguard Worker    wrapper_application_restart_on_finish=True,
176*3c7ae9deSAndroid Build Coastguard Worker    data_snapshot=data_snapshot)
177*3c7ae9deSAndroid Build Coastguard Worker
178*3c7ae9deSAndroid Build Coastguard Worker# Make sure nothing failed
179*3c7ae9deSAndroid Build Coastguard Workerif (application_monitor.error_has_occurred == True):
180*3c7ae9deSAndroid Build Coastguard Worker    print ("INFO - Stopping application due to error caused by credentials")
181*3c7ae9deSAndroid Build Coastguard Worker    print ("Please fix your credentials and then restart this application again")
182*3c7ae9deSAndroid Build Coastguard Worker    exit(0)
183*3c7ae9deSAndroid Build Coastguard Worker
184*3c7ae9deSAndroid Build Coastguard Worker# For tracking if we stopped due to a metric alarm
185*3c7ae9deSAndroid Build Coastguard Workerstopped_due_to_metric_alarm = False
186*3c7ae9deSAndroid Build Coastguard Worker
187*3c7ae9deSAndroid Build Coastguard Workerdef execution_loop():
188*3c7ae9deSAndroid Build Coastguard Worker    while True:
189*3c7ae9deSAndroid Build Coastguard Worker        s3_monitor.monitor_loop_function(time_passed=canary_application_loop_wait_time)
190*3c7ae9deSAndroid Build Coastguard Worker
191*3c7ae9deSAndroid Build Coastguard Worker        # Is there an error?
192*3c7ae9deSAndroid Build Coastguard Worker        if (s3_monitor.had_internal_error == True):
193*3c7ae9deSAndroid Build Coastguard Worker            print ("[Debug] S3 monitor had an internal error!")
194*3c7ae9deSAndroid Build Coastguard Worker            break
195*3c7ae9deSAndroid Build Coastguard Worker
196*3c7ae9deSAndroid Build Coastguard Worker        # Is there a new file?
197*3c7ae9deSAndroid Build Coastguard Worker        if (s3_monitor.s3_file_needs_replacing == True):
198*3c7ae9deSAndroid Build Coastguard Worker            # Stop the application
199*3c7ae9deSAndroid Build Coastguard Worker            print ("[Debug] Stopping application monitor...")
200*3c7ae9deSAndroid Build Coastguard Worker            application_monitor.stop_monitoring()
201*3c7ae9deSAndroid Build Coastguard Worker            print ("[Debug] Getting S3 file...")
202*3c7ae9deSAndroid Build Coastguard Worker            s3_monitor.replace_current_file_for_new_file()
203*3c7ae9deSAndroid Build Coastguard Worker            # Start the application
204*3c7ae9deSAndroid Build Coastguard Worker            print ("[Debug] Starting application monitor...")
205*3c7ae9deSAndroid Build Coastguard Worker            application_monitor.start_monitoring()
206*3c7ae9deSAndroid Build Coastguard Worker            # Allow the snapshot monitor to cut a ticket
207*3c7ae9deSAndroid Build Coastguard Worker            snapshot_monitor.can_cut_ticket = True
208*3c7ae9deSAndroid Build Coastguard Worker
209*3c7ae9deSAndroid Build Coastguard Worker        snapshot_monitor.monitor_loop_function(
210*3c7ae9deSAndroid Build Coastguard Worker            time_passed=canary_application_loop_wait_time, psutil_process=application_monitor.application_process_psutil)
211*3c7ae9deSAndroid Build Coastguard Worker        application_monitor.monitor_loop_function(
212*3c7ae9deSAndroid Build Coastguard Worker            time_passed=canary_application_loop_wait_time)
213*3c7ae9deSAndroid Build Coastguard Worker
214*3c7ae9deSAndroid Build Coastguard Worker        # Did a metric go into alarm?
215*3c7ae9deSAndroid Build Coastguard Worker        if (snapshot_monitor.has_cut_ticket == True):
216*3c7ae9deSAndroid Build Coastguard Worker            # Do not allow it to cut anymore tickets until it gets a new build
217*3c7ae9deSAndroid Build Coastguard Worker            snapshot_monitor.can_cut_ticket = False
218*3c7ae9deSAndroid Build Coastguard Worker
219*3c7ae9deSAndroid Build Coastguard Worker        # If an error has occurred or otherwise this thread needs to stop, then break the loop
220*3c7ae9deSAndroid Build Coastguard Worker        if (application_monitor.error_has_occurred == True or snapshot_monitor.had_internal_error == True):
221*3c7ae9deSAndroid Build Coastguard Worker            if (application_monitor.error_has_occurred == True):
222*3c7ae9deSAndroid Build Coastguard Worker                print ("[Debug] Application monitor error occurred!")
223*3c7ae9deSAndroid Build Coastguard Worker            else:
224*3c7ae9deSAndroid Build Coastguard Worker                print ("[Debug] Snapshot monitor internal error ocurred!")
225*3c7ae9deSAndroid Build Coastguard Worker            break
226*3c7ae9deSAndroid Build Coastguard Worker
227*3c7ae9deSAndroid Build Coastguard Worker        time.sleep(canary_application_loop_wait_time)
228*3c7ae9deSAndroid Build Coastguard Worker
229*3c7ae9deSAndroid Build Coastguard Worker
230*3c7ae9deSAndroid Build Coastguard Workerdef application_thread():
231*3c7ae9deSAndroid Build Coastguard Worker    # Start the application going
232*3c7ae9deSAndroid Build Coastguard Worker    snapshot_monitor.start_monitoring()
233*3c7ae9deSAndroid Build Coastguard Worker    application_monitor.start_monitoring()
234*3c7ae9deSAndroid Build Coastguard Worker    # Allow the snapshot monitor to cut tickets
235*3c7ae9deSAndroid Build Coastguard Worker    snapshot_monitor.can_cut_ticket = True
236*3c7ae9deSAndroid Build Coastguard Worker
237*3c7ae9deSAndroid Build Coastguard Worker    start_email_body = "MQTT5 24/7 Canary Wrapper has started. This will run and continue to test new MQTT5 application builds as"
238*3c7ae9deSAndroid Build Coastguard Worker    start_email_body += " they pass CodeBuild and are uploaded to S3."
239*3c7ae9deSAndroid Build Coastguard Worker    snapshot_monitor.send_email(email_body=start_email_body, email_subject_text_append="Started")
240*3c7ae9deSAndroid Build Coastguard Worker
241*3c7ae9deSAndroid Build Coastguard Worker    # Start the execution loop
242*3c7ae9deSAndroid Build Coastguard Worker    execution_loop()
243*3c7ae9deSAndroid Build Coastguard Worker
244*3c7ae9deSAndroid Build Coastguard Worker    # Make sure everything is stopped
245*3c7ae9deSAndroid Build Coastguard Worker    snapshot_monitor.stop_monitoring()
246*3c7ae9deSAndroid Build Coastguard Worker    application_monitor.stop_monitoring()
247*3c7ae9deSAndroid Build Coastguard Worker
248*3c7ae9deSAndroid Build Coastguard Worker    # Track whether this counts as an error (and therefore we should cleanup accordingly) or not
249*3c7ae9deSAndroid Build Coastguard Worker    wrapper_error_occurred = False
250*3c7ae9deSAndroid Build Coastguard Worker
251*3c7ae9deSAndroid Build Coastguard Worker    send_finished_email = True
252*3c7ae9deSAndroid Build Coastguard Worker    finished_email_body = "MQTT5 24/7 Canary Wrapper has stopped."
253*3c7ae9deSAndroid Build Coastguard Worker    finished_email_body += "\n\n"
254*3c7ae9deSAndroid Build Coastguard Worker
255*3c7ae9deSAndroid Build Coastguard Worker    try:
256*3c7ae9deSAndroid Build Coastguard Worker        # Find out why we stopped
257*3c7ae9deSAndroid Build Coastguard Worker        # S3 Monitor
258*3c7ae9deSAndroid Build Coastguard Worker        if (s3_monitor.had_internal_error == True):
259*3c7ae9deSAndroid Build Coastguard Worker            if (s3_monitor.error_due_to_credentials == False):
260*3c7ae9deSAndroid Build Coastguard Worker                print ("ERROR - S3 monitor stopped due to internal error!")
261*3c7ae9deSAndroid Build Coastguard Worker                cut_ticket_using_cloudwatch(
262*3c7ae9deSAndroid Build Coastguard Worker                    git_repo_name=canary_local_git_repo_stub,
263*3c7ae9deSAndroid Build Coastguard Worker                    git_hash=canary_local_git_hash_stub,
264*3c7ae9deSAndroid Build Coastguard Worker                    git_hash_as_namespace=False,
265*3c7ae9deSAndroid Build Coastguard Worker                    git_fixed_namespace_text=canary_local_git_fixed_namespace,
266*3c7ae9deSAndroid Build Coastguard Worker                    cloudwatch_region=canary_region_stub,
267*3c7ae9deSAndroid Build Coastguard Worker                    ticket_description="Snapshot monitor stopped due to internal error! Reason info: " + s3_monitor.internal_error_reason,
268*3c7ae9deSAndroid Build Coastguard Worker                    ticket_reason="S3 monitor stopped due to internal error",
269*3c7ae9deSAndroid Build Coastguard Worker                    ticket_allow_duplicates=True,
270*3c7ae9deSAndroid Build Coastguard Worker                    ticket_category="AWS",
271*3c7ae9deSAndroid Build Coastguard Worker                    ticket_type="SDKs and Tools",
272*3c7ae9deSAndroid Build Coastguard Worker                    ticket_item="IoT SDK for CPP",
273*3c7ae9deSAndroid Build Coastguard Worker                    ticket_group="AWS IoT Device SDK",
274*3c7ae9deSAndroid Build Coastguard Worker                    ticket_severity=4)
275*3c7ae9deSAndroid Build Coastguard Worker                finished_email_body += "Failure due to S3 monitor stopping due to an internal error."
276*3c7ae9deSAndroid Build Coastguard Worker                finished_email_body += " Reason given for error: " + s3_monitor.internal_error_reason
277*3c7ae9deSAndroid Build Coastguard Worker                wrapper_error_occurred = True
278*3c7ae9deSAndroid Build Coastguard Worker        # Snapshot Monitor
279*3c7ae9deSAndroid Build Coastguard Worker        elif (snapshot_monitor.had_internal_error == True):
280*3c7ae9deSAndroid Build Coastguard Worker            if (snapshot_monitor.has_cut_ticket == True):
281*3c7ae9deSAndroid Build Coastguard Worker                # We do not need to cut a ticket here - it's cut by the snapshot monitor!
282*3c7ae9deSAndroid Build Coastguard Worker                print ("ERROR - Snapshot monitor stopped due to metric in alarm!")
283*3c7ae9deSAndroid Build Coastguard Worker                finished_email_body += "Failure due to required metrics being in alarm! A new ticket should have been cut!"
284*3c7ae9deSAndroid Build Coastguard Worker                finished_email_body += "\nMetrics in Alarm: " + str(snapshot_monitor.cloudwatch_current_alarms_triggered)
285*3c7ae9deSAndroid Build Coastguard Worker                finished_email_body += "\nNOTE - this shouldn't occur in the 24/7 Canary! If it does, then the wrapper needs adjusting."
286*3c7ae9deSAndroid Build Coastguard Worker                wrapper_error_occurred = True
287*3c7ae9deSAndroid Build Coastguard Worker            else:
288*3c7ae9deSAndroid Build Coastguard Worker                print ("ERROR - Snapshot monitor stopped due to internal error!")
289*3c7ae9deSAndroid Build Coastguard Worker                cut_ticket_using_cloudwatch(
290*3c7ae9deSAndroid Build Coastguard Worker                    git_repo_name=canary_local_git_repo_stub,
291*3c7ae9deSAndroid Build Coastguard Worker                    git_hash=canary_local_git_hash_stub,
292*3c7ae9deSAndroid Build Coastguard Worker                    git_hash_as_namespace=False,
293*3c7ae9deSAndroid Build Coastguard Worker                    git_fixed_namespace_text=canary_local_git_fixed_namespace,
294*3c7ae9deSAndroid Build Coastguard Worker                    cloudwatch_region=canary_region_stub,
295*3c7ae9deSAndroid Build Coastguard Worker                    ticket_description="Snapshot monitor stopped due to internal error! Reason info: " + snapshot_monitor.internal_error_reason,
296*3c7ae9deSAndroid Build Coastguard Worker                    ticket_reason="Snapshot monitor stopped due to internal error",
297*3c7ae9deSAndroid Build Coastguard Worker                    ticket_allow_duplicates=True,
298*3c7ae9deSAndroid Build Coastguard Worker                    ticket_category="AWS",
299*3c7ae9deSAndroid Build Coastguard Worker                    ticket_type="SDKs and Tools",
300*3c7ae9deSAndroid Build Coastguard Worker                    ticket_item="IoT SDK for CPP",
301*3c7ae9deSAndroid Build Coastguard Worker                    ticket_group="AWS IoT Device SDK",
302*3c7ae9deSAndroid Build Coastguard Worker                    ticket_severity=4)
303*3c7ae9deSAndroid Build Coastguard Worker                wrapper_error_occurred = True
304*3c7ae9deSAndroid Build Coastguard Worker                finished_email_body += "Failure due to Snapshot monitor stopping due to an internal error."
305*3c7ae9deSAndroid Build Coastguard Worker                finished_email_body += " Reason given for error: " + snapshot_monitor.internal_error_reason
306*3c7ae9deSAndroid Build Coastguard Worker        # Application Monitor
307*3c7ae9deSAndroid Build Coastguard Worker        elif (application_monitor.error_has_occurred == True):
308*3c7ae9deSAndroid Build Coastguard Worker            if (application_monitor.error_due_to_credentials == True):
309*3c7ae9deSAndroid Build Coastguard Worker                print ("INFO - Stopping application due to error caused by credentials")
310*3c7ae9deSAndroid Build Coastguard Worker                print ("Please fix your credentials and then restart this application again")
311*3c7ae9deSAndroid Build Coastguard Worker                wrapper_error_occurred = True
312*3c7ae9deSAndroid Build Coastguard Worker                send_finished_email = False
313*3c7ae9deSAndroid Build Coastguard Worker            else:
314*3c7ae9deSAndroid Build Coastguard Worker                # Is the error something in the canary failed?
315*3c7ae9deSAndroid Build Coastguard Worker                if (application_monitor.error_code != 0):
316*3c7ae9deSAndroid Build Coastguard Worker                    cut_ticket_using_cloudwatch(
317*3c7ae9deSAndroid Build Coastguard Worker                        git_repo_name=canary_local_git_repo_stub,
318*3c7ae9deSAndroid Build Coastguard Worker                        git_hash=canary_local_git_hash_stub,
319*3c7ae9deSAndroid Build Coastguard Worker                        git_hash_as_namespace=False,
320*3c7ae9deSAndroid Build Coastguard Worker                        git_fixed_namespace_text=canary_local_git_fixed_namespace,
321*3c7ae9deSAndroid Build Coastguard Worker                        cloudwatch_region=canary_region_stub,
322*3c7ae9deSAndroid Build Coastguard Worker                        ticket_description="The 24/7 Canary exited with a non-zero exit code! This likely means something in the canary failed.",
323*3c7ae9deSAndroid Build Coastguard Worker                        ticket_reason="The 24/7 Canary exited with a non-zero exit code",
324*3c7ae9deSAndroid Build Coastguard Worker                        ticket_allow_duplicates=True,
325*3c7ae9deSAndroid Build Coastguard Worker                        ticket_category="AWS",
326*3c7ae9deSAndroid Build Coastguard Worker                        ticket_type="SDKs and Tools",
327*3c7ae9deSAndroid Build Coastguard Worker                        ticket_item="IoT SDK for CPP",
328*3c7ae9deSAndroid Build Coastguard Worker                        ticket_group="AWS IoT Device SDK",
329*3c7ae9deSAndroid Build Coastguard Worker                        ticket_severity=3)
330*3c7ae9deSAndroid Build Coastguard Worker                    wrapper_error_occurred = True
331*3c7ae9deSAndroid Build Coastguard Worker                    finished_email_body += "Failure due to MQTT5 application exiting with a non-zero exit code!"
332*3c7ae9deSAndroid Build Coastguard Worker                    finished_email_body += " This means something in the Canary application itself failed"
333*3c7ae9deSAndroid Build Coastguard Worker                else:
334*3c7ae9deSAndroid Build Coastguard Worker                    cut_ticket_using_cloudwatch(
335*3c7ae9deSAndroid Build Coastguard Worker                        git_repo_name=canary_local_git_repo_stub,
336*3c7ae9deSAndroid Build Coastguard Worker                        git_hash=canary_local_git_hash_stub,
337*3c7ae9deSAndroid Build Coastguard Worker                        git_hash_as_namespace=False,
338*3c7ae9deSAndroid Build Coastguard Worker                        git_fixed_namespace_text=canary_local_git_fixed_namespace,
339*3c7ae9deSAndroid Build Coastguard Worker                        cloudwatch_region=canary_region_stub,
340*3c7ae9deSAndroid Build Coastguard Worker                        ticket_description="The 24/7 Canary exited with a zero exit code but did not restart!",
341*3c7ae9deSAndroid Build Coastguard Worker                        ticket_reason="The 24/7 Canary exited with a zero exit code but did not restart",
342*3c7ae9deSAndroid Build Coastguard Worker                        ticket_allow_duplicates=True,
343*3c7ae9deSAndroid Build Coastguard Worker                        ticket_category="AWS",
344*3c7ae9deSAndroid Build Coastguard Worker                        ticket_type="SDKs and Tools",
345*3c7ae9deSAndroid Build Coastguard Worker                        ticket_item="IoT SDK for CPP",
346*3c7ae9deSAndroid Build Coastguard Worker                        ticket_group="AWS IoT Device SDK",
347*3c7ae9deSAndroid Build Coastguard Worker                        ticket_severity=3)
348*3c7ae9deSAndroid Build Coastguard Worker                    wrapper_error_occurred = True
349*3c7ae9deSAndroid Build Coastguard Worker                    finished_email_body += "Failure due to MQTT5 application stopping and not automatically restarting!"
350*3c7ae9deSAndroid Build Coastguard Worker                    finished_email_body += " This shouldn't occur and means something is wrong with the Canary wrapper!"
351*3c7ae9deSAndroid Build Coastguard Worker        # Other
352*3c7ae9deSAndroid Build Coastguard Worker        else:
353*3c7ae9deSAndroid Build Coastguard Worker            print ("ERROR - 24/7 Canary stopped due to unknown reason!")
354*3c7ae9deSAndroid Build Coastguard Worker            cut_ticket_using_cloudwatch(
355*3c7ae9deSAndroid Build Coastguard Worker                git_repo_name=canary_local_git_repo_stub,
356*3c7ae9deSAndroid Build Coastguard Worker                git_hash=canary_local_git_hash_stub,
357*3c7ae9deSAndroid Build Coastguard Worker                git_hash_as_namespace=False,
358*3c7ae9deSAndroid Build Coastguard Worker                git_fixed_namespace_text=canary_local_git_fixed_namespace,
359*3c7ae9deSAndroid Build Coastguard Worker                cloudwatch_region=canary_region_stub,
360*3c7ae9deSAndroid Build Coastguard Worker                ticket_description="The 24/7 Canary stopped for an unknown reason!",
361*3c7ae9deSAndroid Build Coastguard Worker                ticket_reason="The 24/7 Canary stopped for unknown reason",
362*3c7ae9deSAndroid Build Coastguard Worker                ticket_allow_duplicates=True,
363*3c7ae9deSAndroid Build Coastguard Worker                ticket_category="AWS",
364*3c7ae9deSAndroid Build Coastguard Worker                ticket_type="SDKs and Tools",
365*3c7ae9deSAndroid Build Coastguard Worker                ticket_item="IoT SDK for CPP",
366*3c7ae9deSAndroid Build Coastguard Worker                ticket_group="AWS IoT Device SDK",
367*3c7ae9deSAndroid Build Coastguard Worker                ticket_severity=3)
368*3c7ae9deSAndroid Build Coastguard Worker            wrapper_error_occurred = True
369*3c7ae9deSAndroid Build Coastguard Worker            finished_email_body += "Failure due to unknown reason! This shouldn't happen and means something has gone wrong!"
370*3c7ae9deSAndroid Build Coastguard Worker    except Exception as e:
371*3c7ae9deSAndroid Build Coastguard Worker        print ("ERROR: Could not (possibly) cut ticket due to exception!")
372*3c7ae9deSAndroid Build Coastguard Worker        print ("Exception: " + str(e), flush=True)
373*3c7ae9deSAndroid Build Coastguard Worker
374*3c7ae9deSAndroid Build Coastguard Worker    # Clean everything up and stop
375*3c7ae9deSAndroid Build Coastguard Worker    snapshot_monitor.cleanup_monitor(error_occurred=wrapper_error_occurred)
376*3c7ae9deSAndroid Build Coastguard Worker    application_monitor.cleanup_monitor(error_occurred=wrapper_error_occurred)
377*3c7ae9deSAndroid Build Coastguard Worker    print ("24/7 Canary finished!")
378*3c7ae9deSAndroid Build Coastguard Worker
379*3c7ae9deSAndroid Build Coastguard Worker    finished_email_body += "\n\nYou can find the log file for this run at the following S3 location: "
380*3c7ae9deSAndroid Build Coastguard Worker    finished_email_body += "https://s3.console.aws.amazon.com/s3/object/"
381*3c7ae9deSAndroid Build Coastguard Worker    finished_email_body += command_parser_arguments.s3_bucket_name
382*3c7ae9deSAndroid Build Coastguard Worker    finished_email_body += "?region=" + canary_region_stub
383*3c7ae9deSAndroid Build Coastguard Worker    finished_email_body += "&prefix=" + canary_local_git_repo_stub + "/"
384*3c7ae9deSAndroid Build Coastguard Worker    if (wrapper_error_occurred == True):
385*3c7ae9deSAndroid Build Coastguard Worker        finished_email_body += "Failed_Logs/"
386*3c7ae9deSAndroid Build Coastguard Worker    finished_email_body += canary_local_git_hash_stub + ".log"
387*3c7ae9deSAndroid Build Coastguard Worker    # Send the finish email
388*3c7ae9deSAndroid Build Coastguard Worker    if (send_finished_email == True):
389*3c7ae9deSAndroid Build Coastguard Worker        if (wrapper_error_occurred == True):
390*3c7ae9deSAndroid Build Coastguard Worker            snapshot_monitor.send_email(email_body=finished_email_body, email_subject_text_append="Had an error")
391*3c7ae9deSAndroid Build Coastguard Worker        else:
392*3c7ae9deSAndroid Build Coastguard Worker            snapshot_monitor.send_email(email_body=finished_email_body, email_subject_text_append="Finished")
393*3c7ae9deSAndroid Build Coastguard Worker
394*3c7ae9deSAndroid Build Coastguard Worker    exit (-1)
395*3c7ae9deSAndroid Build Coastguard Worker
396*3c7ae9deSAndroid Build Coastguard Worker
397*3c7ae9deSAndroid Build Coastguard Worker# Start the application!
398*3c7ae9deSAndroid Build Coastguard Workerapplication_thread()
399