@@ -10,12 +10,12 @@ use si_data_nats::{InnerMessage, Subject};
1010// seems strange to get these cyclone_core types from si_pool_noodle?
1111use si_pool_noodle:: {
1212 ActionRunResultSuccess , CycloneClient , CycloneRequest , CycloneRequestable , ExecutionError ,
13- ManagementResultSuccess , ProgressMessage , ResolverFunctionResultSuccess ,
14- SchemaVariantDefinitionResultSuccess , SensitiveStrings , ValidationResultSuccess ,
13+ FunctionResultFailure , FunctionResultFailureError , ManagementResultSuccess , ProgressMessage ,
14+ ResolverFunctionResultSuccess , SchemaVariantDefinitionResultSuccess , SensitiveStrings ,
15+ ValidationResultSuccess ,
1516} ;
1617use std:: { collections:: HashMap , result, str:: Utf8Error , sync:: Arc , time:: Duration } ;
1718use telemetry:: prelude:: * ;
18- use telemetry_utils:: metric;
1919use thiserror:: Error ;
2020use tokio:: sync:: { oneshot, Mutex } ;
2121use veritech_core:: {
@@ -186,7 +186,6 @@ where
186186 let nats_for_publisher = state. nats . clone ( ) ;
187187 let publisher = Publisher :: new ( & nats_for_publisher, & reply_mailbox) ;
188188 let execution_id = request. execution_id ( ) . to_owned ( ) ;
189-
190189 let cyclone_request = CycloneRequest :: from_parts ( request. clone ( ) , sensitive_strings) ;
191190
192191 let ( kill_sender, kill_receiver) = oneshot:: channel :: < ( ) > ( ) ;
@@ -224,7 +223,7 @@ where
224223 trace ! ( "received heartbeat message" ) ;
225224 }
226225 Err ( err) => {
227- warn ! ( error = ?err, "next progress message was an error, bailing out" ) ;
226+ warn ! ( si . error. message = ?err, "next progress message was an error, bailing out" ) ;
228227 break ;
229228 }
230229 }
@@ -242,47 +241,76 @@ where
242241 HandlerResult :: Ok ( function_result)
243242 } ;
244243
245- // we do not want to return errors at this point as it will retry functions that may have
246- // failed for legitimate reasons and should not be retried
244+ // we do not want to return errors at this point as it will Nack the message and end up auto-retrying
245+ // functions that may have failed for legitimate reasons and should not be retried
247246 let timeout = state. cyclone_client_execution_timeout ;
248247 let result = tokio:: select! {
249248 _ = tokio:: time:: sleep( timeout) => {
250- error!( "hit timeout for communicating with cyclone server" ) ;
251- kill_sender_remove_blocking( & state. kill_senders, execution_id) . await ?;
249+ error!( "hit timeout for communicating with cyclone server:{:?}" , & timeout ) ;
250+ kill_sender_remove_blocking( & state. kill_senders, execution_id. to_owned ( ) ) . await ?;
252251 Err ( HandlerError :: CycloneTimeout (
253252 timeout,
254253 ) )
255254 } ,
256255 Ok ( _) = kill_receiver => {
257- Err ( HandlerError :: Killed ( execution_id) )
256+ Err ( HandlerError :: Killed ( execution_id. clone ( ) ) )
258257 }
259258 func_result = progress_loop => {
260- kill_sender_remove_blocking( & state. kill_senders, execution_id) . await ?;
259+ kill_sender_remove_blocking( & state. kill_senders, execution_id. to_owned ( ) ) . await ?;
261260 func_result
262261 } ,
263262 } ;
264263
265264 match result {
265+ // Got an Ok - let anyone subscribing to a reply know
266266 Ok ( function_result) => {
267267 if let Err ( err) = publisher. publish_result ( & function_result) . await {
268- metric ! ( counter. function_run. action = -1 ) ;
269- error ! ( error = ?err, "failed to publish errored result" ) ;
268+ error ! ( si. error. message = ?err, "failed to publish errored result" ) ;
270269 }
271270
272271 request. dec_run_metric ( ) ;
273272 span. record_ok ( ) ;
274273 }
275- Err ( HandlerError :: CycloneTimeout ( timeout) ) => {
276- request. dec_run_metric ( ) ;
277- warn ! ( error = ?timeout, "timed out trying to run function to completion" ) ;
278- }
279- Err ( HandlerError :: Killed ( execution_id) ) => {
280- request. dec_run_metric ( ) ;
281- info ! ( error = ?execution_id, "function killed during execution via signal" ) ;
282- }
283- Err ( err) => {
274+ // Got an error that we don't want to recover from here - need to let anyone subscribing know we're done
275+ // so they're not waiting forever and can decide how to proceed
276+ // Construct the Error result to propagate to subscribers
277+ Err ( ref err) => {
278+ let func_result_error = match err {
279+ HandlerError :: CycloneTimeout ( ref timeout) => {
280+ warn ! ( si. error. message = ?err, "timed out trying to run function to completion: {:?}" , timeout) ;
281+ let func_res_failure = FunctionResultFailure :: new_for_veritech_server_error (
282+ execution_id. to_owned ( ) ,
283+ "timed out trying to run function to completion" ,
284+ timestamp ( ) ,
285+ ) ;
286+ si_pool_noodle:: FunctionResult :: Failure :: < Request > ( func_res_failure)
287+ }
288+ HandlerError :: Killed ( ref execution_id) => {
289+ warn ! ( si. error. message = ?err, si. func_run. id = ?execution_id, "function killed during execution: {:?} via signal" , execution_id) ;
290+ let func_res_failure = FunctionResultFailure :: new (
291+ execution_id. to_owned ( ) ,
292+ FunctionResultFailureError {
293+ kind : si_pool_noodle:: FunctionResultFailureErrorKind :: KilledExecution ,
294+ message : "function execution terminated" . to_owned ( ) ,
295+ } ,
296+ timestamp ( ) ,
297+ ) ;
298+ si_pool_noodle:: FunctionResult :: Failure :: < Request > ( func_res_failure)
299+ }
300+ err => {
301+ error ! ( si. error. message = ?err, si. func_run. id = ?execution_id. to_owned( ) , "failure trying to run function to completion" ) ;
302+ let func_res_failure = FunctionResultFailure :: new_for_veritech_server_error (
303+ execution_id. to_owned ( ) ,
304+ "timed out trying to run function to completion" ,
305+ timestamp ( ) ,
306+ ) ;
307+ si_pool_noodle:: FunctionResult :: Failure :: < Request > ( func_res_failure)
308+ }
309+ } ;
284310 request. dec_run_metric ( ) ;
285- error ! ( error = ?err, "failure trying to run function to completion" ) ;
311+ if let Err ( err) = publisher. publish_result ( & func_result_error) . await {
312+ error ! ( error = ?err, "failed to publish errored result" ) ;
313+ }
286314 }
287315 }
288316
0 commit comments