-
Notifications
You must be signed in to change notification settings - Fork 23
Description
I keep getting this error when I am running data load task for my project. I have tried with different datasets, references (both 37 and 38) and projects. I tried deleting the matrix table folder, but still the error persists
Hail: INFO: wrote matrix table with 7279144 rows and 3 columns in 6 partitions to /tmp/8e118849-1968-4a96-a831-ce16f7e38269.mt
ERROR: [pid 1] Worker Worker(salt=9444364660, workers=1, host=pipeline-runner-api-7c454844b6-g5pck, username=root, pid=1) failed ValidateCallsetTask(reference_genome=GRCh38, dataset_type=SNV_INDEL, run_id=20251123-081803-717131, sample_type=WGS, callset_path=/var/seqr/seqr-loading-temp/Patient-450.hard-filtered.vcf.gz, project_guids=["R0004_novel_test_batch"], skip_check_sex_and_relatedness=False, skip_expect_tdr_metrics=True, skip_validation=False, is_new_gcnv_joint_call=False)
Traceback (most recent call last):
File "/usr/local/lib/python3.11/dist-packages/luigi/worker.py", line 210, in run
new_deps = self._run_get_new_deps()
^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/luigi/worker.py", line 138, in _run_get_new_deps
task_gen = self.task.run()
^^^^^^^^^^^^^^^
File "/v03_pipeline/lib/tasks/base/base_update.py", line 20, in run
write(ht, self.output().path)
File "/v03_pipeline/lib/misc/io.py", line 332, in write
return t.write(destination_path, overwrite=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "", line 2, in write
File "/usr/local/lib/python3.11/dist-packages/hail/typecheck/check.py", line 585, in wrapper
return original_func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/hail/matrixtable.py", line 2810, in write
Env.backend().execute(ir.MatrixWrite(self._mir, writer))
File "/usr/local/lib/python3.11/dist-packages/hail/backend/spark_backend.py", line 217, in execute
raise err
File "/usr/local/lib/python3.11/dist-packages/hail/backend/spark_backend.py", line 209, in execute
return super().execute(ir, timed)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/hail/backend/backend.py", line 181, in execute
raise e.maybe_user_error(ir) from None
File "/usr/local/lib/python3.11/dist-packages/hail/backend/backend.py", line 179, in execute
result, timings = self._rpc(ActionTag.EXECUTE, payload)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/hail/backend/py4j_backend.py", line 221, in _rpc
raise fatal_error_from_java_error_triplet(
hail.utils.java.FatalError: HailException: RelationalSetup.writeMetadata: file already exists: /var/seqr/seqr-loading-temp/GRCh38/SNV_INDEL/imported_callsets/b0579a8c111de07ee25141e6b1a81c0c3e1315bf44b8d6592586c366330350d2.mt/rows
Java stack trace:
is.hail.utils.HailException: RelationalSetup.writeMetadata: file already exists: /var/seqr/seqr-loading-temp/GRCh38/SNV_INDEL/imported_callsets/b0579a8c111de07ee25141e6b1a81c0c3e1315bf44b8d6592586c366330350d2.mt/rows
at __C24120Compiled.__m24124begin_group_0_region6_11(Emit.scala)
at __C24120Compiled.__m24124begin_group_0(Emit.scala)
at __C24120Compiled.__m24122split_Block(Emit.scala)
at __C24120Compiled.apply(Emit.scala)
at is.hail.expr.ir.CompileAndEvaluate$.$anonfun$_apply$4(CompileAndEvaluate.scala:60)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:84)
at is.hail.expr.ir.CompileAndEvaluate$.$anonfun$_apply$2(CompileAndEvaluate.scala:60)
at is.hail.expr.ir.CompileAndEvaluate$.$anonfun$_apply$2$adapted(CompileAndEvaluate.scala:58)
at is.hail.backend.ExecuteContext.$anonfun$scopedExecution$1(ExecuteContext.scala:144)
at is.hail.utils.package$.using(package.scala:673)
at is.hail.backend.ExecuteContext.scopedExecution(ExecuteContext.scala:144)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:58)
at is.hail.expr.ir.CompileAndEvaluate$.$anonfun$apply$1(CompileAndEvaluate.scala:17)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:84)
at is.hail.expr.ir.CompileAndEvaluate$.apply(CompileAndEvaluate.scala:17)
at is.hail.expr.ir.TableWriter.apply(TableWriter.scala:51)
at is.hail.expr.ir.Interpret$.run(Interpret.scala:922)
at is.hail.expr.ir.Interpret$.alreadyLowered(Interpret.scala:66)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.evaluate$1(LowerOrInterpretNonCompilable.scala:20)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.rewrite$1(LowerOrInterpretNonCompilable.scala:59)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.apply(LowerOrInterpretNonCompilable.scala:64)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.transform(LoweringPass.scala:83)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$3(LoweringPass.scala:32)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:84)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$1(LoweringPass.scala:32)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:84)
at is.hail.expr.ir.lowering.LoweringPass.apply(LoweringPass.scala:30)
at is.hail.expr.ir.lowering.LoweringPass.apply$(LoweringPass.scala:29)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.apply(LoweringPass.scala:78)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1(LoweringPipeline.scala:21)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1$adapted(LoweringPipeline.scala:19)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:19)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:45)
at is.hail.backend.spark.SparkBackend._execute(SparkBackend.scala:578)
at is.hail.backend.spark.SparkBackend.$anonfun$execute$4(SparkBackend.scala:614)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:84)
at is.hail.backend.spark.SparkBackend.$anonfun$execute$3(SparkBackend.scala:609)
at is.hail.backend.spark.SparkBackend.$anonfun$execute$3$adapted(SparkBackend.scala:608)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$3(ExecuteContext.scala:78)
at is.hail.utils.package$.using(package.scala:673)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$2(ExecuteContext.scala:78)
at is.hail.utils.package$.using(package.scala:673)
at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:13)
at is.hail.backend.ExecuteContext$.scoped(ExecuteContext.scala:65)
at is.hail.backend.spark.SparkBackend.$anonfun$withExecuteContext$2(SparkBackend.scala:411)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:55)
at is.hail.utils.ExecutionTimer$.logTime(ExecutionTimer.scala:62)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:397)
at is.hail.backend.spark.SparkBackend.execute(SparkBackend.scala:608)
at is.hail.backend.BackendHttpHandler.handle(BackendServer.scala:88)
at jdk.httpserver/com.sun.net.httpserver.Filter$Chain.doFilter(Filter.java:77)
at jdk.httpserver/sun.net.httpserver.AuthFilter.doFilter(AuthFilter.java:82)
at jdk.httpserver/com.sun.net.httpserver.Filter$Chain.doFilter(Filter.java:80)
at jdk.httpserver/sun.net.httpserver.ServerImpl$Exchange$LinkHandler.handle(ServerImpl.java:848)
at jdk.httpserver/com.sun.net.httpserver.Filter$Chain.doFilter(Filter.java:77)
at jdk.httpserver/sun.net.httpserver.ServerImpl$Exchange.run(ServerImpl.java:817)
at jdk.httpserver/sun.net.httpserver.ServerImpl$DefaultExecutor.execute(ServerImpl.java:201)
at jdk.httpserver/sun.net.httpserver.ServerImpl$Dispatcher.handle(ServerImpl.java:560)
at jdk.httpserver/sun.net.httpserver.ServerImpl$Dispatcher.run(ServerImpl.java:526)
at java.base/java.lang.Thread.run(Thread.java:829)