I started running PySR in a cluster with the following configuration.
PySRRegressor(
binary_operators=["+", "-", "*"],
unary_operators=["square", "cube", "fourth(x) = x^4"],
extra_sympy_mappings={"fourth": lambda x: x**4},
expression_spec=template,
niterations=100,
batching=True,
batch_size=4096,
populations=150,
population_size=128,
maxdepth=7,
maxsize=32,
parsimony=1e-4,
elementwise_loss="L2DistLoss()",
procs=30,
cluster_manager="slurm",
parallelism="multiprocessing",
turbo=True,
ncycles_per_iteration=100,
)
When I run with this configuration, I am noticing that in each worker, when Julia is initialised and the code is running, it gives an error during compilation.
cpu-bind=MASK - chuck-29, task 0 0 [6082]: mask 0x3fffffff set
Compiling Julia backend...
Traceback (most recent call last):
File "/work/chuck/anemmani/git-repos/symbolic-quark/scripts/internal-params/mass-symbolic.py", line 623, in <module>
main()
~~~~^^
File "/work/chuck/anemmani/git-repos/symbolic-quark/scripts/internal-params/mass-symbolic.py", line 604, in main
model.fit(train_x, train_y)
~~~~~~~~~^^^^^^^^^^^^^^^^^^
File "/home/anemmani/softwares/anaconda3/envs/pysr/lib/python3.14/site-packages/pysr/sr.py", line 2318, in fit
self._run(X, y, runtime_params, weights=weights, seed=seed, category=category)
~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/anemmani/softwares/anaconda3/envs/pysr/lib/python3.14/site-packages/pysr/sr.py", line 2109, in _run
out = SymbolicRegression.equation_search(
jl_X,
...<27 lines>...
logger=logger,
)
File "/home/anemmani/.julia/packages/PythonCall/avYrV/src/JlWrap/any.jl", line 262, in __call__
return self._jl_callmethod($(pyjl_methodnum(pyjlany_call)), args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
juliacall.JuliaError: On worker 2:
UndefVarError: `ComposableExpressionModule` not defined in `SymbolicRegression`
Stacktrace:
[1] deserialize_module
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:1034
[2] handle_deserialize
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:933
[3] deserialize
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:851
[4] deserialize_datatype
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:1543
[5] handle_deserialize
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:904
[6] deserialize
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:851
[7] deserialize_datatype
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:1555
[8] handle_deserialize
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:904
[9] deserialize
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:851
[10] deserialize_datatype
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:1568
[11] handle_deserialize
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:904
[12] deserialize
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:851
[13] deserialize_datatype
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:1559
[14] handle_deserialize
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:904
[15] deserialize
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:851
[16] deserialize_datatype
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:1568
[17] handle_deserialize
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:904
[18] deserialize
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:851
[19] handle_deserialize
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:911
[20] deserialize
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Serialization/src/Serialization.jl:851 [inlined]
[21] deserialize_msg
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Distributed/src/messages.jl:87
[22] message_handler_loop
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Distributed/src/process_messages.jl:176
[23] process_tcp_streams
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Distributed/src/process_messages.jl:133
[24] #process_messages##0
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Distributed/src/process_messages.jl:121
Stacktrace:
[1] #remotecall_fetch#109
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Distributed/src/remotecall.jl:465
[2] remotecall_fetch
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Distributed/src/remotecall.jl:454
[3] remotecall_fetch
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Distributed/src/remotecall.jl:492 [inlined]
[4] call_on_owner
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Distributed/src/remotecall.jl:565 [inlined]
[5] fetch
@ ~/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/share/julia/stdlib/v1.12/Distributed/src/remotecall.jl:619
[6] test_function_on_workers(example_inputs::Tuple{@NamedTuple{f::ComposableExpression{Float64, Node{Float64}, @NamedTuple{operators::OperatorEnum{Tuple{typeof(+), typeof(-), typeof(*)}, Tuple{typeof(square), typeof(cube), typeof(fourth)}}, variable_names::Nothing, eval_options::EvalOptions{true, false, true, Nothing}}}}, @NamedTuple{p1::SymbolicRegression.TemplateExpressionModule.ParamVector{Float64}, p2::SymbolicRegression.TemplateExpressionModule.ParamVector{Float64}}, Vector{ValidVector{Vector{Float64}}}}, op::typeof(__sr_template_2337148491627256653), procs::Vector{Int64})
@ SymbolicRegression ~/.julia/packages/SymbolicRegression/L5TJa/src/Configure.jl:237
[7] move_functions_to_workers(procs::Vector{Int64}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, OperatorEnum{Tuple{typeof(+), typeof(-), typeof(*)}, Tuple{typeof(square), typeof(cube), typeof(fourth)}}, Node, TemplateExpression, @NamedTuple{structure::TemplateStructure{(:f,), (:p1, :p2), typeof(__sr_template_2337148491627256653), @NamedTuple{f::Int64}, @NamedTuple{p1::Int64, p2::Int64}}}, MutationWeights, true, false, nothing, Nothing, 8}, dataset::SymbolicRegression.CoreModule.DatasetModule.BasicDataset{Float64, Float64, Matrix{Float64}, Vector{Float64}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}, verbosity::Int64)
@ SymbolicRegression ~/.julia/packages/SymbolicRegression/L5TJa/src/Configure.jl:211
[8] #configure_workers#15
@ ~/.julia/packages/SymbolicRegression/L5TJa/src/Configure.jl:389 [inlined]
[9] _create_workers(datasets::Vector{SymbolicRegression.CoreModule.DatasetModule.BasicDataset{Float64, Float64, Matrix{Float64}, Vector{Float64}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}}, ropt::SymbolicRegression.SearchUtilsModule.RuntimeOptions{:multiprocessing, 1, true, Nothing}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, OperatorEnum{Tuple{typeof(+), typeof(-), typeof(*)}, Tuple{typeof(square), typeof(cube), typeof(fourth)}}, Node, TemplateExpression, @NamedTuple{structure::TemplateStructure{(:f,), (:p1, :p2), typeof(__sr_template_2337148491627256653), @NamedTuple{f::Int64}, @NamedTuple{p1::Int64, p2::Int64}}}, MutationWeights, true, false, nothing, Nothing, 8})
@ SymbolicRegression ~/.julia/packages/SymbolicRegression/L5TJa/src/SymbolicRegression.jl:633
[10] _equation_search(datasets::Vector{SymbolicRegression.CoreModule.DatasetModule.BasicDataset{Float64, Float64, Matrix{Float64}, Vector{Float64}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}}, ropt::SymbolicRegression.SearchUtilsModule.RuntimeOptions{:multiprocessing, 1, true, Nothing}, options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, OperatorEnum{Tuple{typeof(+), typeof(-), typeof(*)}, Tuple{typeof(square), typeof(cube), typeof(fourth)}}, Node, TemplateExpression, @NamedTuple{structure::TemplateStructure{(:f,), (:p1, :p2), typeof(__sr_template_2337148491627256653), @NamedTuple{f::Int64}, @NamedTuple{p1::Int64, p2::Int64}}}, MutationWeights, true, false, nothing, Nothing, 8}, saved_state::Nothing)
@ SymbolicRegression ~/.julia/packages/SymbolicRegression/L5TJa/src/SymbolicRegression.jl:568
[11] equation_search(datasets::Vector{SymbolicRegression.CoreModule.DatasetModule.BasicDataset{Float64, Float64, Matrix{Float64}, Vector{Float64}, Nothing, @NamedTuple{}, Nothing, Nothing, Nothing, Nothing}}; options::Options{SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping{Int64, Int64}, OperatorEnum{Tuple{typeof(+), typeof(-), typeof(*)}, Tuple{typeof(square), typeof(cube), typeof(fourth)}}, Node, TemplateExpression, @NamedTuple{structure::TemplateStructure{(:f,), (:p1, :p2), typeof(__sr_template_2337148491627256653), @NamedTuple{f::Int64}, @NamedTuple{p1::Int64, p2::Int64}}}, MutationWeights, true, false, nothing, Nothing, 8}, saved_state::Nothing, runtime_options::Nothing, runtime_options_kws::@Kwargs{niterations::Int64, parallelism::String, numprocs::Int64, procs::Nothing, addprocs_function::typeof(addprocs_slurm), heap_size_hint_in_bytes::Nothing, worker_imports::Nothing, runtests::Bool, return_state::Bool, run_id::String, verbosity::Int64, logger::Nothing, progress::Bool, v_dim_out::Val{1}})
@ SymbolicRegression ~/.julia/packages/SymbolicRegression/L5TJa/src/SymbolicRegression.jl:561
[12] equation_search
@ ~/.julia/packages/SymbolicRegression/L5TJa/src/SymbolicRegression.jl:542 [inlined]
[13] #equation_search#16
@ ~/.julia/packages/SymbolicRegression/L5TJa/src/SymbolicRegression.jl:511 [inlined]
[14] equation_search
@ ~/.julia/packages/SymbolicRegression/L5TJa/src/SymbolicRegression.jl:456 [inlined]
[15] #equation_search#17
@ ~/.julia/packages/SymbolicRegression/L5TJa/src/SymbolicRegression.jl:535 [inlined]
[16] pyjlany_call(self::typeof(equation_search), args_::Py, kwargs_::Py)
@ PythonCall.JlWrap ~/.julia/packages/PythonCall/avYrV/src/JlWrap/any.jl:44
[17] _pyjl_callmethod
@ ~/.julia/packages/PythonCall/avYrV/src/JlWrap/base.jl:73
[18] _pyjl_callmethod
@ ~/.julia/packages/PythonCall/avYrV/src/JlWrap/C.jl:63
[ Info: Automatically setting `--heap-size-hint=4169M` on each Julia process. You can configure this with the `heap_size_hint_in_bytes` parameter.
┌ Warning: The Slurm functionality in the `ClusterManagers.jl` package is deprecated (including `ClusterManagers.addprocs_slurm` and `ClusterManagers.SlurmManager`). It will be removed from ClusterManagers.jl in a future release. We recommend migrating to the [https://github.com/JuliaParallel/SlurmClusterManager.jl](https://github.com/JuliaParallel/SlurmClusterManager.jl) package instead.
│ caller = ip:0x0
└ @ Core :-1
[ Info: Starting SLURM job julia-6153: `srun -J julia-6153 -n 30 -D /work/chuck/anemmani/git-repos/symbolic-quark/scripts/internal-params -o /work/chuck/anemmani/git-repos/symbolic-quark/scripts/internal-params/./julia-6153-17684729876-%4t.out /home/anemmani/.julia/juliaup/julia-1.12.3+0.x64.linux.gnu/bin/julia --heap-size=4169M --worker=AG58K97zmQeSwBlt`
[ Info: Worker 0 (after 0 s): No output file "/work/chuck/anemmani/git-repos/symbolic-quark/scripts/internal-params/./julia-6153-17684729876-0000.out" yet
[ Info: Worker 0 (after 1 s): Output file found, but no connection details yet
[ Info: Worker 0 ready after 3 s on host 192.168.6.29, port 9816
[ Info: Worker 1 ready after 3 s on host 192.168.6.29, port 9817
[ Info: Worker 2 ready after 3 s on host 192.168.6.29, port 9818
[ Info: Worker 3 ready after 3 s on host 192.168.6.29, port 9819
[ Info: Worker 4 ready after 3 s on host 192.168.6.29, port 9820
[ Info: Worker 5 ready after 3 s on host 192.168.6.29, port 9821
[ Info: Worker 6 ready after 3 s on host 192.168.6.29, port 9822
[ Info: Worker 7 ready after 3 s on host 192.168.6.29, port 9823
[ Info: Worker 8 ready after 3 s on host 192.168.6.29, port 9824
[ Info: Worker 9 ready after 3 s on host 192.168.6.29, port 9825
[ Info: Worker 10 ready after 3 s on host 192.168.6.29, port 9826
[ Info: Worker 11 ready after 3 s on host 192.168.6.29, port 9827
[ Info: Worker 12 ready after 3 s on host 192.168.6.29, port 9828
[ Info: Worker 13 ready after 3 s on host 192.168.6.29, port 9829
[ Info: Worker 14 ready after 3 s on host 192.168.6.29, port 9830
[ Info: Worker 15 ready after 3 s on host 192.168.6.29, port 9831
[ Info: Worker 16 ready after 3 s on host 192.168.6.29, port 9832
[ Info: Worker 17 ready after 3 s on host 192.168.6.29, port 9833
[ Info: Worker 18 ready after 3 s on host 192.168.6.29, port 9834
[ Info: Worker 19 ready after 3 s on host 192.168.6.29, port 9835
[ Info: Worker 20 ready after 3 s on host 192.168.6.29, port 9836
[ Info: Worker 21 ready after 3 s on host 192.168.6.29, port 9837
[ Info: Worker 22 ready after 3 s on host 192.168.6.29, port 9838
[ Info: Worker 23 ready after 3 s on host 192.168.6.29, port 9839
[ Info: Worker 24 ready after 3 s on host 192.168.6.29, port 9840
[ Info: Worker 25 ready after 3 s on host 192.168.6.29, port 9841
[ Info: Worker 26 ready after 3 s on host 192.168.6.29, port 9842
[ Info: Worker 27 ready after 3 s on host 192.168.6.29, port 9843
[ Info: Worker 28 ready after 3 s on host 192.168.6.29, port 9844
[ Info: Worker 29 ready after 3 s on host 192.168.6.29, port 9845
[ Info: Importing SymbolicRegression on workers as well as extensions ClusterManagers, LoopVectorization.
[ Info: Finished!
[ Info: Copying definition of fourth to workers...
[ Info: Finished!
[ Info: Copying definition of __sr_template_2337148491627256653 to workers...
[ Info: Finished!
What happened?
I started running PySR in a cluster with the following configuration.
And when I submit it in Slurm with the following Slurm settings
When I run with this configuration, I am noticing that in each worker, when Julia is initialised and the code is running, it gives an error during compilation.
I attached the entire logs also
Version
1.5.9
Operating System
Linux
Package Manager
Conda
Interface
Script (i.e.,
python3 script.py)Relevant log output
Extra Info
No response