Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions backends/cuda-gen/ceed-cuda-gen-operator-build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,7 @@ static int CeedOperatorBuildKernelQFunction_Cuda_gen(std::ostringstream &code, C
CeedQFunctionField *qf_input_fields, CeedInt num_output_fields,
CeedOperatorField *op_output_fields, CeedQFunctionField *qf_output_fields,
std::string qfunction_name, CeedInt Q_1d, bool is_all_tensor, bool is_at_points,
bool use_3d_slices) {
bool use_3d_slices, bool is_assemble) {
std::string Q_name = is_all_tensor ? "Q_1d" : "Q";
CeedEvalMode eval_mode = CEED_EVAL_NONE;
CeedElemRestriction elem_rstr;
Expand Down Expand Up @@ -1029,6 +1029,7 @@ static int CeedOperatorBuildKernelQFunction_Cuda_gen(std::ostringstream &code, C
CeedInt comp_stride;
CeedElemRestriction elem_rstr;

if (is_assemble) break;
CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr));
CeedCallBackend(CeedElemRestrictionGetCompStride(elem_rstr, &comp_stride));
CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
Expand Down Expand Up @@ -1583,7 +1584,7 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op, bool *is_good_b
// -- Q function
CeedCallBackend(CeedOperatorBuildKernelQFunction_Cuda_gen(code, data, tab, max_dim, max_num_points, num_input_fields, op_input_fields,
qf_input_fields, num_output_fields, op_output_fields, qf_output_fields, qfunction_name,
Q_1d, is_all_tensor, is_at_points, use_3d_slices));
Q_1d, is_all_tensor, is_at_points, use_3d_slices, false));

// -- Output basis and restriction
code << "\n" << tab << "// -- Output field basis action and restrictions\n";
Expand Down Expand Up @@ -2008,7 +2009,7 @@ static int CeedOperatorBuildKernelAssemblyAtPoints_Cuda_gen(CeedOperator op, boo
// -- Q function
CeedCallBackend(CeedOperatorBuildKernelQFunction_Cuda_gen(code, data, tab, max_dim, max_num_points, num_input_fields, op_input_fields,
qf_input_fields, num_output_fields, op_output_fields, qf_output_fields, qfunction_name,
Q_1d, is_all_tensor, is_at_points, use_3d_slices));
Q_1d, is_all_tensor, is_at_points, use_3d_slices, true));

// -- Output basis and restriction
code << "\n" << tab << "// -- Output field basis action and restrictions\n";
Expand Down Expand Up @@ -2274,7 +2275,18 @@ extern "C" int CeedOperatorBuildKernelLinearAssembleQFunction_Cuda_gen(CeedOpera
}
}
for (CeedInt i = 0; i < num_output_fields; i++) {
code << tab << "CeedScalar *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
bool is_active = false;

{
CeedVector vec;

CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec));
is_active = vec == CEED_VECTOR_ACTIVE;
CeedCallBackend(CeedVectorDestroy(&vec));
}
if (is_active) {
code << tab << "CeedScalar *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
}
}

code << tab << "const CeedInt max_dim = " << max_dim << ";\n";
Expand Down Expand Up @@ -2605,7 +2617,7 @@ extern "C" int CeedOperatorBuildKernelLinearAssembleQFunction_Cuda_gen(CeedOpera
// -- Q function
CeedCallBackend(CeedOperatorBuildKernelQFunction_Cuda_gen(code, data, tab, max_dim, max_num_points, num_input_fields, op_input_fields,
qf_input_fields, num_output_fields, op_output_fields, qf_output_fields, qfunction_name,
Q_1d, is_all_tensor, is_at_points, use_3d_slices));
Q_1d, is_all_tensor, is_at_points, use_3d_slices, true));

// -- Output basis and restriction
code << "\n" << tab << "// -- Output field basis action and restrictions\n";
Expand Down
12 changes: 7 additions & 5 deletions backends/cuda-gen/ceed-cuda-gen-operator.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,30 +294,32 @@ static int CeedOperatorApplyAdd_Cuda_gen(CeedOperator op, CeedVector input_vec,
}

static int CeedOperatorApplyAddComposite_Cuda_gen(CeedOperator op, CeedVector input_vec, CeedVector output_vec, CeedRequest *request) {
bool is_run_good[CEED_COMPOSITE_MAX] = {false};
bool is_run_good[CEED_COMPOSITE_MAX] = {false}, is_sequential;
CeedInt num_suboperators;
const CeedScalar *input_arr = NULL;
CeedScalar *output_arr = NULL;
Ceed ceed;
CeedOperator *sub_operators;
cudaStream_t stream = NULL;

CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
CeedCall(CeedOperatorCompositeIsSequential(op, &is_sequential));
if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(input_vec, CEED_MEM_DEVICE, &input_arr));
if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArray(output_vec, CEED_MEM_DEVICE, &output_arr));
if (is_sequential) CeedCallCuda(ceed, cudaStreamCreate(&stream));
for (CeedInt i = 0; i < num_suboperators; i++) {
CeedInt num_elem = 0;

CeedCall(CeedOperatorGetNumElements(sub_operators[i], &num_elem));
if (num_elem > 0) {
cudaStream_t stream = NULL;

CeedCallCuda(ceed, cudaStreamCreate(&stream));
if (!is_sequential) CeedCallCuda(ceed, cudaStreamCreate(&stream));
CeedCallBackend(CeedOperatorApplyAddCore_Cuda_gen(sub_operators[i], stream, input_arr, output_arr, &is_run_good[i], request));
CeedCallCuda(ceed, cudaStreamDestroy(stream));
if (!is_sequential) CeedCallCuda(ceed, cudaStreamDestroy(stream));
}
}
if (is_sequential) CeedCallCuda(ceed, cudaStreamDestroy(stream));
if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArrayRead(input_vec, &input_arr));
if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArray(output_vec, &output_arr));
CeedCallCuda(ceed, cudaDeviceSynchronize());
Expand Down
4 changes: 2 additions & 2 deletions backends/cuda-ref/ceed-cuda-ref-operator.c
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@ static int CeedOperatorApplyAdd_Cuda(CeedOperator op, CeedVector in_vec, CeedVec
if (eval_mode == CEED_EVAL_NONE) {
CeedScalar *e_vec_array;

CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[i], CEED_MEM_DEVICE, &e_vec_array));
CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[field], CEED_MEM_DEVICE, &e_vec_array));
CeedCallBackend(CeedVectorRestoreArray(e_vec, &e_vec_array));
}

Expand Down Expand Up @@ -942,7 +942,7 @@ static int CeedOperatorApplyAddAtPoints_Cuda(CeedOperator op, CeedVector in_vec,
if (eval_mode == CEED_EVAL_NONE) {
CeedScalar *e_vec_array;

CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[i], CEED_MEM_DEVICE, &e_vec_array));
CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[field], CEED_MEM_DEVICE, &e_vec_array));
CeedCallBackend(CeedVectorRestoreArray(e_vec, &e_vec_array));
}

Expand Down
22 changes: 17 additions & 5 deletions backends/hip-gen/ceed-hip-gen-operator-build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,7 @@ static int CeedOperatorBuildKernelQFunction_Hip_gen(std::ostringstream &code, Ce
CeedQFunctionField *qf_input_fields, CeedInt num_output_fields,
CeedOperatorField *op_output_fields, CeedQFunctionField *qf_output_fields,
std::string qfunction_name, CeedInt Q_1d, bool is_all_tensor, bool is_at_points,
bool use_3d_slices) {
bool use_3d_slices, bool is_assemble) {
std::string Q_name = is_all_tensor ? "Q_1d" : "Q";
CeedEvalMode eval_mode = CEED_EVAL_NONE;
CeedElemRestriction elem_rstr;
Expand Down Expand Up @@ -1056,6 +1056,7 @@ static int CeedOperatorBuildKernelQFunction_Hip_gen(std::ostringstream &code, Ce
CeedInt comp_stride;
CeedElemRestriction elem_rstr;

if (is_assemble) break;
CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr));
CeedCallBackend(CeedElemRestrictionGetCompStride(elem_rstr, &comp_stride));
CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr));
Expand Down Expand Up @@ -1596,7 +1597,7 @@ extern "C" int CeedOperatorBuildKernel_Hip_gen(CeedOperator op, bool *is_good_bu
// -- Q function
CeedCallBackend(CeedOperatorBuildKernelQFunction_Hip_gen(code, data, tab, max_dim, max_num_points, num_input_fields, op_input_fields,
qf_input_fields, num_output_fields, op_output_fields, qf_output_fields, qfunction_name,
Q_1d, is_all_tensor, is_at_points, use_3d_slices));
Q_1d, is_all_tensor, is_at_points, use_3d_slices, false));

// -- Output basis and restriction
code << "\n" << tab << "// -- Output field basis action and restrictions\n";
Expand Down Expand Up @@ -2013,7 +2014,7 @@ static int CeedOperatorBuildKernelAssemblyAtPoints_Hip_gen(CeedOperator op, bool
// -- Q function
CeedCallBackend(CeedOperatorBuildKernelQFunction_Hip_gen(code, data, tab, max_dim, max_num_points, num_input_fields, op_input_fields,
qf_input_fields, num_output_fields, op_output_fields, qf_output_fields, qfunction_name,
Q_1d, is_all_tensor, is_at_points, use_3d_slices));
Q_1d, is_all_tensor, is_at_points, use_3d_slices, true));

// -- Output basis and restriction
code << "\n" << tab << "// -- Output field basis action and restrictions\n";
Expand Down Expand Up @@ -2270,7 +2271,18 @@ extern "C" int CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen(CeedOperat
}
}
for (CeedInt i = 0; i < num_output_fields; i++) {
code << tab << "CeedScalar *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
bool is_active = false;

{
CeedVector vec;

CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec));
is_active = vec == CEED_VECTOR_ACTIVE;
CeedCallBackend(CeedVectorDestroy(&vec));
}
if (is_active) {
code << tab << "CeedScalar *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
}
}

code << tab << "const CeedInt max_dim = " << max_dim << ";\n";
Expand Down Expand Up @@ -2601,7 +2613,7 @@ extern "C" int CeedOperatorBuildKernelLinearAssembleQFunction_Hip_gen(CeedOperat
// -- Q function
CeedCallBackend(CeedOperatorBuildKernelQFunction_Hip_gen(code, data, tab, max_dim, max_num_points, num_input_fields, op_input_fields,
qf_input_fields, num_output_fields, op_output_fields, qf_output_fields, qfunction_name,
Q_1d, is_all_tensor, is_at_points, use_3d_slices));
Q_1d, is_all_tensor, is_at_points, use_3d_slices, true));

// -- Output basis and restriction
code << "\n" << tab << "// -- Output field basis action and restrictions\n";
Expand Down
21 changes: 13 additions & 8 deletions backends/hip-gen/ceed-hip-gen-operator.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector input_vec, C
}

static int CeedOperatorApplyAddComposite_Hip_gen(CeedOperator op, CeedVector input_vec, CeedVector output_vec, CeedRequest *request) {
bool is_run_good[CEED_COMPOSITE_MAX] = {false};
bool is_run_good[CEED_COMPOSITE_MAX] = {false}, is_sequential;
CeedInt num_suboperators;
const CeedScalar *input_arr = NULL;
CeedScalar *output_arr = NULL;
Expand All @@ -264,23 +264,28 @@ static int CeedOperatorApplyAddComposite_Hip_gen(CeedOperator op, CeedVector inp
CeedCallBackend(CeedOperatorGetData(op, &impl));
CeedCallBackend(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
CeedCallBackend(CeedOperatorCompositeGetSubList(op, &sub_operators));
CeedCall(CeedOperatorCompositeIsSequential(op, &is_sequential));
if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(input_vec, CEED_MEM_DEVICE, &input_arr));
if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArray(output_vec, CEED_MEM_DEVICE, &output_arr));
for (CeedInt i = 0; i < num_suboperators; i++) {
CeedInt num_elem = 0;
CeedInt num_elem = 0;
const CeedInt stream_index = is_sequential ? 0 : i;

CeedCallBackend(CeedOperatorGetNumElements(sub_operators[i], &num_elem));
if (num_elem > 0) {
if (!impl->streams[i]) CeedCallHip(ceed, hipStreamCreate(&impl->streams[i]));
CeedCallBackend(CeedOperatorApplyAddCore_Hip_gen(sub_operators[i], impl->streams[i], input_arr, output_arr, &is_run_good[i], request));
if (!impl->streams[stream_index]) CeedCallHip(ceed, hipStreamCreate(&impl->streams[stream_index]));
CeedCallBackend(CeedOperatorApplyAddCore_Hip_gen(sub_operators[i], impl->streams[stream_index], input_arr, output_arr, &is_run_good[i],
request));
} else {
is_run_good[i] = true;
}
}

for (CeedInt i = 0; i < num_suboperators; i++) {
if (impl->streams[i]) {
if (is_run_good[i]) CeedCallHip(ceed, hipStreamSynchronize(impl->streams[i]));
if (is_sequential) CeedCallHip(ceed, hipStreamSynchronize(impl->streams[0]));
else {
for (CeedInt i = 0; i < num_suboperators; i++) {
if (impl->streams[i]) {
if (is_run_good[i]) CeedCallHip(ceed, hipStreamSynchronize(impl->streams[i]));
}
}
}
if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArrayRead(input_vec, &input_arr));
Expand Down
4 changes: 2 additions & 2 deletions backends/hip-ref/ceed-hip-ref-operator.c
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ static int CeedOperatorApplyAdd_Hip(CeedOperator op, CeedVector in_vec, CeedVect
if (eval_mode == CEED_EVAL_NONE) {
CeedScalar *e_vec_array;

CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[i], CEED_MEM_DEVICE, &e_vec_array));
CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[field], CEED_MEM_DEVICE, &e_vec_array));
CeedCallBackend(CeedVectorRestoreArray(e_vec, &e_vec_array));
}

Expand Down Expand Up @@ -939,7 +939,7 @@ static int CeedOperatorApplyAddAtPoints_Hip(CeedOperator op, CeedVector in_vec,
if (eval_mode == CEED_EVAL_NONE) {
CeedScalar *e_vec_array;

CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[i], CEED_MEM_DEVICE, &e_vec_array));
CeedCallBackend(CeedVectorTakeArray(impl->q_vecs_out[field], CEED_MEM_DEVICE, &e_vec_array));
CeedCallBackend(CeedVectorRestoreArray(e_vec, &e_vec_array));
}

Expand Down
1 change: 1 addition & 0 deletions include/ceed-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ struct CeedOperator_private {
bool is_composite;
bool is_at_points;
bool has_restriction;
bool is_sequential;
CeedQFunctionAssemblyData qf_assembled;
CeedOperatorAssemblyData op_assembled;
CeedOperator *sub_operators;
Expand Down
2 changes: 2 additions & 0 deletions include/ceed/ceed.h
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,8 @@ CEED_EXTERN int CeedOperatorCompositeAddSub(CeedOperator composite_op, CeedOper
CEED_EXTERN int CeedOperatorCompositeGetNumSub(CeedOperator op, CeedInt *num_suboperators);
CEED_EXTERN int CeedOperatorCompositeGetSubList(CeedOperator op, CeedOperator **sub_operators);
CEED_EXTERN int CeedOperatorCompositeGetSubByName(CeedOperator op, const char *op_name, CeedOperator *sub_op);
CEED_EXTERN int CeedOperatorCompositeSetSequential(CeedOperator op, bool is_sequential);
CEED_EXTERN int CeedOperatorCompositeIsSequential(CeedOperator op, bool *is_sequential);
CEED_EXTERN int CeedOperatorCheckReady(CeedOperator op);
CEED_EXTERN int CeedOperatorGetActiveVectorLengths(CeedOperator op, CeedSize *input_size, CeedSize *output_size);
CEED_EXTERN int CeedOperatorSetQFunctionAssemblyReuse(CeedOperator op, bool reuse_assembly_data);
Expand Down
42 changes: 42 additions & 0 deletions interface/ceed-operator.c
Original file line number Diff line number Diff line change
Expand Up @@ -1390,6 +1390,48 @@ int CeedOperatorCompositeGetSubByName(CeedOperator op, const char *op_name, Ceed
return CEED_ERROR_SUCCESS;
}

/**
@brief Set whether the sub-operators of the composite `CeedOperator` must be run sequentially.

Note: This value currently only affects the GPU `/gpu/cuda/gen` and `/gpu/hip/gen` backends.

@param[in] op Composite `CeedOperator`
@param[in] is_sequential Flag value to set, if `true`, forces the composite `CeedOperator` to execute sequentially

@return An error code: 0 - success, otherwise - failure

@ref Advanced
**/
int CeedOperatorCompositeSetSequential(CeedOperator op, bool is_sequential) {
bool is_composite;

CeedCall(CeedOperatorIsComposite(op, &is_composite));
CeedCheck(is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_MINOR, "Only defined for a composite operator");
op->is_sequential = is_sequential;
return CEED_ERROR_SUCCESS;
}

/**
@brief Get whether the sub-operators of the composite `CeedOperator` must be run sequentially.

Note: This value currently only affects the GPU `/gpu/cuda/gen` and `/gpu/hip/gen` backends.

@param[in] op Composite `CeedOperator`
@param[out] is_sequential Variable to store sequential status

@return An error code: 0 - success, otherwise - failure

@ref Advanced
**/
int CeedOperatorCompositeIsSequential(CeedOperator op, bool *is_sequential) {
bool is_composite;

CeedCall(CeedOperatorIsComposite(op, &is_composite));
CeedCheck(is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_MINOR, "Only defined for a composite operator");
*is_sequential = op->is_sequential;
return CEED_ERROR_SUCCESS;
}

/**
@brief Check if a `CeedOperator` is ready to be used.

Expand Down
Loading