Conversation
…insensitive match
| else: | ||
| return {clinical_id: clinical_data['SAMPLE_ID'] for clinical_id, clinical_data in | ||
| self._clinical_data.items() if clinical_data['VITAL_STATUS'] == 'alive'} | ||
| self._clinical_data.items() if (clinical_data['VITAL_STATUS'] is not None and clinical_data['VITAL_STATUS'].lower() == 'alive')} |
There was a problem hiding this comment.
good catch, we should have that.
There was a problem hiding this comment.
The new ME version moves this logic around a bit. Now it ignores users with VITAL_STATUS "deceased," rather than including only users with VITAL_STATUS "alive" (i.e. everyone is alive by default).
| # recompile the query to be case insensitive | ||
| # convert the $in into a list of $or conditions so we can use $regex inside a $in | ||
| # mongo has a limitation that cannot use $regex within a $in | ||
| # using regex | ||
| if "ONCOTREE_PRIMARY_DIAGNOSIS_NAME" in query_part.query: | ||
| if "$in" in query_part.query['ONCOTREE_PRIMARY_DIAGNOSIS_NAME']: | ||
| new_conditions = [ | ||
| {'ONCOTREE_PRIMARY_DIAGNOSIS_NAME': {'$regex': f'^{old_query}$', '$options': 'i'}} for | ||
| old_query in query_part.query['ONCOTREE_PRIMARY_DIAGNOSIS_NAME']['$in']] | ||
| del query_part.query['ONCOTREE_PRIMARY_DIAGNOSIS_NAME'] # Remove old query from query_part | ||
| query_part.query['$or'] = new_conditions # Add new conditions to query_part | ||
| else: | ||
| org_query = query_part.query['ONCOTREE_PRIMARY_DIAGNOSIS_NAME']; | ||
| ignore_case_query = {'$regex': f'^{org_query}$', '$options': 'i'} | ||
| query_part.query['ONCOTREE_PRIMARY_DIAGNOSIS_NAME'] = ignore_case_query | ||
|
|
||
| # Exclude documents where 'ONCOTREE_PRIMARY_DIAGNOSIS_NAME' is 'NA' | ||
| new_query = { | ||
| '$and': [ | ||
| {join_field: {'$in': list(need_new)}}, | ||
| query_part.query, | ||
| {'ONCOTREE_PRIMARY_DIAGNOSIS_NAME': {'$ne': 'NA'}} | ||
| ] | ||
| } |
|
|
||
| # add mutation | ||
| if true_protein is not None: | ||
| if true_protein is not None and true_protein: |
There was a problem hiding this comment.
Good catch. @jasonhansel do we already have that fixed or 😬
There was a problem hiding this comment.
We should be able to incorporate this change without issues.
| "Renal Angiomyolipoma", | ||
| "Large Cell Neuroendocrine Carcinoma" | ||
| "Large Cell Neuroendocrine Carcinoma", | ||
| "Breast Invasive Carcinoma, NOS" |
There was a problem hiding this comment.
duplicate with the one above?
| "Invasive Breast Carcinoma", | ||
| "Phyllodes Tumor of the Breast", | ||
| "Breast Invasive Carcinosarcoma, NOS", | ||
| "Breast Invasive Carcinoma, NOS", |
There was a problem hiding this comment.
This file is generated programmatically from the OncoTree data. We should check with PMATCH to see why they want something other than what OncoTree provides; the config JSON file allows you to specify a separate path within this folder to use for this mapping.
| new_trial_match.update({'cancer_type_match': get_cancer_type_match(trial_match)}) | ||
| # Add in additional fields we need for frontend | ||
| if ('arm_description' in trial_match.match_clause_data.match_clause_additional_attributes): | ||
| new_trial_match.update({'arm_description': trial_match.match_clause_data.match_clause_additional_attributes['arm_description']}) |
There was a problem hiding this comment.
We should be able to incorporate this change without issues.
| elif trial_value.upper() == 'FALSE': | ||
| return QueryTransformerResult({sample_key: 'Negative'}, False) | ||
| else: | ||
| return QueryTransformerResult({sample_key: trial_value}, False) |
There was a problem hiding this comment.
We should be able to incorporate this change without issues.
| if run_args.csv_output: | ||
| me.create_output_csv() | ||
| from matchengine.internals.utilities.output import create_output_csv | ||
| create_output_csv(me) |
There was a problem hiding this comment.
The latest ME version fixes this.
| subp_p.add_argument('-t', dest='trial', default=None, help=param_trials_help) | ||
| subp_p.add_argument('-c', dest='clinical', default=None, help=param_clinical_help) | ||
| subp_p.add_argument('-g', dest='extended_attributes', default=None, help=param_genomic_help) | ||
| subp_p.add_argument('-g', dest='genomic', default=None, help=param_genomic_help) |
There was a problem hiding this comment.
While we can fix this, we should be discouraging using this "loading" functionality for anything other than trials, in favor of having users load data into MongoDB directly using e.g. an ETL process of some sort.
| subp_p.add_argument('-t', dest='trial', default=None, help=param_trials_help) | ||
| subp_p.add_argument('-c', dest='clinical', default=None, help=param_clinical_help) | ||
| subp_p.add_argument('-g', dest='extended_attributes', default=None, help=param_genomic_help) | ||
| subp_p.add_argument('-g', dest='genomic', default=None, help=param_genomic_help) |
There was a problem hiding this comment.
While we can fix this, we should be discouraging using this "loading" functionality for anything other than trials, in favor of having users load data into MongoDB directly using e.g. an ETL process of some sort.
| # if isinstance(identifier, ObjectId) or identifier is None: | ||
| # pass | ||
| # else: | ||
| # sort_array.append(int(identifier.replace("-", ""))) |
There was a problem hiding this comment.
The new version of ME will move this into TrialMatchDocumentCreator where it can be modified more easily.
| if matchengine.report_all_clinical_reasons or \ | ||
| keys.issubset(matchengine.match_criteria_transform.valid_clinical_reasons): | ||
| should_add_reason = True | ||
| if should_add_reason: |
There was a problem hiding this comment.
The new version of ME moves this code into TrialMatchDocumentCreator where it can be modified more easily (though it seems like the change here may just be a refactor).
|
|
||
| if need_new: | ||
| new_query = {'$and': [{join_field: {'$in': list(need_new)}}, query_part.query]} | ||
| # recompile the query to be case insensitive |
There was a problem hiding this comment.
This is something we don't want to incorporate. There may be ways of doing this through the query transformers, but it will never be performant because of the cost of regex lookups. Ideally this could be fixed at the data ingestion layer by (say) lowercasing all inputs and then searching for lowercase cancer types or by making cancer types match oncotree.
| else: | ||
| raw_file_data = file_handle.read() | ||
| if filetype == 'yaml': | ||
| if filetype == 'yml': |
There was a problem hiding this comment.
This is just a bugfix that we can incorporate.
| "match_trial_link_id": "protocol_no", | ||
| "trial_status_key": { | ||
| "key_name": null, | ||
| "key_name": "summary", |
There was a problem hiding this comment.
Most of the changes to this file can be incorporated without issues. The one exception is the "trial_status_key," which determines how we decide if trials are open or closed; that may be something we need to keep separate for PMATCH.
add value match for wildtype when recording result
CTM-289: fix structural variation matching without report date
No description provided.