11# This code is part of OpenFE and is licensed under the MIT license.
22# For details, see https://github.com/OpenFreeEnergy/gufe
3- import ast
4- import json
53import io
64import numpy as np
75from os import PathLike
8- from typing import Union , Optional
9- from collections import defaultdict
6+ from typing import Union
107
11- from openmm import app
12- from openmm import unit as omm_unit
8+ import pdbinf
139
1410from rdkit import Chem
1511from rdkit .Chem .rdchem import Mol , Atom , Conformer , EditableMol , BondType
5147}
5248
5349
54- negative_ions = ["F" , "CL" , "Br" , "I" ]
55- positive_ions = ["NA" , "MG" , "ZN" ]
56-
57-
5850class ProteinComponent (ExplicitMoleculeComponent ):
5951 """Wrapper around a Protein representation.
6052
@@ -82,7 +74,6 @@ def __init__(self, rdkit: RDKitMol, name=""):
8274 "Consider loading via rdkit.Chem.MolFromPDBFile or similar." )
8375 super ().__init__ (rdkit = rdkit , name = name )
8476
85- # FROM
8677 @classmethod
8778 def from_pdb_file (cls , pdb_file : str , name : str = "" ):
8879 """
@@ -100,9 +91,10 @@ def from_pdb_file(cls, pdb_file: str, name: str = ""):
10091 ProteinComponent
10192 the deserialized molecule
10293 """
103- openmm_PDBFile = PDBFile (pdb_file )
104- return cls ._from_openmmPDBFile (
105- openmm_PDBFile = openmm_PDBFile , name = name
94+ return cls (
95+ rdkit = pdbinf .load_pdb_file (pdb_file ,
96+ templates = [pdbinf .STANDARD_AA_DOC ]),
97+ name = name ,
10698 )
10799
108100 @classmethod
@@ -112,7 +104,7 @@ def from_pdbx_file(cls, pdbx_file: str, name=""):
112104
113105 Parameters
114106 ----------
115- pdbxfile : str
107+ pdbx_file : str
116108 path to the pdb file.
117109 name : str, optional
118110 name of the input protein, by default ""
@@ -122,117 +114,12 @@ def from_pdbx_file(cls, pdbx_file: str, name=""):
122114 ProteinComponent
123115 the deserialized molecule
124116 """
125- openmm_PDBxFile = PDBxFile (pdbx_file )
126- return cls ._from_openmmPDBFile (
127- openmm_PDBFile = openmm_PDBxFile , name = name
117+ return cls (
118+ rdkit = pdbinf .load_pdbx_file (pdbx_file ,
119+ templates = [pdbinf .STANDARD_AA_DOC ]),
120+ name = name ,
128121 )
129122
130- @classmethod
131- def _from_openmmPDBFile (cls , openmm_PDBFile : Union [PDBFile , PDBxFile ],
132- name : str = "" ):
133- """Converts to our internal representation (rdkit Mol)
134-
135- Parameters
136- ----------
137- openmm_PDBFile : PDBFile or PDBxFile
138- object of the protein
139- name : str
140- name of the protein
141-
142- Returns
143- -------
144- ProteinComponent
145- the deserialized molecule
146- """
147- periodicTable = Chem .GetPeriodicTable ()
148- mol_topology = openmm_PDBFile .getTopology ()
149-
150- rd_mol = Mol ()
151- editable_rdmol = EditableMol (rd_mol )
152-
153- # Add Atoms
154- for atom in mol_topology .atoms ():
155- a = Atom (atom .element .atomic_number )
156-
157- atom_monomerInfo = Chem .AtomPDBResidueInfo ()
158- atom_monomerInfo .SetChainId (atom .residue .chain .id )
159- atom_monomerInfo .SetSerialNumber (int (atom .id ))
160- atom_monomerInfo .SetSegmentNumber (int (atom .residue .chain .index ))
161- atom_monomerInfo .SetInsertionCode (atom .residue .insertionCode )
162- atom_monomerInfo .SetName (atom .name )
163- atom_monomerInfo .SetResidueName (atom .residue .name )
164- atom_monomerInfo .SetResidueNumber (int (atom .residue .id ))
165- atom_monomerInfo .SetIsHeteroAtom (False ) # TODO: Do hetatoms
166-
167- a .SetMonomerInfo (atom_monomerInfo )
168-
169- # additonally possible:
170- # atom_monomerInfo.SetSecondaryStructure
171- # atom_monomerInfo.SetMonomerType
172- # atom_monomerInfo.SetAltLoc
173-
174- editable_rdmol .AddAtom (a )
175-
176- # Add Bonds
177- for bond in mol_topology .bonds ():
178- bond_order = _BONDORDERS_OPENMM_TO_RDKIT [bond .order ]
179- editable_rdmol .AddBond (
180- beginAtomIdx = bond .atom1 .index ,
181- endAtomIdx = bond .atom2 .index ,
182- order = bond_order ,
183- )
184-
185- # Set Positions
186- rd_mol = editable_rdmol .GetMol ()
187- positions = np .array (
188- openmm_PDBFile .positions .value_in_unit (omm_unit .angstrom ), ndmin = 3
189- )
190-
191- for frame_id , frame in enumerate (positions ):
192- conf = Conformer (frame_id )
193- for atom_id , atom_pos in enumerate (frame ):
194- conf .SetAtomPosition (atom_id , atom_pos )
195- rd_mol .AddConformer (conf )
196-
197- # Add Additionals
198- # Formal Charge
199- netcharge = 0
200- for a in rd_mol .GetAtoms ():
201- atomic_num = a .GetAtomicNum ()
202- atom_name = a .GetMonomerInfo ().GetName ()
203-
204- connectivity = sum (
205- int (bond .GetBondType ()) for bond in a .GetBonds ()
206- )
207- default_valence = periodicTable .GetDefaultValence (atomic_num )
208-
209- if connectivity == 0 : # ions:
210- if atom_name in positive_ions :
211- fc = default_valence # e.g. Sodium ions
212- elif atom_name in negative_ions :
213- fc = - default_valence # e.g. Chlorine ions
214- else : # -no-cov-
215- resn = a .GetMonomerInfo ().GetResidueName ()
216- resind = int (a .GetMonomerInfo ().GetResidueNumber ())
217- raise ValueError (
218- "I don't know this Ion or something really went "
219- f"wrong! \t { atom_name } \t { resn } \t -{ resind } \t "
220- f"connectivity{ connectivity } "
221- )
222- elif default_valence > connectivity :
223- fc = - (default_valence - connectivity ) # negative charge
224- elif default_valence < connectivity :
225- fc = + (connectivity - default_valence ) # positive charge
226- else :
227- fc = 0 # neutral
228-
229- a .SetFormalCharge (fc )
230- a .UpdatePropertyCache (strict = True )
231-
232- netcharge += fc
233-
234- return cls (rdkit = rd_mol , name = name )
235-
236123 @classmethod
237124 def _from_dict (cls , ser_dict : dict , name : str = "" ):
238125 """Deserialize from dict representation"""
@@ -294,15 +181,16 @@ def _from_dict(cls, ser_dict: dict, name: str = ""):
294181
295182 return cls (rdkit = rd_mol , name = name )
296183
297- # TO
298- def to_openmm_topology (self ) -> app .Topology :
184+ def to_openmm_topology (self ) -> "app.Topology" :
299185 """Convert to an openmm Topology object
300186
301187 Returns
302188 -------
303189 openmm.app.Topology
304190 resulting topology obj.
305191 """
192+ from openmm import app
193+
306194 def reskey (m ):
307195 """key for defining when a residue has changed from previous
308196
@@ -370,16 +258,18 @@ def chainkey(m):
370258
371259 return top
372260
373- def to_openmm_positions (self ) -> omm_unit . Quantity :
374- """
375- serialize the positions to openmm.unit.Quantity
261+ def to_openmm_positions (self ) -> "openmm.app.unit. Quantity" :
262+ """serialize the positions to openmm.unit.Quantity
263+
376264 ! only one frame at the moment!
377265
378266 Returns
379267 -------
380268 omm_unit.Quantity
381269 Quantity containing protein atom positions
382270 """
271+ from openmm import unit as omm_unit
272+
383273 np_pos = deserialize_numpy (self .to_dict ()["conformers" ][0 ])
384274 openmm_pos = (
385275 list (map (lambda x : np .array (x ), np_pos )) * omm_unit .angstrom
@@ -388,8 +278,7 @@ def to_openmm_positions(self) -> omm_unit.Quantity:
388278 return openmm_pos
389279
390280 def to_pdb_file (self , out_path : Union [str , bytes , PathLike [str ], PathLike [bytes ], io .TextIOBase ]) -> str :
391- """
392- serialize protein to pdb file.
281+ """Write protein to pdb file.
393282
394283 Parameters
395284 ----------
@@ -434,8 +323,7 @@ def to_pdb_file(self, out_path: Union[str, bytes, PathLike[str], PathLike[bytes]
434323 def to_pdbx_file (
435324 self , out_path : Union [str , bytes , PathLike [str ], PathLike [bytes ], io .TextIOBase ]
436325 ) -> str :
437- """
438- serialize protein to pdbx file.
326+ """Write protein to pdbx file.
439327
440328 Parameters
441329 ----------
@@ -447,6 +335,8 @@ def to_pdbx_file(
447335 str
448336 string path to the resulting pdbx.
449337 """
338+ from openmm import unit as omm_unit
339+
450340 # get top:
451341 top = self .to_openmm_topology ()
452342
@@ -472,7 +362,6 @@ def to_pdbx_file(
472362
473363 PDBxFile .writeFile (topology = top , positions = openmm_pos , file = out_file )
474364
475-
476365 if must_close :
477366 # we only close the file if we had to open it
478367 out_file .close ()
0 commit comments