Skip to content

Commit 215dace

Browse files
use pooch for PLB files
previous solution used io.StringIO which rdkit doesn't like (and wasn't documented as accepted)
1 parent 8658083 commit 215dace

File tree

2 files changed

+50
-82
lines changed

2 files changed

+50
-82
lines changed

gufe/tests/conftest.py

Lines changed: 33 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
import importlib.resources
55
import urllib.request
66
from urllib.error import URLError
7-
import io
87
import functools
8+
import pooch
99
import pytest
1010
from rdkit import Chem
1111
from rdkit.Chem import AllChem
@@ -20,58 +20,42 @@
2020
else:
2121
HAS_INTERNET = True
2222

23+
PLB_files = pooch.create(
24+
path=pooch.os_cache('pdbinf'),
25+
base_url='https://github.com/openforcefield/protein-ligand-benchmark/raw/d3387602bbeb0167abf00dfb81753d8936775dd2/data/',
26+
version=None,
27+
registry={
28+
'p38/01_protein/crd/protein.pdb': '3f0bf718644e7c29f5200cd3def4240ac25ef5fb1948b2e64deb5015d8a45aa4',
29+
'mcl1/01_protein/crd/protein.pdb': 'f80ff9dd93a5d9dd6e90091e9631a8ce7fe0dc931e16543e22c1f92009660306',
30+
'cdk2/01_protein/crd/protein.pdb': '15d1e509d7951ca45ea266d51a627d5f452dcf0bb5bd48751ae57eb29e28ab69',
31+
'shp2/01_protein/crd/protein.pdb': 'd6759cbd135aaddaa658446064df4095d978d3681c014a0528b542d60b2c8770',
32+
'pde2/01_protein/crd/protein.pdb': '3b7967c1717789215452cdf919520625602d5438a9d2a18620726b8b1b3a8ef0',
33+
'cmet/01_protein/crd/protein.pdb': '155ec32941a9082dbdbbfde460ff97c88d4fe7e100e9a9577edb5a9e7b6467ae',
34+
'ptp1b/01_protein/crd/protein.pdb': 'bfa0f9204e96aa463b80946b788c4153cd24701291007eb77638a16fd156634e',
35+
'thrombin/01_protein/crd/protein.pdb': 'eb4ea18bef9c4c71dcdc922616d6719ee918112be87a0bd6b274c856eff1dd59',
36+
'cdk8/01_protein/crd/protein.pdb': 'b058774526a19775d8f438b14e9d6da331b6de74e0ef9e96db575f6c0bb067b2',
37+
'pfkfb3/01_protein/crd/protein.pdb': '4367710db0dbf284cc715ae9a8dd82d06bd77dcc3fb0885678e16632a2732dcc',
38+
'tyk2/01_protein/crd/protein.pdb': '9090684f4bdae90afbe5f2698a14c778396c024c19ceb6333de4808d9e29fae6',
39+
'syk/01_protein/crd/protein.pdb': 'f6199d0c1818eb5bb24e164426789cf39cae7aa32c8ca2e98f5f44d299a6f82f',
40+
'tnks2/01_protein/crd/protein.pdb': 'fc7681a05dbf07590aa8de133f981b6d8ae9cebcc23d54addc2c4fe80be80299',
41+
'eg5/01_protein/crd/protein.pdb': 'f2964a785c922502dc86fb4e2e5295d32d41d5b68b8c3246e989de5234c3fd0f',
42+
'hif2a/01_protein/crd/protein.pdb': '5bbf520e7c102a65cc7ba0253fd66f43562f77284c82b3b9613e997b7ac76c93',
43+
44+
},
45+
)
2346

24-
class URLFileLike:
25-
def __init__(self, url, encoding='utf-8'):
26-
self.url = url
27-
self.encoding = encoding
28-
self.data = None
2947

30-
def __call__(self):
48+
@pytest.fixture(params=['p38', 'mcl1', 'cdk2', 'shp2', 'pde2', 'cmet', 'ptp1b',
49+
'thrombin', 'cdk8', 'pfkfb3', 'tyk2', 'syk', 'tnks2',
50+
'eg5', 'hif2a', '181l'])
51+
def PDB_files(request):
52+
if request.param == '181l':
53+
with importlib.resources.path('gufe.tests.data', '181l.pdb') as file:
54+
return str(file)
55+
else:
3156
if not HAS_INTERNET: # pragma: no-cover
3257
pytest.skip("Skipping because internet seems faulty")
33-
34-
if self.data is None:
35-
req = urllib.request.urlopen(self.url)
36-
self.data = req.read().decode(self.encoding)
37-
38-
return io.StringIO(self.data)
39-
40-
41-
def get_test_filename(filename):
42-
with importlib.resources.path('gufe.tests.data', filename) as file:
43-
return str(file)
44-
45-
46-
_benchmark_pdb_names = [
47-
"cmet_protein",
48-
"hif2a_protein",
49-
"mcl1_protein",
50-
"p38_protein",
51-
"ptp1b_protein",
52-
"syk_protein",
53-
"thrombin_protein",
54-
"tnsk2_protein",
55-
"tyk2_protein",
56-
]
57-
58-
59-
_pl_benchmark_url_pattern = (
60-
"https://github.com/OpenFreeEnergy/openfe-benchmarks/blob/main/openfe_benchmarks/data/{name}.pdb?raw=true"
61-
)
62-
63-
64-
PDB_BENCHMARK_LOADERS = {
65-
name: URLFileLike(url=_pl_benchmark_url_pattern.format(name=name))
66-
for name in _benchmark_pdb_names
67-
}
68-
69-
PDB_FILE_LOADERS = {
70-
name: lambda: get_test_filename(name)
71-
for name in ["181l.pdb"]
72-
}
73-
74-
ALL_PDB_LOADERS = dict(**PDB_BENCHMARK_LOADERS, **PDB_FILE_LOADERS)
58+
return PLB_files.fetch('{}/01_protein/crd/protein.pdb'.format(request.param))
7559

7660

7761
@pytest.fixture

gufe/tests/test_proteincomponent.py

Lines changed: 17 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from openmm import unit
1515
from numpy.testing import assert_almost_equal
1616

17-
from .conftest import ALL_PDB_LOADERS
17+
from .conftest import PLB_files
1818

1919

2020
@pytest.fixture
@@ -94,11 +94,8 @@ class TestProteinComponent(GufeTokenizableTestsMixin):
9494
def instance(self, PDB_181L_path):
9595
return self.cls.from_pdb_file(PDB_181L_path, name="Steve")
9696

97-
# From
98-
@pytest.mark.parametrize('in_pdb_path', ALL_PDB_LOADERS.keys())
99-
def test_from_pdb_file(self, in_pdb_path):
100-
in_pdb_io = ALL_PDB_LOADERS[in_pdb_path]()
101-
p = self.cls.from_pdb_file(in_pdb_io, name="Steve")
97+
def test_from_pdb_file(self, PDB_files):
98+
p = self.cls.from_pdb_file(PDB_files, name="Steve")
10299

103100
assert isinstance(p, ProteinComponent)
104101
assert p.name == "Steve"
@@ -177,21 +174,16 @@ def test_to_pdb_input_types(self, PDB_181L_OpenMMClean_path, tmp_path,
177174
output_func=p.to_pdb_file
178175
)
179176

180-
@pytest.mark.parametrize('in_pdb_path', ALL_PDB_LOADERS.keys())
181-
def test_to_pdb_round_trip(self, in_pdb_path, tmp_path):
182-
in_pdb_io = ALL_PDB_LOADERS[in_pdb_path]()
183-
184-
p = self.cls.from_pdb_file(in_pdb_io, name="Wuff")
185-
out_file_name = "tmp_"+in_pdb_path+".pdb"
177+
def test_to_pdb_round_trip(self, PDB_files, tmp_path):
178+
p = self.cls.from_pdb_file(PDB_files, name="Wuff")
179+
out_file_name = "tmp_foo.pdb"
186180
out_file = tmp_path / out_file_name
187181

188182
p.to_pdb_file(str(out_file))
189183

190-
ref_in_pdb_io = ALL_PDB_LOADERS[in_pdb_path]()
191-
192184
# generate openMM reference file:
193-
openmm_pdb = pdbfile.PDBFile(ref_in_pdb_io)
194-
out_ref_file_name = "tmp_"+in_pdb_path+"_openmm_ref.pdb"
185+
openmm_pdb = pdbfile.PDBFile(PDB_files)
186+
out_ref_file_name = "tmp_foo_openmm_ref.pdb"
195187
out_ref_file = tmp_path / out_ref_file_name
196188

197189
pdbfile.PDBFile.writeFile(openmm_pdb.topology, openmm_pdb.positions, file=open(str(out_ref_file), "w"))
@@ -213,33 +205,23 @@ def test_dummy_from_dict(self, PDB_181L_OpenMMClean_path):
213205

214206
assert p == p2
215207

216-
# parametrize
217-
@pytest.mark.parametrize('in_pdb_path', ALL_PDB_LOADERS.keys())
218-
def test_to_openmm_positions(self, in_pdb_path):
219-
in_pdb_io = ALL_PDB_LOADERS[in_pdb_path]()
220-
ref_in_pdb_io = ALL_PDB_LOADERS[in_pdb_path]()
221-
222-
openmm_pdb = pdbfile.PDBFile(ref_in_pdb_io)
208+
def test_to_openmm_positions(self, PDB_files):
209+
openmm_pdb = pdbfile.PDBFile(PDB_files)
223210
openmm_pos = openmm_pdb.positions
224211

225-
p = self.cls.from_pdb_file(in_pdb_io, name="Bob")
212+
p = self.cls.from_pdb_file(PDB_files, name="Bob")
226213
gufe_openmm_pos = p.to_openmm_positions()
227214

228215
v1 = gufe_openmm_pos.value_in_unit(unit.nanometer)
229216
v2 = openmm_pos.value_in_unit(unit.nanometer)
230217

231218
assert_almost_equal(actual=v1, desired=v2, decimal=6)
232219

233-
# parametrize
234-
@pytest.mark.parametrize('in_pdb_path', ALL_PDB_LOADERS.keys())
235-
def test_to_openmm_topology(self, in_pdb_path):
236-
in_pdb_io = ALL_PDB_LOADERS[in_pdb_path]()
237-
ref_in_pdb_io = ALL_PDB_LOADERS[in_pdb_path]()
238-
239-
openmm_pdb = pdbfile.PDBFile(ref_in_pdb_io)
220+
def test_to_openmm_topology(self, PDB_files):
221+
openmm_pdb = pdbfile.PDBFile(PDB_files)
240222
openmm_top = openmm_pdb.topology
241223

242-
p = self.cls.from_pdb_file(in_pdb_io, name="Bob")
224+
p = self.cls.from_pdb_file(PDB_files, name="Bob")
243225
gufe_openmm_top = p.to_openmm_topology()
244226
assert_topology_equal(openmm_top, gufe_openmm_top)
245227

@@ -290,7 +272,9 @@ def test_protein_total_charge(self, PDB_181L_path):
290272
assert m1.total_charge == 7
291273

292274
def test_protein_total_charge_thromb(self):
293-
m1 = self.cls.from_pdb_file(ALL_PDB_LOADERS["thrombin_protein"]())
275+
f = PLB_files.fetch('thrombin/01_protein/crd/protein.pdb')
276+
277+
m1 = self.cls.from_pdb_file(f)
294278

295279
assert m1.total_charge == 6
296280

0 commit comments

Comments
 (0)