Skip to content

Commit 00a9d08

Browse files
committed
Fix VCF tests
1 parent ab86815 commit 00a9d08

File tree

3 files changed

+14
-31
lines changed

3 files changed

+14
-31
lines changed

python/tests/test_vcf.py

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -357,21 +357,18 @@ def test_individuals_no_nodes_default_args(self):
357357
with warnings.catch_warnings(record=True) as w:
358358
with pytest.raises(ValueError, match="No samples in resulting VCF model"):
359359
ts2.as_vcf(allow_position_zero=True)
360-
assert len(w) == 2
360+
assert len(w) == 1
361361
assert "At least one sample node does not have an individual ID" in str(
362362
w[0].message
363363
)
364-
assert "Individual 0 has no nodes associated with it." in str(w[1].message)
365364

366365
def test_individuals_no_nodes_as_argument(self):
367366
ts1 = msprime.simulate(10, mutation_rate=0.1, random_seed=2)
368367
tables = ts1.dump_tables()
369368
tables.individuals.add_row()
370369
ts2 = tables.tree_sequence()
371370
with warnings.catch_warnings(record=True) as w:
372-
with pytest.raises(
373-
ValueError, match="Individual 0 has no nodes associated with it."
374-
):
371+
with pytest.raises(ValueError, match="No samples in resulting VCF model"):
375372
ts2.as_vcf(individuals=[0])
376373
assert len(w) == 1
377374
assert "At least one sample node does not have an individual ID" in str(
@@ -395,7 +392,7 @@ def test_ploidy_with_no_node_individuals(self):
395392
def test_empty_individuals(self):
396393
ts = msprime.sim_ancestry(3, random_seed=2)
397394
ts = tsutil.insert_branch_sites(ts)
398-
with pytest.raises(ValueError, match="No samples in resulting VCF model"):
395+
with pytest.raises(ValueError, match="No individuals specified"):
399396
ts.as_vcf(individuals=[])
400397

401398
def test_duplicate_individuals(self):
@@ -404,28 +401,6 @@ def test_duplicate_individuals(self):
404401
with pytest.raises(tskit.LibraryError, match="TSK_ERR_DUPLICATE_SAMPLE"):
405402
ts.as_vcf(individuals=[0, 0], allow_position_zero=True)
406403

407-
def test_mixed_sample_non_sample_individuals(self):
408-
ts = msprime.sim_ancestry(3, random_seed=2)
409-
tables = ts.dump_tables()
410-
tables.individuals.add_row()
411-
# Add a reference to an individual from a non-sample
412-
individual = tables.nodes.individual
413-
individual[-1] = 0
414-
tables.nodes.individual = individual
415-
ts = tables.tree_sequence()
416-
ts = tsutil.insert_branch_sites(ts)
417-
with warnings.catch_warnings(record=True) as w:
418-
ts.map_to_vcf_model()
419-
assert len(w) == 2
420-
assert (
421-
"Individual 0 has both sample and non-sample nodes associated with it."
422-
in str(w[0].message)
423-
)
424-
assert "Individual 3 has no nodes associated with it." in str(w[1].message)
425-
with warnings.catch_warnings(record=True) as w:
426-
assert len(ts.as_vcf(individuals=[1, 2], allow_position_zero=True)) > 0
427-
assert len(w) == 0
428-
429404
def test_samples_with_and_without_individuals(self):
430405
ts = tskit.Tree.generate_balanced(3).tree_sequence
431406
tables = ts.dump_tables()

python/tskit/trees.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10604,6 +10604,8 @@ def map_to_vcf_model(
1060410604
else:
1060510605
if individuals is None:
1060610606
individuals = np.arange(self.num_individuals, dtype=np.int32)
10607+
if len(individuals) == 0:
10608+
raise ValueError("No individuals specified")
1060710609
if min(individuals) < 0 or max(individuals) >= self.num_individuals:
1060810610
raise ValueError("Invalid individual ID")
1060910611

python/tskit/vcf.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,18 +71,24 @@ def __init__(
7171
vcf_model = tree_sequence.map_to_vcf_model(
7272
individuals=individuals, ploidy=ploidy, individual_names=individual_names
7373
)
74-
self.individual_names = vcf_model.individuals_name
74+
# Remove individuals with zero ploidy as these cannot be
75+
# represented in VCF.
76+
individuals_nodes = vcf_model.individuals_nodes
77+
to_keep = (individuals_nodes != -1).any(axis=1)
78+
individuals_nodes = individuals_nodes[to_keep]
79+
self.individual_names = vcf_model.individuals_name[to_keep]
80+
7581
self.individual_ploidies = [
7682
len(nodes[nodes >= 0]) for nodes in vcf_model.individuals_nodes
7783
]
7884
self.num_individuals = len(self.individual_names)
7985

80-
if len(vcf_model.individuals_nodes) == 0:
86+
if len(individuals_nodes) == 0:
8187
raise ValueError("No samples in resulting VCF model")
8288

8389
# Flatten the array of node IDs, filtering out the -1 padding values
8490
self.samples = []
85-
for row in vcf_model.individuals_nodes:
91+
for row in individuals_nodes:
8692
for node_id in row:
8793
if node_id != -1:
8894
self.samples.append(node_id)

0 commit comments

Comments
 (0)