Skip to content

Commit bbfbf83

Browse files
authored
Merge pull request #51 from linkml/plotting
Add plotting
2 parents 84d7e89 + 4cc473b commit bbfbf83

File tree

7 files changed

+960
-0
lines changed

7 files changed

+960
-0
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""
2+
Visualization and plotting functions for LinkML data.
3+
"""
4+
5+
__version__ = "0.1.0"

src/linkml_store/plotting/cli.py

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
"""
2+
Command-line interface for the plotting package.
3+
"""
4+
5+
import logging
6+
from pathlib import Path
7+
from typing import Optional, Union
8+
9+
import click
10+
11+
from linkml_store.plotting.heatmap import heatmap_from_file, export_heatmap_data
12+
from linkml_store.utils.format_utils import Format
13+
14+
logger = logging.getLogger(__name__)
15+
16+
17+
@click.group()
18+
def plot_cli():
19+
"""Plotting utilities for LinkML data."""
20+
pass
21+
22+
23+
@plot_cli.command()
24+
@click.argument("input_file", required=False)
25+
@click.option("--x-column", "-x", required=True, help="Column to use for x-axis")
26+
@click.option("--y-column", "-y", required=True, help="Column to use for y-axis")
27+
@click.option("--value-column", "-v", help="Column containing values (if not provided, counts will be used)")
28+
@click.option("--title", "-t", help="Title for the heatmap")
29+
@click.option("--width", "-w", type=int, default=10, show_default=True, help="Width of the figure in inches")
30+
@click.option("--height", "-h", type=int, default=8, show_default=True, help="Height of the figure in inches")
31+
@click.option("--cmap", "-c", default="YlGnBu", show_default=True, help="Colormap to use")
32+
@click.option("--output", "-o", required=True, help="Output file path")
33+
@click.option("--format", "-f", help="Input file format")
34+
@click.option("--dpi", type=int, default=300, show_default=True, help="DPI for output image")
35+
@click.option("--square/--no-square", default=False, show_default=True, help="Make cells square")
36+
@click.option("--annotate/--no-annotate", default=True, show_default=True, help="Annotate cells with values")
37+
@click.option("--font-size", type=int, default=10, show_default=True, help="Font size for annotations and labels")
38+
@click.option("--robust/--no-robust", default=False, show_default=True, help="Use robust quantiles for colormap scaling")
39+
@click.option("--remove-duplicates/--no-remove-duplicates", default=True, show_default=True,
40+
help="Remove duplicate x,y combinations (default) or keep all occurrences")
41+
@click.option("--cluster", type=click.Choice(["none", "both", "x", "y"]), default="none", show_default=True,
42+
help="Cluster axes: none (default), both, x-axis only, or y-axis only")
43+
@click.option("--cluster-method", type=click.Choice(["complete", "average", "single", "ward"]), default="complete", show_default=True,
44+
help="Linkage method for hierarchical clustering")
45+
@click.option("--cluster-metric", type=click.Choice(["euclidean", "correlation", "cosine", "cityblock"]), default="euclidean", show_default=True,
46+
help="Distance metric for clustering")
47+
@click.option("--export-data", "-e", help="Export the heatmap data to this file")
48+
@click.option("--export-format", "-E", type=click.Choice([f.value for f in Format]), default="csv", show_default=True,
49+
help="Format for exported data")
50+
def heatmap(
51+
input_file: Optional[str],
52+
x_column: str,
53+
y_column: str,
54+
value_column: Optional[str],
55+
title: Optional[str],
56+
width: int,
57+
height: int,
58+
cmap: str,
59+
output: str,
60+
format: Optional[str],
61+
dpi: int,
62+
square: bool,
63+
annotate: bool,
64+
font_size: int,
65+
robust: bool,
66+
remove_duplicates: bool,
67+
cluster: str,
68+
cluster_method: str,
69+
cluster_metric: str,
70+
export_data: Optional[str],
71+
export_format: Union[str, Format],
72+
):
73+
"""
74+
Create a heatmap from a tabular data file.
75+
76+
Examples:
77+
# From a file
78+
linkml-store plot heatmap data.csv -x species -y country -o heatmap.png
79+
80+
# From stdin
81+
cat data.csv | linkml-store plot heatmap -x species -y country -o heatmap.png
82+
83+
This will create a heatmap showing the frequency counts of species by country.
84+
If you want to use a specific value column instead of counts:
85+
86+
linkml-store plot heatmap data.csv -x species -y country -v population -o heatmap.png
87+
"""
88+
# Handle file path - if None, use stdin
89+
if input_file is None:
90+
input_file = "-" # format_utils treats "-" as stdin
91+
92+
# Convert 'none' to False for clustering parameter
93+
use_cluster = False if cluster == "none" else cluster
94+
95+
# Create heatmap visualization
96+
fig, ax = heatmap_from_file(
97+
file_path=input_file,
98+
x_column=x_column,
99+
y_column=y_column,
100+
value_column=value_column,
101+
title=title,
102+
figsize=(width, height),
103+
cmap=cmap,
104+
output_file=output,
105+
format=format,
106+
dpi=dpi,
107+
square=square,
108+
annot=annotate,
109+
font_size=font_size,
110+
robust=robust,
111+
remove_duplicates=remove_duplicates,
112+
cluster=use_cluster,
113+
cluster_method=cluster_method,
114+
cluster_metric=cluster_metric,
115+
)
116+
117+
# Export data if requested
118+
if export_data:
119+
# For export, reuse the data already loaded for the heatmap instead of loading again
120+
# This avoids the "I/O operation on closed file" error when input_file is stdin
121+
import pandas as pd
122+
from matplotlib.axes import Axes
123+
124+
# Extract the data directly from the plot
125+
if hasattr(ax, 'get_figure') and hasattr(ax, 'get_children'):
126+
# Extract the heatmap data from the plot itself
127+
heatmap_data = {}
128+
for child in ax.get_children():
129+
if isinstance(child, plt.matplotlib.collections.QuadMesh):
130+
# Get the colormap data
131+
data_values = child.get_array()
132+
rows = ax.get_yticks()
133+
cols = ax.get_xticks()
134+
row_labels = [item.get_text() for item in ax.get_yticklabels()]
135+
col_labels = [item.get_text() for item in ax.get_xticklabels()]
136+
137+
# Create a dataframe from the plot data
138+
heatmap_df = pd.DataFrame(
139+
index=[label for label in row_labels if label],
140+
columns=[label for label in col_labels if label]
141+
)
142+
143+
# Fill in the values (if we can)
144+
if len(data_values) == len(row_labels) * len(col_labels):
145+
for i, row in enumerate(row_labels):
146+
for j, col in enumerate(col_labels):
147+
if row and col: # Skip empty labels
148+
idx = i * len(col_labels) + j
149+
if idx < len(data_values):
150+
heatmap_df.at[row, col] = data_values[idx]
151+
152+
# Reset index to make the y_column a regular column
153+
result_df = heatmap_df.reset_index()
154+
result_df.rename(columns={'index': y_column}, inplace=True)
155+
156+
# Export the data
157+
from linkml_store.utils.format_utils import write_output
158+
records = result_df.to_dict(orient='records')
159+
write_output(records, format=export_format, target=export_data)
160+
click.echo(f"Heatmap data exported to {export_data}")
161+
break
162+
else:
163+
# If we couldn't extract data from the plot, inform the user
164+
click.echo("Warning: Could not export data from the plot")
165+
else:
166+
click.echo("Warning: Could not export data from the plot")
167+
168+
click.echo(f"Heatmap created at {output}")
169+
170+
171+
if __name__ == "__main__":
172+
plot_cli()

0 commit comments

Comments
 (0)