1+ """
2+ Command-line interface for the plotting package.
3+ """
4+
5+ import logging
6+ from pathlib import Path
7+ from typing import Optional , Union
8+
9+ import click
10+
11+ from linkml_store .plotting .heatmap import heatmap_from_file , export_heatmap_data
12+ from linkml_store .utils .format_utils import Format
13+
14+ logger = logging .getLogger (__name__ )
15+
16+
17+ @click .group ()
18+ def plot_cli ():
19+ """Plotting utilities for LinkML data."""
20+ pass
21+
22+
23+ @plot_cli .command ()
24+ @click .argument ("input_file" , required = False )
25+ @click .option ("--x-column" , "-x" , required = True , help = "Column to use for x-axis" )
26+ @click .option ("--y-column" , "-y" , required = True , help = "Column to use for y-axis" )
27+ @click .option ("--value-column" , "-v" , help = "Column containing values (if not provided, counts will be used)" )
28+ @click .option ("--title" , "-t" , help = "Title for the heatmap" )
29+ @click .option ("--width" , "-w" , type = int , default = 10 , show_default = True , help = "Width of the figure in inches" )
30+ @click .option ("--height" , "-h" , type = int , default = 8 , show_default = True , help = "Height of the figure in inches" )
31+ @click .option ("--cmap" , "-c" , default = "YlGnBu" , show_default = True , help = "Colormap to use" )
32+ @click .option ("--output" , "-o" , required = True , help = "Output file path" )
33+ @click .option ("--format" , "-f" , help = "Input file format" )
34+ @click .option ("--dpi" , type = int , default = 300 , show_default = True , help = "DPI for output image" )
35+ @click .option ("--square/--no-square" , default = False , show_default = True , help = "Make cells square" )
36+ @click .option ("--annotate/--no-annotate" , default = True , show_default = True , help = "Annotate cells with values" )
37+ @click .option ("--font-size" , type = int , default = 10 , show_default = True , help = "Font size for annotations and labels" )
38+ @click .option ("--robust/--no-robust" , default = False , show_default = True , help = "Use robust quantiles for colormap scaling" )
39+ @click .option ("--remove-duplicates/--no-remove-duplicates" , default = True , show_default = True ,
40+ help = "Remove duplicate x,y combinations (default) or keep all occurrences" )
41+ @click .option ("--cluster" , type = click .Choice (["none" , "both" , "x" , "y" ]), default = "none" , show_default = True ,
42+ help = "Cluster axes: none (default), both, x-axis only, or y-axis only" )
43+ @click .option ("--cluster-method" , type = click .Choice (["complete" , "average" , "single" , "ward" ]), default = "complete" , show_default = True ,
44+ help = "Linkage method for hierarchical clustering" )
45+ @click .option ("--cluster-metric" , type = click .Choice (["euclidean" , "correlation" , "cosine" , "cityblock" ]), default = "euclidean" , show_default = True ,
46+ help = "Distance metric for clustering" )
47+ @click .option ("--export-data" , "-e" , help = "Export the heatmap data to this file" )
48+ @click .option ("--export-format" , "-E" , type = click .Choice ([f .value for f in Format ]), default = "csv" , show_default = True ,
49+ help = "Format for exported data" )
50+ def heatmap (
51+ input_file : Optional [str ],
52+ x_column : str ,
53+ y_column : str ,
54+ value_column : Optional [str ],
55+ title : Optional [str ],
56+ width : int ,
57+ height : int ,
58+ cmap : str ,
59+ output : str ,
60+ format : Optional [str ],
61+ dpi : int ,
62+ square : bool ,
63+ annotate : bool ,
64+ font_size : int ,
65+ robust : bool ,
66+ remove_duplicates : bool ,
67+ cluster : str ,
68+ cluster_method : str ,
69+ cluster_metric : str ,
70+ export_data : Optional [str ],
71+ export_format : Union [str , Format ],
72+ ):
73+ """
74+ Create a heatmap from a tabular data file.
75+
76+ Examples:
77+ # From a file
78+ linkml-store plot heatmap data.csv -x species -y country -o heatmap.png
79+
80+ # From stdin
81+ cat data.csv | linkml-store plot heatmap -x species -y country -o heatmap.png
82+
83+ This will create a heatmap showing the frequency counts of species by country.
84+ If you want to use a specific value column instead of counts:
85+
86+ linkml-store plot heatmap data.csv -x species -y country -v population -o heatmap.png
87+ """
88+ # Handle file path - if None, use stdin
89+ if input_file is None :
90+ input_file = "-" # format_utils treats "-" as stdin
91+
92+ # Convert 'none' to False for clustering parameter
93+ use_cluster = False if cluster == "none" else cluster
94+
95+ # Create heatmap visualization
96+ fig , ax = heatmap_from_file (
97+ file_path = input_file ,
98+ x_column = x_column ,
99+ y_column = y_column ,
100+ value_column = value_column ,
101+ title = title ,
102+ figsize = (width , height ),
103+ cmap = cmap ,
104+ output_file = output ,
105+ format = format ,
106+ dpi = dpi ,
107+ square = square ,
108+ annot = annotate ,
109+ font_size = font_size ,
110+ robust = robust ,
111+ remove_duplicates = remove_duplicates ,
112+ cluster = use_cluster ,
113+ cluster_method = cluster_method ,
114+ cluster_metric = cluster_metric ,
115+ )
116+
117+ # Export data if requested
118+ if export_data :
119+ # For export, reuse the data already loaded for the heatmap instead of loading again
120+ # This avoids the "I/O operation on closed file" error when input_file is stdin
121+ import pandas as pd
122+ from matplotlib .axes import Axes
123+
124+ # Extract the data directly from the plot
125+ if hasattr (ax , 'get_figure' ) and hasattr (ax , 'get_children' ):
126+ # Extract the heatmap data from the plot itself
127+ heatmap_data = {}
128+ for child in ax .get_children ():
129+ if isinstance (child , plt .matplotlib .collections .QuadMesh ):
130+ # Get the colormap data
131+ data_values = child .get_array ()
132+ rows = ax .get_yticks ()
133+ cols = ax .get_xticks ()
134+ row_labels = [item .get_text () for item in ax .get_yticklabels ()]
135+ col_labels = [item .get_text () for item in ax .get_xticklabels ()]
136+
137+ # Create a dataframe from the plot data
138+ heatmap_df = pd .DataFrame (
139+ index = [label for label in row_labels if label ],
140+ columns = [label for label in col_labels if label ]
141+ )
142+
143+ # Fill in the values (if we can)
144+ if len (data_values ) == len (row_labels ) * len (col_labels ):
145+ for i , row in enumerate (row_labels ):
146+ for j , col in enumerate (col_labels ):
147+ if row and col : # Skip empty labels
148+ idx = i * len (col_labels ) + j
149+ if idx < len (data_values ):
150+ heatmap_df .at [row , col ] = data_values [idx ]
151+
152+ # Reset index to make the y_column a regular column
153+ result_df = heatmap_df .reset_index ()
154+ result_df .rename (columns = {'index' : y_column }, inplace = True )
155+
156+ # Export the data
157+ from linkml_store .utils .format_utils import write_output
158+ records = result_df .to_dict (orient = 'records' )
159+ write_output (records , format = export_format , target = export_data )
160+ click .echo (f"Heatmap data exported to { export_data } " )
161+ break
162+ else :
163+ # If we couldn't extract data from the plot, inform the user
164+ click .echo ("Warning: Could not export data from the plot" )
165+ else :
166+ click .echo ("Warning: Could not export data from the plot" )
167+
168+ click .echo (f"Heatmap created at { output } " )
169+
170+
171+ if __name__ == "__main__" :
172+ plot_cli ()
0 commit comments