Skip to content

Commit 84d7e89

Browse files
authored
Merge pull request #50 from linkml/solr-composite-facets
faceting on composite for solr
2 parents 9016184 + 29a852a commit 84d7e89

File tree

1 file changed

+102
-19
lines changed

1 file changed

+102
-19
lines changed

src/linkml_store/api/stores/solr/solr_collection.py

Lines changed: 102 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import logging
44
from copy import copy
5-
from typing import Any, Dict, List, Optional, Union
5+
from typing import Any, Dict, List, Optional, Union, Tuple
66

77
import requests
88

@@ -56,6 +56,7 @@ def query(self, query: Query, **kwargs) -> QueryResult:
5656
response.raise_for_status()
5757

5858
data = response.json()
59+
logger.debug(f"Response: {data}")
5960
num_rows = data["response"]["numFound"]
6061
rows = data["response"]["docs"]
6162

@@ -64,30 +65,112 @@ def query(self, query: Query, **kwargs) -> QueryResult:
6465
def query_facets(
6566
self,
6667
where: Optional[Dict] = None,
67-
facet_columns: List[str] = None,
68+
facet_columns: List[Union[str, Tuple[str, ...]]] = None,
6869
facet_limit=DEFAULT_FACET_LIMIT,
6970
facet_min_count: int = 1,
7071
**kwargs,
71-
) -> Dict[str, Dict[str, int]]:
72+
) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
73+
"""
74+
Query facet counts for fields or field combinations.
75+
76+
:param where: Filter conditions
77+
:param facet_columns: List of fields to facet on. Elements can be:
78+
- Simple strings for single field facets
79+
- Tuples of strings for field combinations (pivot facets)
80+
:param facet_limit: Maximum number of facet values to return
81+
:param facet_min_count: Minimum count for facet values to be included
82+
:return: Dictionary mapping fields or field tuples to lists of (value, count) tuples
83+
"""
7284
solr_query = self._build_solr_query(where)
73-
solr_query["facet"] = "true"
74-
solr_query["facet.field"] = facet_columns
75-
solr_query["facet.limit"] = facet_limit
76-
solr_query["facet.mincount"] = facet_min_count
77-
78-
logger.info(f"Querying Solr collection {self.alias} for facets with query: {solr_query}")
79-
80-
response = requests.get(f"{self._collection_base}/select", params=solr_query)
81-
response.raise_for_status()
82-
83-
data = response.json()
84-
facet_counts = data["facet_counts"]["facet_fields"]
85-
85+
86+
# Separate single fields and tuple fields
87+
single_fields = []
88+
tuple_fields = []
89+
90+
if facet_columns:
91+
for field in facet_columns:
92+
if isinstance(field, str):
93+
single_fields.append(field)
94+
elif isinstance(field, tuple):
95+
tuple_fields.append(field)
96+
97+
# Process regular facets
8698
results = {}
87-
for facet_field, counts in facet_counts.items():
88-
results[facet_field] = list(zip(counts[::2], counts[1::2]))
89-
99+
if single_fields:
100+
solr_query["facet"] = "true"
101+
solr_query["facet.field"] = single_fields
102+
solr_query["facet.limit"] = facet_limit
103+
solr_query["facet.mincount"] = facet_min_count
104+
105+
logger.info(f"Querying Solr collection {self.alias} for facets with query: {solr_query}")
106+
response = requests.get(f"{self._collection_base}/select", params=solr_query)
107+
response.raise_for_status()
108+
109+
data = response.json()
110+
facet_counts = data["facet_counts"]["facet_fields"]
111+
112+
for facet_field, counts in facet_counts.items():
113+
results[facet_field] = list(zip(counts[::2], counts[1::2]))
114+
115+
# Process pivot facets for tuple fields
116+
if tuple_fields:
117+
# TODO: Add a warning if Solr < 4.0, when this was introduced
118+
for field_tuple in tuple_fields:
119+
# Create a query for this specific field tuple
120+
pivot_query = self._build_solr_query(where)
121+
pivot_query["facet"] = "true"
122+
123+
# Create pivot facet
124+
field_str = ','.join(field_tuple)
125+
pivot_query["facet.pivot"] = field_str
126+
pivot_query["facet.pivot.mincount"] = facet_min_count
127+
pivot_query["facet.limit"] = facet_limit
128+
129+
logger.info(f"Querying Solr collection {self.alias} for pivot facets with query: {pivot_query}")
130+
response = requests.get(f"{self._collection_base}/select", params=pivot_query)
131+
response.raise_for_status()
132+
133+
data = response.json()
134+
pivot_facets = data.get("facet_counts", {}).get("facet_pivot", {})
135+
136+
# Process pivot facets into the same format as MongoDB results
137+
field_str = ','.join(field_tuple)
138+
pivot_data = pivot_facets.get(field_str, [])
139+
140+
# Build a list of tuples (field values, count)
141+
pivot_results = []
142+
self._process_pivot_facets(pivot_data, [], pivot_results, field_tuple)
143+
144+
results[field_tuple] = pivot_results
145+
90146
return results
147+
148+
def _process_pivot_facets(self, pivot_data, current_values, results, field_tuple):
149+
"""
150+
Recursively process pivot facet results to extract combinations of field values.
151+
152+
:param pivot_data: The pivot facet data from Solr
153+
:param current_values: The current path of values in the recursion
154+
:param results: The result list to populate
155+
:param field_tuple: The original field tuple for reference
156+
"""
157+
for item in pivot_data:
158+
# Add the current field value
159+
value = item.get("value")
160+
count = item.get("count", 0)
161+
162+
# Update the current path with this value
163+
values = current_values + [value]
164+
165+
# If we have all the fields from the tuple, add a result
166+
if len(values) == len(field_tuple):
167+
# Create a tuple of values corresponding to the field tuple
168+
results.append((tuple(values), count))
169+
170+
# Process child pivot fields recursively
171+
pivot = item.get("pivot", [])
172+
if pivot and len(values) < len(field_tuple):
173+
self._process_pivot_facets(pivot, values, results, field_tuple)
91174

92175
def _build_solr_query(
93176
self, query: Union[Query, Dict], search_term="*:*", extra: Optional[Dict] = None

0 commit comments

Comments
 (0)