|
6 | 6 | - resolve "end" (None) to END_POS |
7 | 7 | """ |
8 | 8 |
|
9 | | -from collections import Counter, defaultdict |
| 9 | +from collections import Counter |
10 | 10 | from dataclasses import dataclass |
11 | 11 | from typing import Literal, Optional, Sequence, Tuple |
12 | 12 |
|
@@ -162,27 +162,31 @@ def compile_transcripts( |
162 | 162 | compiled_transcripts = deduplicated_transcripts |
163 | 163 |
|
164 | 164 | # Build groups structure |
165 | | - # Preserve order of first appearance |
166 | | - groups_order = [] |
167 | | - group_counts = defaultdict(int) |
| 165 | + # Groups are consecutive runs of transcripts with the same group value |
| 166 | + # Preserve order and allow the same group name to appear multiple times |
| 167 | + groups_list = [] # List of (group_name, size) tuples |
168 | 168 | SENTINEL = object() # Unique sentinel that's not None |
169 | 169 | last_group = SENTINEL |
170 | | - last_group_count = 0 |
| 170 | + current_group_size = 0 |
| 171 | + |
171 | 172 | for transcript in parsed_transcripts: |
172 | 173 | if transcript.group != last_group: |
| 174 | + # Save the previous group if it exists |
173 | 175 | if last_group is not SENTINEL: |
174 | | - group_counts[last_group] = last_group_count |
| 176 | + groups_list.append((last_group, current_group_size)) |
| 177 | + # Start a new group |
175 | 178 | last_group = transcript.group |
176 | | - groups_order.append(transcript.group) |
177 | | - last_group_count = 0 |
178 | | - last_group_count += 1 |
| 179 | + current_group_size = 0 |
| 180 | + current_group_size += 1 |
179 | 181 |
|
180 | | - group_counts[last_group] = last_group_count # for the final group |
| 182 | + # Don't forget the final group |
| 183 | + if last_group is not SENTINEL: |
| 184 | + groups_list.append((last_group, current_group_size)) |
181 | 185 |
|
182 | | - # Build groups list |
| 186 | + # Build CompiledGroup objects |
183 | 187 | compiled_groups = [ |
184 | | - CompiledGroup(name=group_name, size=group_counts[group_name]) |
185 | | - for group_name in groups_order |
| 188 | + CompiledGroup(name=group_name, size=size) |
| 189 | + for group_name, size in groups_list |
186 | 190 | ] |
187 | 191 |
|
188 | 192 | return compiled_transcripts, compiled_groups |
|
0 commit comments