Skip to content

Commit 6927767

Browse files
chg: improve choices handling and instances generation
- for choices, an Itemset class is introduced which allows calculating and storing metadata about the choice list more consistently. In particular, working out whether itext is required only once. - the initial motivation was the if block in survey.py line 391, which for a form with many choice lists resulted in a large slow down due to something approaching O(n*m) + costs from string join/split. - this allowed removing: - "_itemset*" properties from MultipleChoiceQuestion - _search_lists property from Survey - functions is_label_dynamic and has_dynamic_label - in the course of implementing this it became clear that actually the Tag class never has choices so code for that is removed. The Tag class is like an Option with respect to OsmUploadQuestion. Added some OSM-related test cases to check it's not broken. - similarly since choices/itext are handled by Survey, the MultipleChoiceQuestion class doesn't need choices children either, just a reference to the Itemset for XML and JSON output. - for instances generation, the above choices changes allowed simplification of _generate_static_instances, and otherwise the changes are mainly to avoid repeating checks or using intermediate lists by instead using generators/yield as much as possible. - test_j2x_creation.py / test_j2x_question.py / strings.ini - updated these tests since they are using internal APIs in a way that diverges significantly from what xls2json currently emits - For example test_select_one_question_multilingual had multi-lang choice labels but the expected XML string had a reference like "/test/qname/a:label" which implies choice itemsets aren't shared which has not been the case for a while. - tried to make these tests more useful by adding xpath assertions, and unlike other tests using ss_structure they may be useful for validating/showing what dict structure xlsforms can be provided as. - test_j2x_xform_build_preparation.py - removed this test since it's not clear what the expectation is. If it was intended to check that identical choice lists from separate questions are merged, then that functionality doesn't exist, and the choices should not be provided separately per question anyway. - test_dynamic_default.py / test_translations.py - updated performance test figures. - translation test benefits most from the choices changes because it has lots of choice lists. Increase in memory most likely due to Itemset instances that now wrap each choice list. - dynamic_default slightly faster due to the instances changes and earlier commits today (e.g. not calling xml_label_or_hint twice for input/text questions, etc).
1 parent d5ba008 commit 6927767

File tree

12 files changed

+603
-550
lines changed

12 files changed

+603
-550
lines changed

pyxform/builder.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -146,16 +146,20 @@ def _create_question_from_dict(
146146
)
147147

148148
if question_class:
149-
if const.CHOICES in d and choices:
150-
return question_class(
151-
question_type_dictionary=question_type_dictionary,
152-
choices=choices.get(d[const.ITEMSET], d[const.CHOICES]),
153-
**{k: v for k, v in d.items() if k != const.CHOICES},
154-
)
155-
else:
156-
return question_class(
157-
question_type_dictionary=question_type_dictionary, **d
158-
)
149+
if choices:
150+
d_choices = d.get(const.CHOICES, d.get(const.CHILDREN))
151+
if d_choices:
152+
return question_class(
153+
question_type_dictionary=question_type_dictionary,
154+
**{
155+
k: v
156+
for k, v in d.items()
157+
if k not in {const.CHOICES, const.CHILDREN}
158+
},
159+
choices=choices.get(d[const.ITEMSET], d_choices),
160+
)
161+
162+
return question_class(question_type_dictionary=question_type_dictionary, **d)
159163

160164
return ()
161165

pyxform/question.py

Lines changed: 79 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import os.path
6+
import re
67
from collections.abc import Callable, Generator, Iterable
78
from itertools import chain
89
from typing import TYPE_CHECKING
@@ -16,12 +17,12 @@
1617
EXTERNAL_INSTANCE_EXTENSIONS,
1718
)
1819
from pyxform.errors import PyXFormError
20+
from pyxform.parsing.expression import RE_ANY_PYXFORM_REF
1921
from pyxform.question_type_dictionary import QUESTION_TYPE_DICT
2022
from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement
2123
from pyxform.utils import (
2224
PYXFORM_REFERENCE_REGEX,
2325
DetachableElement,
24-
coalesce,
2526
combine_lists,
2627
default_is_dynamic,
2728
node,
@@ -32,9 +33,6 @@
3233

3334

3435
QUESTION_EXTRA_FIELDS = (
35-
"_itemset_dyn_label",
36-
"_itemset_has_media",
37-
"_itemset_multi_language",
3836
"_qtd_defaults",
3937
"_qtd_kwargs",
4038
"action",
@@ -55,7 +53,7 @@
5553
QUESTION_FIELDS = (*SURVEY_ELEMENT_FIELDS, *QUESTION_EXTRA_FIELDS)
5654

5755
SELECT_QUESTION_EXTRA_FIELDS = (
58-
constants.CHILDREN,
56+
constants.CHOICES,
5957
constants.ITEMSET,
6058
constants.LIST_NAME_U,
6159
)
@@ -65,15 +63,12 @@
6563
OSM_QUESTION_FIELDS = (*QUESTION_FIELDS, *SELECT_QUESTION_EXTRA_FIELDS)
6664

6765
OPTION_EXTRA_FIELDS = (
68-
"_choice_itext_id",
66+
"_choice_itext_ref",
6967
constants.MEDIA,
7068
"sms_option",
7169
)
7270
OPTION_FIELDS = (*SURVEY_ELEMENT_FIELDS, *OPTION_EXTRA_FIELDS)
7371

74-
TAG_EXTRA_FIELDS = (constants.CHILDREN,)
75-
TAG_FIELDS = (*SURVEY_ELEMENT_FIELDS, *TAG_EXTRA_FIELDS)
76-
7772

7873
class Question(SurveyElement):
7974
__slots__ = QUESTION_EXTRA_FIELDS
@@ -291,40 +286,60 @@ def __init__(
291286
sms_option: str | None = None,
292287
**kwargs,
293288
):
294-
self._choice_itext_id: str | None = None
289+
self._choice_itext_ref: str | None = None
295290
self.media: dict | None = media
296291
self.sms_option: str | None = sms_option
297292

298293
super().__init__(name=name, label=label, **kwargs)
299294

300-
def xml_value(self):
301-
return node("value", self.name)
302-
303-
def xml(self, survey: "Survey"):
304-
item = node("item")
305-
item.appendChild(self.xml_label(survey=survey))
306-
item.appendChild(self.xml_value())
307-
308-
return item
309-
310295
def validate(self):
311296
pass
312297

313298
def xml_control(self, survey: "Survey"):
314299
raise NotImplementedError()
315300

316-
def _translation_path(self, display_element):
317-
if self._choice_itext_id is not None:
318-
return self._choice_itext_id
319-
return super()._translation_path(display_element=display_element)
320-
321301
def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict:
322302
to_delete = (k for k in self.get_slot_names() if k.startswith("_"))
323303
if delete_keys is not None:
324304
to_delete = chain(to_delete, delete_keys)
325305
return super().to_json_dict(delete_keys=to_delete)
326306

327307

308+
class Itemset:
309+
"""Itemset details and metadata detection."""
310+
311+
__slots__ = ("name", "options", "requires_itext", "used_by_search")
312+
313+
def __init__(self, name: str, choices: Iterable[dict]):
314+
self.requires_itext: bool = False
315+
self.used_by_search: bool = False
316+
self.name: str = name
317+
self.options: tuple[Option, ...] = tuple(o for o in self.get_options(choices))
318+
319+
def get_options(self, choices: Iterable[dict]) -> Generator[Option, None, None]:
320+
requires_itext = False
321+
for c in choices:
322+
option = Option(**c)
323+
if not requires_itext:
324+
# Media: dict of image, audio, etc. Defaults to None.
325+
if option.media:
326+
requires_itext = True
327+
else:
328+
choice_label = option.label
329+
label_is_dict = isinstance(choice_label, dict)
330+
# Multi-language: dict of labels etc per language. Can be just a string.
331+
if label_is_dict:
332+
requires_itext = True
333+
# Dynamic label: string contains a pyxform reference.
334+
elif (
335+
choice_label
336+
and re.search(RE_ANY_PYXFORM_REF, choice_label) is not None
337+
):
338+
requires_itext = True
339+
yield option
340+
self.requires_itext = requires_itext
341+
342+
328343
class MultipleChoiceQuestion(Question):
329344
__slots__ = SELECT_QUESTION_EXTRA_FIELDS
330345

@@ -335,53 +350,21 @@ def get_slot_names() -> tuple[str, ...]:
335350
def __init__(
336351
self, itemset: str | None = None, list_name: str | None = None, **kwargs
337352
):
338-
# Internals
339-
self._itemset_dyn_label: bool = False
340-
self._itemset_has_media: bool = False
341-
self._itemset_multi_language: bool = False
353+
if not itemset and not list_name:
354+
raise PyXFormError(
355+
"Arguments 'itemset' and 'list_name' must not both be None or empty."
356+
)
342357

343358
# Structure
344-
self.children: tuple[Option, ...] | None = None
359+
self.choices: Itemset | None = None
345360
self.itemset: str | None = itemset
346361
self.list_name: str | None = list_name
347362

348-
# Notice that choices can be specified under choices or children.
349-
# I'm going to try to stick to just choices.
350-
# Aliases in the json format will make it more difficult
351-
# to use going forward.
352-
kw_choices = kwargs.pop(constants.CHOICES, None)
353-
kw_children = kwargs.pop(constants.CHILDREN, None)
354-
choices = coalesce(kw_choices, kw_children)
355-
if isinstance(choices, tuple) and isinstance(next(iter(choices)), Option):
356-
self.children = choices
357-
elif choices:
358-
self.children = tuple(
359-
Option(**c) for c in combine_lists(kw_choices, kw_children)
360-
)
363+
choices = kwargs.pop(constants.CHOICES, None)
364+
if isinstance(choices, Itemset):
365+
self.choices = choices
361366
super().__init__(**kwargs)
362367

363-
def validate(self):
364-
Question.validate(self)
365-
if self.children:
366-
for child in self.children:
367-
child.validate()
368-
369-
def iter_descendants(
370-
self,
371-
condition: Callable[["SurveyElement"], bool] | None = None,
372-
iter_into_section_items: bool = False,
373-
) -> Generator["SurveyElement", None, None]:
374-
if condition is None:
375-
yield self
376-
elif condition(self):
377-
yield self
378-
if iter_into_section_items and self.children:
379-
for e in self.children:
380-
yield from e.iter_descendants(
381-
condition=condition,
382-
iter_into_section_items=iter_into_section_items,
383-
)
384-
385368
def build_xml(self, survey: "Survey"):
386369
if self.bind["type"] not in {"string", "odk:rank"}:
387370
raise PyXFormError("""Invalid value for `self.bind["type"]`.""")
@@ -403,21 +386,18 @@ def build_xml(self, survey: "Survey"):
403386
itemset_value_ref = self.parameters.get("value", itemset_value_ref)
404387
itemset_label_ref = self.parameters.get("label", itemset_label_ref)
405388

406-
multi_language = self._itemset_multi_language
407-
has_media = self._itemset_has_media
408-
has_dyn_label = self._itemset_dyn_label
409389
is_previous_question = bool(PYXFORM_REFERENCE_REGEX.search(self.itemset))
410390

411391
if file_extension in EXTERNAL_INSTANCE_EXTENSIONS:
412392
pass
413-
elif not multi_language and not has_media and not has_dyn_label:
393+
elif self.choices and self.choices.requires_itext:
414394
itemset = self.itemset
395+
itemset_label_ref = "jr:itext(itextId)"
415396
else:
416397
itemset = self.itemset
417-
itemset_label_ref = "jr:itext(itextId)"
418398

419399
choice_filter = self.choice_filter
420-
if choice_filter is not None:
400+
if choice_filter:
421401
choice_filter = survey.insert_xpaths(
422402
choice_filter, self, True, is_previous_question
423403
)
@@ -460,63 +440,43 @@ def build_xml(self, survey: "Survey"):
460440

461441
nodeset += ")"
462442

463-
itemset_children = [
464-
node("value", ref=itemset_value_ref),
465-
node("label", ref=itemset_label_ref),
466-
]
467-
result.appendChild(node("itemset", *itemset_children, nodeset=nodeset))
468-
elif self.children:
469-
for child in self.children:
470-
result.appendChild(child.xml(survey=survey))
443+
result.appendChild(
444+
node(
445+
"itemset",
446+
node("value", ref=itemset_value_ref),
447+
node("label", ref=itemset_label_ref),
448+
nodeset=nodeset,
449+
)
450+
)
451+
elif self.choices:
452+
# Options processing specific to XLSForms using the "search()" function.
453+
# The _choice_itext_ref is prepared by Survey._redirect_is_search_itext.
454+
itemset = self.choices
455+
if itemset.used_by_search:
456+
for option in itemset.options:
457+
if itemset.requires_itext:
458+
label_node = node("label", ref=option._choice_itext_ref)
459+
elif self.label:
460+
label, output_inserted = survey.insert_output_values(
461+
option.label, option
462+
)
463+
label_node = node("label", label, toParseString=output_inserted)
464+
else:
465+
label_node = node("label")
466+
result.appendChild(
467+
node("item", label_node, node("value", option.name))
468+
)
471469

472470
return result
473471

474472

475473
class Tag(SurveyElement):
476-
__slots__ = TAG_EXTRA_FIELDS
477-
478474
@staticmethod
479475
def get_slot_names() -> tuple[str, ...]:
480-
return TAG_FIELDS
481-
482-
def __init__(self, name: str, label: str | dict | None = None, **kwargs):
483-
self.children: tuple[Option, ...] | None = None
484-
485-
kw_choices = kwargs.pop(constants.CHOICES, None)
486-
kw_children = kwargs.pop(constants.CHILDREN, None)
487-
choices = coalesce(kw_choices, kw_children)
488-
if isinstance(choices, tuple) and isinstance(next(iter(choices)), Option):
489-
self.children = choices
490-
elif choices:
491-
self.children = tuple(
492-
Option(**c) for c in combine_lists(kw_choices, kw_children)
493-
)
494-
super().__init__(name=name, label=label, **kwargs)
495-
496-
def iter_descendants(
497-
self,
498-
condition: Callable[["SurveyElement"], bool] | None = None,
499-
iter_into_section_items: bool = False,
500-
) -> Generator["SurveyElement", None, None]:
501-
if condition is None:
502-
yield self
503-
elif condition(self):
504-
yield self
505-
if iter_into_section_items and self.children:
506-
for e in self.children:
507-
yield from e.iter_descendants(
508-
condition=condition,
509-
iter_into_section_items=iter_into_section_items,
510-
)
476+
return SURVEY_ELEMENT_FIELDS
511477

512478
def xml(self, survey: "Survey"):
513-
result = node("tag", key=self.name)
514-
result.appendChild(self.xml_label(survey=survey))
515-
if self.children:
516-
for choice in self.children:
517-
result.appendChild(choice.xml(survey=survey))
518-
519-
return result
479+
return node("tag", self.xml_label(survey=survey), key=self.name)
520480

521481
def validate(self):
522482
pass

0 commit comments

Comments
 (0)