Skip to content

Commit 8b1b69c

Browse files
buhrmannines
authored andcommitted
Relax version requirement for stanfordnlp to include v0.2.0. (#15)
* For compatibility with native Spacy language classes allow passing of empty text strings. This will produce 0-length docs, rather than raising an exception. * Increment minor version. * Relax version requirement for stanfordnlp to include new 0.2.0. * Make tests pass with stanfordnlp 0.2.0. Some POS tag predictions have flipped in ambiguous cases (e.g. DET/PRON, VERB/AUX).
1 parent bddb883 commit 8b1b69c

File tree

3 files changed

+20
-5
lines changed

3 files changed

+20
-5
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
spacy>=2.1.0
2-
stanfordnlp>=0.1.0,<0.2.0
2+
stanfordnlp>=0.1.0,<0.2.1
33
# Development dependencies
44
pytest>=4.0.0,<5.0.0

spacy_stanfordnlp/about.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
__title__ = "spacy-stanfordnlp"
2-
__version__ = "0.1.1"
2+
__version__ = "0.1.2"
33
__summary__ = "Use the latest StanfordNLP research models directly in spaCy"
44
__uri__ = "https://explosion.ai"
55
__author__ = "Ines Montani"

tests/test_language.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,32 @@ def lang():
1616
return "en"
1717

1818

19+
def tags_equal(act, exp):
20+
"""Check if each actual tag in act is equal to one or more expected tags in exp."""
21+
return all(a == e if isinstance(e, str) else a in e for a, e in zip(act, exp))
22+
23+
1924
def test_spacy_stanfordnlp(lang, models_dir):
20-
snlp = stanfordnlp.Pipeline(lang=lang, models_dir=models_dir)
25+
try:
26+
snlp = stanfordnlp.Pipeline(lang=lang, models_dir=models_dir)
27+
except:
28+
snlp = stanfordnlp.Pipeline(lang=lang)
2129
nlp = StanfordNLPLanguage(snlp)
2230
assert nlp.lang == "stanfordnlp_" + lang
2331

2432
doc = nlp("Hello world! This is a test.")
2533

34+
# Expected POS tags. Note: Different versions of stanfordnlp result in different POS tags.
35+
# In particular, "this" can be a determiner or pronoun, their distinction is pretty vague in
36+
# general. And "is" in "This is a test" can be interpreted either as simply a verb or as an
37+
# auxiliary (linking) verb. Neither interpretation is necessarily more or less correct.
2638
# fmt: off
39+
pos_exp = ["INTJ", "NOUN", "PUNCT", ("DET", "PRON"), ("VERB", "AUX"), "DET", "NOUN", "PUNCT"]
40+
2741
assert [t.text for t in doc] == ["Hello", "world", "!", "This", "is", "a", "test", "."]
2842
assert [t.lemma_ for t in doc] == ["hello", "world", "!", "this", "be", "a", "test", "."]
29-
assert [t.pos_ for t in doc] == ["INTJ", "NOUN", "PUNCT", "DET", "VERB", "DET", "NOUN", "PUNCT"]
43+
assert tags_equal([t.pos_ for t in doc], pos_exp)
44+
3045
assert [t.tag_ for t in doc] == ["UH", "NN", ".", "DT", "VBZ", "DT", "NN", '.']
3146
assert [t.dep_ for t in doc] == ["root", "vocative", "punct", "nsubj", "cop", "det", "root", "punct"]
3247
assert [t.is_sent_start for t in doc] == [True, None, None, True, None, None, None, None]
@@ -41,7 +56,7 @@ def test_spacy_stanfordnlp(lang, models_dir):
4156
assert docs[0].text == "Hello world"
4257
assert [t.pos_ for t in docs[0]] == ["INTJ", "NOUN"]
4358
assert docs[1].text == "This is a test"
44-
assert [t.pos_ for t in docs[1]] == ["DET", "VERB", "DET", "NOUN"]
59+
assert tags_equal([t.pos_ for t in docs[1]], pos_exp[3:-1])
4560

4661

4762
def test_get_defaults():

0 commit comments

Comments
 (0)