Skip to content

Commit 06d7163

Browse files
Merge pull request #457 from afuetterer/pytest
test: convert unittest style tests to pytest test functions
2 parents 00c971e + 43af386 commit 06d7163

File tree

4 files changed

+107
-128
lines changed

4 files changed

+107
-128
lines changed

tests/test_benchmark.py

Lines changed: 27 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -15,102 +15,83 @@
1515
# See the License for the specific language governing permissions and
1616
# limitations under the License.
1717
#
18-
# pytest --benchmark-enable --benchmark-timer=time.process_time tika/tests/test_benchmark.py
19-
# pytest --benchmark-enable --benchmark-timer=time.process_time tika/tests/test_benchmark.py
20-
import os
21-
import unittest
18+
# pytest --benchmark-enable --benchmark-timer=time.process_time tests/test_benchmark.py
19+
20+
from pathlib import Path
2221
import zlib
2322
import gzip
2423
from http import HTTPStatus
2524

2625
import tika.parser
2726

2827

28+
TEST_FILE_PATH = Path(__file__).parent / "files" / "rwservlet.pdf"
29+
HEADERS = {"Accept-Encoding": "gzip, deflate"}
30+
31+
2932
def test_local_binary(benchmark):
3033
"""parse file binary"""
31-
file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf')
32-
response = benchmark(tika_from_binary, file)
33-
34-
assert response['status'] == HTTPStatus.OK
34+
response = benchmark(tika_from_binary, TEST_FILE_PATH)
35+
assert response["status"] == HTTPStatus.OK
3536

3637

3738
def test_parser_buffer(benchmark):
3839
"""example how to send gzip file"""
39-
file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf')
40-
response = benchmark(tika_from_buffer, file)
41-
42-
assert response['status'] == HTTPStatus.OK
40+
response = benchmark(tika_from_buffer, TEST_FILE_PATH)
41+
assert response["status"] == HTTPStatus.OK
4342

4443

4544
def test_parser_buffer_zlib_input(benchmark):
4645
"""example how to send gzip file"""
47-
file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf')
48-
49-
response = benchmark(tika_from_buffer_zlib, file)
50-
51-
assert response['status'] == HTTPStatus.OK
46+
response = benchmark(tika_from_buffer_zlib, TEST_FILE_PATH)
47+
assert response["status"] == HTTPStatus.OK
5248

5349

5450
def test_parser_buffer_gzip_input(benchmark):
5551
"""parse file binary"""
56-
file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf')
57-
response = benchmark(tika_from_buffer_gzip, file)
58-
59-
assert response['status'] == HTTPStatus.OK
52+
response = benchmark(tika_from_buffer_gzip, TEST_FILE_PATH)
53+
assert response["status"] == HTTPStatus.OK
6054

6155

6256
def test_local_binary_with_gzip_output(benchmark):
6357
"""parse file binary"""
64-
file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf')
65-
response = benchmark(tika_from_binary, file, headers={'Accept-Encoding': 'gzip, deflate'})
66-
67-
assert response['status'] == HTTPStatus.OK
58+
response = benchmark(tika_from_binary, TEST_FILE_PATH, headers=HEADERS)
59+
assert response["status"] == HTTPStatus.OK
6860

6961

7062
def test_parser_buffer_with_gzip_output(benchmark):
7163
"""example how to send gzip file"""
72-
file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf')
73-
response = benchmark(tika_from_buffer, file, headers={'Accept-Encoding': 'gzip, deflate'})
74-
75-
assert response['status'] == HTTPStatus.OK
64+
response = benchmark(tika_from_buffer, TEST_FILE_PATH, headers=HEADERS)
65+
assert response["status"] == HTTPStatus.OK
7666

7767

7868
def test_parser_buffer_zlib_input_and_gzip_output(benchmark):
7969
"""example how to send gzip file"""
80-
file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf')
81-
82-
response = benchmark(tika_from_buffer_zlib, file, headers={'Accept-Encoding': 'gzip, deflate'})
83-
84-
assert response['status'] == HTTPStatus.OK
70+
response = benchmark(tika_from_buffer_zlib, TEST_FILE_PATH, headers=HEADERS)
71+
assert response["status"] == HTTPStatus.OK
8572

8673

8774
def test_parser_buffer_gzip_input_and_gzip_output(benchmark):
8875
"""parse file binary"""
89-
file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf')
90-
response = benchmark(tika_from_buffer_gzip, file, headers={'Accept-Encoding': 'gzip, deflate'})
91-
92-
assert response['status'] == HTTPStatus.OK
76+
response = benchmark(tika_from_buffer_gzip, TEST_FILE_PATH, headers=HEADERS)
77+
assert response["status"] == HTTPStatus.OK
9378

9479

9580
def tika_from_buffer_zlib(file, headers=None):
96-
with open(file, 'rb') as file_obj:
81+
with open(file, "rb") as file_obj:
9782
return tika.parser.from_buffer(zlib.compress(file_obj.read()), headers=headers)
9883

9984

10085
def tika_from_buffer_gzip(file, headers=None):
101-
with open(file, 'rb') as file_obj:
86+
with open(file, "rb") as file_obj:
10287
return tika.parser.from_buffer(gzip.compress(file_obj.read()), headers=headers)
10388

10489

10590
def tika_from_buffer(file, headers=None):
106-
with open(file, 'rb') as file_obj:
91+
with open(file, "rb") as file_obj:
10792
return tika.parser.from_buffer(file_obj.read(), headers=headers)
10893

10994

11095
def tika_from_binary(file, headers=None):
111-
with open(file, 'rb') as file_obj:
96+
with open(file, "rb") as file_obj:
11297
return tika.parser.from_file(file_obj, headers=headers)
113-
114-
115-
if __name__ == '__main__':
116-
unittest.main()

tests/test_tika.py

Lines changed: 38 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -15,58 +15,57 @@
1515
# See the License for the specific language governing permissions and
1616
# limitations under the License.
1717

18-
import os
19-
import unittest
18+
from pathlib import Path
2019
from http import HTTPStatus
2120

2221
import tika.parser
2322
import tika.tika
2423

2524

26-
class CreateTest(unittest.TestCase):
27-
"""test for file types"""
25+
TEST_FILE_PATH = Path(__file__).parent / "files" / "rwservlet.pdf"
2826

29-
def test_remote_pdf(self):
30-
"""parse remote PDF"""
31-
self.assertTrue(tika.parser.from_file(
32-
'https://upload.wikimedia.org/wikipedia/commons/4/42/Article_feedback_flow_B_-_Thank_editors.pdf'))
3327

34-
def test_remote_html(self):
35-
"""parse remote HTML"""
36-
self.assertTrue(tika.parser.from_file('http://neverssl.com/index.html'))
28+
def test_remote_pdf():
29+
"""parse remote PDF"""
30+
assert tika.parser.from_file(
31+
"https://upload.wikimedia.org/wikipedia/commons/4/42/Article_feedback_flow_B_-_Thank_editors.pdf")
3732

38-
def test_remote_mp3(self):
39-
"""parse remote mp3"""
40-
self.assertTrue(tika.parser.from_file(
41-
'https://archive.org/download/Ainst-Spaceshipdemo.mp3/Ainst-Spaceshipdemo.mp3'))
4233

43-
def test_remote_jpg(self):
44-
"""parse remote jpg"""
45-
self.assertTrue(tika.parser.from_file(
46-
'https://upload.wikimedia.org/wikipedia/commons/b/b7/X_logo.jpg'))
34+
def test_remote_html():
35+
"""parse remote HTML"""
36+
assert tika.parser.from_file("http://neverssl.com/index.html")
4737

48-
def test_local_binary(self):
49-
"""parse file binary"""
50-
file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf')
51-
with open(file, 'rb') as file_obj:
52-
self.assertTrue(tika.parser.from_file(file_obj))
5338

54-
def test_local_buffer(self):
55-
response = tika.parser.from_buffer('Good evening, Dave')
56-
self.assertEqual(response['status'], HTTPStatus.OK)
39+
def test_remote_mp3():
40+
"""parse remote mp3"""
41+
assert tika.parser.from_file(
42+
"https://archive.org/download/Ainst-Spaceshipdemo.mp3/Ainst-Spaceshipdemo.mp3")
5743

58-
def test_local_path(self):
59-
"""parse file path"""
60-
file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf')
61-
self.assertTrue(tika.parser.from_file(file))
6244

63-
def test_kill_server(self):
64-
"""parse some file then kills server"""
65-
file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf')
66-
with open(file, 'rb') as file_obj:
67-
tika.parser.from_file(file_obj)
68-
self.assertIsNone(tika.tika.killServer())
45+
def test_remote_jpg():
46+
"""parse remote jpg"""
47+
assert tika.parser.from_file(
48+
"https://upload.wikimedia.org/wikipedia/commons/b/b7/X_logo.jpg")
6949

7050

71-
if __name__ == '__main__':
72-
unittest.main()
51+
def test_local_binary():
52+
"""parse file binary"""
53+
with open(TEST_FILE_PATH, "rb") as file_obj:
54+
assert tika.parser.from_file(file_obj)
55+
56+
57+
def test_local_buffer():
58+
response = tika.parser.from_buffer("Good evening, Dave")
59+
assert response["status"] == HTTPStatus.OK
60+
61+
62+
def test_local_path():
63+
"""parse file path"""
64+
assert tika.parser.from_file(str(TEST_FILE_PATH))
65+
66+
67+
def test_kill_server():
68+
"""parse some file then kills server"""
69+
with open(TEST_FILE_PATH, "rb") as file_obj:
70+
tika.parser.from_file(file_obj)
71+
assert tika.tika.killServer() is None

tests/test_unpack.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from tempfile import NamedTemporaryFile
2+
3+
from tika import unpack
4+
5+
6+
# Test data
7+
TEXT_UTF8 = "Hello, world!! 😎 👽"
8+
TEXT_ASCII = "Hello, world!!"
9+
10+
11+
def test_utf8():
12+
"""Test UTF-8 encoding"""
13+
with NamedTemporaryFile("w+b", prefix="tika-python", suffix=".txt", dir="/tmp") as f:
14+
f.write(TEXT_UTF8.encode("utf8"))
15+
f.flush()
16+
f.seek(0)
17+
parsed = unpack.from_file(f.name)
18+
assert parsed["content"].strip() == TEXT_UTF8
19+
20+
21+
def test_ascii():
22+
"""Test ASCII encoding"""
23+
with NamedTemporaryFile("w+t", prefix="tika-python", suffix=".txt", dir="/tmp") as f:
24+
f.write(TEXT_ASCII)
25+
f.flush()
26+
f.seek(0)
27+
parsed = unpack.from_file(f.name)
28+
assert parsed["content"].strip() == TEXT_ASCII
29+
30+
31+
def test_from_buffer():
32+
parsed = unpack.from_buffer("what?")
33+
assert parsed is not None
34+
assert parsed["metadata"] is not None
35+
assert parsed["metadata"]["Content-Length"] == "5"
36+
37+
38+
def test_from_buffer_with_headers():
39+
parsed = unpack.from_buffer("what?", headers={"Param": "whatever"})
40+
assert parsed is not None
41+
assert parsed["metadata"] is not None
42+
assert parsed["metadata"]["Content-Length"] == "5"

tests/tests_unpack.py

Lines changed: 0 additions & 43 deletions
This file was deleted.

0 commit comments

Comments
 (0)