|
15 | 15 | # See the License for the specific language governing permissions and |
16 | 16 | # limitations under the License. |
17 | 17 | # |
18 | | -# pytest --benchmark-enable --benchmark-timer=time.process_time tika/tests/test_benchmark.py |
19 | | -# pytest --benchmark-enable --benchmark-timer=time.process_time tika/tests/test_benchmark.py |
20 | | -import os |
21 | | -import unittest |
| 18 | +# pytest --benchmark-enable --benchmark-timer=time.process_time tests/test_benchmark.py |
| 19 | + |
| 20 | +from pathlib import Path |
22 | 21 | import zlib |
23 | 22 | import gzip |
24 | 23 | from http import HTTPStatus |
25 | 24 |
|
26 | 25 | import tika.parser |
27 | 26 |
|
28 | 27 |
|
| 28 | +TEST_FILE_PATH = Path(__file__).parent / "files" / "rwservlet.pdf" |
| 29 | +HEADERS = {"Accept-Encoding": "gzip, deflate"} |
| 30 | + |
| 31 | + |
29 | 32 | def test_local_binary(benchmark): |
30 | 33 | """parse file binary""" |
31 | | - file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf') |
32 | | - response = benchmark(tika_from_binary, file) |
33 | | - |
34 | | - assert response['status'] == HTTPStatus.OK |
| 34 | + response = benchmark(tika_from_binary, TEST_FILE_PATH) |
| 35 | + assert response["status"] == HTTPStatus.OK |
35 | 36 |
|
36 | 37 |
|
37 | 38 | def test_parser_buffer(benchmark): |
38 | 39 | """example how to send gzip file""" |
39 | | - file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf') |
40 | | - response = benchmark(tika_from_buffer, file) |
41 | | - |
42 | | - assert response['status'] == HTTPStatus.OK |
| 40 | + response = benchmark(tika_from_buffer, TEST_FILE_PATH) |
| 41 | + assert response["status"] == HTTPStatus.OK |
43 | 42 |
|
44 | 43 |
|
45 | 44 | def test_parser_buffer_zlib_input(benchmark): |
46 | 45 | """example how to send gzip file""" |
47 | | - file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf') |
48 | | - |
49 | | - response = benchmark(tika_from_buffer_zlib, file) |
50 | | - |
51 | | - assert response['status'] == HTTPStatus.OK |
| 46 | + response = benchmark(tika_from_buffer_zlib, TEST_FILE_PATH) |
| 47 | + assert response["status"] == HTTPStatus.OK |
52 | 48 |
|
53 | 49 |
|
54 | 50 | def test_parser_buffer_gzip_input(benchmark): |
55 | 51 | """parse file binary""" |
56 | | - file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf') |
57 | | - response = benchmark(tika_from_buffer_gzip, file) |
58 | | - |
59 | | - assert response['status'] == HTTPStatus.OK |
| 52 | + response = benchmark(tika_from_buffer_gzip, TEST_FILE_PATH) |
| 53 | + assert response["status"] == HTTPStatus.OK |
60 | 54 |
|
61 | 55 |
|
62 | 56 | def test_local_binary_with_gzip_output(benchmark): |
63 | 57 | """parse file binary""" |
64 | | - file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf') |
65 | | - response = benchmark(tika_from_binary, file, headers={'Accept-Encoding': 'gzip, deflate'}) |
66 | | - |
67 | | - assert response['status'] == HTTPStatus.OK |
| 58 | + response = benchmark(tika_from_binary, TEST_FILE_PATH, headers=HEADERS) |
| 59 | + assert response["status"] == HTTPStatus.OK |
68 | 60 |
|
69 | 61 |
|
70 | 62 | def test_parser_buffer_with_gzip_output(benchmark): |
71 | 63 | """example how to send gzip file""" |
72 | | - file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf') |
73 | | - response = benchmark(tika_from_buffer, file, headers={'Accept-Encoding': 'gzip, deflate'}) |
74 | | - |
75 | | - assert response['status'] == HTTPStatus.OK |
| 64 | + response = benchmark(tika_from_buffer, TEST_FILE_PATH, headers=HEADERS) |
| 65 | + assert response["status"] == HTTPStatus.OK |
76 | 66 |
|
77 | 67 |
|
78 | 68 | def test_parser_buffer_zlib_input_and_gzip_output(benchmark): |
79 | 69 | """example how to send gzip file""" |
80 | | - file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf') |
81 | | - |
82 | | - response = benchmark(tika_from_buffer_zlib, file, headers={'Accept-Encoding': 'gzip, deflate'}) |
83 | | - |
84 | | - assert response['status'] == HTTPStatus.OK |
| 70 | + response = benchmark(tika_from_buffer_zlib, TEST_FILE_PATH, headers=HEADERS) |
| 71 | + assert response["status"] == HTTPStatus.OK |
85 | 72 |
|
86 | 73 |
|
87 | 74 | def test_parser_buffer_gzip_input_and_gzip_output(benchmark): |
88 | 75 | """parse file binary""" |
89 | | - file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf') |
90 | | - response = benchmark(tika_from_buffer_gzip, file, headers={'Accept-Encoding': 'gzip, deflate'}) |
91 | | - |
92 | | - assert response['status'] == HTTPStatus.OK |
| 76 | + response = benchmark(tika_from_buffer_gzip, TEST_FILE_PATH, headers=HEADERS) |
| 77 | + assert response["status"] == HTTPStatus.OK |
93 | 78 |
|
94 | 79 |
|
95 | 80 | def tika_from_buffer_zlib(file, headers=None): |
96 | | - with open(file, 'rb') as file_obj: |
| 81 | + with open(file, "rb") as file_obj: |
97 | 82 | return tika.parser.from_buffer(zlib.compress(file_obj.read()), headers=headers) |
98 | 83 |
|
99 | 84 |
|
100 | 85 | def tika_from_buffer_gzip(file, headers=None): |
101 | | - with open(file, 'rb') as file_obj: |
| 86 | + with open(file, "rb") as file_obj: |
102 | 87 | return tika.parser.from_buffer(gzip.compress(file_obj.read()), headers=headers) |
103 | 88 |
|
104 | 89 |
|
105 | 90 | def tika_from_buffer(file, headers=None): |
106 | | - with open(file, 'rb') as file_obj: |
| 91 | + with open(file, "rb") as file_obj: |
107 | 92 | return tika.parser.from_buffer(file_obj.read(), headers=headers) |
108 | 93 |
|
109 | 94 |
|
110 | 95 | def tika_from_binary(file, headers=None): |
111 | | - with open(file, 'rb') as file_obj: |
| 96 | + with open(file, "rb") as file_obj: |
112 | 97 | return tika.parser.from_file(file_obj, headers=headers) |
113 | | - |
114 | | - |
115 | | -if __name__ == '__main__': |
116 | | - unittest.main() |
0 commit comments