diff --git a/transformations/gender_randomizer/README.md b/transformations/gender_randomizer/README.md new file mode 100644 index 000000000..2cbf18091 --- /dev/null +++ b/transformations/gender_randomizer/README.md @@ -0,0 +1,45 @@ +# Gender Randomizer 🦎 + ⌨️ → 🐍 + +Author name: Tabitha Sugumar +Author email: t.sugumar@elsevier.com +Author Affiliation: Elsevier + +## What type of a transformation is this? +This transformation changes names in English texts, randomizing selection so that there's an even chance of male and female names. Acknowledging that names are not deterministic identifiers of someone's gender/pronouns, here pronouns are modified to she/her/herself if the selected name is in names/female.txt and to he/his/him/himself if the selected name is in names/male.txt. These files can be modified or replaced as desired. + +## What tasks does it intend to benefit? +This is intended to avoid gender bias in natural language processing models. Run this transformation on text data prior to using it to train a model. + +## Data and Code Provenance +This uses the coreferee library (https://github.com/msg-systems/coreferee). This package exists as a fully developed library and would normally be included in the requirements.txt file and installed via pip. However, the code is downloaded and included locally in this transformation to allow slight modifications to the setup file to make the library compatible with, and possible to install in python 3.7, as required for this transformation. The coreferee library was designed/tested in python 3.8, and the recent update made it compatible with python 3.9. The readme within the directory provides more details, and for complete, updated information about the coreferee package, please see the linked github repository. **All code within the coreferee directory was created by the coreferee library developers.** + +The names directory comes from https://www.kaggle.com/nltkdata/names. The README within the directory provides more detail. + +## What are the limitations of this transformation? +This transformation does not handle gendered words such as actor/actess, waiter/waitress, etc. The handling of pronouns is limited to what the coreferee library can identify, and, in this version is limited to the segment of text fed in one iteration (ie: if a text is separated and fed in batches, the pronouns/names will not be consistent accross the text). + +## Examples of this transformation + +Because this is a randomized transformation, in both the selection of gender and selection of name, test examples are impossible -- the output for a single sentence is expected to be different in each successive run. Instead I've provided some example sentences and outputs for reference. + +1) Input: '“Edward turned to Miss Marple. “It’s like this, you see. As Uncle Mathew grew older, he got more and more suspicious. He didn’t trust anybody.” “Very wise of him,” said Miss Marple. “The depravity of human nature is unbelievable.” ' + + Possible Output: '“Edward turned to Tandie. “It’s like this, you see. As Elvira grew older, she got more and more suspicious. She didn’t trust anybody.” “Very wise of her,” said Tandie. “The depravity of human nature is unbelievable.” ' + +2) Input: 'I think George never tells himself the truth.' + + Possible Output: 'I think Monique never tells herself the truth.' + +3) Input: 'Angela wanted to study abroad that summer but she decided to travel with her friends instead.' + + Possible Output: 'Dominique wanted to study abroad that summer but he decided to travel with his friends instead.' + +4) Input: 'I thought that Michael would go to medschool, but he told me he was applying for law.' + + Possible Output: 'I thought that Arabele would go to medschool, but she told me she was applying for law.' + +5) Input: 'Mattias went to New York for Christmas last year, but he wanted to stay with family for New Years.' + + Possible Output: 'Dinah went to New York for Christmas last year, but she wanted to stay with family for New Years.' + + diff --git a/transformations/gender_randomizer/__init__.py b/transformations/gender_randomizer/__init__.py new file mode 100644 index 000000000..930cdce0b --- /dev/null +++ b/transformations/gender_randomizer/__init__.py @@ -0,0 +1 @@ +from .transformation import * diff --git a/transformations/gender_randomizer/coreferee/.gitignore b/transformations/gender_randomizer/coreferee/.gitignore new file mode 100644 index 000000000..b6e47617d --- /dev/null +++ b/transformations/gender_randomizer/coreferee/.gitignore @@ -0,0 +1,129 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/transformations/gender_randomizer/coreferee/LICENSE b/transformations/gender_randomizer/coreferee/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/transformations/gender_randomizer/coreferee/NOTICE b/transformations/gender_randomizer/coreferee/NOTICE new file mode 100644 index 000000000..483b542cf --- /dev/null +++ b/transformations/gender_randomizer/coreferee/NOTICE @@ -0,0 +1,4 @@ +Coreferee +Copyright (C) 2021 msg systems ag, Germany + +Additional licenses and attributions apply to word lists and training data used for specific languages and are documented within the /coreferee/lang/*/data directories. diff --git a/transformations/gender_randomizer/coreferee/README.md b/transformations/gender_randomizer/coreferee/README.md new file mode 100644 index 000000000..6c708dc3c --- /dev/null +++ b/transformations/gender_randomizer/coreferee/README.md @@ -0,0 +1,483 @@ +Coreferee +========= +Author: Richard Paul Hudson, msg systems ag + +- [1. Introduction](#introduction) + - [1.1 The basic idea](#the-basic-idea) + - [1.2 Getting started](#getting-started) + - [1.2.1 English](#getting-started-en) + - [1.2.2 German](#getting-started-de) + - [1.2.3 Polish](#getting-started-pl) + - [1.3 Background information](#background-information) + - [1.4 Facts and figures](#facts-and-figures) + - [1.4.1 Covered relevant linguistic features](#covered-relevant-linguistic-features) + - [1.4.2 Model performance](#model-performance) +- [2. Interacting with the data model](#interacting-with-the-data-model) +- [3. How it works](#how-it-works) + - [3.1 General operation and rules](#general-operation-and-rules) + - [3.1.1 Anaphor pair analysis](#anaphor-pair-analysis) + - [3.1.2 Noun pair detection](#noun-pair-detection) + - [3.1.3 Building the chains](#building-the-chains) + - [3.2 The neural ensemble](#the-neural-ensemble) +- [4. Adding support for a new language](#adding-support-for-a-new-language) +- [5. Open issues/requests for assistance](#open-issues) + + + +### 1. Introduction + + +#### 1.1 The basic idea + +Coreferences are situations where two or more words within a text refer to the same entity, e.g. *__John__ went home because __he__ was tired*. Resolving coreferences is an important general task within the natural language processing field. + +Coreferee is a Python 3 library (tested with version 3.8.7) that is used together with [spaCy](https://spacy.io/) (tested with version 3.0.5) to resolve coreferences within English, German and Polish texts. It is designed so that it is easy to add support for new languages. It uses a mixture of neural networks and programmed rules. + + +#### 1.2 Getting started + + +##### 1.2.1 English + +Presuming you have already installed [spaCy](https://spacy.io/) and one of the English spacy models, install Coreferee from the command line by typing: + +``` +python3 -m pip install coreferee +python3 -m coreferee install en +``` + +Note that: + +- the required command may be `python` rather than `python3` on some operating systems; +- in order to use the transformer-based spaCy model `en_core_web_trf` with Coreferee, you will need to install the spaCy model `en_core_web_lg` as well (see the explanation [here](#model-performance)). + +Then open a Python prompt (type `python3` or `python` at the command line): + +``` +>>> import coreferee, spacy +>>> nlp = spacy.load('en_core_web_trf') +>>> nlp.add_pipe('coreferee') + +>>> +>>> doc = nlp("Although he was very busy with his work, Peter had had enough of it. He and his wife decided they needed a holiday. They travelled to Spain because they loved the country very much.") +>>> +>>> doc._.coref_chains.print() +0: he(1), his(6), Peter(9), He(16), his(18) +1: work(7), it(14) +2: [He(16); wife(19)], they(21), They(26), they(31) +3: Spain(29), country(34) +>>> +>>> doc[16]._.coref_chains.print() +0: he(1), his(6), Peter(9), He(16), his(18) +2: [He(16); wife(19)], they(21), They(26), they(31) +>>> +>>> doc._.coref_chains.resolve(doc[31]) +[Peter, wife] +>>> +``` + + +##### 1.2.2 German + +Presuming you have already installed [spaCy](https://spacy.io/) and one of the German spacy models, install Coreferee from the command line by typing: + +``` +python3 -m pip install coreferee +python3 -m coreferee install de +``` + +Note that the required command may be `python` rather than `python3` on some operating systems. + +Then open a Python prompt (type `python3` or `python` at the command line): + +``` +>>> import coreferee, spacy +>>> nlp = spacy.load('de_core_news_lg') +>>> nlp.add_pipe('coreferee') + +>>> +>>> doc = nlp("Weil er mit seiner Arbeit sehr beschäftigt war, hatte Peter genug davon. Er und seine Frau haben entschieden, dass ihnen ein Urlaub gut tun würde. Sie sind nach Spanien gefahren, weil ihnen das Land sehr gefiel.") +>>> +>>> doc._.coref_chains.print() +0: er(1), seiner(3), Peter(10), Er(14), seine(16) +1: Arbeit(4), davon(12) +2: [Er(14); Frau(17)], ihnen(22), Sie(29), ihnen(36) +3: Spanien(32), Land(38) +>>> +>>> doc[14]._.coref_chains.print() +0: er(1), seiner(3), Peter(10), Er(14), seine(16) +2: [Er(14); Frau(17)], ihnen(22), Sie(29), ihnen(36) +>>> +>>> doc._.coref_chains.resolve(doc[36]) +[Peter, Frau] +>>> +``` + + +##### 1.2.3 Polish + +Presuming you have already installed [spaCy](https://spacy.io/) and one of the Polish spacy models, install Coreferee from the command line by typing: + +``` +python3 -m pip install coreferee +python3 -m coreferee install pl +``` + +Note that the required command may be `python` rather than `python3` on some operating systems. + +Then open a Python prompt (type `python3` or `python` at the command line): + +``` +>>> import coreferee, spacy +>>> nlp = spacy.load('pl_core_news_lg') +>>> nlp.add_pipe('coreferee') + +>>> +>>> doc = nlp("Ponieważ bardzo zajęty był swoją pracą, Janek miał jej dość. Postanowili z jego żoną, że potrzebują wakacji. Pojechali do Hiszpanii, bo bardzo im się ten kraj podobał.") +>>> +>>> doc._.coref_chains.print() +0: był(3), swoją(4), Janek(7), Postanowili(12), jego(14) +1: pracą(5), jej(9) +2: [Postanowili(12); żoną(15)], potrzebują(18), Pojechali(21), im(27) +3: Hiszpanii(23), kraj(30) +>>> +>>> doc[12]._.coref_chains.print() +0: był(3), swoją(4), Janek(7), Postanowili(12), jego(14) +2: [Postanowili(12); żoną(15)], potrzebują(18), Pojechali(21), im(27) +>>> +>>> doc._.coref_chains.resolve(doc[27]) +[Janek, żoną] +>>> +``` + + +#### 1.3 Background information + +Handling coreference resolution successfully requires training corpora that have been manually annotated with coreferences. The [state of the art in coreference resolution](https://paperswithcode.com/sota/coreference-resolution-on-conll-2012) is progressing rapidly, but is largely focussed on techniques that require training corpora that are larger than what is available for most languages and software developers. The [CONLL 2012 training corpus](https://cemantix.org/conll/2012/task-description.html), which is most widely used, has the following restrictions: + +- CONLL 2012 covers English, Chinese and Arabic; there is nothing of comparable size for most other languages. For example, the [corpus](#model-performance) we used to train Coreferee for German is around a tenth of the size of CONLL 2012; + +- CONLL 2012 is not publicly available and has a license that precludes non-members of the Linguistic Data Consortium from using models commercially that CONLL 2012 was used to train. + +Earlier versions of spaCy had an extension, [Neuralcoref](https://github.com/huggingface/neuralcoref), that was excellent but that was never made publicly available for any language other than English. The aim of Coreferee, on the other hand, is to get coreference resolution working for a variety of languages: our focus is less on necessarily achieving the best possible precision and recall for English than on enabling the functionality to be reproduced for new languages as easily and as quickly as possible. Because training data is in such short supply for most languages and is very effort-intensive to produce, it is important to use what is available as effectively as possible. + +There are three essential strategies that human readers employ to recognise coreferences within a text: + +1) Hard grammatical rules that completely preclude entities within a text from coreferring, e.g. *__The house__ stood tall. __They__ went on walking.* Such rules play an especially important role in languages that have grammatical gender, which includes most continental European languages. + +2) Pragmatic tendencies, e.g. a word that begins a sentence and that is a grammatical subject is more likely than a word that is in the middle of a sentence and that forms part of a prepositional phrase to be referred back to by a pronoun that follows it in the next sentence. + +3) Semantic restrictions, i.e. which entities can realistically do what to which entities in the world being described. For example, in the sentence *The child saddled __her__ up*, a reader's experience of the world will make it clear that *her* must refer to a horse. + +With unlimited training data, it would be possible to train a system to employ all three strategies effectively from first principles using word vectors. The features of Coreferee that allow effective learning with the limited training data that is available are: + +- Strategy 1) is covered by hardcoded rules for each language that the system is then not required to learn from the training data. Because detailed knowledge of the grammar of a specific natural language is a separate skill set from knowledge of machine learning, the two concerns have been fully separated in Coreferee: rules are covered in a separate module from tendencies. This means that a model for a new language can be generated by a competent Python programmer with no knowledge of machine learning or neural networks; + +- Because the pragmatic tendencies for strategy 2) are very complex and only partially understood by linguists, machine learning and neural networks represent the only realistic way of tackling them. In order to reduce the amount of training data required for neural networks to learn effectively, the syntactic and morphological information supplied by the spaCy models, which have typically been trained with considerably more training data than will be available for coreference resolution, is used as input to neural networks alongside the standard [word vectors](#the-neural-ensemble). + +- Especially with limited training data but probably even with the largest available training datasets, it is unlikely that a system will learn more than the very simplest tendencies for strategy 3). However, making word vectors available to neural networks ensures that Coreferee can make use of whatever tendencies are discernable. + +Coreferee started life to assist the [Holmes](https://github.com/msg-systems/holmes-extractor) project, which is used for information extraction and intelligent search. Coreferee is in no way dependent on Holmes, but this original aim has led to several design decisions that may seem somewhat atypical. Several of them could easily be altered by someone with a requirement to do so: + +- A mention within Coreferee does not consist of a span, but rather of a single token or of a list of tokens that stand in a coordination relationship to one another. + +- Coreferee does not capture coreferences that are unambiguously evident from the structure of a sentence. For example, the identity of *he* and *doctor* in the sentence *__He__ was a __doctor__* is not reported by Coreferee because it can easily be derived from a simple analysis of the copular structure of the phrase. + +- Repetitions of first- and second-person pronouns (*__I__ was tired. __I__ went home*) are not captured as they add no value either for information extraction or for intelligent search. + +- Coreferee focusses heavily on anaphors (for English: pronouns). There is only relatively limited capture of coreference between noun phrases, and it is entirely rule-based. (In turn, however, this serves the aim of working with limited training data: noun-phrase coreference is a more exacting task than anaphor resolution.) + +- Because search performance is much more important for Holmes than document parsing performance, Coreferee performs all analysis eagerly as each document passes through the pipe. + + +#### 1.4 Facts and figures + + +##### 1.4.1 Covered relevant linguistic features + + + + + + +
LanguageISO 639-1Anaphor expressionAgreement classesCoordination expression
PronominalVerbalPrepositionalConjunctiveComitative
EnglishenMy friend came in. He was happy.--Three singular (natural genders) and one plural class.Peter and Mary-
GermandeMein Freund kam rein. Er war glücklich.-Ich benutzte das Auto und hatte damit einige Probleme.Three singular (grammatical genders) and one plural class.Peter und Maria-
PolishplWszedł mój kolega. On był szczęśliwy.Wszedł mój kolega. Szczęśliwy był.1-2Three singular (grammatical genders) and two plural (natural genders) classes.Piotr i Kasia1) Piotr z Kasią przyszli;
2) Widziałem Piotra i przyszli z Kasią
+ +1. Only subject zero anaphors are covered. Object zero anaphors, e.g. Wypiłeś wodę? Tak, wypiłem. are not in scope because they are mainly used colloquially and do not normally occur in the types of text for which [Coreferee is primarily designed](#background-information). Handling them would require creating or locating a detailed dictionary of verb valencies. + +2. Polish has a restricted use of anaphoric prepositions in some formal registers, e.g. *Skończyło się to __dlań__ smutno*. Because the Polish spaCy models were trained on news texts, they do not recognise such prepositions, meaning that Coreferee cannot capture them either. + + +##### 1.4.2 Model performance + + + + + + + +
LanguageISO 639-1Training corporaTotal words in training corpora*_trf models*_lg models*_md models*_sm models
Anaphors in 20%Accuracy (%)Anaphors in 20%Accuracy (%)Anaphors in 20%Accuracy (%)Anaphors in 20%Accuracy (%)
EnglishenParCor/ LitBank393564296783.52290383.98290783.21287882.49
GermandeParCor164300--62577.2862077.1062576.00
PolishplPCC548268--155372.12152171.07138370.21
+ +Coreferee produces a range of neural-network models for each language corresponding to the various spaCy models for that language. The [neural network inputs](#the-neural-ensemble) include word vectors. With `_sm` (small) models, both spaCy and Coreferee use context-sensitive tensors as an alternative to word vectors. `_trf` (transformer-based) models, on the other hand, do not use or offer word vectors at all. To remedy this problem, the model configuration files (`config.cfg` in the directory for each language) allow a **vectors model** to be specified for use when a main model does not have its own vectors. Coreferee then combines the linguistic information generated by the main model with vector information returned for the individual words in each document by the vectors model. + +Because the Coreferee models are rather large (70GB-80GB for the group of models for a given language) and because many users will only be interested in one language, the group of models for a given language is installed using `python3 -m coreferee install` as demonstrated in the introduction. All Coreferee models are more or less the same size; a larger spaCy model does not equate to a larger Coreferee model. As the figures above demonstrate, the accuracy of Coreferee corresponds closely to the size of the underlying spaCy model, and users are urged to use the larger spaCy models. It is in any case unclear whether there is a situation in which it would make sense to use Coreferee with an `_sm` model as the Coreferee model would then be considerably larger than the spaCy model! + +Assessing and comparing the precision and recall of anaphor resolution algorithms is notoriously difficult. For one thing, two human annotators of the same data will not always agree (and, indeed, there are some cases where Coreferee and a training annotator disagree where Coreferee's interpretation seems the more plausible!) And the same algorithm may perform with wildly different accuracies with different test documents depending on how clearly the documents are written and how often there are competing interpretations of individual anaphors. + +Because Coreferee decides where there are anaphors to resolve (as opposed to what to resolve them to) in a purely rule-based fashion and because there is not necessarily a perfect correspondence between the types of anaphor these rules are aiming to capture and the types of anaphor covered by any given training corpus, a recall measure would not be meaningful. Instead, we compare the performance between spaCy models — and, during tuning, between different hyperparameter values — by counting the total **number of anaphors** that the rules find within the test documents as parsed by the spaCy model being used and that are also annotated with a coreference within the training data. The **accuracy** then expresses the percentage of these anaphors for which the coreference annotated by the corpus author is part of the chain(s) suggested by Coreferee. In situations where the training data specifies a chain C->B->A and B is a type of coreference that Coreferee is not aiming to capture, C->A is used as a valid training reference. + +Assessing the performance of a model requires test data that was not used for training. At the same time, however, Coreferee is explicitly designed for use in situations where training data is at a premium, and it seems a shame to waste the learning opportunity offered by specific training documents just to assess a model a single time. To enable valid testing and at the same time to maximize the use of training data, each model is trained twice. On the first run, around 80% of the data is used for training and the remaining 20% for testing. (In practice, these percentages can vary somewhat because individual documents cannot be split between the two groups.) This first model is then discarded and a second training run is carried out with the available data in its entirity. The assumption is that, because it is based on more training data, the performance of this second model can be presumed to be at least as good as the measured performance of the first model. The obvious drawback, however, is that there is no way of verifying this. + +Since coreference between noun phrases is restricted to a small number of cases captured by [simple rules](#noun-pair-detection), the model assessment figures presented here refer solely to [anaphor resolution](#anaphor-pair-analysis). When anaphor resolution accuracy is being assessed for a test document, noun pairs are detected and [added to chains](#building-the-chains) according to the standard rules, but they do not feature in the accuracy figures. On some rare occasions, however, they may have an indirect effect on accuracy by affecting the semantic considerations that determine which anaphors can be added to which chains. + +Note that **Total words in training corpora** in the table above refers to 100% of the available data for each language, while the **Anaphors in 20%** columns specify the number of anaphors found in the roughly 20% of this data that is used for model assessment. + + +### 2 Interacting with the data model + +Coreferee generates **Chain** objects where each chain is an ordered collection of **Mention** objects that have been analysed as referring to the same entity. Each mention holds references to one or more spaCy token indexes; a chain can have a maximum of one mention with more than one token (most often its leftmost mention). A given token index occurs in a maximum of two mentions; if it belongs to two mentions the mentions will belong to different chains and one of the mentions will contain multiple tokens. All chains that refer to a given `Doc` or `Token` object are managed on a `ChainHolder` object which is accessed via `._.coref_chains`. Reproducing part of the example from the [introduction](#getting-started-en): + +``` +>>> doc = nlp("Although he was very busy with his work, Peter had had enough of it. He and his wife decided they needed a holiday. They travelled to Spain because they loved the country very much.") +>>> +>>> doc._.coref_chains.print() +0: he(1), his(6), Peter(9), He(16), his(18) +1: work(7), it(14) +2: [He(16); wife(19)], they(21), They(26), they(31) +3: Spain(29), country(34) +>>> +>>> doc[16]._.coref_chains.print() +0: he(1), his(6), Peter(9), He(16), his(18) +2: [He(16); wife(19)], they(21), They(26), they(31) +>>> +``` + +Chains and mentions can be navigated much as if they were lists: + +``` +>>> len(doc._.coref_chains) +4 +>>> doc._.coref_chains[1].pretty_representation +'1: work(7), it(14)' +>>> len(doc._.coref_chains[1]) +2 +>>> doc._.coref_chains[1][1] +[14] +>>> len(doc._.coref_chains[1][1]) +1 +>>> doc._.coref_chains[1][1][0] +14 +>>> +>>> for chain in doc._.coref_chains: +... for mention in chain: +... print(mention) +... +[1] +[6] +[9] +[16] +[18] +[7] +[14] +[16, 19] +[21] +[26] +[31] +[29] +[34] +>>> +``` + +A document with Coreferee annotations can be saved and loaded using the normal spaCy methods: the annotations survive the serialization and deserialization. To facilitate this, Coreferee does not store references to spaCy objects, but merely to token indexes. However, each class has a pretty representation designed for human consumption that contains information from the spaCy document and that is generated eagerly when the object is first instantiated. Additionally, the `ChainHolder` object has a `print()` method that prints its chains' pretty representations with one chain on each line: + +``` +>>> doc._.coref_chains +[0: [1], [6], [9], [16], [18], 1: [7], [14], 2: [16, 19], [21], [26], [31], 3: [29], [34]] +>>> doc._.coref_chains.pretty_representation +'0: he(1), his(6), Peter(9), He(16), his(18); 1: work(7), it(14); 2: [He(16); wife(19)], they(21), They(26), they(31); 3: Spain(29), country(34)' +>>> doc._.coref_chains.print() +0: he(1), his(6), Peter(9), He(16), his(18) +1: work(7), it(14) +2: [He(16); wife(19)], they(21), They(26), they(31) +3: Spain(29), country(34) +>>> +>>> doc._.coref_chains[0] +0: [1], [6], [9], [16], [18] +>>> doc._.coref_chains[0].pretty_representation +'0: he(1), his(6), Peter(9), He(16), his(18)' +>>> +>>> doc._.coref_chains[0][0] +[1] +>>> doc._.coref_chains[0][0].pretty_representation +'he(1)' +>>> +``` +Each chain has an index number that is unique within the document. It is displayed in the representations of `Chain` and `ChainHolder` and can also be accessed directly: + +``` +>>> doc._.coref_chains[2].index +2 +``` + +Each chain can also return the index number of the mention within it that is **most specific**: noun phrases are more specific than anaphors and proper names more specific than common nouns: + +``` +>>> doc = nlp("He went to Spain. He loved the country. He often told his friends about it.") +>>> doc._.coref_chains.print() +0: He(0), He(5), He(10), his(13) +1: Spain(3), country(8), it(16) +>>> +>>> doc._.coref_chains[1].most_specific_mention_index +0 +>>> doc._.coref_chains[1][doc._.coref_chains[1].most_specific_mention_index].pretty_representation +'Spain(3)' +``` + +This information is used as the basis for the `resolve()` method shown in the [initial example](#getting-started-en): the method traverses multiple chains to find the most specific mention or mentions within the text that describe a given anaphor or noun phrase head. + + +### 3 How it works + + +#### 3.1 General operation and rules + + +##### 3.1.1 Anaphor pair analysis + +For each language, methods are implemented that determine: + +- for each token, its dependent siblings, e.g. *Jane* is a dependent sibling of *Peter* in the phrase *Peter and Jane*; +- for each token, whether the token is an anaphor (broadly speaking for English: a third-person pronoun); +- for each token, whether the token heads an independent noun phrase that an anaphor could refer to; +- for any independent-noun/anaphor or anaphor/anaphor pair within a text, whether or not semantic and syntactic constraints would permit coreference between the members of the pair. For example, there are no circumstances in which `they` and `her` could ever corefer within a text. When an entity has dependent siblings, the method is called twice, once with and once without the siblings. Possible coreferents are considered up to five sentences away from each anaphor looking backwards through the text. The method returns `2` (coreference permitted), `1` (coreference unlikely but possible) or `0` (coreference impossible). Alongside the language-specific rules, there are a number of language-independent rules which can lead to a `1` rather than a `2` analysis. + +Each anaphor in a document emerges from an analysis using these methods with a list of elements to which it could conceivably refer. The list for each anaphor is scored using the [neural ensemble](#the-neural-ensemble) and the possible referents are ordered by decreasing likelihood. Regardless of their neural ensemble score, any pairs with the rules analysis `1` (coreference unlikely but possible) are ordered behind pairs with the rules analysis `2` (coreference permitted). + +Note that anaphora is understood in a broad sense that includes cataphora, i.e. pronouns that refer forwards rather than backwards like the initial pronoun in the English example in the [introduction](#getting-started-en). Language-independent rules are used to determine situations in which the syntactic relationship between two elements within the same sentence permits cataphora. + +Replacing the neural ensemble scoring with a naive algorithm that always selects the closest potential referent for each anaphor with rules analysis `2` (or `1` if there is no `2`) yields an accuracy of around 60% as opposed to the 84% reported [above](#model-performance). This demonstrates the respective contribution of each processing strategy to the overall result and provides a useful benchmark for any further machine learning experiments. + + +##### 3.1.2 Noun pair detection + +For each language the following are implemented: + +- a method that determines whether a noun phrase is indefinite, or, in languages that do not mark indefiniteness, whether it could be interpreted as being indefinite; +- a method that determines whether a noun phrase is definite, or, in languages that do not mark definiteness, whether it could be interpreted as being definite; +- a dictionary from named entity labels to common nouns that refer to members of each named entity class. For example, the English named entity class `ORG` maps to the nouns `['company', 'firm', 'organisation']`. + +This information is used in a purely rule-based fashion to determine probable coreference between pairs of noun phrases: broadly, definite noun phrases that do not contain additional new information refer back to indefinite or definite noun phrases with the same head word, and named entities are referred back to by the common nouns that describe their classes. Noun pairs can be a maximum of two sentences apart as opposed to the five sentences that apply to anaphoric references. + + +##### 3.1.3 Building the chains + +Coreferee goes through each document in natural reading order from left to right building up chains of anaphors and independent noun phrases. For each anaphor, the highest scoring interpretation as suggested by the neural ensemble is preferred. However, because the semantic (but not the syntactic) restrictions on anaphoric reference apply between all pairs formed by members of a chain rather than merely between adjacent members, it may turn out that the highest scoring interpretation is not permissible because it would lead to a semantically inconsistent chain. The interpretation with the next highest score is then tried, and so on until no interpretations remain. + +In the unusual situation that all suggested interpretations of a given anaphor have been found to be semantically impossible, it is likely that one of the interpretations of the preceding anaphors in the text was incorrect: authors do not normally use anaphors that do not refer to anything. Reading the text: + + ``` + The woman looked down and saw Lesley. She stood up and greeted him. + ``` + + most readers will initially understand `she` as referring to `Lesley`. Only when one reaches the end of the sentence does it become clear that Lesley must be a man and that `she` actually refers to `the woman`. A quick test shows that Coreferee is capable of handling such ambiguity: + +``` +>>> doc = nlp('The woman looked down and saw Lesley. She stood up and greeted her.') +>>> doc._.coref_chains.print() +0: woman(1), her(13) +1: Lesley(6), She(8) +>>> +>>> doc = nlp('The woman looked down and saw Lesley. She stood up and greeted him.') +>>> doc._.coref_chains.print() +0: woman(1), She(8) +1: Lesley(6), him(13) +``` + +This is achieved using a **rewind**: at a point in a text where no suitable interpretation can be found for an anaphor, alternative interpretations of preceding anaphors are investigated in an attempt to find an overall interpretation that fits. + + +#### 3.2 The neural ensemble + +The likelihood scores for [anaphoric pairs](#anaphor-pair-analysis) are calculated using an ensemble of five identical multilayer perceptrons using a rectified linear activation in the input and hidden layers and a sigmoid activation in the output layer. Each of the five networks outputs a probability between 0 and 1 for a given potential anaphoric pair and the mean of the five probabilities is used as the the score for that pair. + +The inputs to each of the five networks consist of: + +1) A **feature map** for each member of the pair. As the first step in training, Coreferee goes through the entire training corpus and notes all the relevant morphological and syntactic information that relevant tokens, their syntactic head tokens and their syntactic children can have. This information is stored with the neural ensemble for each model as a **feature table**. The feature map for a given token (or list of tokens) is a oneshot representation with respect to the feature table. + +2) A **position map** for each member of the pair capturing such information as its position within its sentence and its depth within the dependency tree generated for its sentence. + +3) **Vector squeezers** for each member of the pair and, where existent, for the syntactic head of each member of the pair. The input to a vector squeezer is the [vector or context-sensitive tensor](#model-performance) for the spaCy token in question. A vector squeezer consists of three neural layers and outputs a representation that is only three neurons wide and that is fed into the rest of the network within the same layer as the other, non-vector inputs. + +4) A **compatibility map** capturing the relationship between the members of the pair. Alongside the distance separating them in words and in sentences, this includes the number of common features in their feature maps and the cosine similarity between their syntactic heads. + +Using a vector squeezer has been consistently found to offer slightly better results either than feeding the full-width vectors into the network directly or than omitting them entirely. Possible intuitions that might explain this behaviour are: the reduced width forces the network to learn and attend to a constrained number of specific semantic features relevant to coreference resolution; and the reduced width limits the attention of the network on the raw vectors in a situation where the training data is insufficient to make effective use of them. + +Perhaps somewhat unusually, when a vector is required to represent a coordinated phrase, the mean of the vectors of the individual coordinated tokens is used rather than the mean of the vectors of all the tokens in the coordinated span. + +The structure shared by each of the five networks in the ensemble is shown in the attached diagram: + +![Structure of an ensemble member](https://github.com/msg-systems/coreferee/blob/master/docs/nn_structure.png) + +Cross-linguistically, four training epochs were found to offer the best results; adding more training epochs caused the accuracy to start to tail off again owing to overfitting. Training for all relevant spaCy models for a given language takes between one and two hours on a high-end laptop. + + +### 4. Adding support for a new language + +One of the main design goals of Coreferee was to make it easy to add support for further languages. The prerequisites are: + +- you will need to know the grammar of the language you are adding well enough to make detailed decisions about which coreferences are normal, which are marginally possible and which are impossible; +- you will need to be able to program in Python. + +You should **not** need to get involved in the details of the neural ensemble; Coreferee should do that for you. + +The steps involved are: + +1) Create a directory under `coreferee/lang/` with the same structure as the existing language-specific directories; it is probably easiest to copy one of them. + +2) The file `config.cfg` lists the spaCy models for which you wish to generate Coreferee models. You will need to specify a [separate vectors model](#model-performance) for any of the spaCy models that lack vectors or context-dependent tensors of their own — see the English `config.cfg` for an example. Each config entry specifies a minimum (`from_version`) and maximum (`to_version`) spaCy model version number that the generated Coreferee model will support. During development, both numbers will normally refer to a single version number. Later, when an updated spaCy model version is brought out, testing will be required to see whether the existing Coreferee model still supports the new spaCy model version. If so, the maximum version number can be increased; if not, a new config entry will be necessary to accommodate the new Coreferee model that will then be required. + +3) The file `rules.py` in the main code directory contains an abstract class `RulesAnalyzer` that must be implemented by a class `LanguageSpecificRulesAnalyzer` within a file called `language_specific_rules.py` in each language-specific directory. The abstract class `RulesAnalyzer` contains docstrings that specify for each abstract property and method the contract to which implementing classes should adhere. Looking at the existing language-specific rules is also likely to be helpful. The method `is_potential_anaphor()` is normally the most work to create: here it is probably worth looking at the existing English method for languages with natural gender or at the existing German method for languages with grammatical gender. (Polish has an unusually complex gender system, so the Polish example is unlikely to be helpful even as a basis for working with other Slavonic languages.) + +4) There are some situations where word lists can be helpful. If a list is placed in a file `.dat` within the `data` directory under a language-specific directory, the contents will be automatically made available within the `LanguageSpecificRulesAnalyzer` for the language in question as a variable `self.` that contains a list where each entry corresponds to a line from the file; comments with `#` are supported. If you use a word list, please ensure it can be published under the Apache 2 license and give appropriate attribution within the language-specific directory in the `LICENSE` and, where appropriate, in a `COPYING` file. + +5) Male and female names are managed on a cross-linguistic basis because there is no reason why one would not want e.g. a German female name to be recognised within an English text. Names are automatically made available to all `RulesAnalyzer` implementations as properties `self.male_names`, `self.female_names`, `self.exclusively_male_names` and `self.exclusively_female_names`. If you can locate a suitable names list for the language you are working on that is available under a suitable license, add the attribution to the `LICENSE` file under `common/` and merge your names into the two files. Please tidy up the result so that the files are free of duplicates and in alphabetical order. + +6) Create a language-specific directory under `tests/` with a file `test_rules_.py` to test the rules you have written in 3-5). Although one of the corresponding files for one of the existing languages is likely to be the best starting point, you should also be sure to test any extra features specific to the language you are working on. The test tooling is designed to run each test against all spaCy models specified in `config.cfg`. At this stage in development, you will need to add temporarily a parameter `add_coreferee=False` to the call to `get_nlps()` in the `setUp()` method. Otherwise, all tests will fail because the test tooling will attempt to add the as yet non-existent Coreferee model to the pipe. + +7) Some tests may fail with one of the smaller spaCy models because it produces incorrect syntactic representations rather than because of any issue with your rule code. For such cases, a parameter `excluded_nlps` can be specified within a test method to prevent it from being executed with specific spaCy models. + +8) Locate a training corpus or corpora. Again, you should make sure that the resulting models can be published under the Apache 2 license. Add new loader class(es) for the corpus or corpora to the existing loader classes in the `train/loaders.py` file. Loader classes must implement the `GenericLoader` abstract class that is located at the top of this file. The job of a loader is to read a specific training corpus format and to create and annotate spaCy documents with coreferences marked within corpora of that format. All the data for a single training run should be placed in a single directory; if there are multiple types of training data loaded by different loaders, each loader will need to be able to recognise the data it is required to read by examining the names of the files within the directory. It is worth spending some time checking with `print()` statements that the loaders annotate as expected, otherwise the training step that follows has little chance of success! + +9) You are now ready to begin training. The training command must be issued from the `coreferee/` root directory. Coreferee will place a zip file into ``. Alongside the accuracy for each model, the files in the zip file show the coreference chains produced for each test document as well as a list of incorrect annotations where the Coreferee interpretation differed from the one specified by the training corpus author — information that is invaluable for debugging and rules improvement. As an example, the training command for English is: + +``` +python3 -m coreferee train --lang en --loader ParCorLoader,LitBankANNLoader --data --log +``` + +10) Once you are happy with your models, install them. The command must be issued from the `coreferee/` root directory, otherwise Coreferee will attempt to download the models from GitHub where they are not yet present: + +``` +python3 -m coreferee install +``` + +11) Before you attempt any regression tests that involve running Coreferee as part of the spaCy pipe, you must remove the `add_coreferee=False` parameter you added above. A setup where the parameter is present in one test file but absent in the other test file will not work because the spaCy models are loaded once per test run. + +12) Again using one of the existing languages as an starting point, create a `test_smoke_tests_.py` file in your test directory. The smoke tests are designed to make sure that the basic features of Coreferee are working properly for the language in question and should also cover any features that have posed a particular challenge while developing the rules. + +13) Run `pylint` on your `language_specific_rules.py`. Obviously there is no need to achieve a perfect score, but issues that can be easily remedied like overlong lines should be addressed. + +14) Go through the documentation (`README.md` and `SHORTREADME.md`) adding information about the new language wherever the supported languages are listed in some way. + +15) Issue a pull request. We ask that you supply us with the zip file generated during training. Because this will contain a considerable amount of raw information from the training corpora, it will normally be preferable from a licensing viewpoint to send it out of band rather than attaching it to the pull request. + + +### 5. Open issues / requests for assistance + +1) At present Coreferee uses Keras with TensorFlow, which leads to the limitation that `nlp.pipe()` cannot be called with `n_process > 1` with forked processes. It would be greatly preferable if Coreferee could be converted to use Thinc instead: this would get rid of this limitation and generally fit much better into the spaCy ecosystem. + +2) Because optimising parsing speed was not a priority in the [project within which Coreferee came into being](#background-information), Coreferee is written purely in Python; it would be helpful if somebody could convert it to Cython. + +3) There are almost certainly changes to the inputs and structure of the neural ensemble that would lead to improvements in accuracy, both cross-linguistically and for specific languages. The only caveat to bear in mind when trying out changes is that it should be possible for someone who does not understand neural networks to write rules for a new language. This means that Coreferee should detect necessary differences in the neural network behaviour between languages automatically rather than requiring the trainer to configure them. + +4) It is unclear at present why the accuracy for English is better than for German and why the accuracy for German is better than for Polish. One wholly speculative possibility is that the contents of the [compatibility map](#the-neural-ensemble) are better suited to pronominal than to verbal anaphora. This looks to be a promising avenue of research; understanding why the difference is occurring may well reveal a means of improving accuracy across the board. + +5) It would be useful if somebody could find a way of benchmarking Coreferee against other coreference resolution solutions, especially for English. One problem this would probably present is that using a benchmark necessitates a normative scope where a system aims to find exactly those types of coreference marked within the benchmark corpus, whereas the scope of Coreferee was determined by project requirements. diff --git a/transformations/gender_randomizer/coreferee/SHORTREADME.md b/transformations/gender_randomizer/coreferee/SHORTREADME.md new file mode 100644 index 000000000..b61a02756 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/SHORTREADME.md @@ -0,0 +1,5 @@ +Coreferences are situations where two or more words within a text refer to the same entity, e.g. *__John__ went home because __he__ was tired*. Resolving coreferences is an important general task within the natural language processing field. + +Coreferee is a Python 3 library (tested with version 3.8.7) that is used together with [spaCy](https://spacy.io/) (tested with version 3.0.5) to resolve coreferences within English, German and Polish texts. It is designed so that it is easy to add support for new languages. It uses a mixture of neural networks and programmed rules. + +For more information, please see the [main documentation on GitHub](https://github.com/msg-systems/coreferee). diff --git a/transformations/gender_randomizer/coreferee/coreferee/__init__.py b/transformations/gender_randomizer/coreferee/coreferee/__init__.py new file mode 100644 index 000000000..ebac557e3 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/__init__.py @@ -0,0 +1,25 @@ +# Copyright 2021 msg systems ag + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import warnings + +warnings.filterwarnings("ignore", message=r"\[W007\]", category=UserWarning) + +import coreferee.manager + +coreferee.manager.CorefereeBroker.set_extensions() diff --git a/transformations/gender_randomizer/coreferee/coreferee/__main__.py b/transformations/gender_randomizer/coreferee/coreferee/__main__.py new file mode 100644 index 000000000..663de4f18 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/__main__.py @@ -0,0 +1,115 @@ +# Copyright 2021 msg systems ag + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import sys + +import pkg_resources +from spacy.util import run_command + +from .manager import COMMON_MODELS_PACKAGE_NAMEPART +from .training.train import TrainingManager + +DOWNLOAD_URL = "https://github.com/msg-systems/coreferee/raw/master/models" + +parser = argparse.ArgumentParser() +subparsers = parser.add_subparsers(dest="command") + +train_parser = subparsers.add_parser( + "train", + help="Train models for a language. Must be executed from the root directory of the checked-out repository. Type *python -m coreferee train -h* for more information.", +) +required_named = train_parser.add_argument_group("required arguments") +required_named.add_argument( + "--lang", + dest="lang", + required=True, + help="The ISO 639-1 code for the language to train", +) +required_named.add_argument( + "--loader_classes", + dest="loader_classes", + required=True, + help="The class name(s) of the training data loader within *coreferee.training.loaders*. Multiple class names should be comma-separated.", +) +required_named.add_argument( + "--data_dir", + dest="data_dir", + required=True, + help="The path of the directory that contains the training data", +) +required_named.add_argument( + "--log_dir", + dest="log_dir", + required=True, + help="The path of the directory to which to write log files", +) +install_parser = subparsers.add_parser( + "install", + help="Install models for a language. Type *python -m coreferee install -h* for more information.", +) + +install_parser.add_argument( + "--force-reinstall", + default=False, + action="store_true", + help="Forces a reinstall when models are downloaded from Github (when models are being installed from the local filesystem, a reinstall always takes place)", +) +install_parser.add_argument( + "lang", help="The ISO 639-1 code for the language to train" +) + +args = parser.parse_args() +if args.command == "train": + TrainingManager( + __name__, args.lang, args.loader_classes, args.data_dir, args.log_dir + ).train_models() +elif args.command == "install": + file_system_root = pkg_resources.resource_filename(__name__, "") + models_dirname = "".join( + (file_system_root, os.sep, "..", os.sep, "models", os.sep, args.lang) + ) + if "site-packages" not in models_dirname and os.path.isdir(models_dirname): + run_command( + " ".join( + ( + sys.executable, + "-m pip install --force-reinstall", + models_dirname, + ) + ) + ) + else: + url = "".join( + ( + DOWNLOAD_URL, + "/", + COMMON_MODELS_PACKAGE_NAMEPART, + args.lang, + ".zip", + ) + ) + run_command( + " ".join( + ( + sys.executable, + "-m pip install", + "--force-reinstall" if args.force_reinstall else "", + url, + ) + ) + ) +else: + parser.print_help() diff --git a/transformations/gender_randomizer/coreferee/coreferee/annotation.py b/transformations/gender_randomizer/coreferee/coreferee/annotation.py new file mode 100644 index 000000000..f8b9e582b --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/annotation.py @@ -0,0 +1,503 @@ +# Copyright 2021 msg systems ag + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import deque + +from spacy.language import Language +from spacy.tokens import Doc, Token + +from .data_model import Chain, FeatureTable, Mention +from .rules import RulesAnalyzerFactory +from .tendencies import TendenciesAnalyzer + + +class Annotator: + + RETRY_DEPTH = 5 + + def __init__( + self, + nlp: Language, + vectors_nlp: Language, + feature_table: FeatureTable, + keras_ensemble, + ): + self.keras_ensemble = keras_ensemble + self.rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp) + self.tendencies_analyzer = TendenciesAnalyzer( + self.rules_analyzer, vectors_nlp, feature_table + ) + + @staticmethod + def record_mention( + preceding_mention: Mention, + token: Token, + token_indexes_without_coordination_to_mention_sets: dict, + token_indexes_with_coordination_to_mention_sets: dict, + ) -> None: + """*token_indexes_without_coordination_to_mention_sets* is the main means of + generating and tracking chains. + + *token_indexes_with_coordination_to_mention_sets* tracks the ends of chains that + end in a mention with coordination. It is necessary for the case where two anaphors + both refer to a mention with coordination. It has to be kept separate from the main + dictionary to cover the case where a mention with coordination itself contains an + anaphor that belongs to a separate chain. + """ + if len(preceding_mention.token_indexes) > 1: + if ( + preceding_mention.root_index + in token_indexes_with_coordination_to_mention_sets + ): + mention_set = token_indexes_with_coordination_to_mention_sets[ + preceding_mention.root_index + ] + else: + mention_set = {preceding_mention} + for token_index in preceding_mention.token_indexes: + token_indexes_with_coordination_to_mention_sets[ + token_index + ] = mention_set + else: + preceding_token = token.doc[preceding_mention.root_index] + if ( + preceding_token.i + in token_indexes_without_coordination_to_mention_sets + ): + mention_set = ( + token_indexes_without_coordination_to_mention_sets[ + preceding_token.i + ] + ) + else: + mention_set = {preceding_mention} + token_indexes_without_coordination_to_mention_sets[ + preceding_token.i + ] = mention_set + mention_set.add(Mention(token, False)) + if token.i in token_indexes_without_coordination_to_mention_sets: + mention_set.update( + token_indexes_without_coordination_to_mention_sets[token.i] + ) + for mention in token_indexes_without_coordination_to_mention_sets[ + token.i + ]: + token_indexes_without_coordination_to_mention_sets[ + mention.root_index + ] = mention_set + else: + token_indexes_without_coordination_to_mention_sets[ + token.i + ] = mention_set + + def get_compatibility(self, token: Token, mention_set: list) -> int: + """Checks the compatibility of *token* with the possible chain represented by *mention_set* + and expresses it with the semantics of *RuleAnalyzer.is_potential_anaphoric_pair()*. + """ + + result = 2 + mention_set_contains_referring_mention = False + for mention in mention_set: + if self.rules_analyzer.is_independent_noun( + token.doc[mention.root_index] + ): + mention_set_contains_referring_mention = True + working_result = self.rules_analyzer.is_potential_anaphoric_pair( + mention, token, False + ) + if working_result < result: + result = working_result + if result == 0: + break + if result == 2 and not mention_set_contains_referring_mention: + result = 1 + return result + + def temp_annotate_any_coreferring_noun_link( + self, + token: Token, + sentence_deque: deque, + token_indexes_without_coordination_to_mention_sets: dict, + token_indexes_with_coordination_to_mention_sets: dict, + ) -> None: + doc = token.doc + if not token._.coref_chains.temp_potentially_referring: + return + for sent in sentence_deque: + for preceding_token in ( + doc[index] + for index in range(sent.end, sent.start - 1, -1) + if index < token.i + ): + if ( + preceding_token._.coref_chains.temp_potentially_referring + and self.rules_analyzer.is_potential_coreferring_noun_pair( + preceding_token, token + ) + ): + self.record_mention( + Mention(preceding_token, False), + token, + token_indexes_without_coordination_to_mention_sets, + token_indexes_with_coordination_to_mention_sets, + ) + return + if ( + preceding_token.i + in token_indexes_without_coordination_to_mention_sets + ): + # existing chain; *preceding_token* may be an anaphor linked to a noun + # that can form a noun pair with *token* + mention_set = ( + token_indexes_without_coordination_to_mention_sets[ + preceding_token.i + ] + ) + for mention in ( + mention + for mention in mention_set + if len(mention.token_indexes) == 1 + ): + if self.rules_analyzer.is_potential_coreferring_noun_pair( + token.doc[mention.root_index], token + ): + self.record_mention( + Mention(preceding_token, False), + token, + token_indexes_without_coordination_to_mention_sets, + token_indexes_with_coordination_to_mention_sets, + ) + return + + def temp_annotate_any_anaphoric_link( + self, + token: Token, + token_indexes_without_coordination_to_mention_sets: dict, + token_indexes_with_coordination_to_mention_sets: dict, + permitted_start_index: int = 0, + ) -> bool: + """Returns *True* if an annotation occurred.""" + + def check_mention_sets_for_reflexive_relationships( + mention: Mention, index_to_mention_set_dict: dict + ) -> bool: + for token_index in mention.token_indexes: + if token_index in index_to_mention_set_dict: + for working_mention in index_to_mention_set_dict[ + token_index + ]: + if self.rules_analyzer.is_potential_reflexive_pair( + working_mention, token + ): + return True + return False + + def intern_temp_annotate_any_anaphoric_link( + allow_uncertainty: bool, + ) -> bool: + for index, potential_referred in enumerate( + token._.coref_chains.temp_potential_referreds + ): + if index < permitted_start_index or index >= self.RETRY_DEPTH: + continue + if len(potential_referred.token_indexes) == 1: + if ( + potential_referred.root_index + in token_indexes_without_coordination_to_mention_sets + ): + mention_set = ( + token_indexes_without_coordination_to_mention_sets[ + potential_referred.root_index + ] + ) + compatibility = self.get_compatibility( + token, mention_set + ) + if compatibility == 0 or ( + compatibility == 1 and not allow_uncertainty + ): + continue + if self.rules_analyzer.is_reflexive_anaphor(token) == 0 and ( + check_mention_sets_for_reflexive_relationships( + potential_referred, + token_indexes_without_coordination_to_mention_sets, + ) + or check_mention_sets_for_reflexive_relationships( + potential_referred, + token_indexes_with_coordination_to_mention_sets, + ) + ): + continue + self.record_mention( + potential_referred, + token, + token_indexes_without_coordination_to_mention_sets, + token_indexes_with_coordination_to_mention_sets, + ) + return True + return False + + if intern_temp_annotate_any_anaphoric_link(False): + return True + return intern_temp_annotate_any_anaphoric_link(True) + + def delete_from_collections_for_rewind( + self, + previous_token: Token, + token: Token, + token_indexes_without_coordination_to_mention_sets: dict, + token_indexes_with_coordination_to_mention_sets: dict, + ) -> None: + def intern_delete_from_collections_for_rewind( + dictionary: dict, working_token: Token + ): + if working_token.i in dictionary: + mention_set = dictionary[working_token.i] + working_mention = Mention(working_token, False) + if working_mention in mention_set: + mention_set.remove(working_mention) + del dictionary[working_token.i] + if len(mention_set) == 1: + remaining_mention = list(mention_set)[0] + if remaining_mention.root_index in dictionary: + # is not the case where *remaining_mention* involves coordination + del dictionary[remaining_mention.root_index] + + doc = token.doc + for working_token in doc[previous_token.i : token.i + 1]: + intern_delete_from_collections_for_rewind( + token_indexes_without_coordination_to_mention_sets, + working_token, + ) + intern_delete_from_collections_for_rewind( + token_indexes_with_coordination_to_mention_sets, working_token + ) + + def attempt_rewind_with_previous_token_and_retry_index( + self, + retry_index: int, + previous_token: Token, + token: Token, + sentence_deque: deque, + token_indexes_without_coordination_to_mention_sets: list, + token_indexes_with_coordination_to_mention_sets: list, + ) -> bool: + """Returns *True* if the rewind attempt succeeded.""" + doc = token.doc + if self.temp_annotate_any_anaphoric_link( + previous_token, + token_indexes_without_coordination_to_mention_sets, + token_indexes_with_coordination_to_mention_sets, + retry_index, + ): + for working_token in doc[previous_token.i + 1 : token.i + 1]: + self.temp_annotate_any_coreferring_noun_link( + working_token, + sentence_deque, + token_indexes_without_coordination_to_mention_sets, + token_indexes_with_coordination_to_mention_sets, + ) + if hasattr( + working_token._.coref_chains, "temp_potential_referreds" + ): + if not self.temp_annotate_any_anaphoric_link( + working_token, + token_indexes_without_coordination_to_mention_sets, + token_indexes_with_coordination_to_mention_sets, + ): + return False + return True + return False + + def attempt_retry( + self, + token: Token, + coreferring_deque: deque, + sentence_deque: deque, + token_indexes_without_coordination_to_mention_sets: list, + token_indexes_with_coordination_to_mention_sets: list, + ): + """Called when an anaphor could not be assigned to a chain; attempts alternative + interpretations of the preceding anaphors to see whether any allow all anaphors to be + assigned. Returns *True* if the rewind attempt succeeded.""" + previous_token = None + for retry_index in range( + 1, + min( + self.RETRY_DEPTH, + len(token._.coref_chains.temp_potential_referreds) + 1, + ), + ): + # we only need start with *previous_token* because any different interpretations of + # *token* have already been tried out unsuccessfully + for previous_token in ( + t + for t in coreferring_deque + if token._.coref_chains.temp_sent_index + - t._.coref_chains.temp_sent_index + <= self.rules_analyzer.maximum_anaphora_sentence_referential_distance + ): + self.delete_from_collections_for_rewind( + previous_token, + token, + token_indexes_without_coordination_to_mention_sets, + token_indexes_with_coordination_to_mention_sets, + ) + if self.attempt_rewind_with_previous_token_and_retry_index( + retry_index, + previous_token, + token, + sentence_deque, + token_indexes_without_coordination_to_mention_sets, + token_indexes_with_coordination_to_mention_sets, + ): + return True + if previous_token is not None: + # All attempts have failed, so return to the original interpretation + self.delete_from_collections_for_rewind( + previous_token, + token, + token_indexes_without_coordination_to_mention_sets, + token_indexes_with_coordination_to_mention_sets, + ) + self.attempt_rewind_with_previous_token_and_retry_index( + 0, + previous_token, + token, + sentence_deque, + token_indexes_without_coordination_to_mention_sets, + token_indexes_with_coordination_to_mention_sets, + ) + return False + + def get_most_specific_mention(self, mention_set: set, doc: Doc): + """Returns the most specific mention in the chain, where names > nouns > pronouns.""" + stored_mention = None + for mention in mention_set: + if len(mention.token_indexes) > 1: + return mention + if stored_mention is None: + stored_mention = mention + continue + stored_mention_root_token = doc[stored_mention.root_index] + this_mention_root_token = doc[mention.root_index] + if self.rules_analyzer.is_independent_noun( + this_mention_root_token + ) and not self.rules_analyzer.is_independent_noun( + stored_mention_root_token + ): + stored_mention = mention + if ( + this_mention_root_token.pos_ in self.rules_analyzer.propn_pos + and this_mention_root_token.ent_type_ != "" + and ( + stored_mention_root_token.pos_ + not in self.rules_analyzer.propn_pos + or this_mention_root_token.ent_type_ == "" + ) + ): + stored_mention = mention + return stored_mention + + def annotate(self, doc: Doc, used_in_training=False): + if not used_in_training: + self.rules_analyzer.initialize(doc) + self.tendencies_analyzer.score(doc, self.keras_ensemble) + token_indexes_without_coordination_to_mention_sets = {} + token_indexes_with_coordination_to_mention_sets = {} + sentence_deque = deque( + maxlen=self.rules_analyzer.maximum_coreferring_nouns_sentence_referential_distance + + 1 + ) + coreferring_deque = deque(maxlen=self.RETRY_DEPTH) + for sent in doc.sents: + sentence_deque.appendleft(sent) + for token in sent: + self.temp_annotate_any_coreferring_noun_link( + token, + sentence_deque, + token_indexes_without_coordination_to_mention_sets, + token_indexes_with_coordination_to_mention_sets, + ) + if hasattr(token._.coref_chains, "temp_potential_referreds"): + if self.temp_annotate_any_anaphoric_link( + token, + token_indexes_without_coordination_to_mention_sets, + token_indexes_with_coordination_to_mention_sets, + ) or self.attempt_retry( + token, + coreferring_deque, + sentence_deque, + token_indexes_without_coordination_to_mention_sets, + token_indexes_with_coordination_to_mention_sets, + ): + coreferring_deque.appendleft(token) + + visited_token_indexes = set() + chains = [] + for ( + token_index, + mention_set, + ) in token_indexes_without_coordination_to_mention_sets.items(): + if token_index in visited_token_indexes: + continue + mention_list = sorted( + list(mention_set), key=lambda mention: mention.root_index + ) + most_specific_mention = self.get_most_specific_mention( + mention_list, doc + ) + chain = Chain( + mention_list, mention_list.index(most_specific_mention) + ) + chains.append(chain) + for mention in chain.mentions: + if len(mention.token_indexes) == 1: + visited_token_indexes.add(mention.root_index) + + chains.sort(key=lambda chain: chain.mentions[0].root_index) + + for index, chain in enumerate(chains): + chain.index = index + for mention in chain.mentions: + for token in ( + doc[token_index] for token_index in mention.token_indexes + ): + token._.coref_chains.chains.append(chain) + + doc._.coref_chains.chains = chains + + if not used_in_training: + # get rid of the *temp_* properties on the various objects + for temp_entry in [ + t for t in doc._.coref_chains.__dict__ if t.startswith("temp_") + ][:]: + doc._.coref_chains.__dict__.pop(temp_entry) + for token in doc: + for temp_entry in [ + t + for t in token._.coref_chains.__dict__ + if t.startswith("temp_") + ][:]: + token._.coref_chains.__dict__.pop(temp_entry) + for chain in token._.coref_chains: + for mention in chain: + for inner_temp_entry in [ + t + for t in mention.__dict__ + if t.startswith("temp_") + ][:]: + mention.__dict__.pop(inner_temp_entry) + + return doc diff --git a/transformations/gender_randomizer/coreferee/coreferee/data_model.py b/transformations/gender_randomizer/coreferee/coreferee/data_model.py new file mode 100644 index 000000000..6ee19a62a --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/data_model.py @@ -0,0 +1,321 @@ +# Copyright 2021 msg systems ag + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from os import linesep + +import srsly +from spacy.tokens import Token + + +class ChainHolder: + """The object returned by *token._.coref_chains*.""" + + def __init__(self): + self.chains = [] + + # 'temp*' properties will be removed before processing ends + self.temp_governing_sibling = None + self.temp_has_or_coordination = False + + def __str__(self) -> str: + return str(self.chains) + + def __repr__(self) -> str: + return str(self) + + def print(self) -> None: + print( + linesep.join(chain.pretty_representation for chain in self.chains) + ) + + def __iter__(self) -> iter: + return iter(self.chains.copy()) + + def __len__(self) -> int: + return len(self.chains) + + def __getitem__(self, key): + return self.chains[key] + + @property + def pretty_representation(self): + return "; ".join(chain.pretty_representation for chain in self.chains) + + @staticmethod + def resolve(token: Token) -> list: + """If *token* is an anaphor, returns a list of tokens to which *token* points; + otherwise returns *None*. + """ + + def resolve_recursively(token: Token) -> list: + tokens_to_return = set() + for chain in token._.coref_chains.chains: + for mention in ( + mention + for mention in chain.mentions + if len(mention.token_indexes) > 1 + and token.i not in mention.token_indexes + ): + # Mention contains multiple tokens, some of which may be anaphors and + # belong to further chains. + for contained_token in ( + token.doc[index] + for index in mention.token_indexes + if index != token.i + ): + tokens_to_return.update( + resolve_recursively(contained_token) + ) + return tokens_to_return + for chain in token._.coref_chains.chains: + if ( + len( + [ + mention + for mention in chain.mentions + if len(mention.token_indexes) > 1 + and token.i in mention.token_indexes + ] + ) + > 0 + ): + # This token is pointing back to a multiple-token mention which should + # already have been dealt with further up the recursion stack + continue + return { + token.doc[ + chain.mentions[ + chain.most_specific_mention_index + ].root_index + ] + } + return {token} + + resolved_set = resolve_recursively(token) + if len(resolved_set) == 1 and token in resolved_set: + return None + return sorted(list(resolved_set)) + + @srsly.msgpack_encoders("coreferee_chain_holder") + def serialize_obj(obj, chain=None): + if isinstance(obj, ChainHolder): + serialized_chain_holder = [] + for working_chain in obj.chains: + serialized_chain_holder.append( + ( + [ + ( + mention.token_indexes, + mention.pretty_representation, + ) + for mention in working_chain.mentions + ], + working_chain.most_specific_mention_index, + ) + ) + return {"__coreferee_chain_holder__": serialized_chain_holder} + return obj if chain is None else chain(obj) + + @srsly.msgpack_decoders("coreferee_chain_holder") + def deserialize_obj(obj, chain=None): + if "__coreferee_chain_holder__" in obj: + chain_holder = ChainHolder() + chain_holder.chains = [] + for index, ( + chain_representation, + most_specific_mention_index, + ) in enumerate(obj["__coreferee_chain_holder__"]): + mentions = [] + for ( + token_indexes, + pretty_representation, + ) in chain_representation: + mention = Mention() + mention.token_indexes = token_indexes + mention.pretty_representation = pretty_representation + mention.root_index = token_indexes[0] + mentions.append(mention) + working_chain = Chain(mentions, most_specific_mention_index) + working_chain.index = index + chain_holder.chains.append(working_chain) + return chain_holder + return obj if chain is None else chain(obj) + + +class Chain: + def __init__(self, mentions, most_specific_mention_index): + self.mentions = mentions + self.most_specific_mention_index = most_specific_mention_index + + def __str__(self): + return ": ".join( + ( + str(self.index), + ", ".join(str(mention) for mention in self.mentions), + ) + ) + + def __repr__(self): + return str(self) + + def __iter__(self): + return iter(self.mentions.copy()) + + def __len__(self) -> int: + return len(self.mentions) + + def __getitem__(self, key): + return self.mentions[key] + + @property + def pretty_representation(self): + return ": ".join( + ( + str(self.index), + ", ".join( + mention.pretty_representation for mention in self.mentions + ), + ) + ) + + +class Mention: + def __init__( + self, root: Token = None, include_dependent_siblings: bool = False + ): + if ( + root is not None + ): # root==None during deserialization, never otherwise + doc = root.doc + self.root_index = root.i + self.token_indexes = [root.i] + if include_dependent_siblings: + self.token_indexes.extend( + [t.i for t in root._.coref_chains.temp_dependent_siblings] + ) + if len(self.token_indexes) > 1: + self.pretty_representation = "".join( + ( + "[", + "; ".join( + "".join( + ( + doc[token_index].text, + "(", + str(token_index), + ")", + ) + ) + for token_index in self.token_indexes + ), + "]", + ) + ) + else: + self.pretty_representation = "".join( + (doc[self.root_index].text, "(", str(self.root_index), ")") + ) + + def __eq__(self, other): + return ( + isinstance(other, Mention) + and self.token_indexes == other.token_indexes + ) + + def __hash__(self): + return hash(tuple(self.token_indexes)) + + def __str__(self): + return str(self.token_indexes) + + def __repr__(self): + return str(self.token_indexes) + + def __len__(self) -> int: + return len(self.token_indexes) + + def __getitem__(self, key) -> int: + return self.token_indexes[key] + + +class FeatureTable: + """Captures the possible values of the various Spacy annotations that are observed + to occur during a training corpus. These are then used as the basis for a oneshot + representation of individual tokens. + """ + + def __init__( + self, + *, + tags: list, + morphs: list, + ent_types: list, + lefthand_deps_to_children: list, + righthand_deps_to_children: list, + lefthand_deps_to_parents: list, + righthand_deps_to_parents: list, + parent_tags: list, + parent_morphs: list, + parent_lefthand_deps_to_children: list, + parent_righthand_deps_to_children: list + ): + + # In the notes that follow, 'referred token' means the token at the head of a + # referred-to mention. + + # Tags a referring or referred token can have + self.tags = tags + + # Morphological features a referring or referred token can have + self.morphs = morphs + + # Entity types a referring or referred token can have + self.ent_types = ent_types + + # Dependencies where a referring or referred token is the head and where the child + # is to its left + self.lefthand_deps_to_children = lefthand_deps_to_children + + # Dependencies where a referring or referred token is the head and where the child + # is to its right + self.righthand_deps_to_children = righthand_deps_to_children + + # Dependencies where a referring or referred token is the child and where it is to the + # left of the parent + self.lefthand_deps_to_parents = lefthand_deps_to_parents + + # Dependencies where a referring or referred token is the child and where it is to the + # right of the parent + self.righthand_deps_to_parents = righthand_deps_to_parents + + # Tags the parent of a referring or referred token can have + self.parent_tags = parent_tags + + # Morphological features the parent of a referring or referred token can have + self.parent_morphs = parent_morphs + + # Dependencies where the parent of a referring or referred token is the head and where the + # child is to its left + self.parent_lefthand_deps_to_children = ( + parent_lefthand_deps_to_children + ) + + # Dependencies where the parent of a referring or referred token is the head and where the + # child is to its right + self.parent_righthand_deps_to_children = ( + parent_righthand_deps_to_children + ) + + def __len__(self) -> int: + return sum(len(getattr(self, property)) for property in self.__dict__) diff --git a/transformations/gender_randomizer/coreferee/coreferee/errors.py b/transformations/gender_randomizer/coreferee/coreferee/errors.py new file mode 100644 index 000000000..3c19f3aa4 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/errors.py @@ -0,0 +1,42 @@ +# Copyright 2021 msg systems ag + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class CorefereeError(Exception): + def __init__(self, text: str = ""): + super().__init__() + self.text = text + + def __str__(self) -> str: + return self.text + + +class LanguageNotSupportedError(CorefereeError): + pass + + +class ModelNotSupportedError(CorefereeError): + pass + + +class VectorsModelNotInstalledError(CorefereeError): + pass + + +class VectorsModelHasWrongVersionError(CorefereeError): + pass + + +class MultiprocessingParsingNotSupportedError(CorefereeError): + pass diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/__init__.py b/transformations/gender_randomizer/coreferee/coreferee/lang/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/common/__init__.py b/transformations/gender_randomizer/coreferee/coreferee/lang/common/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/common/data/LICENSE b/transformations/gender_randomizer/coreferee/coreferee/lang/common/data/LICENSE new file mode 100644 index 000000000..4d33a4303 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/common/data/LICENSE @@ -0,0 +1,7 @@ +The files *names.dat in this directory contain personal names derived from the following sources: + +1) U.S. Social Security Administration (https://www.ssa.gov/oact/babynames/background.html) curated under https://github.com/aruljohn/popular-baby-names + +2) Offene Daten Köln (https://offenedaten-koeln.de/dataset/vornamen) released under the CC BY 3.0 DE 'Namensnennung 3.0 Deutschland' license (https://creativecommons.org/licenses/by/3.0/de/legalcode) + +3) Polish government data 'Lista imion występujących w rejestrze PESEL z uwzględnieniem imion osób zmarłych' (https://dane.gov.pl/en/dataset/1501,lista-imion-wystepujacych-w-rejestrze-pesel) published with no restrictions on reuse diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/common/data/__init__.py b/transformations/gender_randomizer/coreferee/coreferee/lang/common/data/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/common/data/female_names.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/common/data/female_names.dat new file mode 100644 index 000000000..c66624730 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/common/data/female_names.dat @@ -0,0 +1,4341 @@ +Aadhya +Aaliya-aliza +Aaliyah +Aanya +Aava +Aayat +Abagail +Abbey +Abbie +Abbigail +Abby +Abdoulie +Abeeha +Abeer +Abera-lara +Abigail +Abigale +Abigayle +Abrielle +Abril +Achieng +Ada +Adaeze +Adalee +Adaline +Adalyn +Adalynn +Adamaris +Adamina +Adanna +Addie +Addilyn +Addilynn +Addison +Addisyn +Addyson +Adela +Adelaide +Adelajda +Adele +Adelheid +Adelin +Adelina +Adeline +Adelyn +Adelynn +Adepa +Aderike +Adi +Adilynn +Adina +Adison +Adjera +Adley +Adolfa +Adolfina +Adrian +Adriana +Adrianna +Adrienne +Adyson +Afia +Afonso +Afra +Afrah +Afriyie +Afroditi +Afrodyta +Agata +Agatha +Agathe +Agedea +Aglaia +Agnes +Agnieszka +Agrippina +Agrypina +Ahmad +Ahsen +Ai +Aida +Aika +Aila +Ailani +Aileen +Ailin +Aimee +Aimie +Aimée +Ainhoa +Ainoa +Ainsley +Aisha +Aishani +Aislinn +Aissata +Aitana +Aiyana +Aiyanna +Aja +Ajana +Ajla +Ajona +Ajsela +Akara +Akari +Akeelah +Akhmedovna +Akina +Akira +Aksana +Akua +Akumu +Ala +Alaa +Alaia +Alaina +Alaja +Alana +Alani +Alanna +Alannah +Alara +Alaya +Alayah +Alayna +Alaysia +Alba +Alberta +Albertyna +Albina +Aldona +Alea +Aleah +Aleandra +Aleena +Alegria +Aleigha +Alejandra +Alejna +Aleksa +Aleksandra +Aleksandrovna +Aleksia +Alen +Alena +Alenia +Alesandra +Alesia +Alessandra +Alessia +Alessia-stefania +Alesya-dailin +Aletheia +Alev +Alex +Alexa +Alexandra +Alexandrea +Alexandria +Alexia +Alexis +Alexsandra +Alexus +Alexys +Aleya +Aleyna +Aleyna-emilia +Alfreda +Ali +Alia +Aliaksandra +Aliana +Alianna +Alica +Alice +Alicia +Alicja +Alida +Aliea +Alija +Alimatou +Alin +Alina +Aline +Aliona +Alisa +Alisha +Alisha-elisabeth +Alisia +Alison +Alissa +Alisson +Alitta +Alivia +Alix +Aliya +Aliyah +Aliye +Aliza +Alize +Alja +Alla +Allen +Allie +Allison +Allisson +Alliya +Ally +Allyson +Allyssa +Alma +Almas +Almedina +Almendra +Almila +Almina +Almira +Alodia +Aloisia +Alojza +Alojzja +Alona +Alondra +Alora +Alva +Alya +Alycia +Alysa +Alysha +Alysia +Alyson +Alyssa +Alyvia +Alâ +Amaia +Amaira +Amal +Amali +Amalia +Amanawit +Amanda +Amanfo +Amani +Amara +Amarachi +Amargi +Amari +Amaris +Amaya +Amayah +Amba +Amber +Amea +Amedeea +Amelia +Amelie +Amely +Ameni +America +Amia +Amiah +Amila +Amilia +Amina +Aminah +Aminata +Amine +Amira +Amirah +Amiya +Amiyah +Amora +Amosi +Amrei +Amy +Amya +Amàlia +Amália +Amélie +Ana +Ana-valentina +Anabel +Anabela +Anabella +Anabelle +Anaeli +Anahi +Anahí +Anais +Analia +Ananda +Ananya +Anas +Anastasia +Anastasia-elena +Anastasiia +Anastasija +Anastasiya +Anastazja +Anat +Anatola +Anatolia +Anaya +Anaïs +Andi +Andjela +Andora +Andra +Andrea +Andriana +Andrijana +Andrika +Andrzelika +Anduta +Andżelika +Andżelina +Anelia +Anelija +Anelise +Aneta +Anetta +Anette +Angel +Angela +Angeles +Angelica +Angelika +Angeliki +Angelina +Angeline +Angelique +Anges +Angie +Anh +Anhelina +Ania +Aniceta +Anida +Aniela +Anika +Anila +Animwaa +Anisa +Anisha +Anissa +Anita +Anitha +Aniya +Aniyah +Anja +Anjali +Ann +Anna +Anna-maria +Anna-sophie +Annabel +Annabell +Annabella +Annabelle +Annais +Annalee +Annalena +Annalina +Annalisa +Annalise +Annamaria +Annamarie +Anne +Annegret +Anneli +Annelie +Annelies +Anneliese +Annemarie +Annette +Anni +Annie +Annika +Anniston +Anouk +Anoush +Ansley +Anta +Antigona +Antoinette +Antonella +Antonia +Antonie +Antonietta +Antonina +Anuk +Anuuk +Anvi +Anya +Anyango +Anzelma +Anzhela +Anzhelika +Apollina +Apollonia +Apolonia +April +Arabella +Araceli +Aracely +Araliya +Aranza +Arden +Areli +Arely +Arha +Ari +Aria +Aria-atlana +Ariadna +Ariadne +Ariah +Ariana +Arianna +Ariel +Ariella +Arielle +Arife +Arilea +Arin +Arina +Arisa +Arisara +Ariya +Ariyah +Arleen +Arlena +Arlene +Arleta +Arleth +Arletta +Arlette +Arly +Armani +Armaya +Armita +Arona +Arpine +Arsema +Arslanovna +Artemis +Arwen +Arya +Aryana +Aryanna +Aryna +Arîn +Asanova +Asel +Aselya-sara +Asemina +Asena +Asenova +Asha +Ashanti +Ashlee +Ashleigh +Ashley +Ashly +Ashlyn +Ashlynn +Ashton +Ashtyn +Asia +Asinat +Asiya +Asiye +Asja +Asli +Aslı +Asmaa +Asmaou +Asmin +Aspen +Asra +Assia +Assibe +Assil +Asta +Astrid +Asya +Asye +Atanasova +Athena +Atinoukê +Aubree +Aubrey +Aubri +Aubriana +Aubrianna +Aubrie +Aubriella +Aubrielle +Audra +Audrey +Audriana +Audrianna +Audrina +August +Augusta +Auguste +Augustyna +Aura +Aurela +Aurelia +Aurelie +Aurelie-angel +Aurelija +Auri +Aurora +Aurélie +Austyn +Autumn +Ava +Avah +Avalie +Avalyn +Avalynn +Aveline +Averi +Averie +Avery +Avesta +Aviana +Avianna +Avienna +Avissa +Avzem +Awa +Awan +Awin +Aya +Ayah +Ayana +Ayanah +Ayanna +Aycan +Ayda +Aydan +Ayela +Ayla +Ayla-ezelda +Ayla-marie +Ayleen +Aylen +Aylin +Ayna +Aysa +Aysegül +Aysel +Aysha +Aysima +Aysin +Ayva +Ayza +Ayşe +Azalea +Azania +Azaria +Azariah +Azra +Azranur +Azul +Azura +Açelya +Ałła +Baaba +Baby +Bachata +Bahri +Baibun +Bailee +Bailey +Balahatun +Balbina +Bali +Bao +Baran +Barbara +Bargret +Barin +Basmala +Batu +Bayan +Baylee +Bayleigh +Bea +Beata +Beatrice +Beatriz +Bee +Begüm +Belen +Belgin +Belinay +Belinda +Beliz +Bella +Bellamy +Belle +Belén +Benedicta +Benedykta +Benigna +Benita +Bennet +Bensu +Bentje +Bentley +Beraet +Berenice +Berenika +Berin +Berivan +Berkley +Bernadeta +Bernadetta +Bernadette +Bernadyna +Bernarda +Bernardeta +Bernardyna +Bernhardina +Berry +Berta +Bertha +Berçem +Berîya +Besuah +Bethany +Bethel +Bethzy +Betina +Betsy +Bettina +Betttina +Betül +Beverly +Bexley +Beyonce +Beyza +Beyzanur +Beyzat +Bezawit +Bianca +Bianka +Bibianna +Biene +Bihter +Bijelle +Bilge +Billie +Bircan +Birdie +Birgit +Birka +Birte +Biruta +Bisan +Biserka +Bitanya +Bithania +Blair +Blaire +Blake +Blakely +Blanca +Blanche +Blandyna +Blanka +Blessing +Bo +Bogda +Bogdana +Bogna +Bogumiła +Bogusława +Bohdana +Bolesława +Bonnie +Bora +Borislavova +Bożena +Bożenna +Braelyn +Braelynn +Brandi +Brandy +Braylee +Breana +Breanna +Breanne +Bree +Brenda +Brenna +Breonna +Bria +Briana +Brianna +Brianne +Briar +Bridget +Bridgette +Briella +Brielle +Brigitte +Briley +Brinley +Brionna +Brisa +Bristol +Britney +Brittany +Brittney +Bronisława +Brook +Brooke +Brooklyn +Brooklynn +Brunhilda +Bryana +Bryanna +Brygida +Brylee +Bryleigh +Bryn +Brynlee +Brynleigh +Brynn +Buket +Buğlem +Bélle +Büşra +Bảo +Cadence +Cailyn +Caitlin +Caitlyn +Caitlynn +Calea +Caleigh +Cali +Calista +Calleigh +Callie +Calliope +Calypso +Cambria +Cameron +Camila +Camilla +Camille +Campbell +Camryn +Cana +Candace +Candice +Cansu +Cara +Carina +Carissa +Carla +Carlee +Carleigh +Carley +Carli +Carlie +Carlina +Carlota +Carlotta +Carly +Carmela +Carmen +Carol +Carola +Carolin +Carolina +Caroline +Carolyn +Carrie +Carson +Carter +Casandra +Casey +Cassandra +Cassidy +Cassie +Cassiopeia +Cataleya +Catalina +Caterina +Catharina +Catherine +Cathy +Caydence +Cayla +Caylee +Cecelia +Cecilia +Cecilie +Cecily +Cecylia +Celeste +Celestyna +Celia +Celin +Celina +Celine +Cennet +Cerasela +Ceren +Ceyda +Ceyda-gülten +Ceylan +Ceylin +Cezaria +Chaltu +Chana +Chandler +Chanel +Chang-mei +Chantal +Chantana +Chantel +Chariklia +Charity +Charlee +Charleigh +Charley +Charli +Charlie +Charlize +Charlotta +Charlotte +Charlotte-mathilda +Chasity +Chaya +Chelsea +Chelsey +Cherish +Cherry +Cheyanne +Cheyenne +Chi +Chiara +Chiara-lia +Chidima +Chidinma +Chinyere +Chioma +Chisom +Chleo +Chloe +Chloé +Christa +Christel +Christian +Christiana +Christin +Christina +Christine +Christy +Chun +Chuqin +Chyna +Ciara +Cielo +Ciera +Cierra +Cina +Cinderella +Cindy +Citlali +Citlalli +Claire +Clara +Clara-marie +Clare +Clarissa +Claudia +Clea +Clea-malie +Clementine +Clementyn +Cleo +Cloe +Cléa +Coco +Colette +Colleen +Collins +Conni +Constance +Constanze +Conway +Cora +Coraline +Cordelia +Corinne +Cornelia +Corona +Corry +Cortney +Cosima +Courtney +Cristal +Cristina +Crocetta +Crystal +Cydney +Cynthia +Cyra +Cyrine +Cyryla +Czesława +Cäcilia +Cäcilie +Céleste +Céline +Da-eun +Dadze +Dafne +Dagmara +Dagna +Dahab +Dahlia +Daija +Daisha +Daisy +Dajana +Dajla +Dakota +Dalaa +Dalary +Daleyza +Dalia +Dalia-jasmin +Dalijah-yael +Dalilah +Dalin +Daliya +Dallas +Damaris +Damiana +Damla +Dana +Dani +Dania +Danica +Daniela +Daniella +Danielle +Danika +Danja +Danna +Danuta +Danyah +Daphne +Darby +Daria +Darian +Dariana +Dariia +Darin +Darlene +Darlis +Darya +Daryna +Dasia +Davina +Dawa +Dawn +Dayami +Dayana +Dayanara +Dayna +Deanna +Deasia +Debora +Deborah +Deena +Deetje +Deewa +Defne +Deidre +Deja +Dejah +Dela +Delaney +Delfina +Delhin +Delia +Deliha +Delilah +Delina +Della +Delsos +Delvida +Delvin +Delwin +Demi +Denis +Denisa +Denise +Denislav +Denisse +Deniz +Derin +Derin-lina +Desdina +Desirae +Desiree +Desislava +Destany +Deste +Destinee +Destiney +Destini +Destiny +Devi +Devin +Devon +Devyn +Deyanova +Diamond +Diana +Diane +Dianna +Diara +Diegui +Dila +Dilara +Dilay +Dilyana +Dimitra +Dimitrichkeva +Dimitrova +Dina +Diona +Dioni +Dioniza +Dior +Diren +Divine +Dixie +Diya +Diệu +Djaliyah +Djenabou +Djiaty +Doa +Dobrawa +Dobrochna +Dobromiła +Dobrosława +Docia +Dolores +Domenica +Domicela +Dominika +Dominique +Donata +Donatella +Donja +Donna +Dora +Doris +Dorota +Dorotea +Dorothea +Dorothy +Dorra +Dream +Drew +Dua +Duaa +Dulana +Dulce +Dunya +Duru +Duruve +Dylan +Dyoniza +Dyuthi +Dziyana +Désirée +Dąbrówka +Dżesika +Eara +Ebba +Ebony +Ebrar +Ecaterina +Ece +Ecem +Ecrin +Eda +Edda +Edea +Edeltraud +Edeltrauda +Edeltraut +Eden +Edessa +Edi +Edita +Edith +Edmunda +Edna +Edwarda +Edwina +Edyta +Effi +Effie +Efna +Efraim +Efsun +Efua +Ege +Egypt +Eila +Eileen +Ekaterina +Ela +Elaida +Elaina +Elaine +Elana +Elanor +Elanur +Elara +Elasu +Eldana +Elea +Eleanor +Electra +Elein +Elen +Elena +Eleni +Elenor +Elenora +Eleonor +Eleonora +Eleonore +Eleya +Eleyna +Elfie +Elfryda +Eliana +Eliane +Elianna +Elide +Elies +Elif +Elifnur +Elifsu +Eligia +Elikya-marie +Eliliana +Elin +Elina +Eliora +Elis +Elisa +Elisabeta +Elisabeth +Elisabetha +Elise +Elissa +Eliyana +Eliz +Eliza +Elizabeth +Elizan +Elizaveta +Elizya +Elke +Ella +Elle +Ellen +Elli +Elliana +Ellianna +Ellie +Elliot +Elliott +Ellis +Ellison +Elly +Elma +Elmedina +Elmina +Elmira +Elodie +Elody +Eloise +Elora +Elordia +Elorie +Elsa +Else +Else-linde +Elsie +Elvana +Elvira +Elwira +Elya +Elyse +Elyssa +Elza +Eléanore +Elżbieta +Ema +Emaan +Emanuela +Ember +Emberly +Emeli +Emelia +Emelie +Emely +Emerald +Emeraude +Emerie +Emerson +Emersyn +Emery +Emila +Emilee +Emilia +Emilia-leora +Emiliana +Emilie +Emiliia +Emilija +Emiliya +Emily +Emilya +Emin +Emly +Emma +Emma-sophie +Emmah +Emmalee +Emmaline +Emmalyn +Emmalynn +Emmanuelle +Emmarie +Emmelina +Emmeline +Emmi +Emmi-claire +Emmie +Emmilou +Emmy +Emory +Emotion +Emra +Enea +Engin +Enguun +Eni +Enie +Enisa +Enise +Enissa +Enna +Enni +Ennie +Enny +Ensley +Enya +Era +Erdita +Erica +Ericka +Erika +Erin +Erina +Erna +Ernestyna +Errong +Erva +Erwina +Eryka +Eryn +Esamoela +Ese +Eshaal +Esila +Esin +Eske +Eslem +Eslim +Esma +Esmail +Esmanur +Esmay +Esme +Esmeralda +Esmée +Esperanza +Esra +Essayas +Essence +Estefani +Estefania +Estefany +Estella +Estelle +Ester +Estera +Esther +Estrella +Etta +Eudokia +Eufemia +Eufrozyna +Eugenia +Eugenie +Eulalia +Eunika +Euphemia +Europa +Euzebia +Eva +Evalyn +Evangelia +Evangeline +Eve +Evelin +Evelina +Evelyn +Evelyne +Evelynn +Everlee +Everleigh +Everly +Evgenia +Evi +Evie +Evin +Evlyn +Evîn +Ewa +Ewelina +Eylül +Ezinne +Ezra +Faa +Fabia +Fabienne +Fabiola +Fadila +Faida +Faith +Faiza +Falak +Fanny +Farha +Farhan +Farina +Farrah +Fatima +Fatime +Fatma +Fatma-nisa +Fatou +Fatoumata +Faustyna +Favour +Faye +Fayola +Fayza +Fazilet +Faé +Fearne +Febronia +Fechi +Federica +Fee +Feenja +Feli +Felia +Felice +Felicia +Felicitas +Felicity +Felicja +Felicyta +Feliksa +Felina +Feline +Femi +Femke +Fendi +Fenja +Fenna +Ferdynanda +Ferida +Feriha +Fern +Fernanda +Fernella +Feyza +Fil +Filicity +Filipa +Filipina +Filippa +Filiz +Filomena +Fina +Finchen +Fine +Finja +Finja-sophie +Finley +Finya +Fiona +Firdaous +Firdevs +Flavie +Fleur +Flor +Flora +Florence +Florentine +Florentyna +Florianna +Folashade +Fortuna +Fouda +Frances +Francesca +Franceska +Franciszka +Franka +Frankie +Franzis +Franziska +Frederike +Fredrikke +Freja +Freshta +Freya +Freyja +Fria +Frida +Frieda +Friederike +Fritzi +Fryda +Fryderyka +Fynja +Gabriel +Gabriela +Gabriele +Gabriella +Gabriella-aurelia +Gabrielle +Gabryela +Gada +Gaia +Gaja +Galilea +Galina +Galyna +Ganem +Ganna +Gaowa +Gauhar +Gelila +Gemma +Genesa +Genesis +Genevieve +Genowefa +Georgia +Georgina +Geraldine +Gerda +Gertrud +Gertruda +Gia +Giada +Giana +Gianna +Giavanna +Gillian +Gina +Ginevra +Gioia +Giorgia +Giovanna +Gisela +Giselle +Gisselle +Giulia +Giuliana +Giuliano +Giulietta +Giusi +Gizela +Glenda +Gloria +Glory +Gordana +Gospava +Grace +Gracelyn +Gracelynn +Gracia +Gracie +Graciela +Gracja +Gracjana +Graziana +Grazyna +Grażyna +Greta +Greta-sophie +Gretchen +Grete +Gréta +Guadalupe +Guendalina +Gufronovna +Guiliana +Gulshen +Gurman +Gustawa +Gwen +Gwendolin +Gwendolyn +Gwyneth +Gyunay +Gyurdzanova +Gönül +Gözde +Gül +Gülay +Güneş +Hacer +Hadas +Hadassah +Hadeel +Hadlee +Hadleigh +Hadley +Hae-in +Hafeza +Haffsa-arij +Hafize +Hafsa +Hailee +Hailey +Hailie +Haisley +Halbast +Haleigh +Haley +Halie +Halima +Halime +Halina +Halisa +Halle +Hallie +Halszka +Halyna +Hamza +Han +Hana +Handa +Hania +Hanife +Haniya +Hanka +Hanna +Hannah +Hanne +Hannelore +Hanni +Haomiao +Hariklia +Harlee +Harleigh +Harley +Harlow +Harmoni +Harmony +Harnimat +Harper +Hasan +Hasibe +Hasret +Hatice +Hattie +Haven +Havin +Hawa +Haya +Hayat +Hayden +Hayes +Haylee +Hayleigh +Hayley +Haylie +Hazel +Hazra +Heather +Heaven +Heavenly +Hedda +Hedi +Hedwig +Heide +Heidemarie +Heidi +Heidrun +Heidy +Helen +Helena +Helene +Helga +Helia +Helin +Hella +Hema +Hena +Henley +Henni +Henrieta +Henrietta +Henriette +Henrike +Henryka +Hera +Hermela +Hermenegilda +Hermina +Hermine +Heronima +Herta +Hevi +Hevin +Hiacynta +Hiba +Hidajeta +Hieronima +Hifa +Hila +Hilal +Hilaria +Hilda +Hilde +Hildegard +Hildegarda +Hillary +Hilma +Hina +Hira +Hiranur +Hiyabel +Hiyori +Hoda +Holland +Holly +Honorata +Hooriya +Hope +Hoyam +Hoàng +Hristova +Huda +Humayra +Humeyra +Hunter +Husna +Hussein +Hêja +Hêvîya +Hüma +Hổng +Iana +Ida +Idalia +Ievgeniia +Ifunanya +Iga +Ilaria +Ilayda +Ilda +Ildiko +Ilef +Ilenia +Iliana +Ilinca +Ilira +Ilisha +Iljana +Ilma +Ilona +Ilsa +Ilsu +Ilvie +Ilwa +Ilza +Iman +Imane +Imani +Imilia +Ina +Inaaya +Inaya +Inci +Inda +India +Ines +Inesa +Inessa +Inez +Ineza +Inga +Inge +Ingeborg +Ingeborga +Ingrid +Ingrida +Ingryda +Inha +Inka +Inna +Insa +Inés +Ioana +Ioanna +Iolanda +Ione +Iraida +Ireland +Irena +Irene +Irina +Iris +Irma +Irmgard +Irmgarda +Irmina +Iryna +Isa +Isabel +Isabela +Isabell +Isabella +Isabella-sophie +Isabelle +Isalie +Isi +Iside +Isilay +Isis +Isla +Ismena +Ismini +Issa +Issra +Issraa +Itohan +Itzayana +Itzel +Iulia +Iuliia +Iva +Ivana +Ivanka +Ivanna +Ivette +Ivory +Ivy +Iweta +Iwetta +Iwona +Iwonka +Iwonna +Iyana +Iyanna +Iza +Izabel +Izabela +Izabella +Izabelle +Izolda +Izydora +Jaane +Jacey +Jackeline +Jacklyn +Jaclyn +Jacqueline +Jacquelyn +Jada +Jade +Jaden +Jadwiga +Jadyn +Jaeda +Jaelle +Jaelyn +Jaelynn +Jagienka +Jagna +Jagoda +Jahya +Jaida +Jaiden +Jaidyn +Jailyn +Jaime +Jakayla +Jaklina +Jale +Jalea +Jalina +Jaliyah +Jalyn +Jalynn +Jamie +Jamila +Jamilah +Jamiya +Jamya +Jana +Janae +Jane +Janelle +Janessa +Janet +Janiah +Janice +Janie +Janina +Janiya +Janiyah +Janna +Jannat +Janne +Jannyn +Janthis +Jaquelin +Jaqueline +Jara +Jarosława +Jasemí +Jaslene +Jaslyn +Jasmin +Jasmina +Jasmine +Jasmyn +Jaycee +Jayda +Jayda-eylem +Jayde +Jayden +Jayla +Jaylah +Jaylee +Jayleen +Jaylen +Jaylene +Jaylin +Jaylyn +Jaylynn +Jaziba +Jazlene +Jazlyn +Jazlynn +Jazmin +Jazmine +Jazmyn +Jazmyne +Jazzlyn +Jaëlle +Jaśmina +Jeanette +Jedida +Jele +Jelena +Jelisaveta +Jella +Jemma +Jena +Jenesis +Jenifer +Jenna +Jennifer +Jenny +Jesica +Jesika +Jessa +Jesse +Jessica +Jessie +Jessika +Jette +Jewel +Jiang +Jil +Jill +Jillian +Jimena +Jinda +Jiyan +Joa +Joan +Joana +Joanna +Joanne +Jocelyn +Jocelynn +Joda +Joelina +Joelle +Johana +Johanna +Johanne +Joia +Jola +Jolanta +Joleen +Jolene +Jolenta +Jolette +Jolie +Jolin +Jolina +Jolina-marie +Joline +Joni +Jonna +Jordan +Jordin +Jordyn +Jordynn +Jorja +Josann +Josefin +Josefina +Josefine +Josefrine +Joselin +Joseline +Joselyn +Josepha +Josephin +Josephine +Josha +Josie +Joslyn +Joulie +Jouline +Journee +Journey +Journi +Jowita +Joy +Joyce +Juana +Juanita +Judith +Judy +Judyta +Jule +Juli +Julia +Juliana +Juliane +Julianna +Julianne +Julie +Julie-margó +Julienne +Juliet +Julieta +Julietta +Juliette +Julika +Julina +Julissa +Julita +Julitta +Juna +June +Juni +Junia +Juniper +Juno +Juriana +Jurnee +Justice +Justina +Justine +Justyna +Juta +Jutta +Jyoti +Józefa +Józefina +Kacey +Kaci +Kacie +Kadence +Kaela +Kaelyn +Kaelynn +Kai +Kaia +Kaidence +Kaila +Kailani +Kailee +Kailey +Kailyn +Kailynn +Kaira +Kairi +Kaitlin +Kaitlyn +Kaitlynn +Kaiya +Kaja +Kalani +Kalea +Kaleigh +Kaley +Kali +Kalina +Kalinka +Kaliyah +Kallie +Kalotta +Kalyn +Kamari +Kamelia +Kameron +Kamila +Kamilah +Kamilia +Kamilla +Kamille +Kamiyah +Kamora +Kamryn +Kamya +Kani +Kara +Karen +Kari +Karim +Karin +Karina +Karine +Karis +Karissa +Karla +Karla-sophie +Karlee +Karley +Karli +Karlie +Karline +Karlotta +Karly +Karma +Karmen +Karol +Karola +Karolina +Karoline +Karsyn +Karter +Karyme +Karyna +Kasandra +Kasey +Kasjana +Kassandra +Kassidy +Kataleya +Katalina +Katarina +Katarzyna +Kate +Katelin +Katelyn +Katelynn +Katerina +Kateryna +Kathaleya +Katharina +Katharine +Katherine +Kathleen +Kathrin +Kathryn +Kathy +Kati +Katia +Katie +Katina +Katinka +Katja +Katlyn +Katlynn +Katrin +Katrina +Katsiaryna +Katy +Kaur +Kaya +Kayden +Kaydence +Kayla +Kaylah +Kaylan +Kaylani +Kaylee +Kayleen +Kayleigh +Kaylen +Kayley +Kayli +Kaylie +Kaylin +Kaylyn +Kaylynn +Kazimiera +Kea +Keara +Keeley +Keely +Kehinde +Kehlani +Keila +Keilani +Keily +Keira +Keke +Kekeli +Kelis +Kelli +Kellie +Kelly +Kelsey +Kelsi +Kelsie +Kenaya +Kendal +Kendall +Kendra +Kendyl +Kenia +Kenley +Kenna +Kennedi +Kennedy +Kensington +Kensley +Kenya +Kenza +Kenzie +Kesja +Kethi +Keyla +Kezia +Khadija +Khairi +Khaleesi +Khallat +Khava +Khloe +Khrystyna +Kiana +Kianna +Kiara +Kiarra +Kiera +Kierra +Kiersten +Kijana +Kiki +Kiley +Kiliane +Kim +Kimber +Kimberly +Kimeta +Kimi +Kimora +Kinenla +Kinga +Kinley +Kinsey +Kinslee +Kinsley +Kira +Kirah +Kiriaki +Kirsten +Kirstin +Kiya +Kiyana +Kiyara +Klara +Klaudia +Klaudyna +Klementyna +Kleo +Kloe +Klotylda +Koleta +Konrada +Konstancja +Konstantinova +Konstantyna +Kook-ja +Kora +Kordula +Kori +Kornela +Kornelia +Koryna +Kosma +Kourtney +Krasimirova +Krista +Kristen +Kristijana +Kristin +Kristina +Kristine +Kristy +Kryspina +Krystal +Krystiana +Krystsina +Krystyna +Krzysztofa +Ksawera +Ksenia +Kseniia +Kseniya +Ksymena +Kumru +Kunegunda +Kwiryna +Kya +Kyla +Kylah +Kylee +Kyleigh +Kylie +Kyndal +Kyndall +Kynlee +Kyra +Käthe +Käthemarie +Kékéli +Kübra +Kısmet +Laarnie +Labiba +Lacey +Laci +Lacie +Lacy +Ladisha +Laelia +Laetitia +Lahja +Laila +Lailah +Lainey +Laisa +Laisha +Lale +Lamar +Lamina +Lana +Landry +Lanea +Laney +Lani +Laniyah +Lara +Lara-milena +Laraib +Larin +Larina +Larisa +Larissa +Larysa +Latifa +Laura +Laurel +Lauren +Laurena +Laurencja +Lauryn +Lava +Laven +Lavienne +Lavya +Lawin +Laya +Layan +Layla +Laylah +Lea +Leah +Leahna +Leana +Leanda +Leander +Leandra +Leann +Leanna +Lee +Leeloo +Leen +Leeya +Legacy +Leia +Leigha +Leighton +Leila +Leilani +Lela +Lemis +Lena +Lene +Leni +Lenia +Lenja +Lennja +Lennon +Lennox +Leokadia +Leona +Leonarda +Leoni +Leonia +Leonida +Leonie +Leonor +Leonora +Leontine +Leontyna +Leopolda +Leopoldyna +Lesia +Lesley +Leslie +Lesly +Lesya +Lesława +Leticia +Letizia +Letizia-maria +Letycja +Levina +Levita +Lexi +Lexie +Lexus +Leya +Leyla +Leyna +Li +Lia +Lia-marie +Liah +Lian +Liana +Liandra +Liara +Libby +Liberty +Lida +Lidia +Lidiia +Lidija +Lidiya +Liel +Lielle +Lien +Lienne +Liese +Liesel +Lieselotte +Liev +Ligia +Lijana +Lika +Lil +Lila +Lilaf +Lilah +Lilav +Lili +Lilia +Lilian +Liliana +Lilianna +Lilibeth +Liliia +Lilija +Lilith +Liliya +Lilja +Liljana +Lilla +Lille +Lilli +Lilli-amalia +Lillian +Lilliana +Lillianna +Lillie +Lilly +Lillyan +Lillyana +Lilo +Lilou +Lily +Lilya +Lilyana +Lilyanna +Lina +Linda +Lindsay +Lindsey +Line +Linea +Linh +Linn +Linna +Linnea +Lioba +Liona +Liora +Lisa +Lisa-marie +Lisanna +Lisanne +Lisbeth +Lise +Liselotte +Lisenka +Lisette +Liss +Lissy +Litzy +Liubov +Liudmila +Liudmyla +Liv +Liva +Livia +Livie +Liwia +Liya +Liya-dua +Liyah +Liyah-hayat +Liyana +Liz +Liza +Lizaveta +Lizbeth +Lizeth +Lizette +Loa +Logan +Lois +Lojayn +Loki +Lola +Lolita +London +Londyn +Lone +Longina +Loni +Lonia +Lonni +Loona +Lora +Lordina +Lore +Loredana +Loreen +Lorelai +Lorelei +Loreley +Loren +Lorena +Lori +Lorien +Lorin +Lorita +Lorîn +Lotta +Lotte +Lottje +Lou +Lou-ann +Louane +Loui +Louisa +Louise +Loujaine +Loulouda +Lourdes +Lov +Lova +Lovis +Loya +Lu +Luana +Luanne +Luba +Lubomira +Lubomiła +Lubow +Luca +Lucero +Lucia +Luciana +Lucie +Lucienne +Lucilla +Lucille +Lucja +Lucjana +Lucjanna +Lucrezia +Lucy +Lucy-lou +Lucyna +Lucía +Ludgarda +Ludmila +Ludmiła +Ludomira +Ludomiła +Ludwika +Ludwina +Luella +Luisa +Luise +Luiza +Lujain +Luján +Luka +Lukrecja +Lulembe +Lulou +Lumi +Luna +Luna-marielle +Luvina +Luz +Luzia +Luzie +Luzina +Lya +Lyanna +Lydia +Lyla +Lylah +Lyn +Lyna +Lyndsey +Lynn +Lyra +Lyric +Lyubomir +Lyubov +Lyudmyla +Léa +Léana +Léanne +Lönna +Maarit +Mabana +Mabel +Mabelle +Macey +Maci +Macie +Mackenzie +Macy +Madalena +Madalyn +Madalynn +Maddison +Madelaine +Madeleine +Madeliene +Madeline +Madelyn +Madelynn +Madiha +Madilyn +Madilynn +Madina +Madisen +Madison +Madisyn +Madita +Madleine +Madlen +Madlyn +Madyson +Mae +Maedot +Maegan +Maeva +Maeve +Magalie +Magda +Magdalena +Magdalene +Maggie +Magneta +Magnolia +Maha +Mahana +Maherzia +Mahi +Mahina +Mahya +Mai +Maia +Maiara +Maiia +Maike +Maila +Mailin +Maisie +Maisy +Maiya +Maja +Majka +Makaila +Makayla +Makena +Makenna +Makenzie +Maknuna +Mala +Malaika +Malak +Malani +Malas +Malaya +Malaysia +Malea +Maleah +Maleen +Malena +Malene +Mali +Malia +Maliah +Malika +Malin +Malina +Maliya +Maliyah +Mallory +Malou +Maloucina +Malu +Malvina +Malwina +Maly +Mana +Mandy +Manifest +Manisa +Manissa +Manuela +Mara +Maralmaa +Maram +Maramawit +Maranata +Maranda +Marcela +Marcelina +Marcella +Marcjanna +Marcy +Mare +Mareli +Marely +Maren +Margaret +Margareta +Margarete +Margaretha +Margarethe +Margarita +Margaryta +Margit +Margo +Margot +Margota +Margret +Marharyta +Mari +Maria +Maria-luisa +Mariah +Mariam +Marian +Mariana +Marianna +Marianne +Maribel +Marie +Marie-louise +Marie-luise +Marie-thérèse +Marieke +Mariela +Mariella +Marielle +Marieta +Marietta +Mariia +Marija +Marika +Marike +Marilena +Marilou +Marilyn +Marilú +Marin +Marina +Marine +Marinella +Mariola +Marion +Mariova +Marisa +Marisol +Marissa +Marit +Marita +Maritza +Mariya +Mariyah +Marja +Marjorie +Marla +Marlee +Marleen +Marleigh +Marlen +Marlena +Marlene +Marley +Marli +Marli-lynn +Marlie +Marliese +Marlo +Marlow +Marlowe +Marlén +Maron +Marta +Martha +Martina +Martyna +Maru +Mary +Maryam +Maryana +Maryia +Maryjane +Maryla +Maryna +Marysol +Marzanna +Marzena +Marzenna +Masal +Masamutu +Mascha +Maseda +Masha +Matea +Mathea +Mathilda +Mathilde +Matilda +Matilde +Matrona +Mattea +Mattie +Matylda +Maura +Mavi +Mavie +Mavis +Mawiga +Mawunyo +Maxi +Maxie +Maxim +Maxima +Maximilia +Maxin +Maxine +May +Maya +Maya-kiara +Mayala +Mayari +Maybelle +Mayla +Maylee +Mayleen +Maylin +Maylin-shari +Mayra +Mazal +Maze +Maëlla +Maëlle +Maëlys +Maïrame +Małgorzata +Mckayla +Mckenna +Mckenzie +Mckinley +Meadow +Meagan +Meaghan +Medea +Medeea +Medina +Medine +Medis +Megan +Meghan +Mehar +Mehmedova +Mehri +Mehriban +Mehriya +Mei +Mei-zhen +Meilani +Mejrem +Mela +Melania +Melanie +Melany +Melat +Melek +Melek-aggeliki +Melia +Meliha +Melika +Melike +Melin +Melina +Melinda +Melis +Melisa +Melissa +Meliya +Melodi +Melodie +Melody +Meltem +Menese +Meral +Mercedes +Mercy +Meredith +Merete +Mergube +Merida +Merijem +Merilla +Merjem +Merle +Merve +Meryem +Meryem-liya +Meta +Metin +Meva +Mevanur +Mevlana +Mey +Meya +Meyra +Mi +Mia +Mia-jolie +Mia-sophia +Mia-sophie +Miah +Micaela +Micah +Micah-gradie +Michaela +Michal +Michalina +Michele +Michelle +Midia +Mieczysława +Mihail +Mihova +Mihra +Mihriban +Mihrimah +Mika +Mikaela +Mikaila +Mikalah +Mikayla +Mila +Mila-joline +Milagros +Milan +Milana +Milani +Milania +Milay +Milele +Milena +Milenova +Miley +Mileya +Mileyna +Milia +Milinda +Milla +Millie +Milou +Mina +Mine +Minel +Minerva +Minh +Minna +Minou +Minu +Mira +Mira-sara +Mirabel +Miracle +Miral +Miranda +Miray +Mirela +Mirella +Mireya +Miriam +Mirijam +Mirja +Mirjam +Mirona +Miroslavova +Mirosława +Miss +Misty +Mitra +Miya +Miyase +Miyu +Mizgeen +Miłosława +Mjimoseoluwa +Moana +Modesta +Moena +Mohammed +Moira +Mollie +Molly +Mona +Monalisa +Monica +Monika +Monique +Monroe +Monserrat +Montana +Montserrat +Moon +Morena +Morgan +Morgana +Moriah +Moriella +Moriko +Moris +Mouhsina +Mrs. +Ms. +Muco +Muhammed +Muna +Munachimso +Mustafa +Mustafova +My +Mya +Myah +Myla +Mylah +Mylee +Mylie +Myong +Myra +Myroslava +Mía +Müberra +Münüre +Mădălina +Mỹ +Naala +Nada +Nadejda +Nadezhda +Nadia +Nadieżda +Nadiia +Nadija +Nadin +Nadine +Nadir +Nadiya +Nadja +Nadzeya +Nadzieja +Naelle +Naemi +Nafas +Nafissatou +Nagihan +Nahla +Naika +Naila +Nailah +Naima +Nairobi +Nala +Nala-kamaly +Nalani +Nalin +Nallely +Namat +Name +Namika +Nana +Nancy +Nanda +Nantenin +Naoko +Naomi +Narcisa +Narcyza +Nari +Narin +Naré +Nas +Nasara +Nasrin +Nastassia +Nastazja +Nastia +Natalee +Natalia +Natalie +Nataliia +Natalija +Natalina +Nataliya +Natallia +Nataly +Natalya +Natasha +Natasza +Nathalia +Nathalie +Nathaly +Natja +Nauzad +Navishka +Naya +Nayeli +Nayely +Nayla +Naz +Nazitera +Nazli +Nazlı +Nazmije +Nazret +Naïma +Nea +Neaza +Neda +Neela +Neele +Neema +Nefeli +Nefertari +Nefes +Neha +Nehuén +Nejla +Nektaria +Nel +Nela +Nele +Nele-luzia +Neli +Nelia +Nell +Nella +Nelli +Nelly +Nena +Neonila +Neriah +Nerim +Nerina +Nermin +Nermine +Nervana +Neslihan +Nesrin +Nesrine +Neva +Nevaeh +Neval +Neveah +Nevin +Nevrija +Neyla +Nezhe +Ngako +Ngokazi +Nhat +Nhi +Nia +Nian +Nica +Nichole +Nicki +Nicol +Nicola +Nicole +Nicoletta +Nicolette +Nicolina +Nicoll +Nicolle +Nieke +Nihal +Nihayet +Nihira +Nika +Nike +Niki +Nikita +Nikki +Niklas +Nikol +Nikola +Nikolay +Nikoleta +Nikoletta +Nikolina +Nil +Nila +Nilay +Nina +Ninelle +Nino +Nira +Nisa +Nisan +Nisha +Nisreen +Nissrine +Niya +Niyati +Nneamaka +Noa +Noelani +Noelia +Noella +Noelle +Noemi +Nofya +Nola +Nomi +Nonna +Noor +Nooriya +Nora +Nora-maxime +Norah +Noran +Norhan +Norina +Noriza +Norma +Noubissie +Nour +Noura +Nouria +Nova +Novah +Novalee +Noé +Noémi +Noémie +Noëllie +Nujin +Numi +Numidia +Nur +Nura +Nural +Nuram +Nurcan +Nya +Nyah +Nyasia +Nyla +Nylah +Oaklee +Oakley +Oaklyn +Oaklynn +Oana +Ocean +Octavia +Odalys +Odilia +Oghenetejiri +Ohemaa +Oi +Oksana +Oktawia +Ola +Olamide +Olaya +Oleksandra +Olena +Olesia +Olesya +Olga +Olha +Olimpia +Olive +Olivia +Oliwia +Omer +Omobolale +Opal +Ophelia +Orhanova +Oriana +Ornela +Orysia +Osaivbie +Osarugue +Osasere +Osman +Osose +Otolia +Ottilia +Ottilie +Otylia +Ouarda +Oumaryero +Oumou +Oxana +Paige +Paislee +Paisleigh +Paisley +Paityn +Palina +Palmer +Paloma +Pamela +Pandora +Paola +Paolina +Paraska +Paraskewia +Paraskiewa +Paraskiewia +Pareeshay +Paris +Pariya +Parker +Patience +Patricia +Patrizia +Patrycja +Paula +Paulin +Paulina +Pauline +Payten +Payton +Peaches +Pearl +Pelagia +Penda-rose +Penelope +Penny +Peppa +Peri +Perihan +Perla +Petra +Petronela +Petronella +Peyton +Pheline +Philia-benice +Philin +Philina +Philine +Philippa +Philou +Phoebe +Phoenix +Phuong +Phylis +Pia +Pia-sophie +Piadora +Pina +Pinar +Piper +Pippa +Plamedi +Plamena +Pola +Polina +Polly +Poppy +Prakseda +Precious +Prema +Prenses +Presley +Princess +Prisca +Priscila +Priscilla +Prisha +Promise +Przemysława +Puja +Pyper +Queen +Quetzali +Quinn +Rabia +Rachael +Rachel +Rachela +Rachelle +Radkova +Radomirova +Radoslavova +Radosława +Rae +Raegan +Raelyn +Raelynn +Rafaela +Raffaela +Rahaf +Rahel +Rahil +Raina +Raisa +Rajmunda +Rakeb +Rala +Rama +Ramona +Ramy +Rana +Randi +Raneem +Rani +Rania +Raphaela +Raquel +Rasema +Rasheed +Rasimova +Raven +Ravza +Raya +Raylee +Rayna +Rayne +Reagan +Reanna +Rebeca +Rebecca +Rebeka +Rebekah +Rebekka +Reece +Reese +Reeya +Regan +Regiline +Regina +Reign +Reilly +Reina +Rejoice +Remi +Remington +Remy +Rena +Renata +Renee +Rengin +Rengîn +Reni +Renée +Resa +Resi +Rey +Reyes +Reyhan +Reyna +Reyyan +Rhea +Rhianna +Rhiannon +Rhode +Rhonda +Ria +Riana +Rianna +Rida +Rieke +Riem +Rihanna +Rika +Riley +Rim +Rimona +Rina +Rita +Ritaaj +Ritel +Riva +River +Rivka +Riya +Roberta +Robin +Robyn +Rocio +Rojin +Roksana +Roksolana +Roma +Romaissa +Romana +Romi +Romie +Romina +Romualda +Romy +Ronahi +Roni +Ronja +Ronya +Rory +Rosa +Rosalee +Rosali +Rosalia +Rosalie +Rosalina +Rosalinda +Rosalyn +Rosalía +Rose +Rosel +Roselin +Rosella +Roselyn +Rosemarie +Rosemary +Rosie +Rosina +Rosleen +Roslin +Roukaya +Rovan +Rowan +Roxana +Roxanne +Roya +Royal +Royalty +Roza +Rozalia +Rozwita +Rubi +Rubie +Rubina +Ruby +Rufaida +Rufina +Rugaya-lina +Rukiye +Runa +Ruslana +Rut +Ruta +Ruth +Ruya +Ryan +Ryann +Rylan +Rylee +Ryleigh +Rylie +Ryszarda +Ryta +Róża +Rüjan +Rümeysa +Rüya +Saadet +Saanvi +Saanvika +Sabina +Sabine +Sable +Sabrina +Sadat +Sade +Sadia +Sadie +Sae-hee +Saeed +Safa +Safia +Safiya +Sage +Sahak +Sahar +Saheba +Sahra +Saida +Saide +Saige +Saja +Sakina +Saleem +Salematou +Sali +Salihya +Salimah +Sally +Salma +Salome +Salomea +Salomeja +Salsabil +Salwina +Saly +Sam +Samanta +Samantha +Samar +Samara +Samaya +Samet +Samira +Samisha +Samiya +Samiyah +Samra +Sanaa +Sanai +Sanam +Sandra +Sandra-anisia +Sandy +Sanelija +Sanem +Saniya +Saniyah +Sanjana +Sanna +Sannah +Santiago +Sanya +Saoirse +Saood +Saphira +Sara +Sarah +Sarahi +Sarai +Sare +Sargun +Sariah +Sarina +Sariyah +Sarra +Sarya +Sasha +Satira +Saturnina +Savana +Savanah +Savanna +Savannah +Sawin +Sawyer +Saya +Sayeh +Saylor +Sayra +Scarlet +Scarlett +Scarlette +Scholastyka +Scout +Sebahat +Sedra +Sefa +Sejla +Selah +Selam +Selena +Selene +Selenia +Selin +Selina +Selma +Selnaz +Sema +Semina +Sena +Sened +Senna +Seny +Seon-mi +Sephora +Sera +Serafina +Seraiah +Serapsu +Serena +Serenity +Serin +Sevdalinova +Sevgi +Seweryna +Seyedeh +Seyla +Seyma +Shabana +Shadin +Shadya +Shahed +Shaili +Shaina +Shakira +Shakti +Sham +Shana +Shanaya +Shania +Shaniya +Shannon +Shanyar +Shara +Sharon +Shavin +Shawna +Shaya +Shayen +Shayesteh +Shayla +Shaylee +Shayna +Shea +Sheila +Shelby +Sheridan +Sherlyn +Sheyla +Shiloh +Shira +Shirangi +Shirin +Shirley +Shiva +Shona-marie +Shreya +Shyann +Shyanne +Shyla +Sia +Sian +Sibela +Siber +Sidney +Siena +Sienna +Siepsa +Sierra +Sika +Sila +Silke +Silvana +Silvia +Silvina +Simav +Simay +Simeonov +Simina +Simona +Simone +Sina +Sinja +Siripat +Siya +Sky +Skye +Skyla +Skylar +Skyler +Slava +Sloan +Sloane +Sloth +Smilla +Snizhana +Sofia +Sofie +Sofiia +Sofija +Sofiya +Sofía +Sohan +Sohia +Sol +Sola +Sole +Solea +Solin +Solomiia +Solveig +Soléa +Sonali +Songül +Sonia +Sonja +Sonya +Sophia +Sophia-marie +Sophie +Soraya +Sorina +Sotiria +Soufiya +Soya +Spasova +Spencer +Stacey +Stacy +Stanislava +Stanislavovna +Stanisława +Stefania +Stefanie +Stefanova +Steffie +Stela +Stella +Stellina +Stephanie +Stephany +Stevie +Stine +Su +Suana +Subaia +Sude +Sue +Suet +Sujanaa +Suki +Sulaika +Sultan +Sumeja +Sumejja +Summer +Suna +Sunamita +Sunna +Sunny +Sura +Surafeal +Suraya +Suri +Susa +Susan +Susana +Susanna +Susanne +Sutton +Suvi +Suzana +Suzanna +Suzanne +Svea +Svea-marie +Svetla +Svetlana +Sviatlana +Svitlana +Swaranshi +Swietłana +Switłana +Sybilla +Sybille +Sydnee +Sydney +Sydni +Sydnie +Sydonia +Sylvaine +Sylvia +Sylvie +Sylwestra +Sylwia +Sylwina +Syntia +Szarlota +Szonja +Sélène +Sümeyye +Süveyda +Sıla +Sława +Sławomira +Tabea +Tabitha +Tacjana +Tadeusza +Taha +Tahia +Taida +Taina +Taisa +Taisiia +Taiwo +Tal +Tala +Talea +Tali +Talia +Talin +Talina +Taliyah +Talu +Talya +Talyia +Tamar +Tamara +Tamia +Tamila +Tamina +Tamya +Tania +Taniya +Taniyah +Tanya +Tara +Taraji +Taryn +Tasmanjit +Tasnim +Tasnuva +Tatiana +Tatianna +Tatjana +Tatsiana +Tatum +Tatyana +Tauriel +Taya +Tayla +Tayler +Taylor +Tchawa +Tea +Teagan +Tegan +Tekie +Tekla +Teliliane +Temperance +Tenley +Teodora +Teodozja +Teofila +Teresa +Tereza +Tess +Tessa +Tessina +Testimony +Tetiana +Tetyana +Thaarisha +Thalia +Thalia-karina +Thea +Theda +Theresa +Therese +Theresia +Thi +Thilda +Thiên +Thora +Thị +Tia +Tiana +Tianna +Tiara +Tierra +Tiffany +Tijana +Tiju +Tilda +Tilla +Tillie +Timea +Timurovna +Tina +Tinka +Tinley +Tinsley +Tipoko +Tiril +Tola +Tolien +Tolina +Tomira +Tomova +Toni +Tonie +Tori +Torvi +Tracy +Treasure +Trenessa +Trinity +Trisha +Trista +Tristan +Tristen +Trudi +Tsampika +Tsvetelina +Tuana +Tuuli +Tyler +Tynisha +Tyra +Türkan +Uliana +Ulrike +Ulyana +Uma +Una +Unique +Ursula +Urszula +Ute +Uyiosa +Vaani +Vada +Vahibe +Vahida +Vaiana +Valencia +Valentina +Valentyna +Valeria +Valerie +Valeriia +Valeriya +Valery +Valeryia +Valeska +Valezka +Valiantsina +Valide +Vanellope +Vanesa +Vanessa +Vanya +Varvara +Vasileva +Vasiliki +Vasilivna +Vasylyna +Veda +Vega +Venezuela +Vera +Veranika +Verena +Veronica +Veronika +Veronique +Vezira +Victoire +Victoria +Victory +Vida +Vienna +Vika +Viktoria +Viktoriia +Viktoriya +Viktoryia +Viktória +Vilma +Vina +Vincenza +Viola +Violae +Violet +Violeta +Violetta +Viona +Vira +Virginia +Vita +Vitaliia +Vitalija +Vitalina +Vittoria +Vittòria +Vitória +Viva +Vivian +Viviana +Vivien +Vivienne +Viyan +Vlada +Vladimirova +Vladyslava +Volha +Vu +Vy +Wacława +Walburga +Walentyna +Waleria +Waleska +Waltraud +Waltrauda +Waltraut +Wambui +Wanda +Wanesa +Wanessa +Wang +Warvin +Warwara +Wasan +Waverly +Wega +Weiming +Wendy +Wera +Weronika +Whitley +Whitney +Wielisława +Wiera +Wiesława +Wieńczysława +Wiktoria +Wilhelmina +Wilhelmine +Willa +Willow +Wilma +Wina +Wincenta +Wincentyna +Winnie +Winter +Wioleta +Wioletta +Wirginia +Wisława +Wita +Witolda +Wren +Wynter +Władysława +Włodzimiera +Xenia +Xiangxu +Xiara +Ximena +Xinran +Xiomara +Yaas +Yade +Yadira +Yael +Yaelle +Yahaira +Yajaira +Yakub +Yalah +Yamilet +Yamileth +Yana +Yanara +Yanet +Yang +Yanina +Yankova +Yara +Yareli +Yaren +Yaretzi +Yaritza +Yaroslava +Yaryna +Yasemin +Yasina +Yasmeen +Yasmin +Yasmina +Yasmine +Yauheniya +Yawira +Yazmin +Yaël +Yağmur +Yeabsra +Yedda +Yelda +Yelitza +Yeliz +Yelizaveta +Yelyzaveta +Yemaya +Yesenia +Yessenia +Yeva +Yevheniia +Yichen +Ylana +Ylenia +Ylvi +Ylvie +Yoanna +Yoko +Yolanda +Yonas +Yordanova +Yoselin +Yostina +Youhne +Youssra +Youzixin +Yozlem +Yue +Yui +Yuki +Yulia +Yuliana +Yuliia +Yuliya +Yuliyanova +Yuna +Yuno +Yuridia +Yusra +Yvette +Yvonne +Yüsra +Zahra +Zainab +Zaineb +Zala +Zana +Zaniyah +Zara +Zaria +Zariah +Zariyah +Zaya +Zaylee +Zaynab +Zbigniewa +Zdzisława +Zefiryna +Zehra +Zeinab +Zejnep +Zekimyuren +Zelda +Zeliha +Zelisia +Zelma +Zenaida +Zendaya +Zenobia +Zenona +Zerya +Zeyna +Zeyneb +Zeynep +Zeyyan +Zhanna +Zhavia +Zhuolan +Zhuri +Ziba +Zilan +Zilfije +Ziling +Zina +Zinaida +Zineta +Zion +Ziva +Zlata +Zoe +Zoe-elaine +Zoey +Zofia +Zoi +Zoia +Zoie +Zoja +Zola +Zolara +Zora +Zoriana +Zosia +Zoya +Zoé +Zoé-victoria +Zoë +Zoí +Zuri +Zury +Zusan +Zuzana +Zuzanna +Zygfryda +Zygmunta +Zyta +Zülal +Züleyha +Zümra +Änne +Änni +Çağla +Élise +Éléa +Émilie +Öykü +Özge +Ülkü +İkra +İlayda +İrem +Łarysa +Łucja +Şoşvin +Żaklin +Żaklina +Żaneta +Żanetta +Żanna diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/common/data/male_names.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/common/data/male_names.dat new file mode 100644 index 000000000..a5710179f --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/common/data/male_names.dat @@ -0,0 +1,4253 @@ +Aaden +Aaman +Aarav +Aaron +Aaron-karl +Aarush +Aaryansh +Aayan +Aayansh +Abbas +Abdallah +Abdalmalk +Abdelkarim +Abdesamad +Abdessamad +Abdiel +Abdoul +Abdoul-djawad +Abdourahman +Abdul +Abdulahi +Abdulaziz +Abdullah +Abdulmuqeet +Abdulrahman +Abdulrasaq +Abdurahman +Abdurrahim +Abdurrahman +Abel +Abenieser +Abid +Abiodun +Abiyan +Abkhaz +Abraham +Abram +Ace +Achilles +Achraf +Adam +Adan +Adar +Addison +Adeday +Adel +Adem +Aden +Adhithya +Adian +Adib +Adil +Adin +Adis +Aditya +Adnan +Adolf +Adolfo +Adonis +Adrian +Adriano +Adriel +Adriell +Adrien +Adrijano +Adrin +Adriyan +Adrián +Aedan +Aeneas +Aeon +Aggelos +Agim +Agir +Agustin +Agyemang +Ahad +Ahil +Ahimsa +Ahmad +Ahmed +Ahmet +Ahmir +Aid +Aidan +Aiden +Aidin +Aidl +Aidyn +Ailele +Ajan +Akim +Akin +Akiva +Akmar +Akın +Al-amin +Al-hassan +Alan +Aland +Alaric +Alaz +Albert +Alberto +Albertov +Albin +Albion +Alden +Aldo +Alec +Aleh +Alejandro +Alek +Aleks +Aleksandar +Aleksander +Aleksandr +Aleksandre +Aleksandrov +Aleksei +Aleksej +Aleksy +Alemu +Alen +Alend +Aleph +Alessandro +Alessio +Alex +Alexander +Alexandr +Alexandre +Alexandro +Alexandros +Alexandru +Alexei +Alexey +Alexis +Alexzander +Aleyan +Alfa +Alfie +Alfio +Alfons +Alfonso +Alfred +Alfredo +Ali +Alia +Aliaksandr +Aliaksei +Alias +Alifeli +Alijah +Alin +Alisson +Alistair +Aliyar +Alkan +Allah +Allaith +Allan +Allen +Allessio +Almamy +Almir +Almoustafa +Alois +Alojzy +Alon +Alonso +Alonzo +Aloys +Alp +Alparslan +Alpay +Alper +Alperen +Alphonse +Alptekin +Altan +Altay +Alton +Alvar +Alvaro +Alvin +Alwin +Amadeus +Amadeusz +Amadou +Amani +Amanuel +Amar +Amare +Amari +Amarion +Amaro +Amaru +Ambroży +Amedeo +Ameer +Amelius +Amen +Amend +Ametbaş +Amilio +Amin +Amine +Amir +Amirali +Amit +Ammar +Amon +Amor +Amos +Amr +An +Anakin +Anas +Anastazy +Anatol +Anatoli +Anatolie +Anatolii +Anatoliusz +Anatoliy +Anbesa +Anders +Anderson +Ando +Andon +Andre +Andrea +Andreas +Andrei +Andreis +Andrej +Andres +Andrew +Andrey +Andrian +Andrii +Andrija +Andrijowytsch +Andris +Andriy +Andrzej +André +Andy +Anetov +Angel +Angelo +Angelos +Anh +Anh-minh +Anil +Anis +Anointed +Anouar +Anse +Anson +Anthony +Antoine +Antoinebonzolo +Anton +Antoni +Antonia +Antonio +Antonios +Antonius +Antony +Antwan +António +Anwit +Anwuli +Anzelm +Aous +Apolinary +Apollinaris +Apollo +Apoloniusz +Arad +Aram +Aran +Aras +Arat +Arcan +Arcangelo +Archer +Archie +Arda +Arel +Ares +Arhan +Ari +Aria +Arian +Aric +Arie +Ariel +Aries +Arif +Arijan +Arin +Aris +Ariyan +Ariyo +Arjan +Arjen +Arjun +Arkadi +Arkadii +Arkadiusz +Arlo +Arman +Armand +Armando +Armang +Armani +Armen +Armin +Arnav +Arne +Arnis +Arno +Arnold +Aro +Aron +Arsen +Arsenii +Arseniusz +Arslan +Artem +Arthur +Artin +Artiom +Arto +Artsem +Artsiom +Artur +Arturo +Arvid +Arwan +Arwid +Arwin +Aryan +Aryas +Asa +Asad +Asaf +Asen +Asenov +Ashanti +Asher +Ashton +Ashur +Asif +Asim +Aslan +Astor +Asyan +Ata +Atacan +Atahan +Atakan +Ateş +Athanasios +Atilla +Atlas +Atticus +Aubrey +August +Augustin +Augustine +Augustus +Augustyn +Aui +Aurelian +Aurelio +Aurelius +Aureliusz +Austen +Austin +Austyn +Avery +Avi +Avid +Aviel +Avir +Awaab +Awras +Axel +Axl +Axton +Ayaan +Ayad +Ayan +Ayas +Ayaz +Ayaz-deniz +Aybars +Aydan +Ayden +Aydin +Aydoan +Aydın +Ayhan +Aykan +Aykut +Aylan +Ayman +Aymen +Ayo +Ayoub +Ayub +Ayyed +Azad +Azar +Azariah +Azer +Azis +Aziz +Baby +Bahez +Bahri +Bailey +Bajwa +Baker +Bakr +Balaaga +Balaj +Baltazar +Bandhu +Baptista +Barack +Baran +Barley +Barnaba +Baron +Barrett +Barry +Bartek +Bartholomäus +Bartosz +Bartłomiej +Barış +Bas +Basire +Basit +Basri +Bastian +Batu +Batuhan +Baturou +Bavin +Bayan +Baybars +Baylor +Bazlael-levent +Bazyl +Bazyli +Başar +Beau +Beckett +Beckham +Beddy +Beene +Behnam +Beka +Bekham +Bekir +Bektash +Bela +Belal +Belden +Ben +Bence +Bene +Benedek +Benedict +Benedikt +Benedito +Benedykt +Beniamin +Benicio +Benito +Benjamin +Bennet +Bennett +Benni +Benno +Benny +Benon +Benoît +Benson +Bent +Bentlee +Bentley +Bently +Benton +Benyamin +Benz +Berat +Berk +Berkay +Berken +Bernado +Bernard +Bernardo +Bernd +Bernhard +Bero +Bert +Bert-josef +Berthold +Bertil +Bertold +Besnik +Beyazit +Biday +Biko +Bilal +Billy +Birhat +Birhayat +Birk +Bishop +Bjarne +Bjorn +Björn +Blago +Blagoev +Blaine +Blaise +Blake +Blaze +Bo +Bobby +Bode +Boden +Bodhi +Bodie +Bodo +Bogdan +Bogumił +Bogusz +Bogusław +Bohdan +Bojan +Bolesław +Bonifacy +Boone +Bora +Boris +Bory +Borys +Bosse +Boston +Boutni +Bowen +Bowie +Bożydar +Brad +Braden +Bradley +Bradly +Brady +Bradyn +Braeden +Braedon +Brahim +Braian +Braiden +Brajan +Brandan +Branden +Brandon +Brandt +Brandyn +Branimirov +Branko +Branson +Brant +Brantlee +Brantley +Braulio +Braxton +Brayan +Brayden +Braydon +Braylen +Braylin +Braylon +Brayson +Brecken +Brendan +Brenden +Brendon +Brennan +Brennen +Brennon +Brent +Brentley +Brenton +Bret +Brett +Brian +Briar +Brice +Bridger +Briggs +Brixton +Brock +Broderick +Brodie +Brody +Brogan +Bronisław +Bronson +Brooklyn +Brooks +Bru +Bruce +Bruno +Brunon +Bryan +Bryant +Bryce +Brycen +Brysen +Bryson +Burak +Burkay +Byron +Bâki +Béla +Bêjan +Błażej +Cade +Caden +Cael +Caiden +Cain +Caio +Cairo +Cale +Caleb +Calixte +Callan +Callen +Callum +Calogero +Calvin +Camaro +Camden +Camdyn +Cameron +Camillo +Camilo +Campbell +Camren +Camron +Camryn +Can +Canaan +Canluca +Cannon +Carl +Carl-benjamin +Carlo +Carlos +Carlton +Carmelo +Carmine +Carol +Carsen +Carson +Carter +Case +Casen +Casey +Cash +Casi +Casimir +Cason +Caspar +Casper +Caspian +Cassian +Cassian-mihai +Cassius +Castiel +Cay +Cayden +Cayleb +Cayson +Ceaser +Cebrail +Cedi +Cedric +Cedrick +Celasun +Celestyn +Cem +Cemal +Cemil +Cenk +Cesar +Cesur +Cevat +Cezar +Cezariusz +Cezary +Chace +Chad +Chaim +Cham +Chance +Chandler +Channing +Charbel +Charl +Charles +Charlie +Charly +Chase +Chaz +Cheikh-mohamed +Chengo +Chester +Chevy +Chris +Christian +Christian-massimo +Christoher +Christoph +Christophe +Christopher +Christos +Chrystian +Chukwuebuka +Ciel +Cinar +Claas +Clarence +Clark +Claude +Claudio +Claudius +Claus +Clay +Clayton +Clemens +Cleo +Clifford +Clifton +Clint +Clinton +Clyde +Clément +Coby +Cody +Coen +Cohen +Colby +Cole +Coleman +Colin +Collin +Colm +Colonius +Colson +Colt +Colten +Colton +Conner +Connor +Conor +Conrad +Constantin +Cooper +Corban +Corbin +Cordell +Corey +Corleone +Cornelius +Cortez +Corvin +Corwin +Cory +Cosimo +Cosmo +Costa +Courtney +Coy +Craig +Creed +Crew +Cristian +Cristiano +Cristobal +Cristofer +Cristopher +Crosby +Cruz +Cuinn +Cullen +Curt +Curtis +Cyprian +Cyrus +Cyryl +Czesław +César +Cüneyt +Daan +Dacjan +Daiki +Dailan +Dakari +Dakota +Dalawar +Dale +Dallas +Dallin +Dalton +Damari +Damarion +Damazy +Damian +Damiano +Damien +Damion +Damir +Damon +Dan +Dandre +Dane +Dangelo +Dani +Daniel +Daniele +Daniil +Danil +Danila +Danilo +Danis +Danny +Dante +Danyil +Danylo +Dapaah +Daquan +Darian +Dariel +Darien +Darin +Dario +Darion +Daris +Darius +Dariusz +Darnell +Daron +Darosław +Darrell +Darren +Darrin +Darrion +Darrius +Darryl +Darwin +Daryan-rocco +Daryl +Dash +Dashawn +Dastin +Daud +Dauld +Daunte +Davi +Davian +David +Davide +Davin +Davion +Davis +Davit +Davon +Davonte +Davood +Davud +Davut +Davy +Davyd +Dawid +Dawood +Dawson +Dax +Daxton +Daylen +Daylon +Dayne +Dayton +Dayyan +Deacon +Dean +Deandre +Deangelo +Decker +Declan +Deegan +Deion +Deivit +Dejan +Dejen +Dejn +Delhat +Delian +Delschad +Demarco +Demarcus +Demarion +Demetrius +Demian +Demiran +Demirhan +Demirov +Denchov +Denian +Denis +Deniz +Dennis +Denny +Dennys +Denver +Denys +Denzel +Deon +Deondre +Deontae +Deonte +Derek +Dereon +Derick +Derrick +Deshaun +Deshawn +Desire +Desislavov +Desmond +Destin +Dev +Devan +Devansh +Devante +Deven +Devin +Devine +Devon +Devonte +Devran +Devrim +Devyn +Dewayne +Dewen +Dexter +Deyvid +Dezyderiusz +Diamady +Dian +Dibea +Diegio +Diego +Dieter +Dietmar +Dieu +Dilan +Diljan +Dillan +Dillion +Dillon +Dilovarovič +Dilyar +Dima +Dimitar +Dimitri +Dimitrios +Dimitris +Dimitrov +Dimo +Dimov +Din +Dinis +Dino +Dion +Dionizy +Dionysis +Dior +Dirk +Diter +Ditmar +Diyan +Diyar +Diyo +Django +Djimon +Djon +Dlsher +Dmitri +Dmitrii +Dmitrij +Dmitry +Dmytrii +Dmytro +Dobiesław +Dobromir +Dobrosław +Dohan +Domenic +Domenico +Domingos +Dominic +Dominick +Dominik +Dominique +Don +Donald +Donat +Donato +Donavan +Donavon +Donnell +Donnie +Donovan +Dontae +Donte +Dorian +Doruk +Douglas +Drago +Drake +Draven +Drew +Duane +Duc +Duke +Dumitru +Duncan +Dustin +Dux +Dwayne +Dwight +Dylan +Dylane +Dyllan +Dylon +Dymitr +Dyon +Dyonizy +Dzhansu +Dzhuneit +Dzianis +Dzmitry +Ean +Earl +Eason +Easton +Ebubekir +Ecevit +Ecrin +Eddie +Eddy +Ede +Eden +Eden-amari +Edenn +Edgar +Edi +Ediagbonya +Edian +Edis +Edison +Ediz +Edmund +Edon +Edotohan +Eduard +Eduardo +Edvard +Edvin +Edward +Edwin +Efe +Efedzhan +Efekaan +Efekan +Efrain +Efren +Efstathios +Ege +Egecan +Egon +Ehaan +Eidan +Ekamjot +Ekfador +Ekin +Ekki +Ekrem +Ekuran +Elan +Elchai +Eldar +Eldi +Eldin +Eleon +Eleonorov +Elessar +Elham +Eli +Elia +Eliah +Eliam +Elian +Eliano +Elias +Eliasz +Elie +Eliel +Eliezer +Eligiusz +Elija +Elijah +Elijas +Elikya +Elio +Elion +Elios +Eliot +Elisei-ionatan +Eliseo +Elisha +Eliyas +Eljano +Elliot +Elliott +Ellis +Elmedin +Elmer +Eloan +Eloghosa +Elon +Elsayed +Elvin +Elvis +Elyas +Elyaz +Elyes +Emad +Eman +Emanuel +Emanuele +Emerson +Emery +Emil +Emile +Emilian +Emiliano +Emilijan +Emilio +Emilio-ramon +Emilion +Emilió +Emiljan +Emillian +Emily +Emin +Emir +Emirhan +Emmanoel +Emmanuel +Emmet +Emmett +Emmitt +Emory +Emran +Emre +Emwiomwan +Emyl +Emíl +Enam +Enea +Eneas +Enes +Engelbert +Engin +Enis +Ennio +Enno +Enoch +Enrico +Enrik +Enrique +Ensar +Entoni +Enzo +Eoin +Ephraim +Eppa +Eran +Erasmo +Eray +Erazm +Ercan +Erden +Erdet +Erdinch +Erdoğan +Erel +Eren +Ergün +Erhan +Erhard +Erian +Eric +Erich +Erick +Erik +Erim +Erjon +Erkan +Ermin +Ermo +Ermuun +Ernest +Ernesto +Ernriko +Ernst +Erol +Ersin +Ertuğrul +Ervin +Erwin +Eryk +Esaia +Eslam +Esminov +Esra +Essar +Esteban +Estevan +Ethan +Ethen +Etienne +Etonam +Eugen +Eugene +Eugenio +Eugeniusz +Eugène +Eun-mok +Eustachiusz +Eustachy +Euzebiusz +Evan +Even +Everett +Evgenii +Evgeny +Evran +Evren +Ewald +Ewaryst +Eyad +Eyal +Eymen +Eyob +Eyram +Eyyub +Eyüp +Ezekiel +Ezequiel +Ezra +Faber +Fabian +Fabio +Fabius +Fabrice +Fabricio +Fabrizio +Fadhil +Faelan +Fahad +Fahd +Fahhed +Fahrettin +Falk +Faraja +Fares +Farez +Farid +Faris +Faruk +Fatehveer +Fatih +Fatjon +Faustin +Fausto +Faustyn +Fazlı +Federico +Fedir +Fedor +Felicjan +Feliks +Felipe +Felipé +Felix +Ferat +Ferdi +Ferdinand +Ferdynand +Ferhat +Ferid +Ferman +Fernando +Ferran +Ferris +Ferry +Feyyaz +Fidel +Fidelius +Fiete +Fietje +Fikret +Filian +Filimon +Filip +Filipe +Filippo +Filippos +Fill +Fin +Finias +Finjas +Finlay +Finley +Finley-maxim +Finn +Finn-emilio +Finn-luca +Finneas +Finnegan +Finnley +Finnr +Fiodor +Fiorenzo +Fisher +Fitus +Fitz +Fletcher +Flinn +Florentin +Florian +Florin +Floris +Flynn +Flóki +Ford +Forest +Forrest +Foster +Fox +Francesco +Francis +Francisco +Franciszek +Franco +Francois +Franek +Franjo +Frank +Frank-dieter +Frankie +Franklin +Franko +Franz +François +Fred +Freddie +Freddy +Frederic +Frederick +Frederico +Frederik +Fredi +Fredrick +Fredrik +Fredy +Freedom +Frey +Frido +Frieder +Friederich +Friedolin +Friedrich +Friis +Fritz +Front +Fryderyk +Frédéric +Fuat +Furat +Fynan +Fynn +Fábio +Félix +Gabriel +Gabriel-ciro +Gabriele +Gabryel +Gael +Gaetano +Gage +Gaige +Gal +Galin +Galinov +Gannon +Garett +Garret +Garrett +Garrison +Garry +Gary +Gatlin +Gauge +Gaven +Gavin +Gavyn +Gaweł +Gazi +Gela +Genaro +Genesis +Gennadii +Gennadiy +Gennaro +Geoffrey +Georg +George +Georgi +Georgiev +Georgios +Geovanni +Geovany +Gerald +Gerard +Gerardo +Gerd +Gereon +Gerhard +German +Gero +Gerwazy +Gessesse +Ghenadie +Gheorge +Gheorghe +Giacomo +Gian +Giancarlo +Gianfranco +Gianluca +Gianni +Gibson +Gideon +Gilbert +Gilberto +Gino +Ginter +Gioacchino +Gioele +Giordano +Giorgi +Giorgio +Giosué-elia +Giovani +Giovanni +Giovanny +Giuliano +Giulien +Giulio +Giuseppe +Gjon +Glauk +Gleb +Glen +Glenn +Gniewko +Gniewomir +Gniewosz +Gocha +Godsent +Gonzalo +Goran +Gordan +Gordon +Gotfryd +Goud +Gowtham +Gracjan +Grady +Graeme +Graham +Grant +Graysen +Grayson +Great +Greatness +Gregor +Gregory +Grey +Greysen +Greyson +Griffin +Grigore +Grisu +Grzegorz +Guadalupe +Guefack +Guido +Guillaume +Guillermo +Gunnar +Gunner +Guntaj +Gunter +Gurveer +Gus +Gustaf +Gustav +Gustavo +Gustaw +Gustel +Gustl +Guy +Gwidon +Gylve +Gérard +Gökalp +Gökay +Göktuğ +Güney +Günter +Güray +Gűnter +Habib +Haden +Hadi +Haiden +Haiman +Hajo +Hakan +Hakar +Hakeem +Halid +Halil +Halilov +Halit +Hamid +Hamidou +Hamitalp +Hamno +Hamza +Han +Hank +Hannes +Hannibal +Hanno +Hano +Hans +Hanzala +Haoyang +Harald +Haran +Haris +Hariton +Harjas +Harlan +Harlem +Harley +Harlyn +Harman +Harold +Haroun +Harper +Harris +Harrison +Harry +Hartmut +Harvey +Hasan +Hassan +Hauke +Hawi +Hayden +Hayes +Hayko +Hayrullah +Haytam +Hayyan +Hazrat +Heath +Hebil +Hector +Heinrich +Heinz +Hektor +Heliodor +Heliodoro +Helmut +Hemen +Henadzi +Hendrik +Hendrix +Henje +Hennadii +Hennes +Henning +Henos +Henri +Henrik +Henry +Henryk +Heorhii +Herbert +Heriberto +Herman +Hermann +Hernan +Heronim +Herri +Hezekiah +Hieronim +Hikmat +Hilary +Hipolit +Hiro +Hiwa +Hlib +Holden +Holger +Honer +Hong-phuoc +Horst +Hossam +Hossein +Houman +Houston +Howard +Hozan +Hristov +Hryhorii +Hubab +Hubert +Hubertus +Hudson +Hugh +Hugo +Hugon +Hulusi +Humberto +Hunter +Huntley +Husna +Hussain +Hussein +Huxley +Huy +Hyeong-jun +Hüseyin +Iacob +Iakob +Ian +Iaroslav +Ibragim +Ibrahim +Ibrahima +Ibuchim +Ibuki +Idaet +Idan +Idhanth +Ido +Idris +Idzi +Ievgen +Ievgenii +Ignacio +Ignacy +Ignat +Ignjat +Igor +Ihab +Ihar +Ihor +Ihsan +Iker +Ikponmwosa +Ilai +Ilan +Ilario +Ilay +Ilia +Ilias +Ilija +Ilijas +Ilja +Illia +Illian +Ilya +Ilyas +Imaan +Imanol +Imilio +Imko +Immanuel +Imran +Infant +Ingo +Innocent +Ino +Ioannis +Ion +Ionnis +Ira +Irakli +Ireneusz +Irvin +Irving +Isa +Isaac +Isaak +Isah +Isai +Isaiah +Isaias +Isak +Ishaan +Ishaq +Ishema +Isiah +Isidor +Isidro +Islam +Ismael +Ismail +Ismayil +Ismet +Israel +Issac +Issam +Istref +István +Itay +Iurie +Iurii +Ivan +Ivanov +Ivar +Ivica +Ivica-maximilian +Ivo +Iwan +Iwindsa +Iwinosa +Iwo +Iyan +Iyas +Izaiah +Izan +Izayah +Izydor +Jabari +Jace +Jacek +Jacenty +Jack +Jackie +Jackob +Jackson +Jacob +Jacoby +Jacopo +Jacques +Jad +Jade +Jaden +Jadiel +Jadon +Jadyn +Jaeden +Jael +Jagger +Jago +Jaheem +Jaheim +Jahiem +Jahir +Jahmane +Jai +Jaiden +Jaidyn +Jaime +Jair +Jairo +Jake +Jakob +Jakobe +Jakub +Jaleel +Jalen +Jalon +Jamal +Jamar +Jamarcus +Jamari +Jamarion +Jamaro +Jamel +James +Jameson +Jamie +Jamil +Jamir +Jamiro +Jamison +Jammeh +Jamye +Jamyl +Jan +Janar +Janik +Janis +Janisław +Janne +Jannes +Janney +Jannick +Jannik +Jannis +Janno +Jano +Janoah +Janosch +Janosch-peter +Janosh +Janto +Januariusz +January +Janus +Janusch +Janusz +Jaoudate +Jaquan +Jaquez +Jardel +Jared +Jarema +Jaren +Jari +Jarle +Jarne +Jarno +Jaro +Jarod +Jaromil +Jaromir +Jaron +Jaroslav +Jarosław +Jarred +Jarrett +Jarrod +Jarvis +Jase +Jasiah +Jasin +Jasman +Jason +Jasper +Javen +Javier +Javion +Javon +Javonte +Jawad +Jax +Jaxen +Jaxon +Jaxson +Jaxton +Jaxtyn +Jaxx +Jaxxon +Jay +Jayce +Jayceon +Jaycob +Jaydan +Jayden +Jaydin +Jaydon +Jaylan +Jaylen +Jaylin +Jaylon +Jayse +Jaysen +Jayson +Jayvion +Jayvon +Jaziel +Jean +Jean-luc +Jean-philippe +Jedidiah +Jeff +Jefferson +Jeffery +Jeffrey +Jelle +Jelte +Jencarlos +Jendrik +Jenke +Jens +Jensen +Jeppe +Jeramiah +Jeremi +Jeremiah +Jeremias +Jeremiasz +Jeremy +Jericho +Jerimiah +Jermain +Jermaine +Jerome +Jerry +Jerzy +Jesaja +Jesiah +Jesper +Jesraiel +Jesse +Jessie +Jesus +Jett +Jevon +Jiaming +Jibrael +Jibril +Jie +Jim +Jimin +Jimmie +Jimmy +Jionni +Jitzhak +Jiyan +Jiři +Jo +Joachim +Joan +Joaquin +Jochem +Jochen +Joe +Joel +Joel-ricardo +Joey +Joffy +Johan +Johann +Johannes +John +Johnathan +Johnathon +Johnnie +Johnny +Johnpaul +Jojo +Jon +Jona +Jonael +Jonah +Jonas +Jonasz +Jonatan +Jonathan +Jonathon +Jondo +Jonne +Jonnie +Jonny +Jonte +Joon +Joona +Jooris +Jordan +Jorden +Jordon +Jordy +Jordyn +Jore +Jorel +Jorge +Jori +Jorin +Joris +Jos +Josch +Joscha +Joschua +Jose +José +Josef +Joseph +Josephina +Josh +Josha +Joshua +Joshũa +Josia +Josiah +Joss +Jost +Josua +Josue +Josué +José +Joud +Jounis +Jovan +Jovani +Jovanni +Jovanny +Jovany +Jovin +Jozef +Joziah +João +Juan +Judah +Jude +Judson +Juelz +Jul +Jules +Julian +Juliano +Julien +Julio +Julius +Juliusz +Juls +Junayd +Junes +Junior +Junis +Juno +Junus +Jurand +Jurek +Juri +Jurij +Jurin +Jusef +Justice +Justin +Justus +Justyn +Jusuf +János +Jérémy +Jérôme +Jóhann +Józef +Józefat +Jörg +Jürgen +Jędrzej +Kaamel +Kaan +Kabiel +Kabir +Kace +Kacper +Kade +Kaden +Kadin +Kadir +Kadr +Kadyn +Kaeden +Kael +Kagan +Kai +Kaiden +Kairo +Kais +Kaiser +Kaison +Kaito +Kaivalya +Kaj +Kajetan +Kalani +Kale +Kaleb +Kalel +Kalikst +Kalle +Kalro +Kama +Kamal +Kamari +Kamau +Kamden +Kamdyn +Kameron +Kamil +Kamren +Kamron +Kamryn +Kane +Kannon +Kanstantsin +Kanye +Kaon +Karam +Karamveer +Karan +Karanfil +Kare +Kareem +Karen +Karim +Karl +Karl-heinz +Karlheinz +Karlo +Karlo-alexander +Karmel +Karol +Karsen +Karson +Karsten +Karsyn +Karter +Karthigan +Karun +Kase +Kasen +Kasey +Kash +Kashton +Kasimir +Kasjan +Kason +Kasongo +Kaspar +Kasper +Kaspian +Kassem +Kasım +Katambala +Katja +Kavi +Kavon +Kay +Kaya +Kayahan +Kayden +Kayo +Kayra +Kaysen +Kayson +Kazbekovic +Kazimierz +Kazuki +Keagan +Kean +Keandre +Keanu +Keaton +Keegan +Keenan +Keev +Keis +Keith +Kelan +Kellan +Kellen +Kelly +Kelton +Kelvin +Kemal +Kemsi +Kenan +Kenan-can +Kendall +Kendrick +Kennedy +Kenneth +Kenny +Keno +Kent +Kenyon +Kenzo +Keon +Kerem +Kerim +Kerry +Keshaun +Keshawn +Keven +Kevin +Kevon +Kewin +Keyan +Keyon +Keyshawn +Khalat +Khaled +Khalid +Khalil +Khang +Khari +Khudeda +Khudhur +Kiaan +Kian +Kiano +Kieran +Kilani +Kilian +Killian +Kinan +King +Kingsley +Kingston +Kiran +Kiril +Kirill +Kirk +Kiron +Kiryl +Kivanc +Kiyan +Klaas +Klaudiusz +Klaus +Klemens +Knox +Knud +Knut +Koa +Kobe +Koby +Koda +Kody +Koen +Kofi +Kohen +Kolby +Kole +Kolja +Kolten +Kolton +Konner +Konnor +Konny +Konrad +Konstantin +Konstantinos +Konstanty +Konstantyn +Koos +Koray +Korbin +Korbyn +Kordian +Korel +Korey +Kornel +Korneliusz +Kory +Kosma +Kostantin +Kostas +Kostiantyn +Kostyantyn +Kotsi +Kousay +Krasimirov +Krew +Kris +Krish +Krishna +Kristian +Kristofer +Kristopher +Kriton +Kryspin +Krystian +Krystyn +Krzesimir +Krzysiek +Krzysztof +Ksawery +Ksaweryn +Ksawier +Kuba +Kubilay +Kumar +Kuno +Kurosch +Kurt +Kurt-egon +Kurtis +Kutay +Kuzey +Kuzma +Kwabena +Kweku +Kyan +Kye +Kylan +Kyle +Kylen +Kyler +Kylian +Kylo +Kymani +Kyng +Kyree +Kyrie +Kyron +Kyryl +Kyrylo +Kyson +Kürşad +Lachlan +Ladarius +Laert +Laith +Lamar +Lamarana +Lambros +Lamin +Lamont +Lance +Landan +Landen +Landin +Lando +Landon +Landry +Landyn +Lane +Langston +Larry +Lars +Lars-erik +Lasha +Laslo +Lasse +Lathan +Latif +Latrell +Lauan +Lauand +Laurens +Laurent +Laurenz +Laurin +Lauris +Lavand +Lawrence +Lawson +Layne +Layton +Lazar +Lazlo +Lean +Leander +Leandro +Leanid +Leano +Leant +Leańo +Lech +Lechosław +Ledger +Lee +Leevi +Legend +Lehat +Leif +Leighton +Lejs +Leland +Lemar +Lemmy +Len +Lenn +Lennard +Lennart +Lennet +Lenni +Lennik +Lennix +Lenno +Lennon +Lennox +Lenny +Leno +Lenox +Leo +Leon +Leonard +Leonardo +Leoncjusz +Leone +Leonel +Leonhard +Leonhardt +Leonid +Leonidas +Leonie +Leonigio +Leonik +Leonte +Leonídas +Leopold +Leopoldo +Leovani +Leroy +Leroy-lyon +Lerry +Leshan +Leszek +Lesław +Leto +Lev +Levan +Levani +Leven +Levent +Levente +Levi +Levijon +Levin +Levio +Levon +Lew +Lewi +Lewin +Lewis +Leyan +Leyan-deniz +León +Li +Liam +Lian +Liano +Lias +Liko +Limeon +Linas +Lincoln +Lino +Linus +Lio +Lion +Lionel +Lior +Liou +Lisandro +Liubomyr +Liyan +Lión +Lloyd +Lochlan +Logan +Loki +London +Longin +Lonnie +Lordon +Lorens +Lorenz +Lorenzo +Loris +Lotar +Lothar +Lou +Louan +Louay +Loui +Louie +Louis +Louis-emanuel +Louka +Lounes +Lovre +Lovro +Luai +Luan +Luano +Lubomir +Lububu +Luc +Luca +Lucas +Lucca +Lucian +Luciano +Lucien +Lucius +Lucjan +Lucjusz +Ludger +Ludo +Ludomir +Ludvig +Ludwig +Ludwik +Luel +Lui +Luigi +Luis +Luitwin +Luiz +Luk +Luka +Lukas +Luke +Lunis +Luong +Lutfi +Lutz +Luuk +Lyam +Lyan +Lyes-amin +Lyle +Lyon +Lyric +Lázaro +Léo +Léon +Lê +Maciej +Mack +Mackenzie +Madden +Maddox +Maddux +Madieu +Madison +Mads +Mady +Mael +Magnus +Magomed +Mahamadou +Mahan +Mahdi +Mahir +Mahmoud +Maik +Maikel +Mailo +Maina +Maison +Major +Makai +Makan +Makar +Makary +Makhi +Makiel +Maks +Maksim +Maksym +Maksymilian +Malachi +Malakai +Malaki +Malcolm +Malek +Mali +Malick +Malik +Malik-aboubakar +Malik-jamal +Malin +Malio +Malo +Malte +Malu +Mamadou +Mamoo +Mamuka +Manfred +Mani +Manraj +Manuel +Manya +Mara +Marc +Marcel +Marceli +Marcelino +Marcellino +Marcello +Marcellus +Marcelo +Marcin +Marcjan +Marco +Marcos +Marcus +Mardan +Marek +Mares +Maria +Marian +Mariano +Marie +Mariev +Marijan +Marin +Marino +Marinov +Marinus +Mario +Marius +Mariusz +Mariyanov +Marjan +Mark +Markel +Markell +Marko +Markus +Marley +Marlo +Marlon +Marlow +Marouan +Marques +Marquez +Marquis +Marquise +Marsel +Marshall +Mart +Marten +Martin +Martino +Marty +Martyn +Maru +Marvellous +Marvin +Marwan +Maryan +Masen +Masih +Mason +Massimiliano +Massimo +Matei +Mateo +Mateu +Mateus +Mateusz +Matheo +Mathew +Mathi +Mathias +Mathieu +Mathis +Matias +Matondo +Mats +Matteo +Matteo-elias +Mattes +Mattheo +Mattheus +Matthew +Matthews +Matthias +Matthis +Matthäus +Matti +Mattia +Mattis +Matts +Matty +Matvii +Matviy +Matz +Maurice +Mauricio +Mauro +Maurycy +Maverick +Mavrik +Max +Maxim +Maxime +Maximiliam +Maximilian +Maximiliano +Maximilien +Maximillian +Maximo +Maximus +Maxton +Maxwell +Maxx +Maxymilian +Mayar +Mayson +Mazen +Mazin +Maèl +Maël +Mcgregor +Medard +Megaw +Mehmed +Mehmet +Mehrab +Meikel +Mekhi +Melchior +Melih +Melvin +Memphis +Menachem +Mendes +Menno +Meo +Meral +Meran +Merlin +Mero +Merrick +Mert +Mert-efe +Mescal +Messi +Messiah +Mete +Metehan +Micah +Michael +Michaell +Michal +Michał +Micheal +Michel +Michele +Michiel +Mick +Mico +Mieczysław +Mielab +Mieszko +Miguel +Mihai +Mihail +Mihailov +Mihajlo +Mijo +Mik +Mika +Mikael +Mikail +Mikalai +Mikan +Mike +Mikel +Mikhail +Mikheil +Mikita +Mikiyas +Mikkel +Miko +Mikolaj +Mikołaj +Mila +Milad +Milan +Milan-julianus +Milano +Milas +Miles +Milian +Miljan +Miller +Milo +Milon +Milow +Milton +Milú +Min-jun +Minh +Minkail +Mio +Mirali +Miran +Miras +Miraç +Mirek +Mirko +Miro +Miron +Miroslav +Miroslavov +Miroslaw +Mirosław +Mirwas +Mirza +Mirzan +Miró +Misael +Mischa +Mitan +Mitchel +Mitchell +Mitja +Mitkov +Miłosz +Miłosław +Mladenov +Mo +Modest +Modou +Mohamad +Mohamed +Mohammad +Mohammed +Mohan +Moheb +Mohib +Moises +Mordechai +Moreno +Morgan +Moritz +Morris +Morten +Moses +Moshe +Mounir +Moussa +Mpiasa +Mr. +Muaad +Muaz +Mubarak +Muhamad +Muhamed +Muhammad +Muhammad-ali +Muhammed +Muhammet +Muhammet-baran +Muhsin +Murat +Musa +Musab +Musap +Mustafa +Mykhailo +Mykhaylo +Mykola +Mykyta +Myles +Mylo +Myron +Myroslav +Márk +Naael +Nadav +Naeem +Nael +Naim +Nakoa +Naksh +Nali +Nalu +Nam +Name +Namik +Nanuk +Napoleon +Narcyz +Nare +Narinder +Naser +Nash +Nasir +Naskov +Natan +Natanael +Nataniel +Nate +Nathan +Nathanael +Nathanaël +Nathanaël-yoram +Nathanial +Nathaniel +Nathen +Navid +Nazar +Nazarii +Naël +Neal +Necati +Nechelson +Necip +Neco +Nedzhibov +Nehemiah +Neil +Neitas +Neko +Neldi +Nelian +Nelio +Nelo +Nelson +Nemisio +Nemo +Neo +Nermin +Nery +Nestor +Nethan +Neuville +Nevfel +Nevio +Neymar +Nezir +Ngao +Ngoc +Niam +Nias +Nic +Niccolò +Nicholas +Nick +Nicklaus +Nickolas +Niclas +Nico +Nicola +Nicolae +Nicolai +Nicolas +Nicolasn +Nicolaus +Nicolay +Nicolás +Nicoló +Niels +Niema +Nigel +Nihaal +Nihad +Nihat +Nik +Nikan +Nikhil +Nikias +Nikita +Niklas +Niko +Nikodem +Nikola +Nikolaev +Nikolai +Nikolaj +Nikolaos +Nikolas +Nikolaus +Nikolay +Nikos +Nil +Nilas +Nilay +Nilo +Nils +Nima +Nino +Nio +Nioh +Nir +Nischan +Niwar +Nixon +Niyam +Nkendem +Noa +Noah +Noam +Noan +Noar +Noctis +Noe +Noel +Noke +Nolan +Nor +Norbert +Norman +Norrin +Nosa +Nosazemen +Nouri +Nova +Nowes +Noyan +Noé +Noél +Noël +Nuh +Numan +Nuno +Nurbin +Nuri +Oakley +Obiri +Obripong +Octavian +Octavio +Odai +Odin +Odis +Oghenekobiruo +Ognyan +Ojie +Oke +Oktawian +Oktawiusz +Olaf +Olamilekan +Ole +Olech +Oleg +Oleh +Olek +Oleksander +Oleksandr +Oleksandrovych +Oleksii +Oleksiy +Olgierd +Olindo +Oliseh +Oliver +Olivier +Oliwer +Oliwier +Oluwaferanmi +Omar +Omar-farouk +Omari +Omarion +Omer +Omoniyi +Omose +Omri +Onel +Onno +Onofrio +Onufry +Onuoha +Onur +Onurcan +Onyx +Oracle +Orest +Orestis +Orhan +Ori +Orion +Oriyomi +Orlando +Oro +Osagie +Osahuname +Osama +Osbaldo +Oscar +Osewe +Oseyemenre +Oskar +Osman +Osmanov +Ossbill +Ostap +Osvaldo +Oswald +Oswaldo +Otis +Oton +Otto +Ottokar +Otton +Ousmane +Owen +Ozan +Oğuz +Paata +Pablo +Paisios +Panagiotis +Panos +Paolo +Paris +Parker +Parlindungan +Pars +Pascal +Paskal +Pathé +Patrice +Patrick +Patrik +Patrycjusz +Patryk +Paul +Paulin +Paulo +Paulus +Pavel +Pavlo +Pavo +Pawel +Paweł +Paxton +Payton +Pedro +Pepe +Perfect +Periklis +Perry +Peter +Petr +Petro +Petros +Petrov +Petru +Peyton +Pharell +Phil +Phileas +Philip +Philipp +Philippe +Phillip +Phineas +Phoenix +Phynix +Pico +Pieer +Pierce +Pierre +Piet +Pieter +Pietro +Pio +Piotr +Pius +Polikarp +Porsch +Porter +Poyraz +Praise +Pranav +Preston +Priam +Prince +Princeton +Prinz +Prodige +Przemysław +Puspa +Qais +Qhraman +Quentin +Quincy +Quinn +Quinten +Quintin +Quinton +Quintus +Quirin +Quốc +Radek +Radomir +Radosław +Radu +Radzisław +Rafa +Rafael +Rafaell +Rafał +Raffael +Raffaele +Ragnar +Raheem +Rahman +Rahmen +Rahul +Raiden +Raik +Rainer +Rajmund +Rajnard +Rajnhold +Rajnold +Ralf +Ralf-maria +Ralph +Raman +Rami +Ramin +Ramiro +Ramiz +Ramo +Ramon +Ramos +Ramuel +Ramunas +Ramón +Randall +Randy +Raphael +Raphaël +Rashad +Rashawn +Rasheed +Rashid +Rasmus +Raul +Raven +Ravi +Ray +Rayan +Rayden +Rayen +Rayk +Raylan +Raymond +Raymundo +Raúl +Reagan +Rebaz +Rebin +Redzep +Reece +Reed +Reese +Reginald +Reid +Reign +Reilly +Reinhard +Reinhold +Reis +Rem +Remi +Remiel +Remigiusz +Remington +Remo +Remus +Remy +Renaldo +Renard +Renas +Renato +Rene +Renis +Renisław +Reno +René +Reto +Reuben +Rex +Rey +Reyaan +Reyansh +Reynaldo +Rezan +Rhett +Rhys +Riaan +Riad +Rian +Ricardo +Riccardo +Rich +Richard +Richie +Rick +Rickey +Ricky +Rico +Ridge +Rigoberto +Riley +Rio +Rios +Rishi +River +Rivus +Riyan +Roan +Robel +Robert +Roberto +Robertov +Robin +Rocco +Roch +Rocket +Rocky +Roderick +Rodi +Rodion +Rodney +Rodolfo +Rodrigo +Roen +Rogash +Rogelio +Roger +Rohaan +Roham +Rohan +Rohat +Roj +Roko +Roland +Rolando +Rolf +Romain +Roman +Romano +Romeo +Romuald +Romy +Ron +Ronald +Ronaldo +Ronan +Ronel +Roni +Ronin +Ronnie +Ronî +Rory +Ros +Ross +Rostyslav +Rowan +Rowen +Roy +Royal +Royce +Rościsław +Ruan +Ruben +Rubin +Rudi +Rudolf +Rudra +Rudy +Rufin +Rumenov +Rumi +Rune +Rupert +Ruska +Ruslan +Russell +Rustam +Ryaan +Ryan +Ryder +Rydham +Ryker +Rylan +Ryland +Rylee +Rylen +Ryley +Ryszard +Rémi +Rémy +Rêzan +Saad +Saam +Sabahattin +Sabastian +Saber +Sabri +Safouan +Safvan +Sage +Sahel +Sahin +Said +Saint +Sajmon +Salah +Salam +Salem +Saliev +Salih +Salim +Saliou +Salman +Salomon +Salvador +Salvatore +Sam +Sambor +Samet +Sami +Samir +Samiyar +Sammi +Sammy +Samra +Samson +Samu +Samuel +Samuel-milan +Samuele +Samy +San-tiago +Sancar +Sander +Sandiego +Sandro +Sang +Sanist +Santana +Santeo +Santiago +Santino +Santos +Sarmad +Sascha +Sashov +Saturnin +Saul +Saveen +Savion +Savvas +Sawyer +Sayin +Saúl +Schahin +Scott +Seaad +Seamus +Sean +Sebastian +Sebastiań +Sefa +Seginus +Sehajbir +Sejad +Sejdo +Selem +Selim +Selman +Semaj +Semen +Semi +Semih +Semir +Semra +Sennai +Sequoia +Serafin +Seraj +Sercan +Sercihan +Serdar +Sergei +Sergey +Serghei +Sergii +Sergio +Sergiu +Sergiusz +Sergiy +Serhat +Serhii +Serhildan +Serhiy +Serif +Serik +Serkan +Sertório +Seth +Severin +Severinov +Sevket +Seweryn +Seyed +Seyit +Seyithan +Seymen +Seyyid +Sezgin +Shaddai +Shahab +Shahan +Shahid +Shamar +Shane +Shannon +Sharbel +Sharvin +Shaun +Shaurya +Shawn +Shayan +Shayne +Shea +Sheldon +Shemar +Shepard +Shepherd +Sher +Shervin +Sherzad +Shiloh +Shina-ayomi +Shiraz +Shivan +Shmuel +Shrish +Siarhei +Siciid +Sidar +Sidney +Siegfried +Siergiej +Siggi +Sihan +Siho +Sihuar +Silas +Silvain +Silvester +Silvio +Simeon +Simo +Simon +Simone +Simão +Sinan +Sinanov +Sincere +Singh +Sirac +Siraj +Sirius +Siung-cheng +Siwar +Siyabovich +Siyami +Sjun +Skender +Skipper +Skylar +Skyler +Slade +Slava +Soa +Soan +Sobhan +Sobiesław +Solaiman +Solomon +Sonny +Sora +Soren +Soufian +Souhail +Souleyman +Spencer +Sri +Stanislau +Stanislav +Stanislaw +Stanisław +Stanley +Stefan +Stefano +Steffen +Stepan +Stephan +Stephen +Stephon +Sterling +Stetson +Steve +Steven +Stian +Stije +Stivan +Stone +Stuart +Subeedei +Sufjan +Sulaiman +Suliman +Sullivan +Sultan +Suphi +Suprit +Surafel +Sutton +Sven +Svetoslav +Svetoslavov +Sviatoslav +Syed +Sylas +Sylvester +Sylweriusz +Sylwester +Sylwin +Sylwiusz +Symeon +Szczepan +Szymon +Sławek +Sławoj +Sławomir +Sławosz +Tabio +Tadeo +Tadeusz +Taeryn +Taha +Tahir +Taim +Tal +Talal +Talan +Talat +Talen +Talha +Talis +Talon +Tam +Tamer +Tami +Tamino +Tamir +Tamjid +Tamme +Tammo +Tamo +Tanner +Taras +Tarek +Tarik +Tariq +Taro +Tasnim +Tate +Tatum +Tavion +Taxiarchis +Taylan +Tayler +Taylo +Taylor +Tayo +Tayshaun +Tchapmi +Tchoté +Teagan +Ted +Tedros +Tegan +Telesfor +Tenebrae +Teo +Teodor +Teofil +Teoman +Terence +Terrance +Terrell +Terrence +Terry +Tevin +Thabo +Thaddeus +Thaddäus +Tham +Thang +Thassilo +Thatcher +Thees +Theo +Theobald +Theodor +Theodore +Theophil +Theopilus +Thiago +Thibaud +Thien +Thierry +Thies +Thijs +Thilo +Thiên +Thodor +Thomas +Thommy +Thorben +Thore +Thorin +Tiago +Tiam +Tian +Tiano +Tibet +Tidde +Tijan +Till +Tillman +Tilman +Tilo +Tim +Timo +Timofej +Timon +Timotheus +Timothy +Timoti-keoni +Timur +Timurhan +Tino +Tio +Titan +Titus +Tiwaz +Tiyam +Tizian +Tjelle +Tjorben +Toan +Tobias +Tobiasz +Toby +Todd +Todorov +Tolga +Tom +Toma +Tomas +Tomasso +Tomasz +Tomer +Tommaso +Tommi +Tommy +Tomte +Tomáš +Tonatiuh +Toni +Tony +Toprak +Torin +Torkel +Tornike +Torsten +Trace +Travis +Travon +Tre +Trent +Trenten +Trenton +Trever +Trevin +Trevion +Trevon +Trevor +Trey +Treyton +Treyvon +Tripp +Tristan +Tristan-henry +Tristen +Tristian +Tristin +Triston +Troy +Truman +Trystan +Tucker +Tuco +Tudor +Turan +Turhan +Turky +Turner +Tuvia +Tuğra +Ty +Tyberiusz +Tycjan +Tyler +Tylor +Tymofii +Tymon +Tymoteusz +Tymur +Tyquan +Tyr +Tyree +Tyreek +Tyrek +Tyrell +Tyrese +Tyrique +Tyron +Tyrone +Tyshawn +Tyson +Tytus +Tâm +Udo +Ugur +Uladzimir +Uladzislau +Ulises +Ulrich +Ulysses +Umaru +Umay +Umeyr +Umut +Unai +Unik +Urban +Uri +Uriah +Uriel +Urijah +Uroš +Urs +Uthman +Utku +Uve +Uwe +Vadim +Vadym +Vadzim +Vaios +Valencio +Valentin +Valentin-liam +Valentino +Valentyn +Valeri +Valerian +Valerii +Valerio +Valeriu +Valeriy +Valery +Valiantsin +Valon +Van +Vance +Varun +Vasco +Vasil +Vasil'ovič +Vasile +Vasilev +Vasili +Vasilije +Vassilios +Vasyl +Vaughn +Vayu +Veaceslav +Vefa +Veit +Veli +Veneciano +Veniamin +Vernon +Veselinov +Viaansh +Viachaslau +Viacheslav +Vicente +Victor +Vigo +Vihaan +Viktar +Viktor +Vince +Vincent +Vincenzo +Vinz +Vinzent +Viorel +Vishan +Vitale +Vitali +Vitalie +Vitalii +Vitaliy +Vito +Vitus +Vivaan +Viyan +Vladimir +Vladislav +Vladislavovič +Vladyslav +Volker +Volodymyr +Vukasin +Vyacheslav +Wacław +Wade +Wadim +Waldemar +Waleed +Walenty +Walerian +Walery +Walid +Walker +Wallace +Walter +Ward +Warren +Wasim +Wasyl +Watson +Wawrzyn +Wawrzyniec +Waylon +Wayne +Waël +Wedigo +Wells +Wenancjusz +Wendelin +Werner +Wesley +Wesson +Westin +Westley +Weston +Wiaczesław +Wieczysław +Wieland +Wielisław +Wiesław +Wieńczysław +Wiklo +Wiktor +Wilder +Wilfried +Wilhelm +Wiliam +Wilibald +Will +Willem +Willi +Willi-ananda +William +Willian +Willie +Willy +Wilson +Wim +Win +Wincent +Wincenty +Winfred +Winicjusz +Winston +Wirgiliusz +Wit +Witalij +Witalis +Witold +Witosław +Wojciech +Wojtek +Wolf +Wolfgang +Wotan +Wyatt +Władimir +Władysław +Włodzimierz +Włodzisław +Xander +Xaver +Xavi +Xavier +Xawery +Xhenet +Xhesijan +Xiaoyi +Xzavier +Yaakov +Yacqub +Yadiel +Yael +Yagiz +Yahir +Yahor +Yahya +Yair +Yakiv +Yakub +Yakup +Yaman +Yamen +Yan +Yandel +Yanis +Yankov +Yanneck +Yannick +Yannik +Yannis +Yanu +Yaraslau +Yared +Yari +Yaro +Yaroslav +Yasan +Yasar +Yasiel +Yasin +Yasir +Yasser +Yassin +Yasuo +Yauhen +Yauheni +Yavuz +Yaw +Yazan +Yazid +Yağız +Yaşar +Yehor +Yehuda +Yeicop +Yeon-oh +Yevgen +Yevhen +Yevhenii +Yi +Yicheng +Yigit +Ying +Yisroel +Yiğit +Yiğit-kamil +Yiğiter +Ylli +Yoav +Yoel +Yohannes +Yonah +Yonas +Yoner +Yonnel +Yordan +Yosef +Yosifov +Yosyp +Yotam +Younes +Yousef +Youssef +Youssuf +Yousuf +Yowani +Yozdzhan +Yuan +Yuhan +Yuki +Yuliyanov +Yuma +Yunho +Yunior +Yunis +Yunus +Yurem +Yuri +Yurii +Yuriy +Yury +Yusof +Yussef +Yusuf +Yuval +Yuvraj +Yuzdzhan +Yuşa +Yves +Yücel +Zachariah +Zacharias +Zachariasz +Zachary +Zachery +Zack +Zackary +Zackery +Zafer +Zahid +Zahir +Zaid +Zaiden +Zain +Zaire +Zakai +Zakaria +Zakariya +Zakary +Zakhar +Zaman +Zander +Zane +Zanyar +Zavier +Zavin +Zavion +Zawer +Zayan +Zayd +Zayden +Zayn +Zayne +Zaza +Zbigniew +Zbisław +Zbyszek +Zbyszko +Zbysław +Zdzisław +Zechariah +Zefiryn +Zeke +Zekeriya +Zenobiusz +Zenon +Zeonhei +Zeus +Zev +Zeyad +Zeyd +Zeyn +Zhivkov +Zhiyang +Zia +Ziad +Ziam +Zidane +Ziemowit +Zinovii +Zinédine +Zion +Ziya +Ziyad +Ziyan +Zlatanov +Zlatko +Zoel +Zoev +Zurab +Zyaire +Zygfryd +Zygmunt +Zülkarneyn +Álvaro +Ân +Çan +Çaner +Çağatay +Çağrı +Çınar +Émile +Étienne +Ömer +Önder +Özgür +İbrahim +İlyas +İsa +İsmet +Ładysław +Łucjan +Łukasz +Şahin +Şenol diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/de/__init__.py b/transformations/gender_randomizer/coreferee/coreferee/lang/de/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/de/config.cfg b/transformations/gender_randomizer/coreferee/coreferee/lang/de/config.cfg new file mode 100644 index 000000000..c8b877e09 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/de/config.cfg @@ -0,0 +1,14 @@ +[sm_3_0_0] +model: core_news_sm +from_version: 3.0.0 +to_version: 3.0.0 + +[md_3_0_0] +model: core_news_md +from_version: 3.0.0 +to_version: 3.0.0 + +[lg_3_0_0] +model: core_news_lg +from_version: 3.0.0 +to_version: 3.0.0 diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/LICENSE b/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/LICENSE new file mode 100644 index 000000000..1612c31ab --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/LICENSE @@ -0,0 +1,3 @@ +1) Training took place using the ParCor corpus (https://opus.nlpl.eu/ParCor/). For further details, see the following article: + +Liane Guillou, Christian Hardmeier, Aaron Smith, Jörg Tiedemann and Bonnie Webber (2014): ParCor 1.0: A Parallel Pronoun-Coreference Corpus to Support Statistical MT, In Proceedings of LREC 2014, Reykjavik, Iceland diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/__init__.py b/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/avalent_verbs.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/avalent_verbs.dat new file mode 100644 index 000000000..0b1bc7014 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/avalent_verbs.dat @@ -0,0 +1,5 @@ +schneien +regnen +hageln +donnern +menscheln diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/blacklisted_phrases.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/blacklisted_phrases.dat new file mode 100644 index 000000000..ed98ec057 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/blacklisted_phrases.dat @@ -0,0 +1,9 @@ +zum Beispiel +kein Wunder +im Großen und Ganzen +zum Schluss +Mit anderen Worten +meines Erachtens +meiner Meinung nach +nach Hause +zu Hause diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/neuter_female_words.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/neuter_female_words.dat new file mode 100644 index 000000000..eabd42fd6 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/neuter_female_words.dat @@ -0,0 +1,5 @@ +Weibsbild +Mädel +Mädchen +Girl +Frauenzimmer diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/neuter_male_words.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/neuter_male_words.dat new file mode 100644 index 000000000..93a403395 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/neuter_male_words.dat @@ -0,0 +1 @@ +Mannsbild diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/neuter_person_words.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/neuter_person_words.dat new file mode 100644 index 000000000..f5bd2c911 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/neuter_person_words.dat @@ -0,0 +1,6 @@ +Kind +Mitglied +Baby +Beispiel +Ergebnis +Arschloch diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/verbs_with_personal_subject.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/verbs_with_personal_subject.dat new file mode 100644 index 000000000..e88266f83 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/de/data/verbs_with_personal_subject.dat @@ -0,0 +1,12 @@ +denken +meinen +sagen +sprechen +betonen +behaupten +annehmen +glauben +behaupten +wissen +kennen +hoffen diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/de/language_specific_rules.py b/transformations/gender_randomizer/coreferee/coreferee/lang/de/language_specific_rules.py new file mode 100644 index 000000000..87b180c15 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/de/language_specific_rules.py @@ -0,0 +1,482 @@ +# Copyright 2021 msg systems ag + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from string import punctuation + +from spacy.tokens import Token + +from ...data_model import Mention +from ...rules import RulesAnalyzer + + +class LanguageSpecificRulesAnalyzer(RulesAnalyzer): + + random_word = "Freude" + + dependent_sibling_deps = "cj" + + conjunction_deps = ("app", "cd", "punct") + + adverbial_clause_deps = ("mo", "oc") + + or_lemmas = "oder" + + entity_noun_dictionary = { + "PER": ["Person", "Mensch", "Mann", "Frau"], + "LOC": ["Ort", "Platz", "Punkt", "Stelle", "Land", "Stadt"], + "ORG": [ + "Firma", + "Geschäft", + "Gesellschaft", + "Organisation", + "Unternehmen", + ], + } + + quote_tuples = [ + ("'", "'"), + ('"', '"'), + ("„", "“"), + ("‚", "‘"), + ("«", "»"), + ("‹", "›"), + ] + + term_operator_pos = ("DET", "ADJ") + + clause_root_pos = ("VERB", "AUX") + + def get_dependent_siblings(self, token: Token) -> list: + def add_siblings_recursively( + recursed_token: Token, visited_set: set + ) -> None: + visited_set.add(recursed_token) + siblings_set = set() + if recursed_token.lemma_ in self.or_lemmas: + token._.coref_chains.temp_has_or_coordination = True + if recursed_token.dep_ in self.dependent_sibling_deps: + siblings_set.add(recursed_token) + for child in ( + child + for child in recursed_token.children + if child not in visited_set and + # hyphenated siblings, e.g. 'Kindes- und Jugendzentrum' + child.tag_ != "TRUNC" + and ( + child.dep_ in self.dependent_sibling_deps + or child.dep_ in self.conjunction_deps + ) + ): + child_siblings_set = add_siblings_recursively( + child, visited_set + ) + siblings_set |= child_siblings_set + return siblings_set + + if ( + token.dep_ not in self.conjunction_deps + and token.dep_ not in self.dependent_sibling_deps + ): + siblings_set = add_siblings_recursively(token, set()) + else: + siblings_set = set() + return sorted(siblings_set) + + def is_independent_noun(self, token: Token) -> bool: + if ( + not ( + token.pos_ in self.noun_pos + or token.tag_ == "PIS" + or (token.pos_ == "PRON" and token.tag_ == "NN") + ) + or token.dep_ == "pnc" + or token.text in punctuation + ): + return False + return not self.is_token_in_one_of_phrases( + token, self.blacklisted_phrases + ) + + def is_potential_anaphor(self, token: Token) -> bool: + if not ( + (token.pos_ == "PRON" and token.tag_ in ("PPER", "PDS", "PRF")) + or (token.pos_ == "DET" and token.tag_ == "PPOSAT") + or (token.pos_ == "ADV" and token.tag_ == "PROAV") + ): + return False + if self.has_morph(token, "Person", "1") or self.has_morph( + token, "Person", "2" + ): + return False + + if ( + token.tag_ == "PPOSAT" + and not token.text.lower().startswith("sein") + and not token.text.lower().startswith("ihr") + ): + return False + + if token.tag_ == "PROAV": + # 'damit' etc. in sentence-initial position refers to the preceding clause + if ( + token.i + == token.doc._.coref_chains.temp_sent_starts[ + token._.coref_chains.temp_sent_index + ] + ): + return False + # 'sie machte es damit, dass ...' + if ( + len( + [child for child in token.children if child.pos_ == "VERB"] + ) + > 0 + ): + return False + if not token.lemma_.lower().startswith("da"): + return False + if token.lemma_.lower() in ("daher", "dahin"): + return False + + # pleonastic 'es' + if token.dep_ == "ep": + return False + + # 'das' + if token.text.lower() == "das": + return False + + # avalent verbs + if ( + token.dep_ != self.root_dep + and token.head.pos_ in ("AUX", "VERB") + and len( + [ + child + for child in token.head.subtree + if child.lemma_ in self.avalent_verbs + ] + ) + > 0 + ): + return False + return True + + def is_potential_anaphoric_pair( + self, referred: Mention, referring: Token, directly: bool + ) -> bool: + def lemma_ends_with_word_in_list(token, word_list): + lower_lemma = token.lemma_.lower() + for word in word_list: + if word.lower().endswith(lower_lemma): + return True + return False + + def get_gender_number_info(token): + masc = fem = neut = plur = False + if token.tag_ != "PPOSAT": + if self.has_morph(token, "Number", "Sing"): + if self.has_morph(token, "Gender", "Masc"): + masc = True + if self.has_morph(token, "Gender", "Fem"): + fem = True + if self.has_morph(token, "Gender", "Neut"): + neut = True + if lemma_ends_with_word_in_list( + token, self.neuter_person_words + ): + masc = True + fem = True + if lemma_ends_with_word_in_list( + token, self.neuter_male_words + ): + masc = True + if lemma_ends_with_word_in_list( + token, self.neuter_female_words + ): + fem = True + if ( + not masc + and not fem + and ( + token.lemma_.lower().endswith("chen") + or token.lemma_.lower().endswith("lein") + and len(token.lemma_) > 6 + ) + ): + masc = True + fem = True + if token.pos_ == "PROPN": + if token.lemma_ in self.male_names: + masc = True + if token.lemma_ in self.female_names: + fem = True + if ( + token.lemma_ not in self.male_names + and token.lemma_ not in self.female_names + ): + masc = fem = neut = True + if self.has_morph(token, "Number", "Plur"): + plur = True + if token.pos_ == "PROPN" and not directly: + # common noun and proper noun in same chain may have different genders + masc = fem = neut = plur = True + if self.is_potential_anaphor(token): + if token.tag_ in ("PROAV", "PRF"): + masc = True + fem = True + neut = True + plur = True + elif token.tag_ == "PPOSAT": + if token.text.lower().startswith("sein"): + masc = True + neut = True + elif token.text.lower().startswith("ihr"): + fem = True + plur = True + else: + if ( + self.has_morph(token, "Number", "Sing") + and self.has_morph(token, "Gender", "Masc") + and ( + self.has_morph(token, "Case", "Dat") + or self.has_morph(token, "Case", "Gen") + ) + ): + neut = True + elif ( + self.has_morph(token, "Number", "Sing") + and self.has_morph(token, "Gender", "Fem") + and ( + self.has_morph(token, "Case", "Acc") + or self.has_morph(token, "Case", "Gen") + ) + ): + plur = True + return masc, fem, neut, plur + + doc = referring.doc + referred_root = doc[referred.root_index] + + ( + referring_masc, + referring_fem, + referring_neut, + referring_plur, + ) = get_gender_number_info(referring) + + # e.g. 'die Männer und die Frauen' ... 'sie': 'sie' cannot refer only to + # 'die Männer' or 'die Frauen' + if ( + len(referred.token_indexes) == 1 + and referring_plur + and self.is_involved_in_non_or_conjunction(referred_root) + ): + return 0 + + referred_masc = referred_fem = referred_neut = referred_plur = False + + if len( + referred.token_indexes + ) > 1 and self.is_involved_in_non_or_conjunction(referred_root): + referred_plur = True + if not referring_plur: + return 0 + + for working_token in (doc[index] for index in referred.token_indexes): + ( + working_masc, + working_fem, + working_neut, + working_plur, + ) = get_gender_number_info(working_token) + referred_masc = referred_masc or working_masc + referred_fem = referred_fem or working_fem + referred_neut = referred_neut or working_neut + referred_plur = referred_plur or working_plur + + if ( + not (referred_masc and referring_masc) + and not (referred_fem and referring_fem) + and not (referred_neut and referring_neut) + and not (referred_plur and referring_plur) + ): + return 0 + + # 'damit' etc. does not refer to nouns over several sentences + if referring.tag_ == "PROAV": + + # 'damit' etc. does not refer to nouns over several sentences + if ( + referring._.coref_chains.temp_sent_index + - referred_root._.coref_chains.temp_sent_index + > 1 + ): + return 0 + + # 'damit' etc. cannot refer to people, places or organisations or to male or female + # anaphors + for working_token in ( + doc[index] for index in referred.token_indexes + ): + if ( + working_token.lemma_ in self.male_names + or working_token.lemma_ in self.female_names + or working_token.ent_type_ in ("PER", "LOC", "ORG") + ): + return 0 + if self.is_potential_anaphor(working_token) and ( + referred_masc or referred_fem + ): + return 0 + + # 'damit' cannot refer forward to a noun + if referring.i < referred.root_index: + return 0 + + if directly: + if self.is_potential_reflexive_pair(referred, referring) != ( + self.is_reflexive_anaphor(referring) == 2 + ): + return 0 + + if referring.tag_ == "PPOSAT": + # possessive pronouns cannot refer back to the head within a genitive phrase. + # This functionality is under 'directly' to improve performance. + working_token = referring + while working_token.dep_ != self.root_dep: + if ( + working_token.head.i in referred.token_indexes + and not working_token.dep_ in self.conjunction_deps + ): + return 0 + if ( + working_token.dep_ not in self.conjunction_deps + and working_token.dep_ + not in self.dependent_sibling_deps + and working_token.dep_ != "ag" + and working_token.tag_ != "PPOSAT" + ): + break + working_token = working_token.head + + referring_governing_sibling = referring + if referring._.coref_chains.temp_governing_sibling is not None: + referring_governing_sibling = ( + referring._.coref_chains.temp_governing_sibling + ) + if ( + referring_governing_sibling.dep_ == "sb" + and referring_governing_sibling.head.lemma_ + in self.verbs_with_personal_subject + ): + for working_token in ( + doc[index] for index in referred.token_indexes + ): + if ( + working_token.pos_ == self.propn_pos + or working_token.ent_type_ == "PER" + ): + return 2 + return 1 + + return 2 + + def is_potentially_indefinite(self, token: Token) -> bool: + + for child in ( + child + for child in token.children + if child.pos_ in self.term_operator_pos + ): + for lemma_beginning in ("ein", "irgendein"): + if child.lemma_.lower().startswith(lemma_beginning): + return True + return False + + def is_potentially_definite(self, token: Token) -> bool: + + for child in ( + child + for child in token.children + if child.pos_ in self.term_operator_pos + ): + for lemma_beginning in ("der", "dies", "jen"): + if child.lemma_.lower().startswith(lemma_beginning): + return True + return False + + def is_reflexive_anaphor(self, token: Token) -> int: + if token.tag_ == "PRF": + return 2 + else: + return 0 + + @staticmethod + def get_ancestor_spanning_any_preposition(token: Token) -> Token: + if token.dep_ == "ROOT": + return None + head = token.head + if head.pos_ == "ADP" and token.dep_ == "nk": + if head.dep_ == "ROOT": + return None + head = head.head + return head + + def is_potential_reflexive_pair( + self, referred: Mention, referring: Token + ) -> bool: + + if referring.pos_ != "PRON": + return False + + referred_root = referring.doc[referred.root_index] + + if referred_root._.coref_chains.temp_governing_sibling is not None: + referred_root = referred_root._.coref_chains.temp_governing_sibling + + if referring._.coref_chains.temp_governing_sibling is not None: + referring = referring._.coref_chains.temp_governing_sibling + + if referred_root.dep_ == "sb": + for referring_ancestor in referring.ancestors: + # Loop up through the verb ancestors of the pronoun + + if referred_root in referring_ancestor.children: + return True + + # The ancestor has its own subject, so stop here + if ( + len( + [ + t + for t in referring_ancestor.children + if t.dep_ == "sb" and t != referred_root + ] + ) + > 0 + ): + return False + return False + + referring_ancestor = self.get_ancestor_spanning_any_preposition( + referring + ) + referred_ancestor = self.get_ancestor_spanning_any_preposition( + referred_root + ) + return referring_ancestor is not None and ( + referring_ancestor == referred_ancestor + or referring_ancestor.i in referred.token_indexes + ) diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/en/__init__.py b/transformations/gender_randomizer/coreferee/coreferee/lang/en/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/en/config.cfg b/transformations/gender_randomizer/coreferee/coreferee/lang/en/config.cfg new file mode 100644 index 000000000..f62fb16f7 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/en/config.cfg @@ -0,0 +1,22 @@ +[sm_3_0_0] +model: core_web_sm +from_version: 3.0.0 +to_version: 3.0.0 + +[md_3_0_0] +model: core_web_md +from_version: 3.0.0 +to_version: 3.0.0 + +[lg_3_0_0] +model: core_web_lg +from_version: 3.0.0 +to_version: 3.0.0 + +[trf_3_0_0] +model: core_web_trf +from_version: 3.0.0 +to_version: 3.0.0 +vectors_model: core_web_lg +vectors_from_version: 3.0.0 +vectors_to_version: 3.0.0 diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/COPYING b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/COPYING new file mode 100644 index 000000000..378a8aa84 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/COPYING @@ -0,0 +1,86 @@ +Creative Commons Attribution 4.0 International Public License +By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions. + +Section 1 – Definitions. + +Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image. +Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License. +Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights. +Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements. +Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material. +Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License. +Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license. +Licensor means the individual(s) or entity(ies) granting rights under this Public License. +Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them. +Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world. +You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning. +Section 2 – Scope. + +License grant. +Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to: +reproduce and Share the Licensed Material, in whole or in part; and +produce, reproduce, and Share Adapted Material. +Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions. +Term. The term of this Public License is specified in Section 6(a). +Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material. +Downstream recipients. +Offer from the Licensor – Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License. +No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material. +No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i). +Other rights. + +Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise. +Patent and trademark rights are not licensed under this Public License. +To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties. +Section 3 – License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the following conditions. + +Attribution. + +If You Share the Licensed Material (including in modified form), You must: + +retain the following if it is supplied by the Licensor with the Licensed Material: +identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated); +a copyright notice; +a notice that refers to this Public License; +a notice that refers to the disclaimer of warranties; +a URI or hyperlink to the Licensed Material to the extent reasonably practicable; +indicate if You modified the Licensed Material and retain an indication of any previous modifications; and +indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License. +You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information. +If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable. +If You Share Adapted Material You produce, the Adapter's License You apply must not prevent recipients of the Adapted Material from complying with this Public License. +Section 4 – Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material: + +for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database; +if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material; and +You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database. +For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights. +Section 5 – Disclaimer of Warranties and Limitation of Liability. + +Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You. +To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You. +The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability. +Section 6 – Term and Termination. + +This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically. +Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates: + +automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or +upon express reinstatement by the Licensor. +For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License. +For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License. +Sections 1, 5, 6, 7, and 8 survive termination of this Public License. +Section 7 – Other Terms and Conditions. + +The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed. +Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License. +Section 8 – Interpretation. + +For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License. +To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions. +No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor. +Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority. diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/LICENSE b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/LICENSE new file mode 100644 index 000000000..d14b9a502 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/LICENSE @@ -0,0 +1,11 @@ +1) Training took place using: + +a) the LitBank corpus (https://github.com/dbamman/litbank) licensed under a Creative Commons Attribution 4.0 International License (https://creativecommons.org/licenses/by/4.0/; reproduced in the COPYING file within this directory). + +b) the ParCor corpus (https://opus.nlpl.eu/ParCor/). For further details, see the following article: + +Liane Guillou, Christian Hardmeier, Aaron Smith, Jörg Tiedemann and Bonnie Webber (2014): ParCor 1.0: A Parallel Pronoun-Coreference Corpus to Support Statistical MT, In Proceedings of LREC 2014, Reykjavik, Iceland + +2) The files *words.dat in this directory are derived from Wordnet (https://wordnet.princeton.edu/) and are distributed under the following license: + +WordNet Release 3.0 This software and database is being provided to you, the LICENSEE, by Princeton University under the following license. By obtaining, using and/or copying this software and database, you agree that you have read, understood, and will comply with these terms and conditions.: Permission to use, copy, modify and distribute this software and database and its documentation for any purpose and without fee or royalty is hereby granted, provided that you agree to comply with the following copyright notice and statements, including the disclaimer, and that the same appear on ALL copies of the software, database and documentation, including modifications that you make for internal use or for distribution. WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved. THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. The name of Princeton University or Princeton may not be used in advertising or publicity pertaining to distribution of the software and/or database. Title to copyright in this software, database and any associated documentation shall at all times remain with Princeton University and LICENSEE agrees to preserve same. diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/__init__.py b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/animal_words.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/animal_words.dat new file mode 100644 index 000000000..573e7b438 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/animal_words.dat @@ -0,0 +1,2690 @@ +Abyssinian +Accipitriformes +Adelie +Aegina +Aegypiidae +Aegyptopithecus +Afghan +Africander +Airedale +Amniota +Angora +Angus +Appaloosa +Appenzeller +Arab +Arabian +Arenaria-Melanocephala +Argonaut +Ayrshire +Blackburn +Brahma +Brahman +Brahmin +Cardigan +Charolais +Cheviot +Chihuahua +Clydesdale +Cornish +Cotswold +Cro-magnon +Cryptoprocta +Devon +Diapsida +Doberman +Dominick +Dominique +Dorking +Duplicidentata +Durham +Edmontonia +EntleBucher +Exmoor +Fissipedia +Friesian +Galloway +Guernsey +Hampshire +Hereford +Holstein +Holstein-Friesian +Ibero-mesornis +Ichyostega +Javanthropus +Jersey +Kodiak +Leonberg +Lhasa +Lincoln +Lipizzan +Lippizan +Lippizaner +Maltese +Manx +Merostomata +Morgan +Muscivora-forficata +Mutillidae +Neandertal +Neanderthal +Newfoundland +Orpington +Ostariophysi +Paranthropus +Peke +Pekinese +Pekingese +Pembroke +Percheron +Pithecanthropus +Plectophera +Pomeranian +Rambouillet +Reynard +Rottweiler +Saluki +Samoyed +Samoyede +Scottie +Sealyham +Sennenhunde +Shetland +Shih-Tzu +Siamese +Sinanthropus +Sinornis +Sivapithecus +Unguiculata +Ungulata +Weimaraner +Welsh +Zinjanthropus +aardvark +aardwolf +abalone +abortus +abrocome +acanthocephalan +acanthopterygian +acarid +acarine +acarus +accentor +acridid +acrodont +actinia +actinian +actiniarian +actinozoan +addax +adder +adelgid +adjutant +admiral +adult +aepyornis +affenpinscher +afropavo +agama +agamid +agnathan +agouti +agua +ai +albacore +albatross +alderfly +alewife +allice +alligator +alligatorfish +allis +allmouth +allosaur +allosaurus +alpaca +alsatian +amadavat +amazon +amberfish +amberjack +ambystomid +amniote +amphibian +amphioxus +amphipod +amphiuma +anaconda +anapsid +anaspid +anatotitan +anchovy +andrena +andrenid +anemone +angelfish +angler +anglerfish +anglewing +angleworm +angwantibo +anhinga +ani +ankylosaur +ankylosaurus +annelid +anoa +anole +anomalops +anomalopteryx +anopheline +ant +antbird +anteater +antelope +anthozoan +anthropoid +antlion +anuran +aoudad +apar +apatosaur +apatosaurus +ape +aperea +aphid +aplacophoran +apolemia +appendicularia +apteryx +arachnid +arachnoid +archaeopteryx +archaeornis +archeopteryx +archerfish +archiannelid +architeuthis +archosaur +archosaurian +arctiid +argal +argali +argasid +argentine +argentinosaur +argus +armadillo +armyworm +arrowworm +arthropod +artiodactyl +arui +ascidian +asp +ass +audad +auk +auklet +aurochs +australopithecine +avadavat +avocet +axolotl +aye-aye +baa-lamb +babbler +babiroussa +babirusa +babirussa +baboon +baby +backswimmer +badger +baldpate +balloonfish +bandicoot +bandtail +bangtail +bantam +banteng +banting +baranduki +barbet +barbu +bark-louse +barker +barnacle +baronduki +barosaur +barosaurus +barracouta +barracuda +barrelfish +barunduki +basenji +basilisk +bass +bassarisk +basset +bat +batfish +batrachian +bay +baya +beagle +bear +bearcat +beaugregory +beaver +bedbug +bee +beef +beefalo +beetle +bellbird +bellwether +beluga +bergall +beroe +bettong +biddy +bigeye +bighorn +bilby +billfish +billy +binturong +biped +bird +bison +bitch +bittern +bivalve +blackbeetle +blackbird +blackbuck +blackcap +blackcock +blackfish +blackfly +blackpoll +blacksnake +blacktail +bladdernose +blanquillo +blastocyst +blastosphere +blastula +blennioid +blenny +blindworm +bloodhound +bloodsucker +bloodworm +blower +blowfish +blowfly +blue +blue-belly +bluebill +bluebird +bluebottle +bluefin +bluefish +bluegill +bluehead +bluepoint +bluethroat +bluetick +bluewing +boa +boar +boarfish +boarhound +boatbill +bobcat +bobolink +bobtail +bobwhite +bollworm +bolti +bombycid +bonefish +bongo +bonito +bonnethead +bonobo +bonxie +booby +booklouse +borer +borzoi +bot +botfly +bottlenose +bottom-dweller +bottom-feeder +bovid +bovine +bow-wow +bowerbird +bowfin +bowhead +boxer +boxfish +brachiopod +brachyuran +brambling +branchiopod +branchiopodan +brant +bream +brent +briard +brill +brisling +bristletail +brit +britt +brittle-star +broadbill +broadtail +brocket +bronc +broncho +bronco +brontosaur +brontosaurus +broodmare +broody +brotula +browntail +bruin +bryozoan +buck +buckskin +budgereegah +budgerigar +budgerygah +budgie +buffalo +buffalofish +bufflehead +bufo +bug +bulbul +bull +bull-snake +bullbat +bulldog +bullfinch +bullfrog +bullhead +bullock +bullterrier +bumblebee +bunny +bunting +burbot +burrfish +burro +burunduki +bushbaby +bushbuck +bushtit +bustard +butcherbird +buteonine +butterball +butterfish +butterfly +butterflyfish +button-quail +buzzard +by-catch +bycatch +cabassous +cabbageworm +cachalot +cacique +cackler +cacomistle +cacomixle +caddice-fly +caddis-fly +caddisworm +caecilian +caiman +cairn +calf +calosoma +camel +camelopard +canary +canecutter +canid +canine +cankerworm +canvasback +capelan +capelin +capercaillie +capercailzie +capibara +capiz +caplin +capon +caprimulgid +capsid +captive +capuchin +capybara +carabao +caracal +caracara +caracul +carancha +carangid +carcajou +cardinal +cardinalfish +caribe +caribou +carinate +carnivore +carnosaur +carp +carthorse +caseworm +cassowary +cat +catalufa +catamount +catamountain +catarrhine +catbird +caterpillar +catfish +cathartid +catostomid +cattalo +cattle +caudate +cavalla +cavy +cayman +cayuse +cazique +cecropia +cedarbird +centipede +centrarchid +cephalaspid +cephalochordate +cephalopod +cerastes +ceratodus +ceratopsian +ceratosaur +ceratosaurus +cercaria +cero +cervid +cestode +cetacean +chachalaca +chacma +chaetodon +chaetognath +chaffinch +chaja +chalcid +chalcidfly +chamaeleon +chameleon +chamois +char +characid +characin +charger +charr +chat +chatterer +cheetah +cheewink +chelonian +chenfish +cherrystone +chestnut +chetah +chevrotain +chewink +chick +chickadee +chicken +chickeree +chigetai +chigger +chigoe +chimaera +chimp +chimpanzee +chinch +chinchilla +chinchillon +chinook +chipmunk +chiropteran +chiton +chlamyphore +chondrichthian +chordate +chough +chow +chrysalis +chrysomelid +chrysopid +chub +chuck-will's-widow +chuckwalla +chum +chunga +cicada +cicala +cichlid +cigarfish +cimarron +cinnabar +cirriped +cirripede +cisco +civet +clam +cleg +clegg +clerid +clingfish +clinid +clumber +clupeid +cnidarian +coachwhip +coati +coati-mondi +coati-mundi +cob +cobia +cobra +cochin +cochineal +cock +cockateel +cockatiel +cockatoo +cockchafer +cocker +cockerel +cockle +cockroach +cod +codfish +codling +coelacanth +coelenterate +coelophysis +coho +cohoe +collembolan +collie +colobus +colt +colubrid +colugo +comatulid +comma +compsognathus +conceptus +conch +conchfish +condor +conenose +coney +conger +conodont +constrictor +convictfish +cony +coon +coondog +coonhound +coot +cooter +cootie +copepod +copper +copperhead +coral +corbina +coreid +corgi +cormorant +corncrake +cornetfish +corythosaur +corythosaurus +coscoroba +cotinga +cottonmouth +cottontail +cottonwick +coucal +cougar +courlan +courser +cow +cowbird +cowfish +cowrie +cowry +cows +coydog +coyote +coypu +crab +crake +crampfish +crane +craniate +crapaud +crappie +crawdad +crawdaddy +crawfish +crawler +crayfish +creeper +creepy-crawly +creole-fish +cricket +crinoid +critter +croaker +crocodile +crocodilian +crossbill +crossopterygian +crotonbug +crow +crow-bait +crowbait +crustacean +ctenophore +cub +cuckoo +cuckoo-bumblebee +cunner +cur +curassow +curlew +currawong +cuscus +cushat +cusk +cusk-eel +cutlassfish +cuttle +cuttlefish +cutworm +cyclops +cyclostome +cygnet +cynodont +cyprinid +cyprinodont +dabbler +dabchick +dace +dachshund +dachsie +dalmatian +dam +damselfish +damselfly +danaid +daphnia +darter +das +dassie +dasyure +dasyurid +daw +dayfly +dealfish +deathwatch +decapod +deer +deerhound +defoliator +deinocheirus +deinonychus +demoiselle +devilfish +dhole +diamondback +diapheromera +diapsid +dibranch +dibranchiate +dicamptodon +dicamptodontid +dickey-bird +dickeybird +dicky-bird +dickybird +dicynodont +digitigrade +dik-dik +dimetrodon +dingo +dinoceras +dinocerate +dinosaur +diplodocus +dipper +dipteran +dipteron +diver +dobbin +dobson +dobsonfly +doctor-fish +doctorfish +dodo +doe +dog +dogfish +doggie +doggy +dogie +dogy +doliolum +dollarfish +dolphin +dolphinfish +donkey +doodlebug +dorbeetle +dormouse +dory +dotrel +dotterel +douroucouli +dove +dovekie +dowitcher +dragon +dragonet +dragonfly +drake +drayhorse +driftfish +drill +dromaeosaur +dromedary +drone +drosophila +drum +drumfish +dryopithecine +duck +duckbill +duckling +dugong +dun +dunlin +dunnock +dziggetai +eagle +eaglet +ear-shell +earthworm +earwig +echidna +echinococcus +echinoderm +ecrevisse +ectoproct +ectotherm +edaphosaurus +edentate +edmontosaurus +eel +eelblenny +eelpout +eelworm +eft +eggar +egger +egret +eider +eland +elaphure +elapid +elasmobranch +elater +elaterid +elephant +elk +elkhound +elver +embryo +emmet +emperor +emu +entellus +entire +entoproct +eohippus +eoraptor +ephemeral +ephemerid +ephemeron +ephemeropteran +eptatretus +equid +equine +ermine +ern +erne +escallop +escolar +eurypterid +eutherian +ewe +eyas +eyra +falcon +falcon-gentil +falcon-gentle +fanaloka +fawn +feeder +feist +felid +feline +female +fer-de-lance +ferret +fetus +fice +fieldfare +fieldmouse +fig-bird +figeater +filaria +filefish +filly +finback +finch +fingerling +finisher +firebird +firebrat +firebug +firefly +fish +fish-fly +fisher +fishworm +fissiped +fitch +flagfish +flamefish +flamingo +flatfish +flathead +flatworm +flea +fledgeling +fledgling +flicker +flickertail +flinthead +flounder +fluke +fly +flycatcher +foal +foetus +fossa +foulmart +foumart +fowl +fox +foxhound +freetail +fritillary +frog +frogfish +froghopper +frogmouth +fulmar +gadfly +gadoid +galago +gallfly +gallina +gallinacean +gallinule +game +gamecock +gander +gannet +ganoid +gar +garfish +garganey +garpike +gastropod +gastrula +gator +gaur +gavial +gayal +gazelle +gecko +gee-gee +gelding +gelechiid +gempylid +gemsbok +gemsbuck +genet +geoduck +geometrid +gerbil +gerbille +gerenuk +gerfalcon +ghostfish +giant +gib +gibbon +giraffe +glareole +globefish +glossina +glowworm +glutton +gnat +gnatcatcher +gnathostome +gnawer +gnu +goat +goatfish +goatsucker +gobbler +goby +godwit +goggle-eye +goldcrest +goldeneye +goldfinch +goldfish +gomphothere +gooney +goonie +goony +goosander +goose +goosefish +gopher +goral +gorgonian +gorilla +goshawk +gosling +goujon +gracilariid +grackle +grade +grampus +grassfinch +grasshopper +gray +grayback +grayhen +graylag +grebe +greenbottle +greeneye +greenfly +greenling +greenshank +greenwing +grenadier +grey +greyback +greyhen +greyhound +greylag +griffon +grindle +grison +grivet +grizzly +groenendael +grosbeak +grossbeak +ground-shaker +groundfish +groundhog +grouper +grouse +grub +grubby +grunt +grunter +guacharo +guan +guanaco +gudgeon +guenon +guereza +guib +guillemot +guinea +guitarfish +gull +gunnel +guppy +gurnard +gyrfalcon +hack +hackee +hackney +haddock +hadrosaur +hadrosaurus +hag +hagfish +hairstreak +hairtail +hake +halfbeak +halibut +hamadryad +hammerhead +hamster +hangbird +hanuman +hare +harlequin-snake +harpy +harrier +hart +hartebeest +harvestfish +harvestman +hatchling +hausen +hawfinch +hawk +hawkbill +hawkmoth +hawksbill +he-goat +head +headfish +heathfowl +hedgehog +heifer +hellbender +helleri +hellgrammiate +helminth +hemerobiid +hemipode +hemipteran +hemipteron +hen +herbivore +heron +herrerasaur +herrerasaurus +herring +heterostracan +hexapod +hind +hinny +hippo +hippoboscid +hippopotamus +hirudinean +hoactzin +hoatzin +hobby +hog +hogchoker +hogfish +hogg +hogget +holibut +holocephalan +holocephalian +holometabola +holothurian +homeotherm +homer +hominid +hominoid +homo +homoiotherm +homopteran +homotherm +honeybee +honeycreeper +honeysucker +honker +hookworm +hoopoe +hoopoo +hooter +hopper +hornbill +hornet +hornpout +horse +horse-head +horsefish +horsefly +horsehead +horseleech +hound +housedog +housefly +howler +human +humanity +humankind +humans +humblebee +hummingbird +humpback +husky +hyaena +hydra +hydroid +hydrozoan +hyena +hymenopter +hymenopteran +hymenopteron +hyrax +ibex +ibis +ichneumon +ichthyosaur +ichthyosaurus +ictodosaur +iguana +iguanid +iguanodon +imago +impala +inchworm +indri +indris +insect +insectivore +instar +invertebrate +isopod +ivorybill +ixodid +jabiru +jacamar +jack +jackal +jackass +jackdaw +jackknife-fish +jackrabbit +jacksmelt +jacksnipe +jade +jaeger +jaguar +jaguarondi +jaguarundi +jassid +javelina +jawfish +jay +jaybird +jellyfish +jennet +jenny +jerboa +jewfish +jigger +jird +jointworm +jument +junco +kaki +kanchil +kangaroo +karakul +katydid +kea +keeshond +kelpie +kestrel +kiang +kid +kildeer +killdeer +killer +killifish +kine +kingbird +kingfish +kingfisher +kinglet +kingsnake +kinkajou +kit +kite +kitten +kittiwake +kitty +kitty-cat +kiwi +knife-handle +knot +koala +kob +komondor +koodoo +kookaburra +koudou +krait +krill +kudu +kuvasz +labyrinthodont +lacertid +lacewing +ladybeetle +ladybird +ladybug +ladyfish +lagomorph +lamb +lambkin +lamellibranch +lammergeier +lammergeyer +lamprey +lampshell +lancelet +lancetfish +langouste +langur +lantern-fly +lanternfish +lapdog +lapin +lappet +lapwing +largemouth +larid +lark +larva +larvacean +lasiocampid +launce +layer +leaf-cutter +leaf-miner +leaf-roller +leafhopper +leatherback +leatherfish +leatherjack +leatherjacket +lechwe +leech +leipoa +lemming +lemur +leoncita +leopard +leopardess +lepidopteran +lepidopteron +leporid +leporide +leppy +leptocephalus +leptodactylid +lerot +leveret +liger +limpet +limpkin +ling +lingcod +linnet +lintwhite +lion +lioness +lionet +lionfish +littleneck +live-bearer +livestock +liza +lizard +lizardfish +llama +loach +lobefin +lobster +lobworm +locust +loggerhead +loir +loligo +long-legs +longhorn +longicorn +longlegs +longwool +lookdown +loon +looper +lorikeet +lory +lotte +louse +louvar +lovebird +lowan +lug +lugworm +lumpfish +lumpsucker +lungfish +lycaenid +lygaeid +lymantriid +lynx +lyrebird +macaque +macaw +machilid +mackerel +macrotus +mademoiselle +madrepore +maggot +magpie +mahimahi +maiger +maigre +mako +malacopterygian +malamute +male +malemute +maleo +malinois +mallard +mamba +mammal +mammalian +mammoth +mamo +man +man-eater +man-of-war +manakin +manatee +mandrill +mangabey +maniraptor +mankind +manta +mantid +mantis +mantispid +manul +mapinguari +mara +marabou +marabout +mare +margate +margay +markhoor +markhor +marlin +marmoset +marmot +marsupial +marten +martin +massasauga +mastiff +mastodon +mastodont +mate +maverick +mavis +mayfish +mayfly +meadowlark +mealworm +mealybug +mecopteran +medfly +medusa +medusan +medusoid +meerkat +megabat +megalosaur +megalosaurus +megapode +megathere +megatherian +megatheriid +meloid +menhaden +merganser +merino +merl +merle +merlin +mesohippus +metabola +metatherian +metazoan +mew +microbat +midge +mierkat +migrator +milcher +milker +millepede +miller +miller's-thumb +milliped +millipede +mina +minah +mink +minnow +mirid +mite +mithan +moa +mocker +mockingbird +mojarra +moke +mola +mole +mollie +mollusc +mollusk +molly +mollymawk +moloch +molter +momot +monal +monarch +monaul +mongoose +mongrel +monitor +monkey +monkfish +monotreme +monster +moo-cow +moonfish +moonshell +moor-bird +moorbird +moorcock +moorfowl +moorgame +moorhen +moose +moray +morula +mosquito +mosquitofish +moth +motmot +moufflon +mouflon +moulter +mound-bird +mount +mouse +mouser +mouthbreeder +mudcat +mudder +mudskipper +mudspringer +muishond +mule +mullet +mulloway +mummichog +muntjac +murine +murre +muskellunge +muskrat +musquash +mussel +mustang +mustelid +musteline +mutant +mutt +muttonfish +mylodon +mylodontid +myna +mynah +myriapod +mytilid +nag +nandu +nanny +nanny-goat +nanomia +napu +narwal +narwhal +narwhale +natterjack +nautilus +needlefish +nematode +nemertean +nemertine +nerita +neritid +neritina +nester +nestling +neuropteran +neuropteron +newt +nightcrawler +nighthawk +nightingale +nightjar +nightwalker +nilgai +no-see-um +noctuid +nonstarter +nothosaur +notornis +nudibranch +numbat +numbfish +nutcracker +nuthatch +nutria +nyala +nylghai +nylghau +nymph +nymphalid +oarfish +ocelot +octopod +octopus +odonate +offspring +oilbird +oilfish +okapi +oldwench +oldwife +oligochaete +olm +ommastrephes +omnivore +onager +onychophoran +opah +openbill +ophidian +opossum +orang +orangutan +orangutang +orca +oriole +ormer +ornithischian +ornithomimid +ornithopod +orphan +orthopteran +orthopteron +ortolan +ortygan +oryx +oscine +osprey +osteostracan +ostracod +ostracoderm +ostrich +otter +otterhound +ounce +ousel +ouzel +ovenbird +oviraptorid +owl +owlet +ox +oxen +oyster +oyster-fish +oystercatcher +oysterfish +paca +pacemaker +pacer +pacesetter +pachycephalosaur +pachycephalosaurus +pachyderm +packhorse +packrat +paddlefish +paddymelon +pademelon +painter +palfrey +palometa +palomino +panda +pangolin +panther +papillon +parakeet +paraquet +parazoan +paroquet +parr +parrakeet +parroket +parroquet +parrot +parrotfish +partridge +pasang +passerine +patas +pea-chick +peachick +peacock +peafowl +peahen +pearl-fish +pearlfish +peba +peccary +pecker +peckerwood +peeper +peewee +peewit +pekan +pelecypod +pelican +peludo +pelycosaur +pen +pen-tail +penguin +pentail +pentastomid +perch +percoid +percoidean +peregrine +peripatus +perissodactyl +periwinkle +permit +pest +pet +petchary +petrel +pewee +pewit +phalanger +phalarope +phasianid +phasmid +pheasant +phenacomys +phoebe +phoronid +physa +pichiciago +pichiciego +pickerel +piculet +piddock +pie-dog +pierid +pig +pigeon +pigfish +piggy +piglet +pika +pike +pike-perch +pikeblenny +pilchard +pilotfish +pinche +pinfish +pinnatiped +pinniped +pinscher +pintado +pintail +pinto +pinworm +pipefish +pipistrel +pipistrelle +pipit +pirana +piranha +pisanosaur +pisanosaurus +pismire +pitta +placental +placoderm +plaice +planaria +planarian +planthopper +plantigrade +planula +platy +platyctenean +platyhelminth +platypus +platyrrhine +platyrrhinian +plecopteran +plectognath +plesiosaur +plesiosaurus +plethodont +pleurodont +plover +plug +poacher +pochard +poeciliid +pogge +pogonophoran +poikilotherm +pointer +polecat +poler +poll +pollack +pollard +pollinator +polliwog +pollock +pollyfish +pollywog +polychaete +polychete +polyp +polyplacophore +polyzoan +pomfret +pompano +pompon +pond-skater +pongid +pony +pooch +poodle +poorwill +popinjay +porbeagle +porcupine +porcupinefish +porgy +poriferan +pork-fish +porker +porkfish +porpoise +possum +post-horse +poster +potamogale +potoroo +potto +poultry +pout +pouter +poyou +prancer +pratincole +prawn +praya +predator +prey +pricket +prickleback +primate +proboscidean +proboscidian +proconsul +procyonid +prongbuck +pronghorn +prosimian +protoavis +protoceratops +protohippus +protomammal +prototherian +proturan +pseudoscorpion +psittacosaur +psittacosaurus +psocid +psychodid +psylla +psyllid +ptarmigan +pterodactyl +pterosaur +pudding-wife +puddingwife +puffbird +puffer +pufferfish +puffin +pug +pug-dog +puku +pullet +puma +pumpkinseed +punkey +punkie +punky +pup +pupa +puppy +pureblood +purebred +puss +pussy +pussycat +pycnogonid +pye-dog +pyralid +pyrrhuloxia +python +quack-quack +quadruped +quagga +quahaug +quahog +quail +quarry +queen +queenfish +quetzal +quiaquia +rabbit +rabbitfish +raccoon +racehorse +racer +racerunner +racoon +rail +ram +ranid +raptor +rasher +rat +ratel +ratite +rattail +ratter +rattler +rattlesnake +raven +ray +razor-fish +razorback +razorbill +redbelly +redbird +redbone +redbreast +redbug +redfish +redhead +redhorse +redpoll +redshank +redstart +redtail +reduviid +redwing +reedbird +reeve +reindeer +remora +remount +reptile +reptilian +retriever +rhea +rhesus +rhino +rhinoceros +ribbonfish +ricebird +ridgel +ridgeling +ridgil +ridgling +ridley +riflebird +ringdove +ringhals +ringlet +ringtail +rinkhals +rivulus +roach +roadrunner +roan +robalo +robin +rockchuck +rockfish +rodent +roebuck +roller +rook +rooster +rorqual +rosefish +rotifer +roughleg +roundworm +royal +rudapithecus +rudd +rudderfish +ruff +ruminant +runner +sabertooth +sable +saddlebill +sagitta +saiga +sailfish +sailor's-choice +saki +salamander +salientian +salmon +salmonid +salp +salpa +sambar +sambur +sanderling +sandfish +sandfly +sandgrouse +sandhopper +sandpiper +sapsucker +sarcoptid +sardine +sassaby +saturniid +saurel +saurian +saurischian +sauropod +saury +sawbill +sawfish +sawfly +sawyer +scad +scallop +scaphopod +scarab +scarabaean +scarabaeid +scarabaeus +scaup +scavenger +schipperke +schistosome +schnauzer +schoolmaster +schrod +sciaenid +sciara +sciarid +scincid +scissortail +scollop +scombroid +scooter +scorpaenid +scorpaenoid +scorpion +scorpionfish +scoter +screamer +scrod +scrub-bird +scrubbird +sculpin +scup +scyphozoan +sea-ear +seabird +seafowl +seagull +seahorse +seal +searcher +searobin +seasnail +seismosaur +selachian +seriema +serin +serotine +serow +serpent +serranid +sertularian +serval +setter +sewellel +shad +shadfly +shanny +shark +sharksucker +she-goat +shearwater +sheatfish +sheep +sheep-tick +sheepdog +sheepshead +sheldrake +shelduck +shellfish +shiner +shipworm +shire +shoat +shoebill +shoebird +shorebird +shorthorn +shote +shoveler +shovelhead +shoveller +shrew +shrewmouse +shrike +shrimp +shrimp-fish +shrimpfish +siamang +sidewinder +sierra +sika +sild +silkworm +silurid +silver-tip +silverback +silverfish +silverside +silversides +silverspot +silvertip +simian +siphonophore +sipunculid +sire +siren +sirenian +siskin +sitter +skate +skilletfish +skimmer +skink +skipjack +skua +skunk +skylark +slater +slave-maker +sleeper +sleuthhound +slider +sloth +slowworm +slug +smallmouth +smelt +smew +smoothhound +snail +snailfish +snake +snake-fish +snakebird +snakeblenny +snakefish +snakefly +snapper +snipe +snipefish +snoek +snook +snowbird +snowflake +soapfish +sockeye +solan +soldier +soldier-fish +soldierfish +sole +solenogaster +solitaire +songbird +songster +sorrel +soup-fin +soupfin +souslik +sow +spadefish +spadefoot +spaniel +sparid +sparling +sparrow +spat +spawner +spearfish +spermophile +sphecoid +sphingid +spider +spirula +spittlebug +spitz +splitworm +sponge +spongefly +spongillafly +spoonbill +spouter +sprat +sprigtail +springbok +springbuck +springer +springtail +squab +squaretail +squealer +squid +squilla +squirrel +squirrelfish +stablemate +stag +staghound +stalking-horse +stallion +starfish +stargazer +starling +staurikosaur +staurikosaurus +stayer +steamer +steed +steenbok +steeplechaser +steer +stegosaur +stegosaurus +steinbok +stenopterygius +stepper +stickleback +sticktight +stilt +stiltbird +stingray +stinkbird +stinkpot +stint +stirk +stoat +stock +stocker +stomatopod +stonechat +stonefish +stonefly +stork +strawworm +stray +striper +stromateid +struthiomimus +stud +studhorse +stumpknocker +stunt +sturgeon +styracosaur +styracosaurus +sucker +suckerfish +suckling +sumpter +sunfish +superbug +superslasher +surfbird +surffish +surfperch +surgeonfish +suricate +surmullet +survivor +suslik +swallow +swamphen +swan +sweeper +swift +swiftlet +swine +swordfish +swordtail +symphilid +synapsid +tabby +tadpole +taenia +taguan +tailorbird +taipan +taira +takahe +takin +talapoin +tamandu +tamandua +tamanoir +tamarao +tamarau +tamarin +tanager +tapeworm +tapir +tarantula +tardigrade +tarpan +tarpon +tarsier +tatou +tatouay +tattler +tatu +tautog +tayra +teal +teg +teiid +teju +teleost +teleostan +telsontail +tench +tendrac +tenebrionid +tenpounder +tenrec +teras +tercel +tercelet +teredinid +teredo +termite +tern +terrapin +terrier +tetra +tetranychid +tetrapod +tettigoniid +thecodont +therapsid +theridiid +theropod +thick-knee +thickhead +thornbill +thoroughbred +thrasher +thread-fish +threadfin +threadfish +threadworm +thresher +thrip +thripid +thrips +throstle +thrush +thylacine +thysanopter +thysanopteron +thysanuron +tichodrome +tick +tiercel +tiger +tiglon +tigon +tigress +tilefish +timucu +tinamou +tineid +tineoid +tinker +tit +titanosaur +titanosaurian +titi +titlark +titmouse +toad +toadfish +tody +tom +tomcat +tomtate +tomtit +tongue-fish +tonguefish +topi +topminnow +torpedo +torsk +tortoise +tortoiseshell +tortoiseshell-cat +tortricid +tortrix +toucan +toucanet +touraco +towhee +toy +trachodon +trachodont +tragopan +tree-frog +treehopper +trematode +trepang +triceratops +trichina +trichopteran +trichopteron +triggerfish +trilobite +tripletail +triton +trogon +trombiculid +trombidiid +tropicbird +trotter +trout +trumpeter +trumpetfish +trunkfish +tsetse +tsine +tuatara +tumblebug +tumbler +tuna +tunicate +tunny +tup +turaco +turacou +turakoo +turbot +turkey +turnstone +turtle +turtledove +tusker +tussah +tusseh +tusser +tussore +tussur +twitterer +tyrannid +tyrannosaur +tyrannosaurus +tzetze +uakari +uintathere +unai +unau +underwing +unguiculate +ungulate +univalve +urial +urochord +urochordate +urodele +urus +utahraptor +vaquita +varan +varment +varmint +vedalia +veery +velociraptor +verdin +vermin +vertebrate +vervet +vespertilionid +vespid +vetchworm +viceroy +vicuna +vinegarroon +viper +vireo +viscacha +viverrine +vixen +vizsla +vole +vulture +wader +wagtail +wahoo +walkingstick +wallaby +walleye +walrus +wapiti +warbler +warhorse +warragal +warrigal +warthog +wasp +watchdog +waterbird +waterbuck +waterdog +waterfowl +waxwing +weakfish +weasel +weaver +weaverbird +webworm +weevil +weka +wether +whale +whalesucker +wheatear +wheatworm +wheeler +whelk +whelp +whidah +whiff +whinchat +whip-scorpion +whip-snake +whippet +whippoorwill +whipsnake +whiptail +whistler +whitebait +whiteface +whitefish +whitefly +whitetail +whitethroat +whiting +whooper +whydah +widgeon +wigeon +wiggler +wildcat +wildebeest +willet +windowpane +winkle +wirehair +wireworm +wisent +wolf +wolffish +wolfhound +wolverine +wombat +wood-creeper +wood-frog +wood-rat +woodborer +woodchuck +woodcock +woodcreeper +woodhewer +woodlouse +woodpecker +woodworm +worker +workhorse +world +worm +wrasse +wreckfish +wren +wren-tit +wriggler +wrymouth +wryneck +yak +yearling +yellowbird +yellowfin +yellowhammer +yellowlegs +yellowtail +yellowthroat +young +zebra +zebu +zoophyte +zooplankton +zoril diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/avalent_verbs.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/avalent_verbs.dat new file mode 100644 index 000000000..26b14df6a --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/avalent_verbs.dat @@ -0,0 +1,4 @@ +drizzle +rain +sleet +snow diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/blacklisted_phrases.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/blacklisted_phrases.dat new file mode 100644 index 000000000..b6ebb344e --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/blacklisted_phrases.dat @@ -0,0 +1,6 @@ +by the way +in fact +for example +no wonder +everyone +everybody diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/exclusively_female_words.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/exclusively_female_words.dat new file mode 100644 index 000000000..0b1f12a7e --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/exclusively_female_words.dat @@ -0,0 +1,182 @@ +B-girl +Brownie +Cinderella +Delilah +Lolita +Scout +Wac +Wave +amah +amazon +babe +baby +bachelorette +baggage +ball-breaker +ball-buster +battle-ax +battle-axe +bawd +beauty +begum +beldam +beldame +belle +bimbo +bird +bluestocking +bobby-socker +bobbysoxer +bridesmaid +broad +cat +chachka +chick +chit +cocotte +colleen +concubine +coquette +courtesan +crone +cyprian +dame +damoiselle +damosel +damozel +damsel +daughter +deb +debutante +demimondaine +demoiselle +dish +divorcee +doll +dominatrix +donna +dowager +doxy +enchantress +ex +ex-wife +eyeful +fille +flapper +flirt +floozie +floozy +foster-sister +gal +gamine +geisha +gentlewoman +girl +girlfriend +granny +gravida +hag +harlot +heroine +homemaker +hooker +houri +housewife +hoyden +hustler +ianfu +inamorata +jezebel +jilt +knockout +lady +lass +lassie +looker +lulu +ma'am +madam +madame +maenad +maid +maiden +mammy +mantrap +marchioness +materfamilias +matriarch +matron +mayoress +mestiza +mill-girl +minx +miss +missis +missus +missy +mistress +moppet +mother +nanny +nullipara +nurse +nursemaid +nymph +nymphet +odalisque +paramour +peach +peri +prickteaser +primigravida +prostitute +ravisher +romp +rosebud +rover +schoolgirl +scouter +secundigravida +sexpot +sheika +sheikha +shiksa +shikse +signora +signorina +siren +sister +skirt +slattern +smasher +soubrette +spinster +streetwalker +stunner +sweetheart +sylph +tart +tchotchke +tchotchkeleh +tease +temptress +tertigravida +tomboy +tsatske +tshatshke +ux. +uxor +vamp +vamper +vestal +vicereine +virago +viscountess +wench +wet-nurse +wetnurse +whore +widow +wife +witch +woman diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/exclusively_male_words.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/exclusively_male_words.dat new file mode 100644 index 000000000..a61fcb3cd --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/exclusively_male_words.dat @@ -0,0 +1,115 @@ +Brownie +Casanova +Don +Esq +Esquire +Fauntleroy +Herr +Jnr +Jr +Junior +Lothario +Methuselah +Monsieur +Samson +Scout +Senhor +Tarzan +adonis +antique +baboo +babu +bachelor +beau +bey +blighter +bloke +boy +boyfriend +bozo +bruiser +bull +buster +castrate +cat +catamite +chap +clotheshorse +cockscomb +codger +coxcomb +cub +cuss +dandy +dog +dude +ejaculator +eunuch +ex +ex-boyfriend +ex-husband +father-figure +fella +feller +fellow +fop +foster-brother +foster-father +gaffer +gallant +galoot +geezer +gent +gentleman +gentleman-at-arms +graybeard +greybeard +guy +he-man +hombre +housefather +hunk +inamorato +ironman +ironside +lad +laddie +macaroni +macho +macho-man +man +man-child +masher +old-timer +oldtimer +paterfamilias +patriarch +philanderer +ploughboy +plowboy +ponce +posseman +rover +schoolboy +scouter +shaver +sheik +signior +signor +signore +sir +sirrah +sod +son +sonny +stepfather +stiff +strapper +stud +swain +swell +widower +widowman +wolf +womaniser +womanizer diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/exclusively_person_words.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/exclusively_person_words.dat new file mode 100644 index 000000000..59175012f --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/exclusively_person_words.dat @@ -0,0 +1,5987 @@ +AWOL +Abenaki +Abo +Aboriginal +Aborigine +Acadian +Achaian +Adventist +Afghanistani +African +African-American +Afrikander +Afrikaner +Afro-American +Alabaman +Alabamian +Alaskan +Aleutian +Alexandrian +Algerian +Almoravid +Amerindian +Amhara +Amish +Anabaptist +Anasazi +Andorran +Angevin +Angevine +Anglican +Anglo-American +Anglo-Indian +Angolan +Angolese +Anguillan +Antiguan +Anzac +Apostle +Appalachian +Arabist +Aramaean +Aramean +Arcadian +Areopagite +Argentinian +Argive +Aristotelean +Aristotelian +Arizonan +Arizonian +Arkansan +Arkansawyer +Arminian +Aryan +Ashkenazi +Asian +Asiatic +Athenian +Augustinian +Aussie +Austrian +Aztec +B-girl +Bahai +Bahamian +Bahraini +Bahreini +Balkan +Bangladeshi +Baptist +Barbadian +Bart +Basotho +Batswana +Bavarian +Bechuana +Bedouin +Beduin +Belgian +Belorussian +Beninese +Berliner +Bermudan +Bermudian +Bhutanese +Bhutani +Bisayan +Blackfriar +Blackshirt +Boche +Boer +Bohemian +Bolivian +Bolshevik +Bolshevist +Bornean +Bostonian +Brazilian +Britisher +Briton +Brother +Brownshirt +Brule +Brummie +Brummy +Bruneian +Buddhist +Burundian +Bushman +Byzantine +CEO +CFO +CPA +Cabalist +Cairene +Cajun +Calapooya +Calapuya +Californian +Calvinist +Cambodian +Cameroonian +Canarese +Cantabrigian +Canuck +Capetian +Carlovingian +Carmelite +Carolingian +Carolinian +Cartesian +Carthaginian +Carthusian +Castillian +Catholic +Catholicos +Cavalier +Celt +Cewa +Chadian +Chaldaean +Chaldean +Chaldee +Chancellor +Charon +Chartist +Chasid +Chassid +Cheops +Chewa +Chicano +Chilean +Chiluba +Chimakum +Christian +Circe +Cistercian +Colombian +Coloradan +Colossian +Communist +Comrade +Confederate +Confucian +Confucianist +Congolese +Congregationalist +Connecticuter +Conoy +Conservative +Contra +Converso +Copt +Corinthian +Cornhusker +Cornishman +Cornishwoman +Cossack +Coue +Cretan +Croat +Croatian +Crusader +Cuban +Cymry +Cynic +Cyprian +Cypriot +Cypriote +Cyril +Czechoslovak +Czechoslovakian +DA +DCI +DP +Dane +Dardan +Dardanian +Darwinian +Delawarean +Delawarian +Democrat +Djiboutian +Dominican +Donatist +Dr. +Druid +Druse +Druze +Dubliner +Dunkard +Dunker +Dutchman +East-sider +Ebionite +Ecuadoran +Ecuadorian +Edwardian +Egyptologist +Elector +Elizabethan +Englishman +Englishwoman +Eolian +Ephesian +Episcopalian +Eritrean +Esq +Esquire +Essene +Ethiopian +Etonian +Etruscan +Eurafrican +Eurasian +European +Evangelist +Evenk +Eyeish +FO +Fabian +Fauntleroy +Fauve +Federal +Federalist +Fellata +Finn +Florentine +Floridian +Franciscan +Franco-American +Francophil +Francophile +Francophobe +Freemason +Frenchman +Frenchwoman +Freudian +Friend +Fulah +Fulbe +G-man +GP +Gabonese +Gael +Galatian +Galilaean +Gambian +Genoese +Germanist +Ghanian +Gibraltarian +Gipsy +Girondin +Girondist +Glaswegian +Gnostic +Gond +Gongorist +Goth +Graecophile +Grecian +Grenadian +Guatemalan +Guinean +Gurkha +Guru +Guyanese +Haitian +Hakham +Halchidhoma +Hanoverian +Harijan +Hasid +Hassid +Hebraist +Hegelian +Hellene +Highlander +Hinayanist +Hindoo +Hindu +Hispanic +Hollander +Honduran +Hoosier +Huguenot +Hun +Hussite +Hutu +Iberian +Icelander +Idahoan +Igbo +Illinoisan +Incan +Indianan +Inger +Ingerman +Injun +Inquisitor +Inuit +Iowan +Iraki +Irani +Iraqi +Irelander +Irishman +Irishwoman +Islamist +Ismaili +Ismailian +Israeli +Israelite +Jack-tar +Jacksonian +Jacobean +Jacobin +Jacobite +Jainist +Jamaican +Janissary +Jansenist +Jap +Jat +Javan +Jeffersonian +Jerry +Jesuit +Jew +Jew-baiter +Jewess +Jihadist +Jnr +Johnny +Jordanian +Jr +Jugoslav +Jugoslavian +Jungian +Junior +Junker +KP +Kabbalist +Kafir +Kaiser +Kalapooia +Kalapuya +Kampuchean +Kansan +Katari +Kazakhstani +Kelt +Kennan +Kentuckian +Kenyan +Keynesian +Khedive +Khufu +Kichai +Klansman +Kluxer +Kolam +Kraut +Krauthead +Kshatriya +Kurd +Kuwaiti +LPN +Labourite +Laconian +Lady +Lakota +Lamaist +Lamarckian +Lancastrian +Langobard +Laotian +Lapplander +Latinist +Lebanese +Legionnaire +Lesbian +Levantine +Levite +Liberian +Libyan +Liechtensteiner +Liverpudlian +Lolita +Lombard +Londoner +Lothario +Louisianan +Louisianian +Lowlander +Lubavitcher +Luddite +Lutheran +Luxembourger +Luxemburger +MVP +Machiavellian +Mackem +Madagascan +Maha +Mahayanist +Mahdist +Mahratta +Mainer +Malawian +Malayan +Maldivan +Maldivian +Malian +Malthusian +Mancunian +Manichaean +Manichean +Manichee +Maoist +Maquisard +Maratha +Marine +Marrano +Marxist +Marylander +Masorete +Masorite +Massorete +Mauritanian +Mauritian +Melchite +Melkite +Mendelian +Mennonite +Menshevik +Mesoamerican +Methodist +Metis +Mexican +Mexican-American +Mexicano +Michigander +Mick +Mickey +Mikmaq +Milady +Milanese +Milquetoast +Miniconju +Minnesotan +Minoan +Missourian +Mithraist +Moghul +Mohammedan +Mollah +Monacan +Monegasque +Mongol +Mongoloid +Monophysite +Monsieur +Monsignor +Montanan +Montserratian +Moonie +Moro +Moroccan +Moslem +Mountie +Mozambican +Mugwump +Muhammadan +Muhammedan +Mulla +Mullah +Muslim +Muslimah +Mycenaen +NIMBY +NOC +Namibian +Nauruan +Nazarene +Nazi +Neapolitan +Nebraskan +Negress +Negro +Negroid +Neoplatonist +Neopolitan +Nepalese +Nestorian +Netherlander +Nevadan +Newtonian +Nicaraguan +Nigerian +Nigerien +Nipponese +Nisei +Nobelist +Nonconformist +Norseman +Northerner +Northman +Nubian +Numidian +Ohioan +Oklahoman +Olmec +Omani +Orangeman +Oregonian +Oriental +Orleanist +Osmanli +Ostrogoth +Oxonian +P.O. +POW +Padre +Pakistani +Paleo-American +Paleo-Amerind +Paleo-Indian +Palestinian +Panamanian +Paraguayan +Parisian +Parisienne +Parliamentarian +Parsee +Parsi +Pashtoon +Passamaquody +Pentecostal +Pentecostalist +Peripatetic +Peruvian +Pharaoh +Pharisee +Philippian +Philistine +Pigmy +Pilate +Pilgrim +Piute +Platonist +Polycarp +Post-impressionist +Postimpressionist +Potemkin +Potyokin +Praetorian +Pre-Raphaelite +Presbyterian +Pretender +Prussian +Puritan +Pushtun +Pygmy +Qatari +Quaker +Quebecois +Rajpoot +Rajput +Ranger +Rasta +Realtor +Reb +Rebel +Redskin +Rex +Riffian +Romani +Romeo +Rommany +Rosicrucian +Rotarian +Roundhead +Royalist +Rwandan +SACEUR +SACLANT +SCPO +SMSgt +Sabahan +Sabbatarian +Sadducee +Sahaptino +Salian +Salvadoran +Salvadorean +Salvadorian +Samaritan +Samnite +Samoan +Saracen +Sarawakian +Sassenach +Satanist +Saudi +Sauk +Savoyard +Saxon +Scholastic +Schoolman +Scot +Scotchman +Scotchwoman +Scotsman +Scotswoman +Scouser +Scout +Semite +Senegalese +Senhor +Sephardi +Serb +Serbian +Seychellois +Shah +Shakespearean +Shakespearian +Shaktist +Shavian +Sherpa +Shi'ite +Shiite +Shintoist +Shivaist +Shoshoni +Siberian +Sicilian +Sihasapa +Sikh +Simeon +Singaporean +Sinologist +Sioux +Sir +Sister +Skinnerian +Slav +Slovenian +Socinian +Somalian +Sooner +Sophist +Southerner +Spaniard +Spartan +Stalinist +Stoic +Sudanese +Sufi +Sumatran +Sumerian +Sunnite +Syrian +T-man +Tantrist +Tanzanian +Taoist +Taracahitian +Tarahumara +Tarheel +Tartufe +Tartuffe +Ted +Templar +Tennessean +Teton +Teuton +Teutonist +Texan +Thatcherite +Theban +Thessalian +Thessalonian +Timorese +Tobagonian +Togolese +Toltec +Tory +Townes +Tractarian +Trappist +Trinidadian +Trinitarian +Trotskyist +Trotskyite +Tunisian +Tunker +Turk +Tutsi +Tyke +Ubermensch +Ugandan +Ukranian +Uniat +Uniate +Unitarian +Uriah +Uruguayan +Utahan +Utopian +V.P. +VIP +Vaishnava +Vandal +Vedist +Venetian +Venezuelan +Vermonter +Victorian +Viking +Virginian +Visayan +Visigoth +Volunteer +Wagnerian +Wahabi +Wahhabi +Wampanoag +Washingtonian +Watusi +Watutsi +Welshman +Wesleyan +West-sider +Whig +Wiccan +Wisconsinite +Wobbly +Wykehamist +Wyomingite +Yank +Yankee +Yankee-Doodle +Yemeni +Yugoslav +Yugoslavian +Zairean +Zairese +Zambian +Zealander +Zealot +Zimbabwean +Zionist +Zoroastrian +Zuni +abator +abbe +abbess +abbot +abbreviator +abdicator +aberrant +abetter +abettor +abhorrer +abiogenist +abjurer +abnegator +abolitionist +abominator +aboriginal +aborigine +abortionist +abridger +abrogator +absconder +abseiler +absentee +absolutist +absolver +abstainer +abstinent +abstracter +abstractionist +abstractor +abuser +abutter +academic +academician +accessary +accommodator +accompanist +accompanyist +accomplice +accordionist +accoucheur +accoucheuse +accountant +accused +accuser +achiever +acolyte +acoustician +acrobat +activist +actor +actress +actuary +addict +addle-head +addlehead +addressee +adducer +adept +adherent +adjudicator +adjuster +adjustor +adman +administrator +admirer +admonisher +adolescent +adoptee +adopter +adorer +adulator +adulterer +adulteress +advancer +adventurer +adventuress +adversary +advertiser +advertizer +advisee +adviser +advisor +advocate +advocator +aerialist +aeronaut +aerophile +aesthete +aesthetician +aetiologist +affiant +affine +affirmer +aficionado +agent-in-place +aggravator +aggregator +aggressor +agitator +agnate +agnostic +agriculturalist +agriculturist +agronomist +aide +aide-de-camp +aircraftman +aircraftsman +aircrewman +airman +airwoman +alarmist +albino +alcalde +alchemist +alcoholic +alderman +alexic +algebraist +alienator +alienee +alienist +alienor +aliterate +alky +all-rounder +allayer +allegoriser +allegorizer +allergist +alliterator +allocator +almoner +almsgiver +alphabetiser +alphabetizer +alpinist +also-ran +alternate +altoist +altruist +alumna +alumnus +amah +amalgamator +amanuensis +amateur +ambassador +ambassadress +ambler +ambusher +ameer +amigo +amir +amnesiac +amnesic +amora +amoralist +amorist +amputator +amputee +anaesthetist +anagnost +analogist +analphabet +analphabetic +analysand +analyst +anarchist +anatomist +ancestor +ancestress +anchorite +anchorman +anchorperson +ancient +androgyne +anecdotist +anesthesiologist +anesthetist +angiologist +anglophil +anglophile +anglophobe +animator +animist +annalist +annihilator +annotator +announcer +annoyer +annuitant +anointer +anomalist +anorectic +anorexic +answerer +antediluvian +anthologist +anthropologist +anthropophagite +anthropophagus +anti +anti-American +anti-Semite +anti-intellectual +anticipant +anticipator +antifeminist +antinomian +antipope +antiquarian +antiquary +ape-man +aper +aphakic +aphasic +aphorist +apiarist +apiculturist +apologist +apostate +apostle +apothecary +apparatchik +appeaser +appellant +applauder +applicant +appointee +appraiser +appreciator +apprehender +apprentice +appropriator +approver +aquanaut +arb +arbiter +arbitrager +arbitrageur +arbitrator +arboriculturist +arborist +archaeologist +archaist +archbishop +archdeacon +archduchess +archduke +archeologist +archimandrite +architect +archivist +archpriest +arguer +arianist +aristocrat +arithmetician +armiger +armor-bearer +arms-runner +arouser +arranger +arriver +arriviste +arrogator +arrowsmith +arsonist +arthritic +artificer +artilleryman +artisan +artist +artiste +ascetic +asker +aspirant +aspirer +ass-kisser +assailant +assassin +assassinator +assaulter +assayer +assemblyman +assemblywoman +assenter +asserter +assessee +assessor +asseverator +assignee +assignor +assimilator +assistant +asthmatic +astrogator +astrologer +astrologist +astronaut +astronomer +astrophysicist +atheist +athlete +attacker +attempter +attendee +attender +attestant +attestator +attester +attestor +attorney +auctioneer +audile +auditor +augur +aunt +auntie +aunty +auspex +auteur +authenticator +author +authoress +authoriser +authoritarian +authorizer +auto-mechanic +autobiographer +autochthon +autocrat +autodidact +auxiliary +avenger +aviator +aviatress +aviatrix +avower +ayah +ayatollah +babe +baboo +babu +baby-sitter +babyminder +babysitter +bacchant +bacchante +bachelor +bachelor-at-arms +bachelorette +back-number +backbencher +backbiter +backer +backpacker +backslapper +backslider +backstroker +backwoodsman +bacteriologist +badgerer +baggageman +bagman +bagpiper +bailee +bailiff +bailor +bairn +baker +baldhead +baldy +balker +balladeer +ballerina +balletomane +balloonist +ballplayer +bambino +banderillero +bandit +bandleader +bandmaster +bandsman +banker +bankrupt +banneret +bantamweight +barbarian +bargainer +bargee +bargeman +barkeep +barkeeper +barmaid +barman +barnstormer +baron +baroness +baronet +barrater +barrator +barrister +barrow-boy +barrow-man +bartender +barterer +barytone +basileus +basketeer +basketmaker +basketweaver +bassist +bassoonist +bather +batman +batsman +battler +baulker +bawd +bawler +beachcomber +beadsman +bearer +beatnik +beau +beautician +bedesman +bedfellow +bedlamite +bedwetter +beefeater +beekeeper +begetter +beggar +beggarman +beggarwoman +beginner +beguiler +begum +behaviorist +behaviourist +behemoth +beholder +beldam +beldame +believer +bellboy +belle +bellhop +belligerent +bellman +bellower +bellyacher +beloved +benedick +benefactor +benefactress +bereaved +berk +berserk +berserker +besieger +bestower +betrayer +betrothed +bettor +bey +bibliographer +bibliophile +bibliopole +bibliopolist +bibliothec +bibliotist +bicycler +bicyclist +bidder +bigamist +bigot +bigwig +bilingual +bilingualist +billionaire +bimbo +bimetallist +binger +biochemist +biographer +biologist +biophysicist +birdbrain +birder +bisexual +biter +blabber +blabbermouth +blackamoor +blackguard +blackleg +blackmailer +blacksmith +blasphemer +blaster +bleacher +bleeder +blighter +blockhead +blogger +bloke +blowhard +blubberer +bludgeoner +bluecoat +bluejacket +bluenose +bluestocking +bluffer +blunderer +blusterer +bo's'n +bo'sun +boarder +boaster +boatbuilder +boatman +boatswain +bobby +bobby-socker +bobbysoxer +bodybuilder +boffin +bohemian +bolshie +bolshy +bombardier +bondholder +bondmaid +bondman +bondsman +bondswoman +bondwoman +bonehead +bonesetter +bookbinder +bookdealer +booker +bookie +bookkeeper +booklover +bookmaker +bookman +bookseller +bookworm +boomer +boor +bootblack +bootlegger +bootlicker +bootmaker +boozer +borderer +borrower +bos'n +bosun +botanist +botcher +boulevardier +bouncer +bounder +bourgeois +bowdleriser +bowdlerizer +bowman +boy +boyfriend +bozo +bracero +brachycephalic +braggart +bragger +brain-worker +brainiac +brainworker +brakeman +brawler +breadwinner +breaststroker +breeder +brewer +briber +bricklayer +bride-to-be +bridegroom +bridesmaid +brigadier +brigand +broad +broker +broker-dealer +broncobuster +brother +brother-in-law +bruiser +brunet +brunette +buccaneer +buckaroo +buckeroo +buddy +buffoon +bug-hunter +bugger +bugler +bugologist +bulimic +bullfighter +bully +bullyboy +bumbler +bumpkin +bungler +bunkmate +bunter +bureaucrat +burgher +burglar +burgomaster +burgrave +bursar +busboy +bushman +bushwhacker +businessman +businessperson +businesswoman +busker +buster +busybody +butch +butcher +butterfingers +buttinsky +buyer +by-blow +bystander +cabalist +cabinetmaker +caddie +cadet +cadger +caffer +caffre +cager +caitiff +calif +caliph +caller +caller-out +caller-up +calligrapher +calligraphist +cameraman +campaigner +campmate +candidate +candlemaker +candymaker +cannibal +cannoneer +canoeist +canonist +cantor +canvasser +capitalist +capo +captain +captor +capturer +car-mechanic +carabineer +carabinier +carbineer +cardholder +cardiologist +cardsharp +cardsharper +careerist +caregiver +caretaker +carhop +caricaturist +carillonneur +caroler +caroller +carouser +carpenter +carper +carpetbagger +carrottop +cartographer +cartoonist +caseworker +cashier +castaway +castrate +castrato +casuist +cataleptic +cataloger +cataloguer +catamite +catechist +catechumen +caterer +cattleman +cavalier +cavalryman +caveman +caviler +caviller +celebrant +celebrater +celebrator +celibate +cellist +cenobite +censor +centenarian +centerfielder +centrist +centurion +ceramicist +ceramist +chain-smoker +chairman +chairperson +chairwoman +challenger +chambermaid +champ +champion +chancellor +changeling +chapelgoer +chaperon +chaperone +chaplain +chargeman +charlatan +charmer +chartist +charwoman +chased +chauvinist +chawbacon +cheap-jack +cheapjack +cheapskate +cheater +chebab +cheerer +cheerleader +cheesemonger +chef +chemist +chewer +chief +chieftain +child +chiliast +chimneysweep +chimneysweeper +chiromancer +chiropodist +chiropractor +chiseler +chiseller +choirboy +choirmaster +chooser +choragus +choreographer +chorine +chorister +chronicler +chucker-out +chump +churchgoer +churchman +churchwarden +churl +chutzpanik +cicerone +cinematographer +citizen +civilian +claimant +clairvoyant +clansman +clanswoman +clarinetist +clarinettist +classicist +classmate +claustrophobe +clergyman +cleric +clericalist +clerk +climatologist +clinician +cloakmaker +clockmaker +clocksmith +closer +clothier +clown +co-beneficiary +co-defendant +co-discoverer +co-ed +co-pilot +co-respondent +co-star +co-worker +coachbuilder +coachman +coadjutor +coalman +coastguardsman +coauthor +coaxer +cobber +cocksucker +coconspirator +coddler +codefendant +coder +codetalker +codger +coenobite +coeval +cofounder +cognoscente +coiffeur +coiffeuse +coiner +collaborationist +collaborator +colleague +collectivist +colleen +collegian +collier +colonel +colonial +colonialist +coloniser +colonist +colonizer +colored +colorist +colossus +columnist +combatant +comedian +comedienne +comer +comic +commandant +commander +commentator +commie +commissar +commissionaire +commissioner +committeeman +committeewoman +commodore +commoner +communicant +communicator +communist +companion +compatriot +compeer +compere +competitor +complainant +complainer +complexifier +composer +compositor +compromiser +comptroller +compulsive +comrade +conceiver +concert-goer +concessionaire +concessioner +conchologist +concierge +conciliator +concubine +conductress +confectioner +confederate +conferee +conferrer +confessor +confidant +confidante +conformist +confrere +confuter +congregant +congressman +congresswoman +conjurer +conjuror +connoisseur +conqueror +conquistador +conscript +conservationist +conservative +conservativist +conservator +consignee +consigner +consignor +conspirator +constitutionalist +constructivist +constructor +consul +consultant +consumer +consumptive +contadino +contemplative +contemporary +contender +contestant +contestee +contester +contortionist +contrabandist +contrapuntist +contrarian +contributor +contriver +controversialist +convalescent +convener +conventioneer +conversationalist +conversationist +convert +conveyancer +convict +coolie +cooly +cooperator +coordinator +cop +copartner +copilot +coppersmith +copycat +copyist +copyreader +copywriter +coquette +coreligionist +corespondent +cornerback +cornetist +cornhusker +coroner +corporal +corporatist +correspondent +cosignatory +cosigner +cosmetician +cosmetologist +cosmographer +cosmographist +cosmologist +cosmonaut +cosmopolitan +cosmopolite +costermonger +costumer +costumier +cotenant +cottager +cottier +councillor +councilman +councilwoman +counsellor +counselor +counselor-at-law +counter-revolutionist +counterdemonstrator +counterfeiter +counterman +counterperson +counterrevolutionary +counterrevolutionist +counterspy +counterterrorist +counterwoman +countess +countryman +countrywoman +courier +courtesan +courtier +cousin +cousin-german +couturier +cowboy +cowgirl +cowhand +cowherd +cowman +cowpoke +cowpuncher +coxswain +crackpot +cracksman +crafter +craftsman +cragsman +craniologist +crap-shooter +crapshooter +crasher +craven +crazy +creditor +cretin +crewman +cricketer +crier +criminal +criminologist +cripple +critic +crofter +crone +crony +crookback +crooner +cropper +cross-dresser +cross-examiner +cross-questioner +crossbencher +crosspatch +croupier +cruiserweight +crusader +crybaby +cryptanalyst +cryptographer +cryptologist +crystallographer +cubist +cuckold +cuirassier +culprit +cultist +cunctator +cupbearer +curandera +curandero +curate +curator +curmudgeon +custodian +customer +cut-up +cutler +cutpurse +cutthroat +cyber-terrorist +cybernaut +cyborg +cyclist +cymbalist +cynic +cyprian +cytogeneticist +cytologist +czar +czarina +czaritza +dacoit +dad +daddy +dago +dairymaid +dairyman +dakoit +dalesman +dallier +dame +damoiselle +damosel +damozel +damsel +dancer +dancing-master +danseur +danseuse +daredevil +darkey +darkie +darky +darner +dastard +dauber +daughter +daughter-in-law +dauphin +dawdler +dayboy +daydreamer +daygirl +deacon +deaconess +deadbeat +deaf-mute +dear +dearest +dearie +deary +deb +debaser +debater +debauchee +debaucher +debitor +debtor +debutante +decadent +deceased +decedent +deceiver +decipherer +deckhand +declarer +decorator +defalcator +defamer +defaulter +defeatist +defecator +defector +defendant +defender +defiler +defrauder +degenerate +degrader +deipnosophist +deist +delayer +delegate +delinquent +deliveryman +demagog +demagogue +demander +demimondaine +democrat +demographer +demographist +demoniac +demonstrator +dentist +denturist +departed +departer +dependant +dependent +deponent +deportee +deposer +depositor +depreciator +depressive +deputy +dermatologist +dervish +descendant +descendent +deserter +designer +deskman +desperado +desperate +despoiler +despot +detainee +detective +determinist +detractor +deviant +deviate +deviationist +devisee +deviser +devisor +devotee +devourer +diabetic +diabolist +diagnostician +dialectician +diarist +dichromat +dickhead +dictator +die-sinker +diehard +diemaker +diesinker +dieter +dietician +dietitian +differentiator +dignitary +dilettante +dilly-dallier +dillydallier +dimwit +dingbat +diocesan +diplomat +diplomate +diplomatist +dipsomaniac +director +disarmer +disbeliever +disburser +disciple +disciplinarian +discoverer +discriminator +discussant +disentangler +disparager +dispatcher +disprover +disputant +dissembler +disseminator +dissenter +dissident +dissimulator +distiller +distortionist +disturber +diva +diversionist +diviner +divorcee +dj +do-gooder +do-nothing +docent +dock-walloper +docker +dockhand +dockworker +doctrinaire +dodderer +doer +doge +dogfighter +dogmatist +dogsbody +dolichocephalic +dolt +domestic +dominatrix +domine +dominee +dominie +dominus +don't-know +donee +donna +donor +doofus +doorkeeper +doorman +dork +dosser +dotard +double-crosser +double-dealer +doubter +doula +dowager +down-and-out +doxy +doyen +doyenne +draftee +drafter +draftsman +draftsperson +dragoman +dragoon +dramatist +draper +draughtsman +drawee +drawler +dreamer +dressmaker +dribbler +drifter +drinker +driveller +drooler +dropkicker +dropout +drover +drudge +druggist +drumbeater +drummer +drunk +drunk-and-disorderly +drunkard +dry +dualist +duce +duchess +ducky +dude +dueler +duelist +dueller +duellist +duenna +duffer +duke +dulcinea +dullard +dumbass +dunce +dunderhead +dunker +dupe +dustman +dweeb +dweller +dyer +dynamiter +dynamitist +dynast +dyslectic +dyspeptic +earl +earner +earthling +earthman +easterner +eavesdropper +eccentric +ecclesiastic +ecdysiast +eclectic +eclecticist +ecologist +econometrician +econometrist +economiser +economist +economizer +ectomorph +editorialist +educatee +educationalist +educationist +educator +effecter +effendi +egalitarian +egghead +egocentric +egoist +egomaniac +egotist +ejaculator +eldest +elector +electrician +electrocutioner +electrologist +electroplater +electrotherapist +elegist +elitist +elocutionist +emancipationist +emancipator +embalmer +embassador +embezzler +embroiderer +embroideress +embryologist +emcee +emeer +emeritus +emigrant +emigre +emigree +emir +emissary +empiricist +employable +employee +employer +empress +emptor +emulator +enate +enchanter +enchantress +encroacher +encyclopaedist +encyclopedist +endocrinologist +endodontist +endomorph +endorser +enforcer +engraver +enjoyer +enlistee +enologist +enophile +enquirer +enrollee +enterpriser +entertainer +enthusiast +entomologist +entrepreneur +enumerator +environmentalist +enzymologist +eparch +epicene +epicure +epicurean +epidemiologist +epigon +epigone +epileptic +epistemologist +equal +equalitarian +equerry +equestrian +equivocator +eradicator +eremite +erotic +escalader +escapee +escapist +escapologist +eschatologist +esquire +essayer +essayist +esthete +esthetician +estimator +etcher +ethician +ethicist +ethnarch +ethnic +ethnographer +ethnologist +ethologist +etiologist +etymologist +eulogist +eunuch +evacuee +evaluator +evangelist +everybody +everyman +everyone +evildoer +evolutionist +ex-boyfriend +ex-gambler +ex-husband +ex-mayor +ex-president +ex-serviceman +ex-spouse +ex-wife +examinee +examiner +exarch +exchanger +exciseman +excogitator +excursionist +excuser +executant +executioner +executor +executrix +exegete +exhibitioner +exhibitionist +exhibitor +existentialist +exodontist +exorciser +expat +expatriate +expender +experimenter +expert +exploiter +exporter +expositor +expounder +expressionist +expurgator +exterminator +extern +extoller +extortioner +extortionist +extravert +extremist +extrovert +eyewitness +fabricator +fabulist +facilitator +factotum +faddist +fakeer +faker +fakir +falangist +falconer +faller +falsifier +famulus +fanatic +fancier +fantasist +fantast +faqir +faquir +farmerette +farmhand +farrier +fascist +fascista +fashionmonger +fatalist +fathead +father-figure +father-in-law +fatso +fatty +faultfinder +fauvist +fawner +featherweight +federalist +fella +fellah +feller +feminist +fence-sitter +fencer +fencesitter +fermentologist +ferryman +fetishist +feudatory +fiance +fiancee +fibber +fiddler +fiduciary +fielder +fieldhand +fieldsman +fieldworker +figurer +filer +filibusterer +fill-in +fille +filmmaker +finagler +finalist +financier +fink +fire-eater +fire-swallower +firefighter +first-nighter +first-rater +firstborn +fisherman +fishmonger +fishwife +fitter +flag-waver +flagellant +flamen +flanker +flapper +flatmate +flatterer +flautist +flibbertigibbet +flogger +floorwalker +floozie +floozy +flouter +flunkey +flunky +flutist +fly-by-night +flyweight +foe +foeman +fogey +fogy +follower +fomenter +fondler +foodie +fool +footballer +footman +footpad +footslogger +fop +forager +forbear +forebear +forecaster +forefather +foreigner +forelady +foreman +foremother +foreperson +forewoman +forger +forgiver +fornicator +fornicatress +fortuneteller +forty-niner +fossilist +foster-brother +foster-child +foster-daughter +foster-father +foster-mother +foster-nurse +foster-parent +foster-sister +foster-son +fosterling +foundling +foundress +four-flusher +framer +franc-tireur +free-lance +free-liver +freebooter +freedman +freedwoman +freeholder +freelance +freelancer +freeloader +freeman +freethinker +freewheeler +freewoman +frequenter +fresher +freshman +friar +friend +frogman +front-runner +frontbencher +frontiersman +frontierswoman +frotteur +fruiterer +frump +fucker +fuckhead +fuddy-duddy +fugitive +fugleman +fumbler +funambulist +functionalist +functionary +fundamentalist +furrier +fusilier +fuss-budget +fusspot +futurist +gadabout +gadgeteer +gaffer +gagman +gagster +gagwriter +gallant +galoot +galvaniser +galvanizer +gambist +gambler +gamekeeper +games-master +games-mistress +gamin +gamine +ganef +ganger +gangsta +gangster +ganof +gaolbird +gaoler +garbageman +gardener +garment-worker +garmentmaker +garnishee +garroter +garrotter +gasman +gastroenterologist +gastronome +gatecrasher +gatekeeper +gatherer +gaucho +gawk +gawker +gay +geek +geezer +geisha +gendarme +genealogist +generalissimo +generalist +geneticist +genitor +gentile +gentleman +gentleman-at-arms +gentlewoman +geographer +geologist +geomancer +geometer +geometrician +geophysicist +geriatrician +gerontologist +ghostwriter +giggler +gigolo +gilder +ginzo +gipsy +girl +girlfriend +git +gitana +gitano +giver +gladiator +glass-cutter +glassblower +glassmaker +glassworker +glazer +glazier +gleaner +globetrotter +glossarist +go-between +go-getter +goatherd +godchild +goddaughter +godfather +godmother +godparent +godson +goer +gofer +gold-beater +gold-worker +goldbeater +goldworker +golfer +goliard +gondolier +gondoliere +goner +gonif +goniff +good-for-naught +good-for-nothing +goody-goody +goof +goof-off +goofball +goon +gorger +gospeler +gospeller +gossiper +gossipmonger +gouger +gourmand +gourmandizer +gourmet +governess +goy +grabber +grader +grammarian +gramps +gran +grandad +grandaunt +grandchild +granddad +granddaddy +granddaughter +grandee +grandfather +grandma +grandmaster +grandmother +grandnephew +grandniece +grandpa +grandparent +grandson +grandstander +granduncle +granger +grannie +grantee +granter +grantor +graphologist +gravedigger +graverobber +graybeard +grazier +greaseball +greaser +great +great-aunt +great-nephew +great-niece +great-uncle +greengrocer +greenhorn +greenskeeper +greeter +griever +grifter +gringo +grinner +griot +groaner +grocer +groom +groom-to-be +groomsman +grouch +groundbreaker +groundkeeper +groundling +groundskeeper +groundsman +groupie +groveler +groveller +grower +grownup +grumbler +grump +guarantor +guardian +guardsman +guerilla +guerrilla +guesser +guestworker +guitarist +gulper +gunman +gunner +gunrunner +gunslinger +gunsmith +guru +guttersnipe +guvnor +guzzler +gymnast +gymnosophist +gynaecologist +gynandromorph +gynecologist +haberdasher +habitant +habitue +hacker +hadji +haematologist +haemophile +haemophiliac +haggler +hagiographer +hagiographist +hagiologist +hairdresser +hairsplitter +hairstylist +haji +hajji +hakeem +hakim +halberdier +half-breed +half-brother +half-caste +half-pint +half-sister +half-wit +handicapper +handler +handyman +hanger-on +hangman +haranguer +harasser +hardliner +hardwareman +harlequin +harlot +harmoniser +harmonizer +harper +harpist +harpooneer +harpooner +harpsichordist +harridan +harum-scarum +has-been +hatemonger +hater +hatmaker +hatter +hauler +haulier +have +have-not +hawker +hawkshaw +hayseed +hazan +he-man +head-shrinker +headcounter +headhunter +headliner +headman +headmaster +headmistress +headsman +headwaiter +healer +hearer +heartthrob +heathen +heavyweight +heckler +hedger +hedonist +heir +heir-at-law +heiress +hell-kite +hell-rooster +hellcat +hellion +helmsman +helot +helper +helpmate +helpmeet +hematologist +hemiplegic +hemophile +hemophiliac +henchman +herbalist +herdsman +heretic +heritor +hermaphrodite +hermit +herpetologist +hesitater +hesitator +heterosexual +hewer +hick +hierarch +high-muck-a-muck +high-up +highbinder +highbrow +higher-up +highflier +highflyer +highjacker +highwayman +hijacker +hiker +hillbilly +hippie +hippy +hipster +hireling +hirer +hisser +histologist +historian +historiographer +histrion +hitchhiker +hitman +hitter +hoarder +hoaxer +hobbledehoy +hobbler +hobbyist +hobo +hodman +holidaymaker +hombre +home-builder +homebody +homeboy +homebuilder +homegirl +homemaker +homeopath +homeowner +homesteader +homoeopath +homophile +homophobe +homosexual +homunculus +honcho +honeymooner +honkey +honkie +honky +honoree +hoodlum +hoofer +hooligan +hopeful +hoper +hornist +horologer +horologist +horseman +horseshoer +horsewoman +horticulturist +hosier +hostage +hosteller +hostess +hostler +hotelier +hotelkeeper +hotelman +hothead +hotshot +house-builder +housebreaker +housebuilder +housefather +houseguest +householder +househusband +housekeeper +housemaid +houseman +housemaster +housemate +housemother +housewife +housewrecker +hoyden +hubby +huckster +huddler +hugger +humanist +humanitarian +humdinger +humorist +humourist +hundred-percenter +hunter-gatherer +huntress +huntsman +hurdler +hurler +husband +husbandman +hussar +hussy +hustler +hydrologist +hydromancer +hygienist +hymie +hyperope +hypertensive +hypnotiser +hypnotist +hypnotizer +hypochondriac +hypocrite +hypotensive +hysteric +ianfu +ice-skater +iceman +ichthyologist +iconoclast +idealist +idealogue +ideologist +ideologue +idiot +idler +idolater +idolatress +idoliser +idolizer +ignoramus +illegitimate +illiterate +illusionist +illustrator +imam +imaum +imbecile +imbiber +imitator +immigrant +immune +immunologist +imperialist +impersonator +importee +importer +imposter +impostor +impresario +impressionist +in-law +inamorata +inamorato +inciter +incompetent +incumbent +incurable +independent +indexer +indigen +indigene +individualist +indorser +inductee +industrialist +inebriate +infant +infantryman +infernal +infidel +infielder +infiltrator +informant +informer +ingrate +inhabitant +inheritor +inheritress +inheritrix +initiator +inmate +innkeeper +innocent +innovator +inoculator +inpatient +inquirer +inquisitor +insider +insolvent +insomniac +inspector +inspirer +instigant +instigator +instructor +instructress +instrumentalist +insured +insurgent +insurrectionist +intellectual +intercessor +interlocutor +interloper +intermediary +intermediator +intern +internationalist +interne +internee +internist +internuncio +interrogator +intersex +intervenor +interviewee +interviewer +intimate +intriguer +introvert +intruder +invader +invalid +invalidator +inventor +investigator +investor +invigilator +invitee +ironist +ironman +ironside +ironworker +irredentist +irreligionist +irridentist +island-dweller +islander +isolationist +itinerant +jabberer +jackanapes +jailbird +jailer +jailor +janissary +janitor +jawan +jaywalker +jazzman +jeerer +jerk-off +jerker +jerry-builder +jester +jeweler +jeweller +jigaboo +jilt +jingo +jingoist +jobber +jobholder +jogger +joiner +jokester +jongleur +journalist +journeyer +journeyman +judge +juggler +junior +junkie +junky +jurist +juror +juryman +jurywoman +justiciar +justifier +juvenile +kabbalist +kafir +kalif +kaliph +keeper +keyboardist +khalif +khalifah +kibbutznik +kibitzer +kicker +kiddy +kidnaper +kidnapper +kike +killjoy +kindergartener +kindergartner +kinsman +kinsperson +kinswoman +kleptomaniac +klutz +knacker +knight-errant +knitter +know-all +know-it-all +knower +knucklehead +kolkhoznik +kook +laborer +labourer +lacer +lackey +lad +laddie +lady +lady-in-waiting +ladylove +laggard +lagger +laird +lamenter +laminator +lamplighter +lampooner +lancer +landgrave +landholder +landlady +landlord +landlubber +landman +landowner +landscaper +landscapist +landsman +langlaufer +languisher +lapidarist +lapidary +lapidator +lapidist +larcener +larcenist +lasher +lass +lassie +latecomer +latitudinarian +laudator +laughingstock +laundress +laundryman +laundrywoman +laureate +lawbreaker +lawgiver +lawmaker +lawman +lawyer +layabout +layman +layperson +lazar +lazybones +leaker +leaper +learner +leaseholder +leatherneck +leaver +lech +lecher +lector +lecturer +ledgeman +left-hander +left-winger +lefthander +leftist +lefty +legate +legatee +legionary +legionnaire +legislator +lender +lensman +leper +lepidopterist +lepidopterologist +lesbian +lessee +lessor +letch +letterer +letterman +leveler +leveller +lexicographer +lexicologist +liar +libber +libeler +liberal +liberalist +liberator +libertarian +libertine +librarian +librettist +licensee +licenser +licentiate +lie-abed +liegeman +lieutenant +lifeguard +lifer +lifter +light-o'-love +light-of-love +lighterman +lightweight +limey +limner +limnologist +limper +line-shooter +linendraper +linesman +lingerer +linguist +linkboy +linkman +linksman +lion-hunter +liquidator +lisper +listener +literate +lithographer +lithomancer +litigant +litigator +litter-bearer +litterateur +litterbug +litterer +liturgist +liveryman +loader +loather +lobbyist +lobsterback +lobsterman +locater +locator +lockkeeper +lockman +lockmaster +locksmith +locum +lodger +logger +logician +logistician +logomach +logomachist +loiterer +loner +longbowman +longer +longshoreman +look-alike +looker +looker-on +looney +loony +looter +loser +lotus-eater +loudmouth +lout +lovely +lover +lowbrow +lowerclassman +lowlife +loyalist +lubber +lulu +lumberman +luminary +lummox +lumper +lunatic +luncher +lunger +lunkhead +lurcher +lurker +lush +lutanist +lutenist +luthier +lutist +lyricist +lyrist +ma'am +macebearer +macer +machinator +machinist +macho +macho-man +macroeconomist +macushla +madam +madame +madcap +madman +madrigalist +madwoman +maenad +maestro +mafioso +magdalen +magician +magistrate +magnate +magnifico +magus +maharaja +maharajah +maharanee +maharani +mahatma +mahout +maid +maidservant +mailman +maimer +maintainer +major-domo +major-general +majorette +make-peace +malacologist +malahini +malcontent +malefactor +malfeasant +maligner +malik +malingerer +maltman +maltreater +maltster +mammalogist +mammy +man-about-town +man-at-arms +man-child +manager +manageress +mandatary +mandator +maneuverer +mangler +maniac +manic-depressive +manicurist +manoeuvrer +manservant +manslayer +manumitter +map-reader +mapper +marathoner +marauder +marcher +marchioness +margrave +marine +mariner +marketer +marksman +marquess +married +marshal +martinet +martyr +marveller +masker +masochist +masquer +masquerader +massager +masseur +masseuse +master-at-arms +mastermind +masturbator +matador +matcher +matchmaker +mater +materfamilias +materialist +mathematician +matman +matriarch +matriculate +matrikin +matrisib +matron +mauler +maven +mavin +mayor +mayoress +meanie +measurer +meatman +mechanic +mechanist +medalist +medallist +meddler +mediator +mediatrix +medico +meeter +megalomaniac +melancholiac +melancholic +meliorist +melter +memoriser +memorizer +memsahib +mender +mendicant +menial +mensch +mensh +mentioner +mentor +mercenary +merchandiser +merchant +merchant-venturer +merrymaker +meshuggeneh +meshuggener +mesmerist +mesmerizer +mesomorph +messenger +messmate +mestiza +mestizo +metalhead +metallurgist +metalworker +meteorologist +metic +metropolitan +microbiologist +microeconomist +microscopist +middlebrow +middleman +middleweight +midget +midinette +midshipman +midwife +migrant +mikado +militant +militarist +militiaman +milkmaid +milkman +milksop +mill-girl +mill-hand +millenarian +millenarist +milliner +millionaire +millionairess +millwright +milord +mimer +mimic +mimicker +minder +miner +mineralogist +mineworker +miniaturist +minimalist +minion +ministrant +minor +minstrel +minter +minx +misanthrope +misanthropist +misbeliever +mischief-maker +miscreant +miser +misfit +misleader +misogamist +misogynist +missionary +missioner +missis +missus +missy +mistress +mixed-blood +mixologist +mnemonist +moaner +mobster +mod +modeler +modeller +moderate +moderationist +modernist +modiste +molester +moll +mollycoddle +mollycoddler +mom +momma +mommy +monarchist +monastic +monetarist +moneyer +moneygrubber +moneylender +moneyman +monger +mongoloid +monitrice +monochromat +monogamist +monogynist +monolingual +monologist +monomaniac +monopoliser +monopolist +monopolizer +monotheist +mooch +moocher +moon-curser +moonlighter +moonshiner +mope +mopper +moppet +moralist +morosoph +mortgagee +mortgager +mortgagor +mortician +moss-trooper +mossback +mother-in-law +motherfucker +motile +motorcyclist +motormouth +moujik +mountaineer +mountebank +mounter +mourner +moviegoer +muadhdhin +muazzin +muckraker +mudslinger +muezzin +muggee +mugger +muggins +mugwump +mujahid +mujik +mujtihad +mulatto +muleteer +multi-billionaire +mumbler +mummer +muncher +muralist +murderee +murderer +murderess +murmurer +muscle-builder +musclebuilder +muscleman +muser +musher +musician +musicologist +musketeer +mutilator +mutineer +mutterer +muttonhead +muzhik +muzjik +muzzler +mycologist +mycophage +mycophagist +myope +mystic +mythologist +nabob +nagger +naif +nailer +namby-pamby +namer +namesake +nance +nanus +naprapath +narc +narcissist +narcist +nark +narrator +natator +national +nationalist +nativist +naturalist +naturist +naturopath +navigator +navvy +nawab +naysayer +nazi +ne'er-do-well +nebbech +nebbish +necessitarian +necker +necromancer +needer +needlewoman +needleworker +negativist +neglecter +negotiant +negotiator +negotiatress +negotiatrix +neoclassicist +neocon +neoconservative +neoliberal +neologist +neonate +nephew +nepotist +nerd +netminder +neurasthenic +neurobiologist +neurolinguist +neurologist +neuroscientist +neurosurgeon +neurotic +neutral +neutralist +newbie +newborn +newcomer +newlywed +newsagent +newsboy +newscaster +newsdealer +newsman +newsmonger +newspaperman +newspaperwoman +newsperson +newsreader +newsvendor +newswoman +newswriter +nibbler +niece +nigga +niggard +nigger +niggler +nightbird +nightrider +nigra +nihilist +nincompoop +ninny +niqaabi +nitpicker +nitwit +no-account +no-show +nob +noble +nobleman +noblewoman +nobody +noctambulist +nomad +nominalist +nominator +nominee +non-Catholic +non-Jew +non-resistant +nonachiever +nonagenarian +nonattender +nonbeliever +noncandidate +noncitizen +noncom +noncombatant +noncompliant +nonconformist +nondescript +nondrinker +nondriver +nonesuch +nonmember +nonparticipant +nonpartisan +nonpartizan +nonperson +nonreader +nonresident +nonsuch +nonworker +normaliser +normalizer +nosey-parker +nosher +nosy-parker +notability +notable +notary +noticer +nouveau-riche +novelist +novice +novillero +nudger +nudist +nudnick +nudnik +nullifier +nullipara +numerologist +numismatist +numismatologist +numskull +nuncio +nurse +nurse-midwife +nurseling +nursemaid +nurser +nurseryman +nursling +nutcase +nutritionist +nutter +nymphet +nympho +nympholept +nymphomaniac +oaf +oarsman +oarswoman +objector +oblate +obliger +oboist +obscurantist +observer +obsessive +obsessive-compulsive +obstetrician +obstructionist +occultist +occupant +occupier +oceanaut +oceanographer +octogenarian +octoroon +oculist +odalisque +oddball +odds-maker +odist +oenologist +oenophile +offender +offerer +offeror +office-bearer +officeholder +officer +official +officiant +ogler +oilman +old-timer +oldster +oldtimer +oligarch +ombudsman +onanist +oncologist +oneiromancer +onlooker +onomancer +operagoer +operative +ophthalmologist +opponent +opportunist +opposer +oppressor +optician +optimist +optometrist +orator +orchestrator +ordainer +orderer +orderly +ordinand +organ-grinder +organist +orientalist +originator +ornamentalist +ornithologist +orthodontist +orthoepist +orthopaedist +orthopedist +orthoptist +osculator +osteologer +osteologist +osteopath +osteopathist +ostiarius +ostiary +ostler +otolaryngologist +otologist +otorhinolaryngologist +out-and-outer +outcast +outcaste +outdoorsman +outdoorswoman +outfielder +outgoer +outlander +outlaw +outpatient +outrider +outsider +overachiever +overcomer +overlord +overseer +owner +owner-occupier +oyabun +pacha +pachuco +pacificist +pacifist +packer +packman +padder +paddler +padre +padrone +paederast +paediatrician +paedophile +pagan +pal +paladin +palaeontologist +paleface +paleographer +paleographist +paleontologist +pallbearer +palmist +palmister +palooka +palsgrave +pamperer +pamphleteer +pandar +pander +panderer +panegyrist +panelist +panellist +panhandler +panjandrum +pansexual +pantheist +pantomimer +pantomimist +pantryman +pantywaist +papa +paparazzo +paper-pusher +paperboy +paperer +paperhanger +papist +papoose +pappa +pappoose +parachuter +parachutist +parader +paragrapher +paralegal +paralytic +paramedic +paramedical +paramour +paranoiac +paranoid +paraplegic +paraprofessional +parapsychologist +paratrooper +pardner +pardoner +paretic +pariah +parishioner +parliamentarian +parlormaid +parlourmaid +parodist +parolee +parson +part-owner +part-timer +partaker +participant +partitionist +partner +partygoer +parvenu +pasha +passenger +passer-by +passerby +patentee +pater +paterfamilias +pathfinder +pathologist +patrial +patriarch +patrician +patrikin +patriot +patrioteer +patrisib +patroller +patrolman +patron +patroness +patronne +patsy +patternmaker +patzer +pauper +pawer +pawnbroker +payee +payer +paymaster +paynim +peacenik +pearler +peasant +peculator +pedagog +pedagogue +pedaler +pedaller +pedant +peddler +pederast +pedestrian +pediatrician +pediatrist +pedlar +pedodontist +pedophile +peer +peeress +pen-friend +pendragon +penetrator +penitent +penman +penologist +penpusher +pensionary +pensioner +pentathlete +peon +perceiver +percipient +percussionist +perfecter +perfectionist +performer +perfumer +perinatologist +periodontist +peripatetic +perisher +perjurer +perpetrator +persecutor +person +personage +perspirer +persuader +pervert +peshmerga +pessimist +pesterer +petitioner +petter +pettifogger +phalangist +pharisee +pharmacist +pharmacologist +philanderer +philanthropist +philatelist +philhellene +philhellenist +philistine +philologist +philologue +philomath +philosopher +philosophiser +philosophizer +phlebotomist +phoner +phonetician +phoney +phonologist +phony +photographer +photojournalist +photometrician +photometrist +phrenologist +physician +physicist +physiologist +physiotherapist +phytochemist +phytologist +pianist +picador +picaninny +piccaninny +pickaninny +picker +picklepuss +picknicker +pickpocket +picnicker +pigman +pigmy +pilferer +pilgrim +pillager +pillock +pimp +pin-up +pinchgut +pinko +pioneer +pip-squeak +pistoleer +pitchman +place-kicker +placekicker +placeman +placeseeker +plagiariser +plagiarist +plagiarizer +plainclothesman +plainsman +plaintiff +plaiter +plantsman +plasterer +platelayer +plater +platitudinarian +play-actor +playactor +playboy +player +playfellow +playgoer +playmaker +playmate +playwright +pleader +pleaser +pleb +plebe +plebeian +pledgee +pledger +plenipotentiary +plier +plodder +ploughboy +ploughman +ploughwright +plowboy +plower +plowman +plowwright +plug-ugly +plugger +plumber +plunderer +pluralist +plutocrat +plyer +podiatrist +poet +poet-singer +poetess +poetiser +poetizer +pointillist +pointsman +poisoner +pol +polack +polemicist +polemist +policeman +policewoman +policyholder +politician +politico +pollster +polluter +poltroon +polyandrist +polygamist +polyglot +polygynist +polymath +polytheist +pom +pommy +pomologist +ponce +ponderer +pontifex +pontiff +poof +pooh-bah +pooler +poove +populariser +popularizer +populist +pornographer +portraitist +portrayer +portwatcher +poseur +poseuse +positivist +posseman +possessor +postgraduate +postilion +postillion +postman +postmaster +postmistress +postponer +postulant +postulator +posturer +potboy +potentate +pothead +potholer +pothunter +potman +potter +potterer +poulterer +poultryman +powderer +powerbroker +practician +practitioner +praetor +pragmatist +prankster +prater +prattler +pre-Socratic +pre-emptor +preacher +prebendary +precentor +preceptor +predestinarian +predestinationist +preemie +preemptor +prefect +prelate +premie +premier +prentice +presbyope +presbyter +preschooler +presenter +presentist +preservationist +pressman +prestidigitator +preteen +preteenager +pretender +preterist +pretor +prevaricator +prexy +prickteaser +priest +priest-doctor +priestess +prig +primigravida +primipara +primogenitor +prince +princeling +princess +printmaker +prior +prioress +prisoner +private +privateersman +prizefighter +pro-lifer +probable +probationer +process-server +procrastinator +proctologist +proctor +procurator +procurer +procuress +prodigal +prof +professional +professor +profiteer +profligate +progenitor +progeny +prognosticator +programmer +prohibitionist +projectionist +prole +proletarian +promisee +promiser +promisor +promoter +promulgator +proofreader +propagandist +propagator +prophesier +prophet +prophetess +propman +proponent +proposer +propositus +proprietor +proprietress +prosecutor +proselyte +prospector +prosthetist +prosthodontist +prostitute +protectionist +protector +protege +protegee +protester +protozoologist +provider +provincial +provisioner +provocateur +provoker +provost +prowler +prude +psalmist +psephologist +pseud +pseudo +pseudohermaphrodite +psychiatrist +psychic +psycho +psychoanalyst +psycholinguist +psychologist +psychoneurotic +psychopath +psychophysicist +psychotherapist +psychotic +pteridologist +publican +publiciser +publicist +publicizer +pudden-head +puddler +pudge +puerpera +pugilist +puller +pundit +punster +punter +puppeteer +purchaser +purist +puritan +purser +pursued +pursuer +purveyor +putterer +pygmy +pyrographer +pyromancer +pyromaniac +qadi +quadripara +quadriplegic +quadroon +quaestor +quaffer +quaker +quarreler +quarreller +quarrier +quarryman +quartermaster +queer +querier +quester +questioner +quibbler +quidnunc +quietist +quin +quintipara +quisling +quitter +quizmaster +quizzer +quoter +rabble-rouser +racialist +racist +racker +racketeer +raconteur +radiobiologist +radiochemist +radiographer +radiologist +radiotherapist +raftman +raftsman +ragamuffin +ragpicker +ragsorter +rail-splitter +railbird +railroader +railwayman +rainmaker +raiser +rajah +rakehell +rambler +rancher +ranee +ranger +rani +ranker +ranter +raper +rapist +rappeller +rapporteur +rapscallion +rascal +rat-catcher +ratepayer +ratifier +ratiocinator +rationalist +raver +ravisher +reactionary +realist +reasoner +rebel +rebuker +receptionist +recidivist +recitalist +reciter +recluse +reconciler +record-breaker +record-holder +record-keeper +recoverer +recreant +recruit +recruiter +recruiting-sergeant +rector +recusant +red-header +redact +redactor +redcap +redcoat +redheader +redneck +reeler +reenactor +ref +referee +refiner +refinisher +reformist +refugee +refuter +regent +registrant +registrar +relative-in-law +reliever +religionist +religious +remunerator +renegade +renovator +renter +rentier +repairer +repairman +repatriate +reporter +reproacher +reprobate +reprover +requester +rescuer +researcher +reservist +resident +resister +respecter +respondent +responder +restauranter +restaurateur +rester +restorer +retailer +retaliator +retard +retiree +retreatant +reveler +reveller +revenuer +reversioner +reversionist +reviewer +reviser +revisionist +revivalist +revolutionary +revolutionist +rewriter +rhabdomancer +rhetorician +rheumatic +rheumatologist +rhinolaryngologist +rhymer +rhymester +ribald +ridiculer +rifleman +right-hander +right-winger +righthander +rightist +ringleader +ringmaster +rioter +ripper +ritualist +rival +roadman +roamer +roarer +robber +rogue +roisterer +roller-skater +rollerblader +romantic +romanticist +roofer +rookie +roomer +roomie +roommate +roomy +rooter +rope-maker +ropedancer +ropemaker +roper +ropewalker +rotter +roue +roughneck +roughrider +roundhead +roundsman +rouser +roustabout +rover +rowdy +rower +royalist +rubberneck +rubbernecker +rube +ruffian +ruiner +ruminator +rumormonger +rumourmonger +rumrunner +runner-up +runt +ruralist +rusher +rustic +rustler +saboteur +sabra +sachem +sacrificer +sacristan +saddhu +saddler +sadhu +sadist +sadomasochist +safebreaker +safecracker +sagamore +sahib +sailmaker +salesclerk +salesgirl +saleslady +salesman +salesperson +saleswoman +salter +salutatorian +saluter +salvager +salvor +sandbagger +sandboy +sandwichman +sangoma +sannup +sannyasi +sannyasin +sanyasi +saphead +sapper +sartor +satirist +satrap +saunterer +savage +savant +saver +sawbones +saxist +saxophonist +scalawag +scallywag +scalper +scammer +scamp +scandalmonger +scapegoat +scapegrace +scaremonger +scatterbrain +scattergood +scenarist +scene-stealer +sceneshifter +sceptic +schemer +schizophrenic +schlemiel +schlepper +schlimazel +schlockmeister +schmo +schmoozer +schmuck +schnook +schnorrer +scholar +scholastic +scholiast +schoolboy +schoolchild +schoolfellow +schoolfriend +schoolgirl +schoolma'am +schoolman +schoolmarm +schoolmate +schoolmistress +schoolteacher +scientist +sciolist +scion +scoffer +scofflaw +scold +scolder +scorekeeper +scorer +scorner +scoundrel +scourer +scourger +scout +scouter +scoutmaster +scrapper +scrawler +screecher +screener +screenwriter +screwballer +scribbler +scrimshanker +scriptwriter +scrivener +scrooge +scrounger +scrutineer +scrutiniser +scrutinizer +sculler +scullion +sculptress +sculpturer +seafarer +seamster +seamstress +seasonal +secessionist +second-in-command +second-rater +second-stringer +seconder +sectarian +sectarist +sectary +secular +secularist +secundigravida +securer +seducer +seductress +seedman +seedsman +seer +segregate +segregationist +segregator +seigneur +seignior +seismologist +seizer +selectman +selectwoman +self-seeker +seller +semanticist +semifinalist +seminarian +seminarist +semiotician +semipro +semiprofessional +sempstress +senator +sendee +seneschal +senior +sensationalist +sensitive +sensualist +sentimentalist +sentinel +sentry +separationist +separatist +septuagenarian +serf +sergeant +sergeant-at-law +sericulturist +serjeant +serjeant-at-arms +serjeant-at-law +sermoniser +sermonizer +serologist +serviceman +servitor +settler +settlor +sexagenarian +sexist +sexpot +shadower +shaheed +shaman +shammer +shamus +shanghaier +sharecropper +shareholder +shareowner +sharer +sharper +sharpy +she-devil +shedder +sheeny +sheepherder +sheepman +shegetz +sheik +sheika +sheikh +sheikha +sheller +shelver +shepherd +shepherdess +sheriff +sherlock +shielder +shiksa +shikse +shill +shingler +ship-breaker +shipmate +shipowner +shipwright +shirker +shirtlifter +shirtmaker +shithead +shitter +shlemiel +shlepper +shlimazel +shlockmeister +shmo +shmuck +shnook +shnorrer +shoeblack +shoemaker +shogun +shopaholic +shopkeeper +shoplifter +shopper +shopwalker +shouter +shover +show-off +showgirl +showman +shrink +shuffler +shut-in +shutterbug +shyster +sib +sibling +sibyl +sidekick +sidesman +sightreader +sightseer +signaler +signaller +signalman +signatory +signer +signior +signor +silly +silver-worker +silversmith +silverworker +simperer +simpleton +sinner +sipper +sir +sirdar +sirrah +sissy +sister +sister-in-law +six-footer +sixth-former +skateboarder +skater +skeptic +skier +skin-diver +skinflint +skinhead +skinny-dipper +skipper +skirmisher +skivvy +skulker +skycap +skydiver +slacker +slanderer +slant-eye +slapper +slattern +slaughterer +slave +slaveholder +slaver +slavey +slayer +sledder +sleepwalker +sleepyhead +sleuth +slinger +slob +slobberer +sloganeer +slogger +slop-seller +slopseller +sloucher +sloven +slowcoach +slowpoke +slugabed +sluggard +slugger +slumberer +slut +slyboots +smallholder +smarta +smirker +smoothy +smotherer +smuggler +snacker +snarer +snatcher +sneak +sneerer +sneezer +sniffer +sniffler +sniper +snitch +snitcher +sniveler +sniveller +snob +snoop +snooper +snorer +snowboarder +snuffler +so-and-so +sobersides +socialiser +socialist +socialite +socializer +sociobiologist +sociolinguist +sociologist +sociopath +sodalist +sodbuster +sodomist +sodomite +softie +softy +sojourner +solderer +solicitor +solitudinarian +soloist +solon +solver +somebody +someone +sommelier +somnambulist +somniloquist +son-in-law +songstress +songwriter +sonneteer +sonny +soothsayer +soph +sophist +sophisticate +sophomore +sorcerer +sorceress +sorehead +sorrower +sot +soundman +sourpuss +southpaw +sovereign +sower +spaceman +spacewalker +spammer +sparer +spastic +specialiser +specialist +specializer +specifier +speculator +speechifier +speechmaker +speechwriter +speedskater +spelaeologist +speleologist +spellbinder +spelunker +spend-all +spendthrift +spewer +spic +spick +spik +spinmeister +spinster +spiritualist +spitfire +spiv +splitter +spoilsport +spokesman +spokesperson +spokeswoman +sponger +sponsor +sportscaster +sportsman +sportswoman +sportswriter +spot-welder +spotter +spouse +sprawler +sprigger +sprinter +sprog +spurner +spy +spymaster +squabbler +squanderer +squatter +squaw +squint-eye +squinter +squire +squirmer +stabber +stableboy +stableman +stacker +staffer +stagehand +stager +staggerer +stainer +stakeholder +stalker +stalwart +stammerer +stand-in +standard-bearer +standardiser +standardizer +starer +starets +starveling +statesman +stateswoman +stationer +stationmaster +statistician +stay-at-home +steady +stealer +steamfitter +steelmaker +steelman +steelworker +steeplejack +steerer +steersman +stenographer +stepbrother +stepchild +stepdaughter +stepfather +stepmother +stepparent +stepsister +stepson +stevedore +steward +stewardess +stick-in-the-mud +stickler +stifler +stigmatic +stigmatist +stinter +stipendiary +stippler +stitcher +stock-taker +stockbroker +stockholder +stockist +stockjobber +stockman +stocktaker +stoic +stomper +stonecutter +stonemason +stoner +stonewaller +stooge +stoolie +stoolpigeon +stooper +storekeeper +storyteller +stowaway +strafer +straggler +stranger +straphanger +strapper +strategian +strategist +strayer +streaker +streetwalker +stretcher-bearer +strider +strikebreaker +stripling +stripteaser +striver +strong-armer +strongman +struggler +strumpet +student +stumblebum +stumbler +stupe +stupid +stutterer +styler +stylist +stylite +subaltern +subcontractor +subdeacon +subdivider +subduer +subeditor +subjectivist +subjugator +sublieutenant +submariner +submitter +subnormal +suborner +subscriber +subsidiser +subsidizer +subsister +suburbanite +subversive +subverter +subvocaliser +subvocalizer +succeeder +succorer +succourer +suer +sufferer +suffragan +suffragette +suffragist +suggester +suitor +sultan +summercater +summercaters +sunbather +super +supercargo +supergrass +superintendent +supermarketeer +supermarketer +supermodel +supermom +supernumerary +superstar +supplanter +suppliant +supplicant +supplier +supremacist +suprematist +supremo +surfboarder +surfer +surgeon +surmounter +surpriser +surrealist +surrenderer +surrogate +surveyor +survivalist +suspect +sustainer +sutler +swaggerer +swaggie +swagman +swain +swami +swashbuckler +swayer +swearer +sweetheart +sweetie +swellhead +swimmer +swindler +swineherd +swinger +switch-hitter +switcher +swordsman +swot +sybarite +sycophant +syllogiser +syllogist +syllogizer +symboliser +symbolist +symbolizer +sympathiser +sympathizer +symphonist +symposiarch +symposiast +syncopator +syndic +syndicalist +syndicator +synonymist +syntactician +synthesist +syphilitic +systematiser +systematist +systematizer +systemiser +systemizer +tablemate +tackler +tactician +tagalong +tailor +taker +talebearer +taleteller +talker +tallyman +tamer +tantaliser +tantalizer +taoiseach +tapper +tapster +taskmaster +taskmistress +taste-maker +taste-tester +taster +tatterdemalion +tattletale +taxer +taxidermist +taxman +taxonomer +taxonomist +taxpayer +tchotchkeleh +teammate +tearaway +tec +techie +technician +technocrat +technologist +technophile +technophobe +teen +teenager +teetotaler +teetotalist +teetotaller +tekki +telecaster +teleologist +telepathist +telephoner +televangelist +telltale +tellurian +temp +temporary +temporiser +temporizer +tempter +temptress +tenant +tenderfoot +tenno +tenorist +tentmaker +tergiversator +termagant +termer +terminator +terpsichorean +terrorist +tertigravida +testate +testator +testatrix +testee +testifier +thane +thaumaturge +thaumaturgist +theatergoer +theatregoer +theist +theologian +theologiser +theologist +theologizer +theoretician +theoriser +theorist +theorizer +theosophist +therapist +thespian +thief +thinker +third-rater +thirster +thought-reader +throttler +thrower +throwster +thug +thurifer +thwarter +tiddler +tightwad +tike +tiler +timberman +timeserver +timpanist +tinkerer +tinner +tinsmith +tinter +tippler +tipster +tiro +tither +title-holder +titterer +toady +toastmaster +tobogganist +toddler +toff +toiler +toller +tollgatherer +tollkeeper +tollman +tomboy +tomfool +toolmaker +toper +torchbearer +toreador +torero +tort-feasor +tortfeasor +torturer +tosser +totalitarian +totemist +toter +totterer +touch-typist +toucher +tough +tourist +tout +touter +tovarich +tovarisch +towhead +townee +towner +townie +townsman +towny +toxicologist +tracker +tracklayer +trader +tradesman +traditionalist +traducer +trafficker +tragedian +tragedienne +trailblazer +trainbandsman +trainbearer +trainee +trainman +trainmaster +traitor +traitress +tramper +trampler +transactor +transalpine +transcendentalist +transcriber +transexual +transferee +transferer +transferor +transferrer +transgressor +transmigrante +transplanter +transsexual +transvestite +trapper +trapshooter +traveler +traveller +traverser +treasonist +treasurer +treater +trekker +trembler +trencherman +trend-setter +trespasser +tribade +tribesman +tribologist +tricker +trier +trifler +triggerman +trigonometrician +tritheist +triumvir +troglodyte +troller +trollop +trombonist +trooper +troubadour +troublemaker +troubler +troubleshooter +trouper +truant +truckler +trudger +truelove +trustbuster +trustee +truster +trustor +trusty +tsar +tsarina +tsaritsa +tub-thumper +tubercular +tugger +turncoat +turnkey +turtler +tutee +tutor +twaddler +twerp +twiddler +twiner +twirler +twirp +two-timer +tycoon +tyke +tympanist +typesetter +typist +typographer +tyrant +tyro +tzar +tzarina +ultraconservative +ultramontane +ump +umpire +unbeliever +uncle +underachiever +underboss +underclassman +underdog +undergrad +undergraduate +underling +undersecretary +underseller +understudy +undertaker +undesirable +undoer +unfastener +unfortunate +unicyclist +unilateralist +unionist +unperson +unraveler +unraveller +untier +untouchable +upbraider +upholder +upholsterer +uprooter +upsetter +upstager +uranologist +urchin +urinator +urologist +user +usherette +usufructuary +usurer +usurper +utiliser +utilitarian +utilizer +ux. +uxor +vacationer +vacationist +vaccinator +vaccinee +vacillator +vagrant +valedictorian +valet +valetudinarian +valuator +valuer +vamper +vandal +vanisher +vanquisher +vaquero +varlet +varnisher +vassal +vaticinator +vaudevillian +vaulter +vaunter +vegan +vegetarian +vendee +vender +vendor +venerator +ventriloquist +venturer +verbaliser +verbalizer +verger +verifier +versifier +vestal +vestryman +vestrywoman +vet +veteran +veterinarian +veterinary +vexer +vibist +vibraphonist +vicar +vicar-general +vice-regent +vicegerent +vicereine +victim +victimiser +victimizer +victor +victualer +victualler +vigilante +vilifier +villager +villainess +villein +vindicator +vintager +vintner +violator +violinist +violist +violoncellist +virago +virologist +virtuoso +viscount +viscountess +visionary +visitant +visitor +visualiser +visualizer +vitaliser +vitalist +vitalizer +viticulturist +vivisectionist +vizier +vocalist +vociferator +voicer +volunteer +voluptuary +vomiter +votary +voter +vouchee +vower +voyager +voyeur +vulcaniser +vulcanizer +vulgarian +vulgariser +vulgarizer +wacko +waddler +waffler +wagerer +waggoner +waggonwright +wagoner +wagonwright +waif +wailer +wainwright +waiter +waitress +waker +walk-on +wall-paperer +wallah +wallpaperer +wally +waltzer +wangler +wanker +wannabe +wannabee +wanter +wanton +ward-heeler +warden +warder +wardress +warehouseman +warehouser +warlord +warmonger +warranter +warrantor +warrener +warrior +washerman +washerwoman +washwoman +wassailer +waster +wastrel +watcher +watchmaker +watchman +watercolorist +watercolourist +waterer +waterman +waverer +wax-chandler +wayfarer +weakling +wearer +weatherman +webmaster +weeper +weigher +weightlifter +weirdie +weirdo +weirdy +weisenheimer +welcher +welcomer +welder +well-wisher +welsher +wench +wencher +westerner +wet-nurse +wetback +wetnurse +whacko +wheedler +wheelwright +whiffer +whiner +whipper +whipper-in +whippersnapper +whisperer +whistle-blower +whistleblower +whitey +whittler +whiz-kid +whizz +whizz-kid +wholesaler +whore +whoremaster +whoremonger +whoreson +wicket-keeper +widow +widower +widowman +wife +wigmaker +wildcatter +windbag +windtalker +winemaker +winger +wingman +winner +wino +wire-puller +wireman +wirer +wiretapper +wiseacre +wisenheimer +witch-hunter +withdrawer +withholder +withstander +witnesser +wittol +wiz +wizard +wog +womaniser +womanizer +wonderer +wonk +woodcarver +woodcutter +woodman +woodsman +woodworker +wooer +woolgatherer +woolsorter +wop +word-painter +wordmonger +wordsmith +workaholic +workfellow +workingman +workman +workmate +world-beater +worldling +worrier +worrywart +worshiper +worshipper +worthy +wrangler +wrester +wrestler +wretch +writer +wrongdoer +wuss +xylophonist +yachtsman +yachtswoman +yanker +yardbird +yardie +yardman +yardmaster +yawner +yearner +yeller +yenta +yeoman +yes-man +yid +yielder +yob +yobbo +yobo +yodeller +yokel +youngster +younker +yuppie +zany +zealot +zoologist diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/person_words.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/person_words.dat new file mode 100644 index 000000000..c1daaade9 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/en/data/person_words.dat @@ -0,0 +1,7869 @@ +AWOL +Abenaki +Abkhas +Abkhasian +Abkhaz +Abkhazian +Abnaki +Abo +Aboriginal +Aborigine +Acadian +Achaean +Achaian +Achomawi +Adventist +Aeolian +Afghan +Afghanistani +African +African-American +Afrikander +Afrikaner +Afro-American +Akwa'ala +Alabama +Alabaman +Alabamian +Alaskan +Albanian +Aleut +Aleutian +Alexandrian +Algerian +Algonkian +Algonkin +Algonquian +Algonquin +Almoravid +Alsatian +Altaic +American +Amerindian +Amhara +Amish +Anabaptist +Anasazi +Andorran +Angevin +Angevine +Angle +Anglican +Anglo-American +Anglo-Indian +Anglo-Saxon +Angolan +Angolese +Anguillan +Annamese +Antiguan +Anzac +Apache +Apostle +Appalachian +Aquarius +Arab +Arabian +Arabist +Aramaean +Aramean +Arapaho +Arapahoe +Arawak +Arawakan +Arcadian +Archer +Areopagite +Argentinian +Argive +Aricara +Aries +Arikara +Aristotelean +Aristotelian +Arizonan +Arizonian +Arkansan +Arkansawyer +Armenian +Arminian +Aryan +Ashkenazi +Asian +Asiatic +Assamese +Assyrian +Atakapa +Athabascan +Athabaskan +Athapascan +Athapaskan +Athenian +Atsugewi +Attacapan +Augustinian +Aussie +Australian +Austrian +Austronesian +Azerbaijani +Aztec +B-girl +Babylonian +Badaga +Badger +Bahai +Bahamian +Bahraini +Bahreini +Balance +Balkan +Bangladeshi +Bantu +Baptist +Barbadian +Bart +Basotho +Basque +Batswana +Bavarian +Beaver +Bechuana +Bedouin +Beduin +Beguine +Belgian +Belorussian +Benedictine +Bengali +Beninese +Berber +Berliner +Bermudan +Bermudian +Bhutanese +Bhutani +Biloxi +Bisayan +Black +Blackfoot +Blackfriar +Blackshirt +Blimp +Blucher +Boche +Boer +Bohemian +Bolivian +Bolshevik +Bolshevist +Bornean +Bostonian +Boswell +Bourbon +Boxer +Brahui +Brazilian +Breton +Brit +Britisher +Briton +Brother +Brownie +Brownshirt +Brule +Brummie +Brummy +Bruneian +Buckeye +Buddha +Buddhist +Bulgarian +Bull +Burmese +Burundian +Bushman +Byelorussian +Byzantine +CEO +CFO +CO +CPA +Cabalist +Caddo +Cahita +Cairene +Cajun +Cakchiquel +Calapooya +Calapuya +Californian +Calvinist +Cambodian +Cambrian +Cameroonian +Canaanite +Canadian +Canarese +Cancer +Cantabrigian +Canuck +Capetian +Capricorn +Carelian +Carib +Carioca +Carlovingian +Carmelite +Carolingian +Carolinian +Cartesian +Carthaginian +Carthusian +Casanova +Cassite +Castillian +Catalan +Catawba +Catholic +Catholicos +Caucasian +Cavalier +Cayuga +Cebuan +Celt +Cewa +Chadian +Chaldaean +Chaldean +Chaldee +Chancellor +Charon +Chartist +Chasid +Chassid +Chechen +Cheops +Cheremis +Cheremiss +Cherokee +Chewa +Cheyenne +Chicano +Chichewa +Chickasaw +Chilean +Chiluba +Chimakum +Chimariko +Chinaman +Chinese +Chinook +Chipewyan +Chippewa +Choctaw +Christian +Chukchi +Chuvash +Cinderella +Circassian +Circe +Cistercian +Cochimi +Cockney +Cocopa +Cocopah +Colombian +Coloradan +Colossian +Comanche +Communist +Comrade +Confederate +Confucian +Confucianist +Congolese +Congregationalist +Connecticuter +Conoy +Conservative +Contra +Converso +Copt +Corinthian +Cornhusker +Cornishman +Cornishwoman +Cossack +Costanoan +Coue +Crab +Cree +Creek +Creole +Cretan +Croat +Croatian +Croesus +Crow +Crusader +Cuban +Cymry +Cynic +Cyprian +Cypriot +Cypriote +Cyril +Czech +Czechoslovak +Czechoslovakian +DA +DCI +DP +Dakota +Dalmatian +Damascene +Dane +Daniel +Dardan +Dardanian +Darwinian +Delaware +Delawarean +Delawarian +Delilah +Democrat +Dhegiha +Diegueno +Djiboutian +Doctor +Dominican +Don +Donatist +Dorian +Dr. +Dravidian +Druid +Druse +Druze +Dubliner +Dunkard +Dunker +Dutchman +East-sider +Ebionite +Ecuadoran +Ecuadorian +Edo +Edwardian +Egyptian +Egyptologist +Einstein +Elamite +Elector +Elizabethan +Englishman +Englishwoman +Eolian +Ephesian +Episcopalian +Erie +Eritrean +Eskimo +Esq +Esquimau +Esquire +Esselen +Essene +Ethiopian +Etonian +Etruscan +Eurafrican +Eurasian +European +Evangelist +Evenk +Evenki +Ewe +Ewenki +Excellency +Eyeish +FO +Fabian +Farsi +Father +Fauntleroy +Fauve +Fed +Federal +Federalist +Fellata +Fijian +Filipino +Finn +Fish +Fleming +Florentine +Floridian +Fox +Franciscan +Franco-American +Francophil +Francophile +Francophobe +Frank +Freemason +Frenchman +Frenchwoman +Freudian +Friend +Fula +Fulah +Fulani +Fulbe +G-man +GP +Gabonese +Gadaba +Gael +Galatian +Galilaean +Galilean +Gambian +Gaul +Gemini +Genevan +Genoese +Geordie +Georgian +German +Germanist +Ghanian +Gibraltarian +Gipsy +Girondin +Girondist +Glaswegian +Gnostic +Goat +Gond +Gongorist +Gopher +Goth +Graecophile +Gray +Grecian +Greek +Green +Grenadian +Guarani +Guatemalan +Guinea +Guinean +Gujarati +Gujerati +Gurkha +Guru +Guyanese +Gypsy +Haida +Haitian +Hakham +Hakka +Halchidhoma +Hanoverian +Harijan +Hasid +Hassid +Hausa +Haussa +Havasupai +Hawaiian +Hebraist +Hebrew +Hegelian +Hellene +Herero +Herr +Hidatsa +Highlander +Highness +Hinayanist +Hindoo +Hindu +Hindustani +Hispanic +Hitchiti +Hittite +Hmong +Hoka +Hokan +Hollander +Honduran +Hoosier +Hopi +Hualapai +Hualpai +Huguenot +Hun +Hungarian +Hunkpapa +Hupa +Hussite +Hutu +Iberian +Icelander +Idahoan +Igbo +Illinois +Illinoisan +Inca +Incan +Indian +Indiana +Indianan +Indo-European +Indonesian +Inger +Ingerman +Ingrian +Injun +Inka +Inquisitor +Inuit +Ionian +Iowa +Iowan +Ioway +Iraki +Irani +Iranian +Iraqi +Irelander +Irishman +Irishwoman +Iroquois +Ishmael +Islamist +Ismaili +Ismailian +Israeli +Israelite +Italian +Jack +Jack-tar +Jacksonian +Jacob +Jacobean +Jacobin +Jacobite +Jainist +Jamaican +Janissary +Jansenist +Jap +Japanese +Jat +Javan +Javanese +Jeffersonian +Jerry +Jesuit +Jew +Jew-baiter +Jewess +Jihadist +Jnr +Job +Johnny +Jordanian +Jr +Judas +Jugoslav +Jugoslavian +Jungian +Junior +Junker +Jute +KP +Kabbalist +Kafir +Kaiser +Kalapooia +Kalapuya +Kalka +Kamia +Kampuchean +Kanarese +Kansa +Kansan +Kansas +Karakalpak +Karelian +Karok +Kashmiri +Kassite +Katari +Kazak +Kazakh +Kazakhstani +Kechua +Kekchi +Kelt +Kennan +Kentuckian +Kenyan +Keynesian +Khalka +Khalkha +Khanty +Khedive +Khirghiz +Khmer +Khufu +Kichai +Kickapoo +Kiliwa +Kiliwi +Kiowa +Kirghiz +Kirgiz +Kiwi +Klansman +Kluxer +Koasati +Kolam +Komi +Korean +Kota +Kotar +Kraut +Krauthead +Kshatriya +Kui +Kurd +Kusan +Kuwaiti +Kwakiutl +LPN +Labourite +Laconian +Lady +Lakota +Lamaist +Lamarckian +Lancastrian +Langobard +Lao +Laotian +Lapp +Lapplander +Latin +Latinist +Latino +Latvian +Lebanese +Legionnaire +Leo +Lesbian +Levantine +Levite +Liberian +Libra +Libyan +Liechtensteiner +Lion +Lithuanian +Liverpudlian +Livonian +Lolita +Lombard +Londoner +Lord +Lothario +Louisianan +Louisianian +Lowlander +Luba +Lubavitcher +Luddite +Lutheran +Luxembourger +Luxemburger +MD +MP +MVP +Macedonian +Machiavellian +Mackem +Madagascan +Magyar +Maha +Mahayanist +Mahdist +Mahican +Mahratta +Maidu +Mainer +Malawian +Malay +Malayan +Malaysian +Maldivan +Maldivian +Malecite +Malian +Maltese +Malthusian +Malto +Mam +Manchu +Mancunian +Mandaean +Mandean +Manichaean +Manichean +Manichee +Mansi +Maoist +Maquis +Maquisard +Maraco +Maratha +Mari +Maricopa +Marine +Marrano +Marxist +Marylander +Mason +Masorete +Masorite +Massachuset +Massachusetts +Massorete +Mattole +Mauritanian +Mauritian +Maya +Mayan +Melchite +Melkite +Mendelian +Mennonite +Menominee +Menomini +Menshevik +Merovingian +Mesoamerican +Methodist +Methuselah +Metis +Mexican +Mexican-American +Mexicano +Miami +Miao +Michigander +Mick +Mickey +Micmac +Mikmaq +Milady +Milanese +Milquetoast +Miniconju +Minnesotan +Minoan +Minuteman +Mississippian +Missouri +Missourian +Mithraist +Miwok +Moghul +Mogul +Mohammedan +Mohave +Mohawk +Mohican +Mojave +Mollah +Mon +Monacan +Monegasque +Mongol +Mongolian +Mongoloid +Monophysite +Monsieur +Monsignor +Montanan +Montserratian +Moonie +Moor +Mordva +Mordvin +Mordvinian +Mormon +Moro +Moroccan +Moslem +Mountie +Mozambican +Mugwump +Muhammadan +Muhammedan +Mulla +Mullah +Muscovite +Muskhogean +Muskogean +Muskogee +Muslim +Muslimah +Mycenaen +NIMBY +NOC +NP +Nahuatl +Namibian +Nanticoke +Nauruan +Navaho +Navajo +Nazarene +Nazi +Neapolitan +Nebraskan +Negress +Negro +Negroid +Neoplatonist +Neopolitan +Nepalese +Nepali +Nestorian +Netherlander +Nevadan +Newtonian +Nganasan +Nicaraguan +Nigerian +Nigerien +Nip +Nipponese +Nisei +Nobelist +Nonconformist +Nootka +Norman +Norse +Norseman +Northerner +Northman +Norwegian +Nubian +Numidian +Occidental +Ofo +Ogalala +Oglala +Ohioan +Ojibwa +Ojibway +Oklahoman +Olmec +Olympian +Omaha +Omani +Oneida +Onondaga +Orangeman +Oregonian +Oriental +Oriya +Orleanist +Osage +Oscan +Osmanli +Ostrogoth +Ostyak +Ostyak-Samoyed +Oto +Otoe +Ottawa +Ottoman +Oxonian +P.O. +PCP +PI +PM +PO +POW +Paddy +Padre +Paiute +Pakistani +Paleo-American +Paleo-Amerind +Paleo-Indian +Palestinian +Pamlico +Panamanian +Panjabi +Papuan +Paraguayan +Parisian +Parisienne +Parliamentarian +Parsee +Parsi +Parthian +Pashtoon +Pashtun +Passamaquody +Pathan +Patwin +Pawnee +Pennsylvanian +Penobscot +Pentecostal +Pentecostalist +Penutian +Peripatetic +Persian +Peruvian +Pharaoh +Pharisee +Philippian +Philistine +Phoenician +Phrygian +Pigmy +Pilate +Pilgrim +Pima +Pisces +Piute +Platonist +Pole +Polycarp +Polynesian +Pomo +Ponca +Ponka +Popper +Portuguese +Post-impressionist +Postimpressionist +Potawatomi +Potemkin +Potyokin +Powhatan +Praetorian +Pre-Raphaelite +Presbyterian +President +Pretender +Protestant +Prussian +Pueblo +Punjabi +Puritan +Pushtun +Pygmy +Qatari +Quaker +Quapaw +Quebecois +Quechua +Quiche +RN +Rajpoot +Rajput +Ram +Ranger +Rasta +Rastafarian +Realtor +Reb +Rebel +Redskin +Republican +Rex +Riff +Riffian +Roma +Roman +Romani +Romanian +Romanoff +Romanov +Romany +Romeo +Rommany +Rosicrucian +Rotarian +Roundhead +Royalist +Rumanian +Russian +Rwandan +Ryukyuan +SACEUR +SACLANT +SCPO +SEAL +SMSgt +SOB +Saame +Saami +Sabahan +Sabbatarian +Sabine +Sac +Sadducee +Sagittarius +Sahaptin +Sahaptino +Salian +Salish +Salvadoran +Salvadorean +Salvadorian +Samaritan +Same +Sami +Samnite +Samoan +Samoyed +Samson +Santee +Saracen +Sarawakian +Sardinian +Sassenach +Satanist +Saudi +Sauk +Savara +Savoyard +Saxon +Scandinavian +Scholastic +Schoolman +Scorpio +Scorpion +Scot +Scotchman +Scotchwoman +Scotsman +Scotswoman +Scouser +Scout +Scythian +Selkup +Seminole +Semite +Seneca +Senegalese +Senhor +Sephardi +Serb +Serbian +Seychellois +Shah +Shahaptian +Shaker +Shakespearean +Shakespearian +Shaktist +Shasta +Shavian +Shawnee +Sherpa +Shi'ite +Shiite +Shintoist +Shivaist +Shona +Shoshone +Shoshoni +Shudra +Siamese +Siberian +Sicilian +Sihasapa +Sikh +Simeon +Sindhi +Singaporean +Singhalese +Sinhalese +Sinologist +Siouan +Sioux +Sir +Sister +Skagit +Skinnerian +Slav +Slovak +Slovene +Slovenian +Socinian +Somali +Somalian +Sooner +Sophist +Sorbian +Sotho +Southerner +Spaniard +Spartan +Speaker +Stalinist +Stoic +Stuart +Sudanese +Sudra +Sufi +Sumatran +Sumerian +Sunni +Sunnite +Svengali +Swazi +Swede +Syrian +T-man +Tadzhik +Tagalog +Tahitian +Tai +Taiwanese +Tajik +Takelma +Tamil +Tantrist +Tanzanian +Tao +Taoist +Taos +Taracahitian +Tarahumara +Tarheel +Tartar +Tartufe +Tartuffe +Tarzan +Tatar +Taurus +Ted +Telugu +Templar +Tennessean +Teton +Teuton +Teutonist +Texan +Thai +Thatcherite +Theban +Thessalian +Thessalonian +Thracian +Tibetan +Timorese +Tlingit +Tobagonian +Toda +Togolese +Toltec +Tom +Tongan +Tory +Town +Townes +Tractarian +Trappist +Treasury +Trinidadian +Trinitarian +Trojan +Trot +Trotskyist +Trotskyite +Tsimshian +Tswana +Tuareg +Tudor +Tulu +Tungus +Tungusic +Tunisian +Tunker +Tupi +Turcoman +Turk +Turki +Turkmen +Turkoman +Tuscan +Tuscarora +Tutelo +Tutsi +Twin +Tyke +Tyrolean +Ubermensch +Udmurt +Ugandan +Uighur +Uigur +Ukranian +Uniat +Uniate +Unitarian +Uriah +Uruguayan +Usbeg +Usbek +Utahan +Ute +Utopian +Uygur +Uzbak +Uzbeg +Uzbek +V.P. +VIP +Vaishnava +Vaisya +Vandal +Vedist +Venetian +Venezuelan +Veps +Vepse +Vepsian +Vermonter +Victorian +Vietnamese +Viking +Virgin +Virginian +Virgo +Visayan +Visigoth +Vogul +Volunteer +Votyak +WASP +Wac +Wagnerian +Wahabi +Wahhabi +Wakashan +Walapai +Walloon +Wampanoag +Washingtonian +Watusi +Watutsi +Wave +Welsh +Welshman +Wesleyan +West-sider +Whig +White +Wiccan +Wichita +Winnebago +Wintun +Wisconsinite +Wobbly +Wolverine +Wykehamist +Wyomingite +Xhosa +Yahi +Yakut +Yana +Yank +Yankee +Yankee-Doodle +Yavapai +Yemeni +Yeniseian +Yokuts +Yoruba +Yucatec +Yucateco +Yugoslav +Yugoslavian +Yuma +Zairean +Zairese +Zambian +Zapotec +Zapotecan +Zealander +Zealot +Zimbabwean +Zionist +Zoroastrian +Zulu +Zuni +abator +abbe +abbess +abbot +abbreviator +abdicator +abductor +abecedarian +aberrant +abetter +abettor +abhorrer +abiogenist +abjurer +abnegator +abolitionist +abomination +abominator +aboriginal +aborigine +abortionist +abridger +abrogator +absconder +abseiler +absentee +absolutist +absolver +abstainer +abstinent +abstracter +abstractionist +abstractor +abuser +abutter +academic +academician +acceptor +accessary +accessory +accommodator +accompanist +accompanyist +accomplice +accordionist +accoucheur +accoucheuse +accountant +accumulator +accused +accuser +ace +achiever +acolyte +acoustician +acquaintance +acquirer +acrobat +active +activist +actor +actress +actuary +adapter +adder +addict +addle-head +addlehead +addressee +adducer +adept +adherent +adjudicator +adjunct +adjuster +adjustor +adjutant +adman +administrator +admiral +admirer +admonisher +adolescent +adonis +adoptee +adopter +adorer +adulator +adult +adulterator +adulterer +adulteress +advancer +adventurer +adventuress +adversary +advertiser +advertizer +advisee +adviser +advisor +advocate +advocator +aerialist +aeronaut +aerophile +aesthete +aesthetician +aetiologist +affiant +affiliate +affine +affirmer +affluent +aficionado +agent +agent-in-place +aggravator +aggregator +aggressor +agitator +agnate +agnostic +agonist +agriculturalist +agriculturist +agronomist +aide +aide-de-camp +aircraftman +aircraftsman +aircrewman +airhead +airman +airwoman +alarmist +albino +alcalde +alchemist +alcoholic +alderman +alexic +algebraist +alien +alienator +alienee +alienist +alienor +aliterate +alky +all-rounder +allayer +allegoriser +allegorizer +allergist +alleviator +alliterator +allocator +ally +almoner +almsgiver +alphabetiser +alphabetizer +alpinist +also-ran +alternate +alto +altoist +altruist +alum +alumna +alumnus +amah +amalgamator +amanuensis +amateur +amazon +ambassador +ambassadress +ambler +ambusher +ameer +amigo +amir +amnesiac +amnesic +amora +amoralist +amorist +amputator +amputee +anachronism +anaesthetist +anagnost +analogist +analphabet +analphabetic +analysand +analyst +anarchist +anathema +anatomist +ancestor +ancestress +anchor +anchorite +anchorman +anchorperson +ancient +androgyne +anecdotist +anesthesiologist +anesthetist +angel +angiologist +angler +anglophil +anglophile +anglophobe +animator +animist +annalist +annihilator +annotator +announcer +annoyance +annoyer +annuitant +anointer +anomalist +anomaly +anorectic +anorexic +answerer +antagonist +antecedent +antediluvian +anthologist +anthropoid +anthropologist +anthropophagite +anthropophagus +anti +anti-American +anti-Semite +anti-intellectual +anticipant +anticipator +antifeminist +antinomian +antipope +antiquarian +antiquary +antique +apache +ape +ape-man +aper +aphakic +aphasic +aphorist +apiarist +apiculturist +apologist +apostate +apostle +apothecary +apotheosis +apparatchik +appeaser +appellant +applauder +applicant +applier +appointee +appointment +appraiser +appreciator +apprehender +apprentice +appropriator +approver +aquanaut +arb +arbiter +arbitrager +arbitrageur +arbitrator +arboriculturist +arborist +archaeologist +archaist +archbishop +archdeacon +archduchess +archduke +archeologist +archer +archimandrite +architect +archivist +archpriest +argonaut +arguer +arianist +aristocrat +arithmetician +armiger +armor-bearer +armorer +armourer +arms-runner +arouser +arranger +arrival +arriver +arriviste +arrogator +arrowsmith +arsonist +arthritic +articulator +artificer +artilleryman +artisan +artist +artiste +ascendant +ascendent +ascender +ascetic +asker +aspirant +aspirer +ass +ass-kisser +assailant +assassin +assassinator +assaulter +assayer +assemblyman +assemblywoman +assenter +asserter +assessee +assessor +asseverator +asshole +assignee +assignor +assimilator +assistant +associate +asthmatic +astrogator +astrologer +astrologist +astronaut +astronomer +astrophysicist +atheist +athlete +attache +attacker +attempter +attendant +attendee +attender +attestant +attestator +attester +attestor +attorney +attracter +attraction +attractor +auctioneer +audile +auditor +augur +aunt +auntie +aunty +auspex +auteur +authenticator +author +authoress +authoriser +authoritarian +authority +authorizer +auto-mechanic +autobiographer +autochthon +autocrat +autodidact +automaton +auxiliary +avatar +avenger +aviator +aviatress +aviatrix +avower +ayah +ayatollah +baas +babbler +babe +baboo +babu +baby +baby-sitter +babyminder +babysitter +bacchanal +bacchant +bacchante +bachelor +bachelor-at-arms +bachelorette +back +back-number +backbencher +backbiter +backer +backpacker +backscratcher +backslapper +backslider +backstop +backstroker +backup +backwoodsman +bacteriologist +badgerer +bag +baggage +baggageman +bagger +bagman +bagpiper +bailee +bailiff +bailor +bairn +baker +balancer +baldhead +baldpate +baldy +balker +ball-breaker +ball-buster +balladeer +ballerina +balletomane +balloonist +ballplayer +bambino +banderillero +bandit +bandleader +bandmaster +bandsman +banker +bankrupt +banneret +bantamweight +barbarian +barber +bard +bargainer +bargee +bargeman +baritone +barkeep +barkeeper +barker +barmaid +barman +barnburner +barnstormer +baron +baroness +baronet +barrater +barrator +barrister +barrow-boy +barrow-man +bartender +barterer +barytone +basileus +basketeer +basketmaker +basketweaver +bass +bassist +basso +bassoonist +bastard +baster +bather +batman +batsman +batter +battle-ax +battle-axe +battler +baulker +bawd +bawler +beachcomber +beadle +beadsman +bear +beard +bearer +beast +beat +beater +beatnik +beau +beautician +beauty +bedesman +bedfellow +bedlamite +bedwetter +beefeater +beekeeper +begetter +beggar +beggarman +beggarwoman +beginner +beguiler +begum +behaviorist +behaviourist +behemoth +beholder +beldam +beldame +believer +bellboy +belle +bellhop +belligerent +bellman +bellower +bellwether +bellyacher +beloved +benedick +benedict +benefactor +benefactress +beneficiary +bereaved +berk +berserk +berserker +besieger +best +bestower +betrayer +betrothed +better +bettor +bey +bibliographer +bibliophile +bibliopole +bibliopolist +bibliothec +bibliotist +bicycler +bicyclist +bidder +bigamist +bigot +bigwig +bilingual +bilingualist +billionaire +bimbo +bimetallist +binger +biochemist +biographer +biologist +biophysicist +bird +birdbrain +birder +birth +bisexual +bishop +bitch +biter +blabber +blabbermouth +blackamoor +blackguard +blackleg +blackmailer +blacksmith +blade +blasphemer +blaster +bleacher +bleeder +blighter +blocker +blockhead +blogger +bloke +blond +blonde +blood +blowhard +blubberer +bludgeoner +bluecoat +bluejacket +bluenose +bluestocking +bluffer +blunderer +blusterer +bo's'n +bo'sun +boarder +boaster +boatbuilder +boater +boatman +boatswain +bobby +bobby-socker +bobbysoxer +bodybuilder +bodyguard +boffin +bohemian +bolshie +bolshy +bombardier +bomber +bombshell +bondholder +bondmaid +bondman +bondsman +bondswoman +bondwoman +bonehead +bonesetter +boniface +boob +booby +bookbinder +bookdealer +booker +bookie +bookkeeper +booklover +bookmaker +bookman +bookseller +bookworm +boomer +boor +booster +bootblack +bootlegger +bootlicker +bootmaker +boozer +borderer +bore +borrower +bos'n +boss +bosun +botanist +botcher +boulevardier +bouncer +bounder +bourgeois +bowdleriser +bowdlerizer +bowler +bowman +boxer +boy +boyfriend +bozo +bracero +brachycephalic +braggart +bragger +brahman +brahmin +brain +brain-worker +brainiac +brainworker +brakeman +brat +brave +bravo +brawler +breadwinner +breaker +breaststroker +breeder +brewer +briber +brick +bricklayer +bride +bride-to-be +bridegroom +bridesmaid +brigadier +brigand +broad +broadcaster +broker +broker-dealer +broncobuster +brother +brother-in-law +browser +bruiser +brunet +brunette +brute +buccaneer +buckaroo +buckeroo +bucolic +buddy +buff +buffoon +bug-hunter +bugger +bugler +bugologist +builder +bulimic +bull +bullfighter +bully +bullyboy +bum +bumbler +bumpkin +bungler +bunkmate +bunny +bunter +bureaucrat +burgess +burgher +burglar +burgomaster +burgrave +bursar +busboy +bushman +bushwhacker +businessman +businessperson +businesswoman +busker +buster +busybody +butch +butcher +butler +butt +butter +butterball +butterfingers +buttinsky +buyer +by-blow +bystander +cabalist +cabinetmaker +cad +caddie +cadet +cadger +caffer +caffre +cager +caitiff +calculator +calif +caliph +caller +caller-out +caller-up +calligrapher +calligraphist +cameraman +campaigner +camper +campmate +canary +candidate +candlemaker +candymaker +cannibal +cannoneer +canoeist +canon +canonist +cantor +canvasser +capitalist +capo +captain +captive +captor +capturer +car-mechanic +carabineer +carabinier +carbineer +card +cardholder +cardinal +cardiologist +cardsharp +cardsharper +careerist +caregiver +caretaker +carhop +caricaturist +carillonneur +caroler +caroller +carouser +carpenter +carper +carpetbagger +carrier +carrottop +carter +cartographer +cartoonist +cartwright +carver +case +caseworker +cashier +castaway +caster +castrate +castrato +casualty +casuist +cat +cat's-paw +cataleptic +cataloger +cataloguer +catamite +catch +catcher +catechist +catechumen +caterer +cattleman +cavalier +cavalryman +caveman +caviler +caviller +celebrant +celebrater +celebrator +celebrity +celibate +cellist +cenobite +censor +centenarian +center +centerfielder +centrist +centurion +ceramicist +ceramist +chachka +chain-smoker +chair +chairman +chairperson +chairwoman +challenger +chamberlain +chambermaid +chameleon +champ +champion +chancellor +chandler +changeling +changer +chap +chapelgoer +chaperon +chaperone +chaplain +chapman +char +character +charge +chargeman +charioteer +charlatan +charmer +chartist +charwoman +chased +chaser +chatelaine +chatterbox +chatterer +chauvinist +chawbacon +cheap-jack +cheapjack +cheapskate +cheat +cheater +chebab +checker +cheerer +cheerleader +cheesemonger +chef +chela +chemist +cherub +chevalier +chewer +chichi +chick +chicken +chief +chieftain +child +chiliast +chimneysweep +chimneysweeper +chink +chiromancer +chiropodist +chiropractor +chiseler +chiseller +chit +choirboy +choirmaster +choker +chooser +choragus +choreographer +chorine +chorister +chosen +christ +chronicler +chucker-out +chum +chump +churchgoer +churchman +churchwarden +churl +chutzpanik +cicerone +cinematographer +cipher +citizen +civilian +claimant +clairvoyant +clansman +clanswoman +clapper +clarinetist +clarinettist +classic +classicist +classifier +classmate +claustrophobe +cleaner +clergyman +cleric +clericalist +clerk +client +climatologist +climber +clinician +cloakmaker +clockmaker +clocksmith +clod +clone +closer +clotheshorse +clothier +clown +co-beneficiary +co-defendant +co-discoverer +co-ed +co-pilot +co-respondent +co-star +co-worker +coach +coachbuilder +coachman +coadjutor +coalman +coaster +coastguardsman +coauthor +coaxer +cobber +cobbler +cockscomb +cocksucker +coconspirator +cocotte +coddler +codefendant +coder +codetalker +codger +coenobite +coeval +cofounder +cog +cognate +cognoscente +coiffeur +coiffeuse +coiner +collaborationist +collaborator +colleague +collectivist +collector +colleen +collegian +collier +colonel +colonial +colonialist +coloniser +colonist +colonizer +coloratura +colored +colorist +colossus +columnist +combatant +comber +comedian +comedienne +comer +comforter +comic +commandant +commander +commando +commentator +commie +commissar +commissionaire +commissioner +committeeman +committeewoman +commodore +commoner +communicant +communicator +communist +commuter +companion +company +compatriot +compeer +compere +competition +competitor +compiler +complainant +complainer +complexifier +composer +compositor +compromiser +comptroller +compulsive +computer +comrade +con +conceiver +concert-goer +concessionaire +concessioner +conchologist +concierge +conciliator +concubine +conditioner +conductor +conductress +confectioner +confederate +conferee +conferrer +confessor +confidant +confidante +conformist +confrere +confuter +congregant +congressman +congresswoman +conjurer +conjuror +connection +connoisseur +conqueror +conquistador +conscript +conservationist +conservative +conservativist +conservator +consignee +consigner +consignor +consort +conspirator +constable +constituent +constitutionalist +constructivist +constructor +consul +consultant +consumer +consumptive +contact +contadino +contemplative +contemporary +contender +contestant +contestee +contester +contortionist +contrabandist +contractor +contralto +contrapuntist +contrarian +contributor +contriver +controller +controversialist +convalescent +convener +conventioneer +conversationalist +conversationist +convert +conveyancer +conveyer +conveyor +convict +cook +cookie +cooky +coolie +cooly +coon +cooper +cooperator +coordinator +cop +copartner +copilot +copper +coppersmith +copycat +copyist +copyreader +copywriter +coquette +coreligionist +corespondent +cornerback +cornetist +cornhusker +coroner +corporal +corporatist +correspondent +corsair +cosignatory +cosigner +cosmetician +cosmetologist +cosmographer +cosmographist +cosmologist +cosmonaut +cosmopolitan +cosmopolite +costermonger +costumer +costumier +cotenant +cottager +cottar +cotter +cottier +councillor +councilman +councilwoman +counsel +counsellor +counselor +counselor-at-law +count +counter +counter-revolutionist +counterdemonstrator +counterfeiter +counterman +counterperson +counterrevolutionary +counterrevolutionist +counterspy +countertenor +counterterrorist +counterwoman +countess +countryman +countrywoman +courier +courser +courtesan +courtier +cousin +cousin-german +couturier +cow +coward +cowboy +cowgirl +cowhand +cowherd +cowman +cowpoke +cowpuncher +cox +coxcomb +coxswain +coyote +crab +cracker +crackerjack +crackpot +cracksman +crafter +craftsman +cragsman +crammer +craniologist +crank +crap-shooter +crapshooter +crasher +craven +crawler +crazy +creator +creature +creditor +creep +creeper +cretin +crewman +cricketer +crier +criminal +criminologist +crimp +crimper +criollo +cripple +critic +crofter +crone +crony +crook +crookback +crooner +cropper +cross-dresser +cross-examiner +cross-questioner +crossbencher +crossover +crosspatch +croupier +cruiserweight +crumb +crusader +crybaby +cryptanalyst +cryptographer +cryptologist +crystallographer +cub +cubist +cuckold +cuckoo +cuirassier +culprit +cultist +cultivator +cunctator +cunt +cupbearer +cur +curandera +curandero +curate +curator +curmudgeon +currier +cuss +custodian +customer +cut-up +cutler +cutpurse +cutter +cutthroat +cyber-terrorist +cybernaut +cyberpunk +cyborg +cyclist +cymbalist +cynic +cypher +cyprian +cytogeneticist +cytologist +czar +czarina +czaritza +dabbler +dacoit +dad +dada +daddy +dago +dairymaid +dairyman +dakoit +dalesman +dallier +dame +damoiselle +damosel +damozel +damsel +dancer +dancing-master +dandy +danseur +danseuse +daredevil +darkey +darkie +darky +darling +darner +dastard +date +dauber +daughter +daughter-in-law +dauphin +dawdler +dayboy +daydreamer +daygirl +deacon +deaconess +deadbeat +deadeye +deadhead +deaf-mute +dealer +dean +dear +dearest +dearie +deary +deb +debaser +debater +debauchee +debaucher +debitor +debtor +debutante +decadent +deceased +decedent +deceiver +decipherer +deckhand +declarer +decoder +decorator +decoy +defalcator +defamer +defaulter +defeatist +defecator +defector +defendant +defender +defiler +defrauder +degenerate +degrader +deification +deipnosophist +deist +delayer +delegate +delinquent +deliverer +deliveryman +demagog +demagogue +demander +demigod +demimondaine +democrat +demographer +demographist +demoiselle +demon +demoniac +demonstrator +denier +denizen +dentist +denturist +departed +departer +dependant +dependent +deponent +deportee +deposer +depositor +depreciator +depressive +deputy +derelict +dermatologist +dervish +descendant +descendent +descender +deserter +designer +deskman +desperado +desperate +despoiler +despot +destroyer +detainee +detective +determinist +detractor +developer +deviant +deviate +deviationist +devil +devisee +deviser +devisor +devotee +devourer +diabetic +diabolist +diagnostician +dialectician +diarist +dichromat +dick +dickhead +dictator +die-sinker +diehard +diemaker +diesinker +dieter +dietician +dietitian +differentiator +digger +dignitary +dike +dilettante +dilly-dallier +dillydallier +dimwit +diner +dingbat +diocesan +dip +diplomat +diplomate +diplomatist +dipsomaniac +director +disarmer +disbeliever +disburser +disciple +disciplinarian +discoverer +discriminator +discussant +disentangler +dish +dishwasher +disparager +dispatcher +dispenser +disprover +disputant +dissembler +disseminator +dissenter +dissident +dissimulator +distiller +distortionist +distributer +distributor +disturber +diva +diver +diversionist +divider +divine +diviner +divorcee +dj +do-gooder +do-nothing +doc +docent +dock-walloper +docker +dockhand +dockworker +doctor +doctrinaire +dodderer +dodger +dodo +doer +dog +doge +dogfighter +dogmatist +dogsbody +dolichocephalic +doll +dolt +domestic +dominatrix +domine +dominee +dominie +dominus +don +don't-know +donee +donna +donor +doofus +doorkeeper +doorman +doormat +dope +dork +dosser +dotard +double +double-crosser +double-dealer +doubter +doughboy +doula +dove +dowager +down-and-out +dowser +doxy +doyen +doyenne +draftee +drafter +draftsman +draftsperson +dragger +dragoman +dragon +dragoon +dramatist +draper +draughtsman +draw +drawee +drawer +drawler +dreamer +dresser +dressmaker +dribbler +drifter +drinker +driveller +driver +drone +drooler +dropkicker +dropout +drover +drudge +druggist +drumbeater +drummer +drunk +drunk-and-disorderly +drunkard +dry +dualist +duce +duchess +ducky +dud +dude +dueler +duelist +dueller +duellist +duenna +duffer +duke +dulcinea +dullard +dumbass +dumbbell +dummy +dunce +dunderhead +dunker +dupe +dustman +dwarf +dweeb +dweller +dyer +dyke +dynamiter +dynamitist +dynast +dyslectic +dyspeptic +earl +earner +earthling +earthman +easterner +eater +eavesdropper +eccentric +ecclesiastic +ecdysiast +eclectic +eclecticist +ecologist +econometrician +econometrist +economiser +economist +economizer +ectomorph +edger +editor +editorialist +educatee +educationalist +educationist +educator +effecter +effector +effendi +egalitarian +egghead +egocentric +egoist +egomaniac +egotist +ejaculator +ejector +elder +eldest +elector +electrician +electrocutioner +electrologist +electroplater +electrotherapist +elegist +elitist +elocutionist +emancipationist +emancipator +embalmer +embassador +embezzler +embodiment +embroiderer +embroideress +embryologist +emcee +emeer +emeritus +emigrant +emigre +emigree +emir +emissary +emperor +empiricist +employable +employee +employer +empress +emptor +emulator +enate +enchanter +enchantress +encroacher +encyclopaedist +encyclopedist +end +endocrinologist +endodontist +endomorph +endorser +enemy +energiser +energizer +enforcer +engineer +engraver +enjoyer +enlistee +enologist +enophile +enquirer +enrollee +ensign +enterpriser +entertainer +enthusiast +entomologist +entrant +entrepreneur +enumerator +environmentalist +envoy +enzymologist +eparch +epicene +epicure +epicurean +epidemiologist +epigon +epigone +epileptic +epistemologist +equal +equalitarian +equerry +equestrian +equivocator +eradicator +eremite +eristic +erotic +escalader +escapee +escapist +escapologist +eschatologist +escort +esquire +essayer +essayist +esthete +esthetician +estimator +etcher +ethician +ethicist +ethnarch +ethnic +ethnographer +ethnologist +ethologist +etiologist +etymologist +eulogist +eunuch +evacuee +evaluator +evangelist +everybody +everyman +everyone +evildoer +evolutionist +ex +ex-boyfriend +ex-gambler +ex-husband +ex-mayor +ex-president +ex-serviceman +ex-spouse +ex-wife +examinee +examiner +exarch +excavator +exchanger +exciseman +excogitator +excursionist +excuser +executant +executioner +executive +executor +executrix +exegete +exhibitioner +exhibitionist +exhibitor +exile +existentialist +exodontist +exorciser +exorcist +expat +expatriate +expectorator +expender +experimenter +expert +exploiter +explorer +exponent +exporter +expositor +expounder +expressionist +expurgator +exterminator +extern +extoller +extortioner +extortionist +extra +extravert +extremist +extrovert +eyeful +eyewitness +fabricator +fabulist +face +facilitator +factor +factotum +faddist +fag +faggot +fagot +failure +fairy +fake +fakeer +faker +fakir +falangist +falconer +faller +falsifier +familiar +family +famulus +fan +fanatic +fancier +fantasist +fantast +faqir +faquir +fare +farmer +farmerette +farmhand +farrier +fascist +fascista +fashionmonger +fastener +fatalist +fathead +father +father-figure +father-in-law +fatso +fatty +faultfinder +fauvist +favorite +favourite +fawner +featherweight +federalist +feeder +fella +fellah +feller +fellow +felo-de-se +felon +female +feminist +fence +fence-sitter +fencer +fencesitter +fermentologist +ferryman +fetishist +feudatory +fiance +fiancee +fibber +fiddler +fiduciary +fielder +fieldhand +fieldsman +fieldworker +fiend +fighter +figure +figurehead +figurer +filer +filibuster +filibusterer +filicide +fill-in +fille +filmmaker +finagler +finalist +financier +finder +finisher +fink +fire-eater +fire-swallower +fireball +firebrand +firebug +firefighter +fireman +first-nighter +first-rater +firstborn +fisher +fisherman +fishmonger +fishwife +fitter +fixer +fixture +flack +flag-waver +flagellant +flak +flake +flamen +flanker +flapper +flasher +flatfoot +flatmate +flatterer +flautist +fledgeling +fledgling +fleer +flibbertigibbet +flier +flirt +floater +flogger +floorwalker +floozie +floozy +flop +florist +flouter +flunkey +flunky +flutist +fly-by-night +flyer +flyweight +fodder +foe +foeman +fogey +fogy +follower +fomenter +fondler +foodie +fool +foot +footballer +footer +footman +footpad +footslogger +fop +forager +forbear +forebear +forecaster +forefather +foreigner +forelady +foreman +foremother +foreperson +forerunner +forester +forewoman +forger +forgiver +fornicator +fornicatress +fortuneteller +forty-niner +forward +fossil +fossilist +foster-brother +foster-child +foster-daughter +foster-father +foster-mother +foster-nurse +foster-parent +foster-sister +foster-son +fosterling +founder +foundling +foundress +four-flusher +fowler +fox +framer +franc-tireur +franklin +fratricide +fraud +freak +free-lance +free-liver +freebooter +freedman +freedwoman +freeholder +freelance +freelancer +freeloader +freeman +freethinker +freewheeler +freewoman +frequenter +fresher +freshman +friar +friend +frog +frogman +front +front-runner +frontbencher +frontiersman +frontierswoman +frotteur +fruitcake +fruiterer +frump +fry +fucker +fuckhead +fuckup +fuddy-duddy +fugitive +fugleman +fullback +fuller +fumbler +fumigator +funambulist +functionalist +functionary +fundamentalist +fundraiser +furrier +fusilier +fuss-budget +fusspot +futurist +fuzz +gadabout +gadfly +gadgeteer +gaffer +gagman +gagster +gagwriter +gainer +gal +gallant +galoot +galvaniser +galvanizer +gambist +gambler +gamecock +gamekeeper +games-master +games-mistress +gamin +gamine +ganef +ganger +gangsta +gangster +ganof +gaolbird +gaoler +garbageman +gardener +garment-worker +garmentmaker +garnishee +garroter +garrotter +gasbag +gasman +gastroenterologist +gastronome +gatecrasher +gatekeeper +gatherer +gaucho +gawk +gawker +gay +gazetteer +geek +geezer +geisha +gem +gendarme +genealogist +general +generalissimo +generalist +generator +geneticist +genitor +genius +gent +gentile +gentleman +gentleman-at-arms +gentlewoman +geographer +geologist +geomancer +geometer +geometrician +geophysicist +geriatrician +gerontologist +ghost +ghostwriter +ghoul +giant +giggler +gigolo +gilder +gillie +ginzo +gipsy +girl +girlfriend +git +gitana +gitano +giver +gladiator +glass-cutter +glassblower +glassmaker +glassworker +glazer +glazier +gleaner +globetrotter +glossarist +glutton +go-between +go-getter +goalie +goalkeeper +goaltender +goat +goatherd +gob +gobbler +god +godchild +goddaughter +godfather +godmother +godparent +godson +goer +gofer +goffer +gold-beater +gold-worker +goldbeater +goldbrick +goldsmith +goldworker +golfer +goliard +goliath +gondolier +gondoliere +goner +gonif +goniff +good-for-naught +good-for-nothing +goody-goody +goof +goof-off +goofball +gook +goon +goose +gopher +gorger +gospeler +gospeller +gossip +gossiper +gossipmonger +gouger +gourmand +gourmandizer +gourmet +governess +governor +goy +grabber +grad +grader +graduate +grammarian +gramps +gran +grandad +grandaunt +grandchild +granddad +granddaddy +granddaughter +grandee +grandfather +grandma +grandmaster +grandmother +grandnephew +grandniece +grandpa +grandparent +grandson +grandstander +granduncle +granger +grannie +granny +grantee +granter +grantor +graphologist +grappler +grass +gravedigger +graverobber +gravida +graybeard +grazier +greaseball +greaser +great +great-aunt +great-nephew +great-niece +great-uncle +greengrocer +greenhorn +greenskeeper +greeter +grenadier +greyback +greybeard +griever +grifter +grind +gringo +grinner +griot +grip +groaner +grocer +groom +groom-to-be +groomsman +grouch +groundbreaker +groundkeeper +groundling +groundskeeper +groundsman +groupie +groveler +groveller +grower +growler +grownup +grumbler +grump +grunt +grunter +guarantor +guard +guardian +guardsman +guerilla +guerrilla +guesser +guest +guestworker +guide +guitarist +gull +gulper +gumshoe +gun +gunman +gunner +gunrunner +gunslinger +gunsmith +guru +gutter +guttersnipe +guvnor +guy +guzzler +gymnast +gymnosophist +gynaecologist +gynandromorph +gynecologist +gypsy +haberdasher +habitant +habitue +hack +hacker +hadji +haematologist +haemophile +haemophiliac +hag +haggler +hagiographer +hagiographist +hagiologist +hairdresser +hairsplitter +hairstylist +haji +hajji +hakeem +hakim +halberdier +half-breed +half-brother +half-caste +half-pint +half-sister +half-wit +halfback +ham +hammerhead +hand +handicapper +handler +handmaid +handmaiden +handyman +hanger +hanger-on +hangman +hangover +haranguer +harasser +hardliner +hardwareman +harlequin +harlot +harmoniser +harmonizer +harper +harpist +harpooneer +harpooner +harpsichordist +harpy +harridan +harrier +harum-scarum +harvester +has-been +hatemonger +hater +hatmaker +hatter +hauler +haulier +have +have-not +hawk +hawker +hawkshaw +hayseed +hazan +he-man +head +head-shrinker +headcounter +headhunter +headliner +headman +headmaster +headmistress +headsman +headwaiter +healer +hearer +heartbreaker +heartthrob +heathen +heaver +heavy +heavyweight +heckler +hedger +hedonist +heel +heir +heir-at-law +heiress +hell-kite +hell-rooster +hellcat +heller +hellhound +hellion +helmsman +helot +help +helper +helpmate +helpmeet +hematologist +hemiplegic +hemophile +hemophiliac +henchman +herald +herbalist +herder +herdsman +heretic +heritor +hermaphrodite +hermit +hero +heroine +herpetologist +hesitater +hesitator +heterosexual +hewer +hick +hierarch +high-muck-a-muck +high-up +highbinder +highbrow +higher-up +highflier +highflyer +highjacker +highwayman +hijacker +hiker +hillbilly +hippie +hippy +hipster +hire +hireling +hirer +hisser +histologist +historian +historiographer +histrion +hitchhiker +hitman +hitter +hoarder +hoaxer +hobbledehoy +hobbler +hobbyist +hobo +hodman +hog +holder +holdout +holdover +holidaymaker +hombre +home-builder +homebody +homeboy +homebuilder +homegirl +homeless +homemaker +homeopath +homeowner +homesteader +homo +homoeopath +homophile +homophobe +homosexual +homunculus +honcho +honey +honeymooner +honkey +honkie +honky +honoree +hood +hoodlum +hoodoo +hoofer +hooker +hooligan +hope +hopeful +hoper +hopper +hornist +horologer +horologist +horseman +horseshoer +horsewoman +horticulturist +hosier +host +hostage +hosteller +hostess +hostler +hotdog +hotelier +hotelkeeper +hotelman +hothead +hotshot +hotspur +hound +houri +house-builder +housebreaker +housebuilder +housefather +houseguest +householder +househusband +housekeeper +housemaid +houseman +housemaster +housemate +housemother +housewife +housewrecker +hoyden +hubby +huckster +huddler +hugger +hulk +humanist +humanitarian +humdinger +hummer +humorist +humourist +humpback +hunchback +hundred-percenter +hunk +hunter +hunter-gatherer +huntress +huntsman +hurdler +hurler +husband +husbandman +hussar +hussy +hustler +hydrologist +hydromancer +hygienist +hymie +hyperope +hypertensive +hypnotiser +hypnotist +hypnotizer +hypochondriac +hypocrite +hypotensive +hysteric +ianfu +ice-skater +iceman +ichthyologist +iconoclast +ideal +idealist +idealogue +ideologist +ideologue +idiot +idler +idol +idolater +idolatress +idoliser +idolizer +ignoramus +illegitimate +illiterate +illusionist +illustrator +image +imam +imaum +imbecile +imbiber +imitator +immigrant +immortal +immune +immunologist +imp +imperialist +impersonator +import +importee +importer +imposter +impostor +impresario +impressionist +improver +in-law +inamorata +inamorato +incarnation +incendiary +inciter +incompetent +incubus +incumbent +incurable +independent +indexer +indigen +indigene +individual +individualist +indorser +inducer +inductee +industrialist +indweller +inebriate +infant +infanticide +infantryman +inferior +infernal +infidel +infielder +infiltrator +informant +informer +ingenue +ingrate +inhabitant +inheritor +inheritress +inheritrix +initiate +initiator +inmate +innkeeper +innocent +innovator +inoculator +inpatient +inquirer +inquisitor +insect +insider +insolvent +insomniac +inspector +inspirer +instigant +instigator +instructor +instructress +instrument +instrumentalist +insured +insurgent +insurrectionist +intellect +intellectual +intercessor +interlocutor +interloper +intermediary +intermediator +intern +internationalist +interne +internee +internist +internuncio +interpreter +interrogator +intersex +intervenor +interviewee +interviewer +intimate +intriguer +introvert +intruder +invader +invalid +invalidator +inventor +investigator +investor +invigilator +invitee +ironist +ironman +ironmonger +ironside +ironworker +irredentist +irregular +irreligionist +irridentist +island-dweller +islander +isolationist +issue +itinerant +jabberer +jack +jackanapes +jackass +jade +jailbird +jailer +jailor +janissary +janitor +jawan +jaywalker +jazzman +jeerer +jerk +jerk-off +jerker +jerry-builder +jester +jewel +jeweler +jeweller +jezebel +jigaboo +jilt +jimdandy +jimhickey +jingo +jingoist +jinx +jobber +jobholder +jock +jockey +jogger +john +joiner +joker +jokester +jonah +jongleur +journalist +journeyer +journeyman +judge +juggler +juicer +jumper +junior +junkie +junky +jurist +juror +juryman +jurywoman +justice +justiciar +justiciary +justifier +juvenile +kabbalist +kachina +kaffir +kafir +kalif +kaliph +kamikaze +keeper +keyboardist +khalif +khalifah +khan +kibbutznik +kibitzer +kicker +kid +kiddy +kidnaper +kidnapper +kike +killer +killjoy +kin +kindergartener +kindergartner +king +kingmaker +kingpin +kink +kinsman +kinsperson +kinswoman +kisser +kleptomaniac +klutz +knacker +knave +kneeler +knight +knight-errant +knitter +knocker +knockout +know-all +know-it-all +knower +knucklehead +kolkhoznik +kook +kvetch +laborer +labourer +lacer +lackey +lad +laddie +ladino +lady +lady-in-waiting +ladylove +laggard +lagger +laird +lama +lamb +lame +lamenter +laminator +lamplighter +lampooner +lancer +landgrave +landholder +landlady +landlord +landlubber +landman +landowner +landscaper +landscapist +landsman +langlaufer +languisher +lapidarist +lapidary +lapidator +lapidist +larcener +larcenist +lascar +lasher +lass +lassie +latecomer +lather +latitudinarian +laudator +lauder +laugher +laughingstock +laundress +laundryman +laundrywoman +laureate +lawbreaker +lawgiver +lawmaker +lawman +lawyer +layabout +layman +layperson +lazar +lazybones +lead +leader +leaker +leaper +learner +leaseholder +leatherneck +leaver +lech +lecher +lector +lecturer +ledgeman +leech +left-hander +left-winger +lefthander +leftist +lefty +legate +legatee +legionary +legionnaire +legislator +lender +lensman +leper +lepidopterist +lepidopterologist +lesbian +lessee +lessor +letch +letter +letterer +letterman +leveler +leveller +lexicographer +lexicologist +liar +libber +libeler +liberal +liberalist +liberator +libertarian +libertine +librarian +librettist +licensee +licenser +licentiate +lie-abed +liege +liegeman +lieutenant +lifeguard +lifer +lifesaver +lifter +light +light-o'-love +light-of-love +lighterman +lightweight +lilliputian +limey +limner +limnologist +limper +line-shooter +linebacker +lineman +linendraper +linesman +lingerer +linguist +linkboy +linkman +linksman +lion +lion-hunter +liquidator +lisper +listener +lister +literate +lithographer +lithomancer +litigant +litigator +litter-bearer +litterateur +litterbug +litterer +liturgist +liver +liveryman +lizard +loader +loafer +loaner +loather +lobbyist +lobsterback +lobsterman +locater +locator +lockkeeper +lockman +lockmaster +locksmith +locum +lodger +logger +loggerhead +logician +logistician +logomach +logomachist +loiterer +loner +longbowman +longer +longshoreman +look-alike +looker +looker-on +lookout +loon +looney +loony +looter +lord +loser +lotus-eater +loudmouth +lounger +louse +lout +love +lovely +lover +lowbrow +lowerclassman +lowlife +loyalist +lubber +luger +lulu +lumberjack +lumberman +luminary +lummox +lump +lumper +lunatic +luncher +lunger +lunkhead +lurcher +lurker +lush +lutanist +lutenist +luthier +lutist +lyricist +lyrist +ma +ma'am +macaroni +mace +macebearer +macer +machinator +machine +machinist +macho +macho-man +macroeconomist +macushla +madam +madame +madcap +madman +madrigalist +madwoman +maenad +maestro +mafioso +magdalen +magician +magistrate +magnate +magnifico +magpie +magus +maharaja +maharajah +maharanee +maharani +mahatma +mahout +maid +maiden +maidservant +mailer +mailman +maimer +mainstay +maintainer +major +major-domo +major-general +majorette +make-peace +maker +malacologist +malahini +malcontent +male +malefactor +malfeasant +maligner +malik +malingerer +maltman +maltreater +maltster +mama +mamma +mammalogist +mammy +man +man-about-town +man-at-arms +man-child +man-eater +manager +manageress +manakin +mandarin +mandatary +mandator +mandatory +maneuverer +mangler +maniac +manic-depressive +manicurist +manikin +manipulator +mannequin +mannikin +manoeuvrer +manservant +manslayer +mantrap +manufacturer +manumitter +map-reader +mapper +marathoner +marauder +marcher +marchioness +margrave +marine +mariner +mark +marketer +marksman +maroon +marquess +marquis +marquise +married +marshal +marshall +martinet +martyr +marveller +masher +masker +masochist +mason +masquer +masquerader +massager +masseur +masseuse +master +master-at-arms +mastermind +masturbator +matador +match +matcher +matchmaker +mate +mater +materfamilias +material +materialist +mathematician +matman +matriarch +matricide +matriculate +matrikin +matrisib +matron +mauler +maven +maverick +mavin +mayor +mayoress +meanie +meany +measurer +meatman +mechanic +mechanist +medalist +medallist +meddler +mediator +mediatrix +medic +medico +mediocrity +medium +meeter +megalomaniac +melancholiac +melancholic +meliorist +melter +member +memoriser +memorizer +memsahib +mender +mendicant +menial +mensch +mensh +mentioner +mentor +mercenary +mercer +merchandiser +merchant +merchant-venturer +merrymaker +meshuggeneh +meshuggener +mesmerist +mesmerizer +mesomorph +messenger +messiah +messmate +mestiza +mestizo +metalhead +metallurgist +metalworker +meteorologist +metic +metropolitan +mezzo +mezzo-soprano +microbiologist +microeconomist +microscopist +middlebrow +middleman +middleweight +midget +midinette +midshipman +midwife +migrant +migrator +mikado +miler +militant +militarist +militiaman +milkmaid +milkman +milksop +mill-girl +mill-hand +millenarian +millenarist +miller +milliner +millionaire +millionairess +millwright +milord +mime +mimer +mimic +mimicker +mind +minder +miner +mineralogist +mineworker +miniaturist +minimalist +minion +minister +ministrant +minor +minstrel +minter +minx +misanthrope +misanthropist +misbeliever +mischief-maker +miscreant +miser +misfit +misleader +misogamist +misogynist +miss +missionary +missioner +missis +missus +missy +mistress +mixed-blood +mixologist +mnemonist +moaner +mobster +mocker +mod +model +modeler +modeller +moderate +moderationist +moderator +modern +modernist +modifier +modiste +mogul +mole +molester +moll +mollycoddle +mollycoddler +mom +momma +mommy +monarch +monarchist +monastic +monetarist +moneyer +moneygrubber +moneylender +moneymaker +moneyman +monger +mongoloid +monitor +monitrice +monk +monkey +monochromat +monogamist +monogynist +monolingual +monologist +monomaniac +monopoliser +monopolist +monopolizer +monotheist +monster +mooch +moocher +moon-curser +moonlighter +moonshiner +mope +mopper +moppet +moralist +moron +morosoph +mortgagee +mortgager +mortgagor +mortician +moss-trooper +mossback +mother +mother-in-law +motherfucker +motile +motorcyclist +motormouth +moujik +mountaineer +mountebank +mounter +mourner +mouse +mouth +mouthpiece +mover +moviegoer +muadhdhin +muazzin +muckraker +mudslinger +muezzin +mufti +mug +muggee +mugger +muggins +mugwump +mujahid +mujik +mujtihad +mulatto +muleteer +muller +multi-billionaire +mum +mumbler +mummer +mummy +muncher +muralist +murderee +murderer +murderess +murmurer +muscle +muscle-builder +musclebuilder +muscleman +muser +musher +musician +musicologist +musketeer +mute +mutilator +mutineer +mutterer +muttonhead +muzhik +muzjik +muzzler +mycologist +mycophage +mycophagist +myope +myrmidon +mystic +mythologist +nabob +nag +nagger +naif +nailer +namby-pamby +name +namer +namesake +nan +nance +nanna +nanny +nanus +naprapath +narc +narcissist +narcist +narcoleptic +nark +narrator +natator +national +nationalist +native +nativist +natural +naturalist +naturist +naturopath +navigator +navvy +nawab +naysayer +nazi +ne'er-do-well +nebbech +nebbish +necessitarian +necker +necromancer +needer +needlewoman +needleworker +negativist +neglecter +negotiant +negotiator +negotiatress +negotiatrix +neighbor +neighbour +neoclassicist +neocon +neoconservative +neoliberal +neologist +neonate +neophyte +nephew +nepotist +nerd +nester +nestling +netkeeper +netminder +neurasthenic +neurobiologist +neurolinguist +neurologist +neuroscientist +neurosurgeon +neurotic +neutral +neutralist +newbie +newborn +newcomer +newlywed +newsagent +newsboy +newscaster +newsdealer +newsman +newsmonger +newspaperman +newspaperwoman +newsperson +newsreader +newsvendor +newswoman +newswriter +nibbler +niece +nigga +niggard +nigger +niggler +nightbird +nighthawk +nightrider +nigra +nihilist +nincompoop +ninja +ninny +nipper +niqaabi +nitpicker +nitwit +no-account +no-show +nob +noble +nobleman +noblewoman +nobody +noctambulist +nomad +nominalist +nominator +nominee +non-Catholic +non-Jew +non-resistant +nonachiever +nonagenarian +nonattender +nonbeliever +noncandidate +noncitizen +noncom +noncombatant +noncompliant +nonconformist +nondescript +nondrinker +nondriver +nonentity +nonesuch +nonmember +nonpareil +nonparticipant +nonpartisan +nonpartizan +nonperson +nonreader +nonresident +nonsmoker +nonstarter +nonsuch +nonworker +normaliser +normalizer +nosey-parker +nosher +nosy-parker +notability +notable +notary +noticer +nouveau-riche +novelist +novice +novillero +novitiate +nude +nudger +nudist +nudnick +nudnik +nuisance +nullifier +nullipara +numerologist +numismatist +numismatologist +numskull +nun +nuncio +nurse +nurse-midwife +nurseling +nursemaid +nurser +nurseryman +nursling +nut +nutcase +nutritionist +nutter +nymph +nymphet +nympho +nympholept +nymphomaniac +oaf +oarsman +oarswoman +objector +oblate +obliger +oboist +obscurantist +observer +obsessive +obsessive-compulsive +obstetrician +obstructer +obstructionist +obstructor +occultist +occupant +occupier +oceanaut +oceanographer +octogenarian +octoroon +oculist +odalisque +oddball +odds-maker +odist +oenologist +oenophile +offender +offerer +offeror +office-bearer +officeholder +officer +official +officiant +offspring +ogler +ogre +oiler +oilman +old-timer +oldster +oldtimer +oligarch +ombudsman +omnivore +onanist +oncologist +oneiromancer +onlooker +onomancer +opener +operagoer +operative +operator +ophthalmologist +opponent +opportunist +opposer +opposite +opposition +oppressor +optician +optimist +optometrist +oracle +orator +orchestrator +ordainer +orderer +orderly +ordinand +ordinary +organ-grinder +organiser +organist +organizer +orientalist +originator +ornamentalist +ornithologist +orphan +orthodontist +orthoepist +orthopaedist +orthopedist +orthoptist +osculator +osteologer +osteologist +osteopath +osteopathist +ostiarius +ostiary +ostler +ostrich +otolaryngologist +otologist +otorhinolaryngologist +ouster +out-and-outer +outcast +outcaste +outdoorsman +outdoorswoman +outfielder +outfitter +outgoer +outlander +outlaw +outlier +outpatient +outrider +outsider +overachiever +overcomer +overlord +overnighter +overseer +owner +owner-occupier +oyabun +pa +pacha +pachuco +pacificist +pacifier +pacifist +packer +packman +packrat +padder +paddler +padre +padrone +paederast +paediatrician +paedophile +pagan +page +pageboy +pain +painter +pal +paladin +palaeontologist +palatine +paleface +paleographer +paleographist +paleontologist +pallbearer +palmist +palmister +palooka +palsgrave +pamperer +pamphleteer +pandar +pander +panderer +panegyrist +panelist +panellist +panhandler +panjandrum +pansexual +pansy +pantheist +pantomimer +pantomimist +pantryman +pantywaist +papa +paparazzo +paper-pusher +paperboy +paperer +paperhanger +papist +papoose +pappa +pappoose +para +parachuter +parachutist +parader +paragon +paragrapher +paralegal +paralytic +paramedic +paramedical +paramour +paranoiac +paranoid +paraplegic +paraprofessional +parapsychologist +parasite +paratrooper +pardner +pardoner +parent +parer +paretic +pariah +parishioner +parliamentarian +parlormaid +parlourmaid +parodist +parolee +parricide +parrot +parson +part-owner +part-timer +partaker +participant +partisan +partitionist +partizan +partner +party +partygoer +parvenu +pasha +passenger +passer +passer-by +passerby +paster +pastor +patentee +pater +paterfamilias +pathfinder +pathologist +patient +patrial +patriarch +patrician +patricide +patrikin +patriot +patrioteer +patrisib +patroller +patrolman +patron +patroness +patronne +patsy +patternmaker +patzer +pauper +pawer +pawn +pawnbroker +payee +payer +paymaster +paynim +peacekeeper +peacemaker +peacenik +peach +peanut +pearler +peasant +peculator +pedagog +pedagogue +pedaler +pedaller +pedant +peddler +pederast +pedestrian +pediatrician +pediatrist +pedlar +pedodontist +pedophile +peeler +peeper +peer +peeress +peewee +pelter +pen-friend +pendragon +penetrator +penitent +penman +penologist +penpusher +pensionary +pensioner +pentathlete +peon +perceiver +percher +percipient +percussionist +perfecter +perfectionist +performer +perfumer +peri +perinatologist +periodontist +peripatetic +perisher +perjurer +perpetrator +persecutor +person +personage +personality +personification +perspirer +persuader +pervert +peshmerga +pessimist +pest +pesterer +pet +petitioner +petter +pettifogger +phalangist +pharisee +pharmacist +pharmacologist +philanderer +philanthropist +philatelist +philhellene +philhellenist +philistine +philologist +philologue +philomath +philosopher +philosophiser +philosophizer +phlebotomist +phoner +phonetician +phoney +phonologist +phony +photographer +photojournalist +photometrician +photometrist +phrenologist +physician +physicist +physiologist +physiotherapist +phytochemist +phytologist +pianist +picador +picaninny +piccaninny +pickaninny +picker +picket +picklepuss +picknicker +pickpocket +pickup +picnicker +pig +pigman +pigmy +pilferer +pilgrim +pill +pillager +pillar +pillock +pilot +pimp +pin-up +pinchgut +pinhead +pink +pinko +pioneer +pip-squeak +piper +piranha +pirate +pisser +pistoleer +pitcher +pitchman +pitman +pivot +place-kicker +placeholder +placekicker +placeman +placeseeker +plagiariser +plagiarist +plagiarizer +plainclothesman +plainsman +plaintiff +plaiter +planet +planner +plant +planter +plantsman +plasterer +platelayer +plater +platitudinarian +play-actor +playactor +playboy +player +playfellow +playgoer +playmaker +playmate +playwright +pleader +pleaser +pleb +plebe +plebeian +pledge +pledgee +pledger +plenipotentiary +plier +plodder +plotter +ploughboy +ploughman +ploughwright +plowboy +plower +plowman +plowwright +plug-ugly +plugger +plumber +plunderer +plunger +pluralist +plutocrat +plyer +poacher +podiatrist +poet +poet-singer +poetess +poetiser +poetizer +poilu +pointillist +pointsman +poisoner +poke +pol +polack +polemic +polemicist +polemist +policeman +policewoman +policyholder +politician +politico +pollster +polluter +poltroon +polyandrist +polygamist +polyglot +polygynist +polymath +polytheist +pom +pommy +pomologist +ponce +ponderer +pontifex +pontiff +poof +pooh-bah +pooler +poop +poove +pop +pope +popinjay +populariser +popularizer +populist +pornographer +porter +portraitist +portrayer +portwatcher +poser +poseur +poseuse +positivist +posseman +possessor +possible +postdoc +poster +postgraduate +postilion +postillion +postman +postmaster +postmistress +postponer +postulant +postulator +posturer +potboy +potentate +pothead +potholer +pothunter +potman +potter +potterer +pouf +poulterer +poultryman +pouter +powderer +power +powerbroker +powerhouse +practician +practitioner +praetor +pragmatist +prankster +prater +prattler +prayer +pre-Socratic +pre-emptor +preacher +prebendary +precentor +preceptor +precursor +predator +predecessor +predestinarian +predestinationist +predictor +preemie +preemptor +prefect +prelate +premie +premier +prentice +presbyope +presbyter +preschooler +presenter +presentist +preservationist +preserver +president +pressman +prestidigitator +preteen +preteenager +pretender +preterist +pretor +prevaricator +prexy +prey +prick +prickteaser +priest +priest-doctor +priestess +prig +primate +primigravida +primipara +primitive +primogenitor +primus +prince +princeling +princess +principal +printer +printmaker +prior +prioress +prisoner +private +privateer +privateersman +prizefighter +pro +pro-lifer +probable +probationer +process-server +processor +proconsul +procrastinator +proctologist +proctor +procurator +procurer +procuress +prodigal +prodigy +producer +prof +professional +professor +profiteer +profligate +progenitor +progeny +prognosticator +programmer +progressive +prohibitionist +projectionist +prole +proletarian +promisee +promiser +promisor +promoter +prompter +promulgator +proofreader +propagandist +propagator +prophesier +prophet +prophetess +propman +proponent +proposer +propositus +proprietor +proprietress +prosecutor +proselyte +prospect +prospector +prosthetist +prosthodontist +prostitute +protagonist +protectionist +protector +protege +protegee +protester +protozoologist +provider +provincial +provisioner +provocateur +provoker +provost +prowler +proxy +prude +pruner +psalmist +psephologist +pseud +pseudo +pseudohermaphrodite +psychiatrist +psychic +psycho +psychoanalyst +psycholinguist +psychologist +psychoneurotic +psychopath +psychophysicist +psychotherapist +psychotic +pteridologist +publican +publiciser +publicist +publicizer +publisher +pudden-head +puddler +pudge +puerpera +pugilist +puke +puller +puncher +pundit +punk +punster +punter +pup +pupil +puppet +puppeteer +puppy +purchaser +purist +puritan +purser +pursued +pursuer +purveyor +pusher +pushover +pussycat +putter +putterer +putz +pygmy +pyrographer +pyromancer +pyromaniac +qadi +quack +quad +quadripara +quadriplegic +quadroon +quadruplet +quaestor +quaffer +quaker +qualifier +quarreler +quarreller +quarrier +quarry +quarryman +quarter +quarterback +quartermaster +queen +queer +querier +quester +questioner +quibbler +quidnunc +quietist +quin +quint +quintipara +quintuplet +quisling +quitter +quizmaster +quizzer +quoter +rabbi +rabble-rouser +racialist +racist +racker +racketeer +raconteur +radical +radiobiologist +radiochemist +radiographer +radiologist +radiotherapist +rafter +raftman +raftsman +ragamuffin +ragpicker +ragsorter +raider +rail-splitter +railbird +railroader +railwayman +rainmaker +raiser +raja +rajah +rake +rakehell +rambler +ramrod +rancher +ranee +ranger +rani +ranker +ranter +raper +rapist +rappeller +rapper +rapporteur +rapscallion +rascal +rat +rat-catcher +ratepayer +ratifier +ratiocinator +rationalist +ratter +raver +ravisher +reactionary +reader +realist +reaper +reasoner +rebel +rebuker +rebutter +receiver +receptionist +recidivist +recipient +recitalist +reciter +reckoner +recluse +reconciler +record-breaker +record-holder +record-keeper +recorder +recoverer +recreant +recruit +recruiter +recruiting-sergeant +rectifier +rector +recusant +red +red-header +redact +redactor +redcap +redcoat +redeemer +redhead +redheader +redneck +reeler +reenactor +ref +referee +referral +refiner +refinisher +reformer +reformist +refugee +refuter +regent +regicide +registrant +registrar +regular +regulator +reincarnation +relation +relative +relative-in-law +relief +reliever +religionist +religious +reminder +remover +remunerator +renegade +renovator +renter +rentier +rep +repairer +repairman +repatriate +repeater +replacement +reporter +repository +representative +reproacher +reprobate +reprover +republican +requester +rescuer +researcher +reserve +reservist +resident +resister +respecter +respondent +responder +restauranter +restaurateur +rester +restorer +restrainer +retailer +retainer +retaliator +retard +retiree +retreatant +reveler +reveller +revenant +revenuer +reverend +reversioner +reversionist +reviewer +reviser +revisionist +revivalist +revolutionary +revolutionist +rewriter +rhabdomancer +rhetorician +rheumatic +rheumatologist +rhinolaryngologist +rhymer +rhymester +ribald +rider +ridiculer +rifleman +rigger +right-hander +right-winger +righthander +rightist +ringer +ringleader +ringmaster +rioter +rip +ripper +riser +ritualist +rival +riveter +rivetter +roadman +roamer +roarer +roaster +robber +rock +rocker +rogue +roisterer +roller-skater +rollerblader +roly-poly +romantic +romanticist +romp +romper +roofer +rookie +roomer +roomie +roommate +roomy +root +rooter +rope-maker +ropedancer +ropemaker +roper +ropewalker +rosebud +rotter +roue +roughneck +roughrider +rounder +roundhead +roundsman +rouser +roustabout +router +rover +rowdy +rower +royalist +rubberneck +rubbernecker +rube +ruffian +ruiner +ruler +ruminator +rummy +rumormonger +rumourmonger +rumrunner +runaway +runner +runner-up +runt +ruralist +rusher +rustic +rustler +saboteur +sabra +sachem +sacrificer +sacristan +saddhu +saddler +sadhu +sadist +sadomasochist +safebreaker +safecracker +sagamore +sage +sahib +sailmaker +sailor +saint +salesclerk +salesgirl +saleslady +salesman +salesperson +saleswoman +salter +salutatorian +saluter +salvager +salvor +sampler +samurai +sandbagger +sandboy +sandwichman +sangoma +sannup +sannyasi +sannyasin +sanyasi +sap +saphead +sapper +sartor +satellite +satirist +satrap +satyr +saunterer +savage +savant +saver +savior +saviour +sawbones +sawyer +saxist +saxophonist +scab +scalawag +scallywag +scalper +scammer +scamp +scandalmonger +scanner +scapegoat +scapegrace +scaremonger +scatterbrain +scattergood +scavenger +scenarist +scene-stealer +sceneshifter +sceptic +schemer +schizophrenic +schlemiel +schlep +schlepper +schlimazel +schlockmeister +schmo +schmoozer +schmuck +schnook +schnorrer +scholar +scholastic +scholiast +schoolboy +schoolchild +schoolfellow +schoolfriend +schoolgirl +schoolma'am +schoolman +schoolmarm +schoolmaster +schoolmate +schoolmistress +schoolteacher +scientist +sciolist +scion +scoffer +scofflaw +scold +scolder +scorekeeper +scorer +scorner +scoundrel +scourer +scourge +scourger +scout +scouter +scoutmaster +scrag +scrambler +scrapper +scratch +scratcher +scrawler +screamer +screecher +screener +screenwriter +screw +screwball +screwballer +scribbler +scribe +scrimshanker +scriptwriter +scrivener +scrooge +scrounger +scrubber +scrutineer +scrutiniser +scrutinizer +sculler +scullion +sculptor +sculptress +sculpturer +seafarer +sealer +seaman +seamster +seamstress +searcher +seasonal +seasoner +secessionist +second +second-in-command +second-rater +second-stringer +seconder +secretary +sectarian +sectarist +sectary +secular +secularist +secundigravida +securer +seducer +seductress +seed +seeder +seedman +seedsman +seeker +seer +segregate +segregationist +segregator +seigneur +seignior +seismologist +seizer +selectman +selector +selectwoman +self +self-seeker +self-starter +seller +semanticist +semifinalist +seminarian +seminarist +semiotician +semipro +semiprofessional +sempstress +senator +sendee +sender +seneschal +senior +sensation +sensationalist +sensitive +sensualist +sentimentalist +sentinel +sentry +separationist +separatist +septuagenarian +serf +sergeant +sergeant-at-law +sericulturist +serjeant +serjeant-at-arms +serjeant-at-law +sermoniser +sermonizer +serologist +servant +server +serviceman +servitor +setter +settler +settlor +sewer +sexagenarian +sexist +sexpot +sexton +shadow +shadower +shaheed +shaker +sham +shaman +shammer +shamus +shanghaier +shaper +sharecropper +shareholder +shareowner +sharer +shark +sharper +sharpie +sharpshooter +sharpy +shaver +she-devil +shearer +shedder +sheeny +sheep +sheepherder +sheepman +shegetz +sheik +sheika +sheikh +sheikha +sheller +shelver +shepherd +shepherdess +sheriff +sherlock +shielder +shifter +shiksa +shikse +shill +shingler +ship-breaker +shipbuilder +shipmate +shipowner +shipper +shipwright +shirker +shirtlifter +shirtmaker +shit +shithead +shitter +shlemiel +shlep +shlepper +shlimazel +shlockmeister +shmo +shmuck +shnook +shnorrer +shocker +shoeblack +shoemaker +shogun +shoofly +shooter +shopaholic +shopkeeper +shoplifter +shopper +shopwalker +shortstop +shot +shouter +shoveler +shoveller +shover +show-off +shower +showgirl +showman +shrew +shrimp +shrink +shuffler +shut-in +shutterbug +shylock +shyster +sib +sibling +sibyl +sidekick +sidesman +sightreader +sightseer +signaler +signaller +signalman +signatory +signer +signior +signor +signora +signore +signorina +silly +silver-worker +silversmith +silverworker +simperer +simple +simpleton +singer +sinner +sipper +sir +sirdar +sire +siren +sirrah +sis +sissy +sister +sister-in-law +sitter +six-footer +sixth-former +skateboarder +skater +skeptic +sketcher +skidder +skier +skimmer +skin-diver +skinflint +skinhead +skinner +skinny-dipper +skipper +skirmisher +skirt +skivvy +skulker +skunk +skycap +skydiver +slacker +slammer +slanderer +slant-eye +slapper +slasher +slattern +slaughterer +slave +slaveholder +slaver +slavey +slayer +sledder +sleeper +sleepwalker +sleepyhead +sleuth +sleuthhound +slicer +slicker +slider +slinger +slip +slipper +slob +slobberer +sloganeer +slogger +slop-seller +slopseller +slouch +sloucher +sloven +slowcoach +slowpoke +slug +slugabed +sluggard +slugger +slumberer +slut +slyboots +smallholder +smarta +smasher +smiler +smirker +smith +smoker +smoothie +smoothy +smotherer +smuggler +snacker +snake +snapper +snarer +snatcher +sneak +sneaker +sneerer +sneezer +sniffer +sniffler +sniper +snitch +snitcher +sniveler +sniveller +snob +snoop +snooper +snoot +snorer +snorter +snot +snowboarder +snuffer +snuffler +so-and-so +soaker +sobersides +socialiser +socialist +socialite +socializer +sociobiologist +sociolinguist +sociologist +sociopath +sod +sodalist +sodbuster +sodomist +sodomite +softie +softy +sojourner +solderer +soldier +solicitor +solitary +solitudinarian +soloist +solon +solver +somebody +someone +sommelier +somnambulist +somniloquist +son +son-in-law +songster +songstress +songwriter +sonneteer +sonny +soothsayer +soph +sophist +sophisticate +sophomore +soprano +sorcerer +sorceress +sorehead +sorrower +sort +sorter +sot +soubrette +soundman +source +sourdough +sourpuss +souse +southpaw +sovereign +sower +spaceman +spacewalker +spade +spammer +spanker +sparer +spastic +speaker +spearhead +specialiser +specialist +specializer +specifier +spectator +speculator +speechifier +speechmaker +speechwriter +speedskater +spelaeologist +speleologist +spellbinder +speller +spelunker +spend-all +spender +spendthrift +spewer +sphinx +spic +spick +spik +spiller +spindlelegs +spindleshanks +spinmeister +spinner +spinster +spiritualist +spitfire +spitter +spiv +splicer +splitter +spoiler +spoilsport +spokesman +spokesperson +spokeswoman +sponge +sponger +sponsor +spook +sport +sportscaster +sportsman +sportswoman +sportswriter +spot-welder +spotter +spouse +spouter +sprawler +sprayer +sprigger +sprinter +sprog +spurner +spy +spymaster +squabbler +squanderer +square +squatter +squaw +squawker +squealer +squeeze +squint-eye +squinter +squire +squirmer +squirt +stabber +stableboy +stableman +stacker +staffer +stagehand +stager +staggerer +stainer +stakeholder +stalker +stalking-horse +stalwart +stammerer +stamper +stand-in +standard-bearer +standardiser +standardizer +standby +star +starer +starets +stargazer +starlet +starter +starveling +stater +statesman +stateswoman +stationer +stationmaster +statistician +stay-at-home +steady +stealer +steamfitter +steelmaker +steelman +steelworker +steeplejack +steerer +steersman +stemmer +stenographer +stentor +stepbrother +stepchild +stepdaughter +stepfather +stepmother +stepparent +stepper +stepsister +stepson +stevedore +steward +stewardess +stick-in-the-mud +stickler +stiff +stifler +stigmatic +stigmatist +stinker +stinkpot +stinter +stipendiary +stippler +stirrer +stitcher +stock-taker +stockbroker +stockholder +stockist +stockjobber +stockman +stocktaker +stoic +stoker +stomper +stonecutter +stonemason +stoner +stonewaller +stooge +stoolie +stoolpigeon +stooper +storekeeper +storyteller +stowaway +strafer +straggler +straight +stranger +strangler +straphanger +strapper +strategian +strategist +strawman +strayer +streaker +streetwalker +stretcher-bearer +strider +strikebreaker +striker +stringer +striper +stripling +stripper +striptease +stripteaser +striver +stroke +stroller +strong-armer +strongman +struggler +strumpet +stud +student +study +stuffer +stumblebum +stumbler +stunner +stupe +stupid +stutterer +styler +stylist +stylite +subaltern +subcontractor +subdeacon +subdivider +subduer +subeditor +subject +subjectivist +subjugator +sublieutenant +submariner +submitter +subnormal +subordinate +suborner +subscriber +subsidiary +subsidiser +subsidizer +subsister +substitute +subtracter +suburbanite +subversive +subverter +subvocaliser +subvocalizer +succeeder +success +successor +succorer +succourer +sucker +suckling +suer +sufferer +suffragan +suffragette +suffragist +suggester +suicide +suit +suitor +sultan +summercater +summercaters +sun +sunbather +sundowner +super +supercargo +supergrass +superintendent +superior +superman +supermarketeer +supermarketer +supermodel +supermom +supernumerary +superordinate +superstar +supervisor +supplanter +suppliant +supplicant +supplier +supporter +suppresser +suppressor +supremacist +suprematist +supremo +surety +surfboarder +surfer +surgeon +surmounter +surpriser +surrealist +surrenderer +surrogate +surveyor +survivalist +survivor +suspect +sustainer +sutler +swagger +swaggerer +swaggie +swagman +swain +swami +swashbuckler +swayer +swearer +sweater +sweep +sweeper +sweetheart +sweetie +swell +swellhead +swimmer +swindler +swineherd +swinger +switch-hitter +switcher +swordsman +swot +sybarite +sycophant +syllogiser +syllogist +syllogizer +sylph +symboliser +symbolist +symbolizer +sympathiser +sympathizer +symphonist +symposiarch +symposiast +syncopator +syndic +syndicalist +syndicator +synonymist +syntactician +synthesiser +synthesist +synthesizer +syphilitic +systematiser +systematist +systematizer +systemiser +systemizer +tablemate +tacker +tackle +tackler +taco +tactician +tagalong +tagger +tail +tailback +tailor +taker +talebearer +talent +taleteller +talker +tallyman +tamer +tanker +tanner +tantaliser +tantalizer +taoiseach +tapper +tapster +tar +target +tart +tartar +taskmaster +taskmistress +taste-maker +taste-tester +taster +tatterdemalion +tattler +tattletale +taxer +taxidermist +taxman +taxonomer +taxonomist +taxpayer +tchotchke +tchotchkeleh +teacher +teammate +teamster +tearaway +tease +teaser +tec +techie +technician +technocrat +technologist +technophile +technophobe +teen +teenager +teetotaler +teetotalist +teetotaller +tekki +telecaster +teleologist +telepathist +telephoner +televangelist +teller +telltale +tellurian +temp +temporary +temporiser +temporizer +tempter +temptress +tenant +tender +tenderfoot +tenno +tenor +tenorist +tentmaker +tergiversator +termagant +termer +terminator +terpsichorean +territorial +terror +terrorist +tertigravida +testate +testator +testatrix +testee +tester +testifier +thane +thatcher +thaumaturge +thaumaturgist +theatergoer +theatregoer +theist +theologian +theologiser +theologist +theologizer +theoretician +theoriser +theorist +theorizer +theosophist +therapist +thespian +thief +thinker +third-rater +thirster +thoroughbred +thought-reader +thrall +threat +throttler +throwaway +thrower +throwster +thrush +thruster +thug +thurifer +thwarter +tiddler +tier +tiger +tightwad +tike +tiler +tiller +tilter +timberman +timekeeper +timer +timeserver +timpanist +tinker +tinkerer +tinner +tinsmith +tinter +tipper +tippler +tipster +tiro +titan +tither +title-holder +titterer +toady +toast +toaster +toastmaster +tobacconist +tobogganist +toddler +toff +toiler +toller +tollgatherer +tollkeeper +tollman +tomboy +tomfool +tool +toolmaker +toper +topper +torchbearer +toreador +torero +tormenter +tormentor +torpedo +tort-feasor +tortfeasor +torturer +tosser +tot +totalitarian +totemist +toter +totterer +touch-typist +toucher +tough +toughie +tourer +tourist +tout +touter +tovarich +tovarisch +towhead +townee +towner +townie +townsman +towny +toxicologist +tracer +tracker +tracklayer +trader +tradesman +traditionalist +traducer +trafficker +tragedian +tragedienne +trailblazer +trailer +trainbandsman +trainbearer +trainee +trainer +trainman +trainmaster +traitor +traitress +tramp +tramper +trampler +transactor +transalpine +transcendentalist +transcriber +transexual +transfer +transferee +transferer +transferor +transferrer +transgressor +transient +translator +transmigrante +transmitter +transplanter +transsexual +transvestite +trapper +trapshooter +traveler +traveller +traverser +trawler +treasonist +treasurer +treater +trekker +trembler +trencher +trencherman +trend-setter +trespasser +tribade +tribesman +tribologist +tribune +trick +tricker +trickster +trier +trifler +triggerman +trigonometrician +trimmer +triplet +tripper +tritheist +triumvir +troglodyte +troller +trollop +trombonist +trooper +troubadour +troublemaker +troubler +troubleshooter +trouper +truant +truckler +trudger +truelove +trumpeter +trustbuster +trustee +truster +trustor +trusty +tsar +tsarina +tsaritsa +tsatske +tshatshke +tub-thumper +tubercular +tucker +tugger +tumbler +tuner +turkey +turncoat +turncock +turner +turnkey +turtler +tutee +tutor +twaddler +twat +twerp +twiddler +twin +twiner +twirler +twirp +twit +two-timer +tycoon +tyke +tympanist +type +typesetter +typist +typographer +tyrant +tyro +tzar +tzarina +ultraconservative +ultramontane +ump +umpire +unbeliever +uncle +underachiever +underboss +underclassman +underdog +undergrad +undergraduate +underling +underperformer +undersecretary +underseller +understudy +undertaker +underwriter +undesirable +undoer +unfastener +unfortunate +unicyclist +unilateralist +unionist +unknown +unperson +unraveler +unraveller +untier +untouchable +upbraider +upholder +upholsterer +uprooter +upsetter +upstager +upstart +uranologist +urchin +urinator +urologist +user +usher +usherette +usufructuary +usurer +usurper +utiliser +utilitarian +utilizer +utterer +ux. +uxor +uxoricide +vacationer +vacationist +vaccinator +vaccinee +vacillator +vagabond +vagrant +valedictorian +valentine +valet +valetudinarian +valuator +valuer +vamp +vamper +vandal +vanisher +vanquisher +vaquero +varlet +varmint +varnisher +vassal +vaticinator +vaudevillian +vaulter +vaunter +vegan +vegetarian +vendee +vender +vendor +venerator +venter +ventriloquist +venturer +verbaliser +verbalizer +verger +verifier +vermin +versifier +vestal +vestryman +vestrywoman +vet +veteran +veterinarian +veterinary +vexer +vibist +vibraphonist +vicar +vicar-general +vice-regent +vicegerent +vicereine +viceroy +victim +victimiser +victimizer +victor +victualer +victualler +viewer +vigilante +vilifier +villager +villain +villainess +villein +vindicator +vintager +vintner +violator +violinist +violist +violoncellist +virago +virgin +virologist +virtuoso +viscount +viscountess +visionary +visitant +visitor +visualiser +visualizer +vitaliser +vitalist +vitalizer +viticulturist +vivisectionist +vixen +vizier +vocaliser +vocalist +vocalizer +vociferator +voice +voicer +voider +voluntary +volunteer +voluptuary +vomiter +votary +voter +vouchee +voucher +vower +voyager +voyeur +vulcaniser +vulcanizer +vulgarian +vulgariser +vulgarizer +vulture +wacko +waddler +waffler +wag +wagerer +waggoner +waggonwright +wagoner +wagonwright +waif +wailer +wainwright +waiter +waitress +waker +walk-in +walk-on +walker +wall-paperer +wallah +wallflower +walloper +wallpaperer +wally +waltzer +wanderer +wangler +wanker +wannabe +wannabee +wanter +wanton +warbler +ward +ward-heeler +warden +warder +wardress +warehouseman +warehouser +warhorse +warlord +warmonger +warner +warrantee +warranter +warrantor +warrener +warrior +washer +washerman +washerwoman +washout +washwoman +wassailer +waster +wastrel +watch +watchdog +watcher +watchmaker +watchman +watercolorist +watercolourist +waterer +waterman +waver +waverer +wax-chandler +wayfarer +weakling +wearer +weasel +weatherman +weaver +webmaster +weeder +weekender +weeper +weigher +weightlifter +weirdie +weirdo +weirdy +weisenheimer +welcher +welcomer +welder +well-wisher +welsher +welterweight +wench +wencher +westerner +wet-nurse +wetback +wetnurse +wetter +whacko +whale +whaler +wheedler +wheeler +wheelwright +whiffer +whiner +whip +whipper +whipper-in +whippersnapper +whirler +whisperer +whistle-blower +whistleblower +whistler +whiteface +whitey +whittler +whiz +whiz-kid +whizz +whizz-kid +wholesaler +whore +whoremaster +whoremonger +whoreson +wicket-keeper +widow +widower +widowman +wife +wiggler +wight +wigmaker +wildcat +wildcatter +wimp +windbag +winder +windtalker +winemaker +wing +wingback +winger +wingman +winker +winner +wino +wiper +wire-puller +wireman +wirer +wiretapper +wiseacre +wisenheimer +wisp +wit +witch +witch-hunter +withdrawer +withholder +withstander +witness +witnesser +wittol +wiz +wizard +wog +wolf +woman +womaniser +womanizer +wonderer +wonk +woodcarver +woodcutter +woodman +woodsman +woodworker +wooer +woolgatherer +woolsorter +wop +word-painter +wordmonger +wordsmith +workaholic +worker +workfellow +workingman +workman +workmate +world-beater +worldling +worm +worrier +worrywart +worshiper +worshipper +worthy +wrangler +wrecker +wrester +wrestler +wretch +wriggler +wright +write-in +writer +wrongdoer +wuss +xylophonist +yachtsman +yachtswoman +yahoo +yakuza +yanker +yardbird +yardie +yardman +yardmaster +yawner +yearling +yearner +yeller +yenta +yeoman +yes-man +yid +yielder +yob +yobbo +yobo +yodeller +yogi +yokel +youngster +younker +youth +yuppie +zany +zealot +zombi +zombie +zoologist diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/en/language_specific_rules.py b/transformations/gender_randomizer/coreferee/coreferee/lang/en/language_specific_rules.py new file mode 100644 index 000000000..ec71bf18e --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/en/language_specific_rules.py @@ -0,0 +1,470 @@ +# Copyright 2021 msg systems ag + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from spacy.tokens import Token + +from ...data_model import Mention +from ...rules import RulesAnalyzer + + +class LanguageSpecificRulesAnalyzer(RulesAnalyzer): + + random_word = "treacle" + + or_lemmas = "or" + + entity_noun_dictionary = { + "PERSON": ["person", "individual", "man", "woman"], + "NORP": ["nation", "people", "ethnicity"], + "FAC": ["building"], + "ORG": ["company", "firm", "organisation"], + "GPE": ["country", "state", "city", "town"], + "LOC": ["place"], + "LAW": ["law"], + "LANGUAGE": ["language", "tongue"], + } + + quote_tuples = [("'", "'"), ('"', '"'), ("“", "”"), ("‘", "’")] + + dependent_sibling_deps = "conj" + + conjunction_deps = ("appos", "cc", "punct") + + adverbial_clause_deps = ("advcl", "acl") + + term_operator_pos = "DET" + + clause_root_pos = ("VERB", "AUX") + + def get_dependent_siblings(self, token: Token) -> list: + def add_siblings_recursively( + recursed_token: Token, visited_set: set + ) -> None: + visited_set.add(recursed_token) + siblings_set = set() + if recursed_token.lemma_ in self.or_lemmas: + token._.coref_chains.temp_has_or_coordination = True + if recursed_token.dep_ in self.dependent_sibling_deps: + siblings_set.add(recursed_token) + for child in ( + child + for child in recursed_token.children + if child not in visited_set + and ( + child.dep_ in self.dependent_sibling_deps + or child.dep_ in self.conjunction_deps + ) + ): + child_siblings_set = add_siblings_recursively( + child, visited_set + ) + siblings_set |= child_siblings_set + + return siblings_set + + if ( + token.dep_ not in self.conjunction_deps + and token.dep_ not in self.dependent_sibling_deps + ): + siblings_set = add_siblings_recursively(token, set()) + else: + siblings_set = set() + return sorted(siblings_set) + + def is_independent_noun(self, token: Token) -> bool: + if not ( + ( + token.pos_ in self.noun_pos + and token.dep_ not in ("compound", "npadvmod") + ) + or (token.tag_ == "CD" and token.dep_ != "nummod") + or (token.tag_ == "DT" and token.dep_ != "det") + or (token.pos_ == "PRON" and token.tag_ == "NN") + ): + return False + return not self.is_token_in_one_of_phrases( + token, self.blacklisted_phrases + ) + + def is_potential_anaphor(self, token: Token) -> bool: + """Potentially externally referring tokens in English are third-person pronouns. + Instances of 'it' have to be investigated further to find out if they are + pleonastic.""" + # Is *token* a third-person pronoun? + if token.tag_ not in ("PRP", "PRP$") or not self.has_morph( + token, "Person", "3" + ): + return False + if token.text.lower() != "it": + return True + + # We have 'it' and have to find out if it is pleonastic... + + # Pleonastic it is out of the question in a conjunction environment + if ( + len(token._.coref_chains.temp_dependent_siblings) > 0 + or token._.coref_chains.temp_governing_sibling is not None + ): + return True + + # e.g. '*It* is important that he has done it' + for child in ( + child for child in token.children if child.dep_ == "amod" + ): + if ( + len( + [ + grandchild + for grandchild in child.children + if grandchild.dep_ not in ("acomp", "xcomp") + ] + ) + == 0 + ): + return False + + # e.g. '*It* seems to be believed that he has done it' + if ( + token.dep_ != "ROOT" + and ( + token.head.lemma_ in ("be", "seem") + or token.dep_ == "nsubjpass" + ) + and len( + [ + child + for child in token.head.children + if child.dep_ in ("advcl", "ccomp", "xcomp") + ] + ) + > 0 + ): + return False + + # e.g. 'This makes *it* unlikely that he has done it' + if ( + token.dep_ != "ROOT" + and token.i > 0 + and token.doc[token.i - 1].lemma_ == "make" + and token.head.dep_ == "ccomp" + ): + return False + + # e.g. '*It* is in everyone's interest that attempting it should succeed' + if token.dep_ in ("nsubj", "nsubjpass") and token.head.lemma_ == "be": + for child in token.head.children: + if ( + len( + [ + grandchild + for grandchild in child.children + if grandchild.dep_ == "relcl" + ] + ) + > 0 + ): + return False + + # Avalent verbs, e.g. '*it* is snowing' + if ( + token.dep_ != self.root_dep + and token.head.pos_ == "VERB" + and len( + [ + child + for child in token.head.subtree + if child.lemma_ in self.avalent_verbs + ] + ) + > 0 + ): + return False + return True + + def is_potential_anaphoric_pair( + self, referred: Mention, referring: Token, directly: bool + ) -> bool: + + doc = referring.doc + referred_root = doc[referred.root_index] + uncertain = False + + # e.g. 'the men and the women' ... 'they': 'they' cannot refer only to + # 'the men' or 'the women' + if ( + len(referred.token_indexes) == 1 + and self.has_morph(referring, "Number", "Plur") + and self.is_involved_in_non_or_conjunction(referred_root) + ): + return 0 + + # Two pronouns without coordination and differing number or gender + if ( + len(referred.token_indexes) == 1 + and self.is_potential_anaphor(referred_root) + and ( + referred_root.morph.get("Number") + != referring.morph.get("Number") + or referred_root.morph.get("Gender") + != referring.morph.get("Gender") + ) + ): + return 0 + + # Singular anaphor, plural referent + if self.has_morph(referring, "Number", "Sing") and ( + len(referred.token_indexes) > 1 + or self.has_morph(referred_root, "Number", "Plur") + ): + return 0 + + if not self.is_potential_anaphor(referred_root): + # antecedent is a noun + + referred_lemma = referred_root.lemma_ + + # 'they' referring to singular non-person noun + if ( + self.has_morph(referring, "Number", "Plur") + and len(referred.token_indexes) == 1 + and self.has_morph(referred_root, "Number", "Sing") + ): + if referred_lemma not in self.person_words: + if ( + referred_root.tag_ != "NNP" + and referred_root.ent_type_ != "PERSON" + ): + return 0 + else: + # named people who choose to refer to themselves with 'they' + uncertain = True + if ( + referred_lemma in self.exclusively_male_words + or referred_lemma in self.exclusively_female_words + ): + uncertain = True + + # 'he' or 'she' referring to non-person, non-animal noun + if ( + ( + self.has_morph(referring, "Gender", "Masc") + or self.has_morph(referring, "Gender", "Fem") + ) + and referred_lemma not in self.exclusively_person_words + and referred_lemma not in self.animal_words + and referred_lemma not in self.male_names + and referred_lemma not in self.female_names + and referred_root.ent_type_ != "PERSON" + ): + if ( + referred_root.tag_ != "NNP" + and referred_lemma not in self.person_words + ): + return 0 + else: + uncertain = True + + # 'it' referring to person noun or entity + if self.has_morph(referring, "Gender", "Neut") and ( + referred_lemma in self.exclusively_person_words + or referred_root.ent_type_ == "PERSON" + ): + return 0 + + # 'it' referring to plural proper name + if ( + self.has_morph(referring, "Gender", "Neut") + and referred_root.tag_ == "NNPS" + ): + uncertain = True + + # 'he' referring to female noun + if ( + self.has_morph(referring, "Gender", "Masc") + and referred_lemma in self.exclusively_female_words + and referred_lemma not in self.animal_words + ): + return 0 + + # 'she' referring to male noun + if ( + self.has_morph(referring, "Gender", "Fem") + and referred_lemma in self.exclusively_male_words + and referred_lemma not in self.animal_words + ): + return 0 + + # 'it' referring to name + if ( + self.has_morph(referring, "Gender", "Neut") + and referred_root.tag_ == "NNP" + and ( + referred_lemma in self.male_names + or referred_lemma in self.female_names + ) + ): + return 0 + + # 'he' referring to female name + if ( + self.has_morph(referring, "Gender", "Masc") + and referred_root.tag_ == "NNP" + and self.has_list_member_in_propn_subtree( + doc[referred.root_index], self.exclusively_female_names + ) + ): + uncertain = True + + # 'she' referring to male name + if ( + self.has_morph(referring, "Gender", "Fem") + and referred_root.tag_ == "NNP" + and self.has_list_member_in_propn_subtree( + doc[referred.root_index], self.exclusively_male_names + ) + ): + uncertain = True + + if directly: + if ( + self.is_potential_reflexive_pair(referred, referring) + and self.is_reflexive_anaphor(referring) == 0 + ): + return 0 + + if ( + not uncertain + and not self.is_potential_reflexive_pair(referred, referring) + and self.is_reflexive_anaphor(referring) == 1 + ): + uncertain = True + + return 1 if uncertain else 2 + + def is_potentially_indefinite(self, token: Token) -> bool: + + return ( + len( + [ + 1 + for child in token.children + if child.tag_ == "DT" + and child.dep_ == "det" + and child.lemma_ in ("a", "an", "some", "another") + ] + ) + > 0 + ) + + def is_potentially_definite(self, token: Token) -> bool: + + return ( + len( + [ + 1 + for child in token.children + if child.tag_ == "DT" + and child.dep_ == "det" + and child.lemma_ + in ("that", "the", "these", "this", "those") + ] + ) + > 0 + ) + + def is_reflexive_anaphor(self, token: Token) -> int: + if self.has_morph(token, "Reflex", "Yes"): + return 1 + else: + return 0 + + @staticmethod + def get_ancestor_spanning_any_preposition(token: Token) -> Token: + if token.dep_ == "ROOT": + return None + head = token.head + if token.dep_ == "pobj": + if head.dep_ == "ROOT": + return None + head = head.head + return head + + def is_potential_reflexive_pair( + self, referred: Mention, referring: Token + ) -> bool: + + if ( + referred.root_index > referring.i + ): # reflexives must follow their referents in English + return False + + referred_root = referring.doc[referred.root_index] + syntactic_subject_dep = ("nsubj", "nsubjpass") + + if referred_root._.coref_chains.temp_governing_sibling is not None: + referred_root = referred_root._.coref_chains.temp_governing_sibling + + if referring._.coref_chains.temp_governing_sibling is not None: + referring = referring._.coref_chains.temp_governing_sibling + + if referring.tag_ != "PRP": # e.g. 'his' rather than 'him' 'himself' + return False + + if referred_root.dep_ in syntactic_subject_dep: + for referring_ancestor in referring.ancestors: + # Loop up through the verb ancestors of the pronoun + + # Other dependencies imply clause types where reflexivity is no longer possible + if referring_ancestor.pos_ in ( + "VERB", + "AUX", + ) and referring_ancestor.dep_ not in ( + "ROOT", + "xcomp", + "pcomp", + "ccomp", + "conj", + "advcl", + "acl", + ): + return False + + if referred_root in referring_ancestor.children: + return True + + # The ancestor has its own subject, so stop here + if ( + len( + [ + t + for t in referring_ancestor.children + if t.dep_ in syntactic_subject_dep + and t != referred_root + ] + ) + > 0 + ): + return False + return False + else: + referring_ancestor = self.get_ancestor_spanning_any_preposition( + referring + ) + referred_ancestor = self.get_ancestor_spanning_any_preposition( + referred_root + ) + return referring_ancestor is not None and ( + referring_ancestor == referred_ancestor + or referring_ancestor.i in referred.token_indexes + ) diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/pl/__init__.py b/transformations/gender_randomizer/coreferee/coreferee/lang/pl/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/pl/config.cfg b/transformations/gender_randomizer/coreferee/coreferee/lang/pl/config.cfg new file mode 100644 index 000000000..161dc5cb9 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/pl/config.cfg @@ -0,0 +1,14 @@ +[sm_3_0_0] +model: core_news_sm +from_version: 3.0.0 +to_version: 3.0.0 + +[md_3_0_0] +model: core_news_md +from_version: 3.0.0 +to_version: 3.0.0 + +[lg_3_0_0] +model: core_news_lg +from_version: 3.0.0 +to_version: 3.0.0 diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/pl/data/COPYING b/transformations/gender_randomizer/coreferee/coreferee/lang/pl/data/COPYING new file mode 100644 index 000000000..e88ea03ed --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/pl/data/COPYING @@ -0,0 +1,53 @@ +THE WORK (AS DEFINED BELOW) IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE OR COPYRIGHT LAW IS PROHIBITED. + +BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS. + +1. Definitions + +"Adaptation" means a work based upon the Work, or upon the Work and other pre-existing works, such as a translation, adaptation, derivative work, arrangement of music or other alterations of a literary or artistic work, or phonogram or performance and includes cinematographic adaptations or any other form in which the Work may be recast, transformed, or adapted including in any form recognizably derived from the original, except that a work that constitutes a Collection will not be considered an Adaptation for the purpose of this License. For the avoidance of doubt, where the Work is a musical work, performance or phonogram, the synchronization of the Work in timed-relation with a moving image ("synching") will be considered an Adaptation for the purpose of this License. +"Collection" means a collection of literary or artistic works, such as encyclopedias and anthologies, or performances, phonograms or broadcasts, or other works or subject matter other than works listed in Section 1(f) below, which, by reason of the selection and arrangement of their contents, constitute intellectual creations, in which the Work is included in its entirety in unmodified form along with one or more other contributions, each constituting separate and independent works in themselves, which together are assembled into a collective whole. A work that constitutes a Collection will not be considered an Adaptation (as defined above) for the purposes of this License. +"Distribute" means to make available to the public the original and copies of the Work or Adaptation, as appropriate, through sale or other transfer of ownership. +"Licensor" means the individual, individuals, entity or entities that offer(s) the Work under the terms of this License. +"Original Author" means, in the case of a literary or artistic work, the individual, individuals, entity or entities who created the Work or if no individual or entity can be identified, the publisher; and in addition (i) in the case of a performance the actors, singers, musicians, dancers, and other persons who act, sing, deliver, declaim, play in, interpret or otherwise perform literary or artistic works or expressions of folklore; (ii) in the case of a phonogram the producer being the person or legal entity who first fixes the sounds of a performance or other sounds; and, (iii) in the case of broadcasts, the organization that transmits the broadcast. +"Work" means the literary and/or artistic work offered under the terms of this License including without limitation any production in the literary, scientific and artistic domain, whatever may be the mode or form of its expression including digital form, such as a book, pamphlet and other writing; a lecture, address, sermon or other work of the same nature; a dramatic or dramatico-musical work; a choreographic work or entertainment in dumb show; a musical composition with or without words; a cinematographic work to which are assimilated works expressed by a process analogous to cinematography; a work of drawing, painting, architecture, sculpture, engraving or lithography; a photographic work to which are assimilated works expressed by a process analogous to photography; a work of applied art; an illustration, map, plan, sketch or three-dimensional work relative to geography, topography, architecture or science; a performance; a broadcast; a phonogram; a compilation of data to the extent it is protected as a copyrightable work; or a work performed by a variety or circus performer to the extent it is not otherwise considered a literary or artistic work. +"You" means an individual or entity exercising rights under this License who has not previously violated the terms of this License with respect to the Work, or who has received express permission from the Licensor to exercise rights under this License despite a previous violation. +"Publicly Perform" means to perform public recitations of the Work and to communicate to the public those public recitations, by any means or process, including by wire or wireless means or public digital performances; to make available to the public Works in such a way that members of the public may access these Works from a place and at a place individually chosen by them; to perform the Work to the public by any means or process and the communication to the public of the performances of the Work, including by public digital performance; to broadcast and rebroadcast the Work by any means including signs, sounds or images. +"Reproduce" means to make copies of the Work by any means including without limitation by sound or visual recordings and the right of fixation and reproducing fixations of the Work, including storage of a protected performance or phonogram in digital form or other electronic medium. +2. Fair Dealing Rights. Nothing in this License is intended to reduce, limit, or restrict any uses free from copyright or rights arising from limitations or exceptions that are provided for in connection with the copyright protection under copyright law or other applicable laws. + +3. License Grant. Subject to the terms and conditions of this License, Licensor hereby grants You a worldwide, royalty-free, non-exclusive, perpetual (for the duration of the applicable copyright) license to exercise the rights in the Work as stated below: + +to Reproduce the Work, to incorporate the Work into one or more Collections, and to Reproduce the Work as incorporated in the Collections; +to create and Reproduce Adaptations provided that any such Adaptation, including any translation in any medium, takes reasonable steps to clearly label, demarcate or otherwise identify that changes were made to the original Work. For example, a translation could be marked "The original work was translated from English to Spanish," or a modification could indicate "The original work has been modified."; +to Distribute and Publicly Perform the Work including as incorporated in Collections; and, +to Distribute and Publicly Perform Adaptations. +For the avoidance of doubt: + +Non-waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme cannot be waived, the Licensor reserves the exclusive right to collect such royalties for any exercise by You of the rights granted under this License; +Waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme can be waived, the Licensor waives the exclusive right to collect such royalties for any exercise by You of the rights granted under this License; and, +Voluntary License Schemes. The Licensor waives the right to collect royalties, whether individually or, in the event that the Licensor is a member of a collecting society that administers voluntary licensing schemes, via that society, from any exercise by You of the rights granted under this License. +The above rights may be exercised in all media and formats whether now known or hereafter devised. The above rights include the right to make such modifications as are technically necessary to exercise the rights in other media and formats. Subject to Section 8(f), all rights not expressly granted by Licensor are hereby reserved. + +4. Restrictions. The license granted in Section 3 above is expressly made subject to and limited by the following restrictions: + +You may Distribute or Publicly Perform the Work only under the terms of this License. You must include a copy of, or the Uniform Resource Identifier (URI) for, this License with every copy of the Work You Distribute or Publicly Perform. You may not offer or impose any terms on the Work that restrict the terms of this License or the ability of the recipient of the Work to exercise the rights granted to that recipient under the terms of the License. You may not sublicense the Work. You must keep intact all notices that refer to this License and to the disclaimer of warranties with every copy of the Work You Distribute or Publicly Perform. When You Distribute or Publicly Perform the Work, You may not impose any effective technological measures on the Work that restrict the ability of a recipient of the Work from You to exercise the rights granted to that recipient under the terms of the License. This Section 4(a) applies to the Work as incorporated in a Collection, but this does not require the Collection apart from the Work itself to be made subject to the terms of this License. If You create a Collection, upon notice from any Licensor You must, to the extent practicable, remove from the Collection any credit as required by Section 4(b), as requested. If You create an Adaptation, upon notice from any Licensor You must, to the extent practicable, remove from the Adaptation any credit as required by Section 4(b), as requested. +If You Distribute, or Publicly Perform the Work or any Adaptations or Collections, You must, unless a request has been made pursuant to Section 4(a), keep intact all copyright notices for the Work and provide, reasonable to the medium or means You are utilizing: (i) the name of the Original Author (or pseudonym, if applicable) if supplied, and/or if the Original Author and/or Licensor designate another party or parties (e.g., a sponsor institute, publishing entity, journal) for attribution ("Attribution Parties") in Licensor's copyright notice, terms of service or by other reasonable means, the name of such party or parties; (ii) the title of the Work if supplied; (iii) to the extent reasonably practicable, the URI, if any, that Licensor specifies to be associated with the Work, unless such URI does not refer to the copyright notice or licensing information for the Work; and (iv) , consistent with Section 3(b), in the case of an Adaptation, a credit identifying the use of the Work in the Adaptation (e.g., "French translation of the Work by Original Author," or "Screenplay based on original Work by Original Author"). The credit required by this Section 4 (b) may be implemented in any reasonable manner; provided, however, that in the case of a Adaptation or Collection, at a minimum such credit will appear, if a credit for all contributing authors of the Adaptation or Collection appears, then as part of these credits and in a manner at least as prominent as the credits for the other contributing authors. For the avoidance of doubt, You may only use the credit required by this Section for the purpose of attribution in the manner set out above and, by exercising Your rights under this License, You may not implicitly or explicitly assert or imply any connection with, sponsorship or endorsement by the Original Author, Licensor and/or Attribution Parties, as appropriate, of You or Your use of the Work, without the separate, express prior written permission of the Original Author, Licensor and/or Attribution Parties. +Except as otherwise agreed in writing by the Licensor or as may be otherwise permitted by applicable law, if You Reproduce, Distribute or Publicly Perform the Work either by itself or as part of any Adaptations or Collections, You must not distort, mutilate, modify or take other derogatory action in relation to the Work which would be prejudicial to the Original Author's honor or reputation. Licensor agrees that in those jurisdictions (e.g. Japan), in which any exercise of the right granted in Section 3(b) of this License (the right to make Adaptations) would be deemed to be a distortion, mutilation, modification or other derogatory action prejudicial to the Original Author's honor and reputation, the Licensor will waive or not assert, as appropriate, this Section, to the fullest extent permitted by the applicable national law, to enable You to reasonably exercise Your right under Section 3(b) of this License (right to make Adaptations) but not otherwise. +5. Representations, Warranties and Disclaimer + +UNLESS OTHERWISE MUTUALLY AGREED TO BY THE PARTIES IN WRITING, LICENSOR OFFERS THE WORK AS-IS AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE WORK, EXPRESS, IMPLIED, STATUTORY OR OTHERWISE, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTIBILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, ACCURACY, OR THE PRESENCE OF ABSENCE OF ERRORS, WHETHER OR NOT DISCOVERABLE. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OF IMPLIED WARRANTIES, SO SUCH EXCLUSION MAY NOT APPLY TO YOU. + +6. Limitation on Liability. EXCEPT TO THE EXTENT REQUIRED BY APPLICABLE LAW, IN NO EVENT WILL LICENSOR BE LIABLE TO YOU ON ANY LEGAL THEORY FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR EXEMPLARY DAMAGES ARISING OUT OF THIS LICENSE OR THE USE OF THE WORK, EVEN IF LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +7. Termination + +This License and the rights granted hereunder will terminate automatically upon any breach by You of the terms of this License. Individuals or entities who have received Adaptations or Collections from You under this License, however, will not have their licenses terminated provided such individuals or entities remain in full compliance with those licenses. Sections 1, 2, 5, 6, 7, and 8 will survive any termination of this License. +Subject to the above terms and conditions, the license granted here is perpetual (for the duration of the applicable copyright in the Work). Notwithstanding the above, Licensor reserves the right to release the Work under different license terms or to stop distributing the Work at any time; provided, however that any such election will not serve to withdraw this License (or any other license that has been, or is required to be, granted under the terms of this License), and this License will continue in full force and effect unless terminated as stated above. +8. Miscellaneous + +Each time You Distribute or Publicly Perform the Work or a Collection, the Licensor offers to the recipient a license to the Work on the same terms and conditions as the license granted to You under this License. +Each time You Distribute or Publicly Perform an Adaptation, Licensor offers to the recipient a license to the original Work on the same terms and conditions as the license granted to You under this License. +If any provision of this License is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this License, and without further action by the parties to this agreement, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable. +No term or provision of this License shall be deemed waived and no breach consented to unless such waiver or consent shall be in writing and signed by the party to be charged with such waiver or consent. +This License constitutes the entire agreement between the parties with respect to the Work licensed here. There are no understandings, agreements or representations with respect to the Work not specified here. Licensor shall not be bound by any additional provisions that may appear in any communication from You. This License may not be modified without the mutual written agreement of the Licensor and You. +The rights granted under, and the subject matter referenced, in this License were drafted utilizing the terminology of the Berne Convention for the Protection of Literary and Artistic Works (as amended on September 28, 1979), the Rome Convention of 1961, the WIPO Copyright Treaty of 1996, the WIPO Performances and Phonograms Treaty of 1996 and the Universal Copyright Convention (as revised on July 24, 1971). These rights and subject matter take effect in the relevant jurisdiction in which the License terms are sought to be enforced according to the corresponding provisions of the implementation of those treaty provisions in the applicable national law. If the standard suite of rights granted under applicable copyright law includes additional rights not granted under this License, such additional rights are deemed to be included in the License; this License is not intended to restrict the license of any rights under applicable law. diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/pl/data/LICENSE b/transformations/gender_randomizer/coreferee/coreferee/lang/pl/data/LICENSE new file mode 100644 index 000000000..3fc656955 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/pl/data/LICENSE @@ -0,0 +1 @@ +1) Training took place using the Polish Coreference Corpus (http://zil.ipipan.waw.pl/PolishCoreferenceCorpus) offered under the Creative Commons Attribution 3.0 Unported License (https://creativecommons.org/licenses/by/3.0/deed.en_US; reproduced in the COPYING file within this directory). diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/pl/data/__init__.py b/transformations/gender_randomizer/coreferee/coreferee/lang/pl/data/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/pl/data/blacklisted_phrases.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/pl/data/blacklisted_phrases.dat new file mode 100644 index 000000000..4ef375556 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/pl/data/blacklisted_phrases.dat @@ -0,0 +1,6 @@ +na przykład +w zasadzie +z resztą +rzecz biorąc +na prawdę +moim zdaniem diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/pl/data/verbs_with_personal_subject.dat b/transformations/gender_randomizer/coreferee/coreferee/lang/pl/data/verbs_with_personal_subject.dat new file mode 100644 index 000000000..2f020da00 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/pl/data/verbs_with_personal_subject.dat @@ -0,0 +1,10 @@ +myśleć +sądzić +mówić +powiedzieć +odzywać +przemawiać +wierzyć +wiedzieć +znać +twierdzić diff --git a/transformations/gender_randomizer/coreferee/coreferee/lang/pl/language_specific_rules.py b/transformations/gender_randomizer/coreferee/coreferee/lang/pl/language_specific_rules.py new file mode 100644 index 000000000..6e9cef137 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/lang/pl/language_specific_rules.py @@ -0,0 +1,731 @@ +# Copyright 2021 msg systems ag + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from string import punctuation + +from spacy.tokens import Token + +from ...data_model import Mention +from ...rules import RulesAnalyzer + + +class LanguageSpecificRulesAnalyzer(RulesAnalyzer): + + random_word = "szczęście" + + or_lemmas = ("albo", "lub") + + dependent_sibling_deps = "conj" + + conjunction_deps = ("cc", "punct") + + adverbial_clause_deps = ("ccomp", "dep", "advcl", "cop") + + entity_noun_dictionary = { + "persName": ["człowiek", "osoba", "mężczyzna", "kobieta"], + "placeName": ["miejsce", "miasto", "państwo", "kraj"], + "orgName": [ + "firma", + "przedsiębiorstwo", + "organizacja", + "zespół", + "przedsięwzięcie", + ], + } + + quote_tuples = [ + ("'", "'"), + ('"', '"'), + ("„", "“"), + ("‚", "‘"), + ("«", "»"), + ("»", "«"), + ] + + term_operator_pos = ("DET", "ADP") + + clause_root_pos = ("VERB", "AUX", "ADJ") + + @staticmethod + def is_reflexive_possessive_pronoun(token: Token) -> bool: + return ( + token.pos_ == "DET" + and token.tag_ == "ADJ" + and token.lemma_[:4] in ("swój", "swoj", "swoi") + ) + + def get_dependent_siblings(self, token: Token) -> list: + + # As well as the standard conjunction found in other languages we also capture + # comitative phrases where coordination is expressed using the pronoun 'z' and + # a noun in the instrumental case. + def add_siblings_recursively( + recursed_token: Token, visited_set: set + ) -> None: + visited_set.add(recursed_token) + siblings_set = set() + if recursed_token.lemma_ in self.or_lemmas: + token._.coref_chains.temp_has_or_coordination = True + if ( + token != recursed_token + and token.pos_ in ("VERB", "AUX") + and self.is_potential_anaphor(token) + and recursed_token.pos_ in ("VERB", "AUX") + and self.is_potential_anaphor(recursed_token) + ): + # we treat two verb anaphors as having or coordination because two + # singular anaphors do not give rise to a plural phrase + token._.coref_chains.temp_has_or_coordination = True + if ( + recursed_token.dep_ in self.dependent_sibling_deps + or self.has_morph(recursed_token, "Case", "Ins") + ) and recursed_token != token: + siblings_set.add(recursed_token) + for child in ( + child + for child in recursed_token.children + if child not in visited_set + and ( + child.dep_ in self.dependent_sibling_deps + or child.dep_ in self.conjunction_deps + ) + ): + child_siblings_set = add_siblings_recursively( + child, visited_set + ) + siblings_set |= child_siblings_set + for child in ( + child + for child in recursed_token.children + if recursed_token.pos_ in self.noun_pos + and child.pos_ in self.noun_pos + and self.has_morph(child, "Case", "Ins") + and len( + [ + 1 + for c in child.children + if c.dep_ == "case" and c.lemma_ == "z" + ] + ) + > 0 + and child not in visited_set + ): + child_siblings_set = add_siblings_recursively( + child, visited_set + ) + siblings_set |= child_siblings_set + for child in ( + child + for child in recursed_token.children + if recursed_token.pos_ in ("VERB", "AUX") + and self.is_potential_anaphor(recursed_token) + and child.pos_ in self.noun_pos + and self.has_morph(child, "Case", "Ins") + and len( + [ + 1 + for c in child.children + if c.dep_ == "case" + and c.lemma_ == "z" + and c.i - 1 == recursed_token.i + ] + ) + > 0 + and child not in visited_set + ): + child_siblings_set = add_siblings_recursively( + child, visited_set + ) + siblings_set |= child_siblings_set + if recursed_token.dep_ != self.root_dep: + # the 'z' sibling and this word are contiguous children of the same parent + for child in ( + child + for child in recursed_token.head.children + if recursed_token.pos_ in self.noun_pos + and child.pos_ in self.noun_pos + and self.has_morph(child, "Case", "Ins") + and len( + [ + 1 + for c in child.children + if c.dep_ == "case" + and c.lemma_ == "z" + and c.i - 1 == recursed_token.i + ] + ) + > 0 + and child not in visited_set + ): + child_siblings_set = add_siblings_recursively( + child, visited_set + ) + siblings_set |= child_siblings_set + return siblings_set + + if ( + token.dep_ not in self.conjunction_deps + and token.dep_ not in self.dependent_sibling_deps + ): + siblings_set = add_siblings_recursively(token, set()) + else: + siblings_set = set() + return sorted(siblings_set) + + def is_independent_noun(self, token: Token) -> bool: + if not token.pos_ in self.noun_pos or token.text in punctuation: + return False + return not self.is_token_in_one_of_phrases( + token, self.blacklisted_phrases + ) + + def is_potential_anaphor(self, token: Token) -> bool: + # third-person pronoun + if token.tag_ in ("PPRON3", "SIEBIE"): + return True + + # reflexive third-person possessive pronoun + if self.is_reflexive_possessive_pronoun(token): + return True + + # finite verb without subject (zero anaphora) + if ( + token.pos_ in ("VERB", "AUX") + and token.tag_ in ("FIN", "PRAET", "BEDZIE") + and len( + [ + child + for child in token.children + if child.dep_.startswith("nsubj") + ] + ) + == 0 + and not self.has_morph(token, "Person", "1") + and not self.has_morph(token, "Person", "2") + ): + + if ( + token._.coref_chains.temp_governing_sibling is not None + and len( + [ + child + for child in token._.coref_chains.temp_governing_sibling.children + if child.dep_.startswith("nsubj") + ] + ) + > 0 + ): + return False + + if ( + token.pos_ == "AUX" + and token.dep_ != self.root_dep + and len( + [ + child + for child in token.head.children + if child.dep_.startswith("nsubj") + ] + ) + > 0 + ): + return False + + # exclude structures like 'okazało się, że ...' + return not ( + len( + [ + child + for child in token.children + if child.dep_ == "expl:pv" + ] + ) + > 0 + and len( + [ + child + for child in token.children + if child.dep_ == "ccomp" + ] + ) + > 0 + and not self.has_morph(token, "Gender", "Masc") + and not self.has_morph(token, "Gender", "Fem") + ) + return False + + def is_potential_anaphoric_pair( + self, referred: Mention, referring: Token, directly: bool + ) -> bool: + + # masc: 'rodzaj męski' + # fem: 'rodzaj żeński' + # neut: 'rodzaj nijaki' + # nonvirile:'rodzaj niemęskoosobowy' + # virile: 'rodzaj męskoosobowy' + + def get_gender_number_info(token): + masc = fem = neut = nonvirile = virile = False + if self.has_morph(token, "Number", "Sing"): + if self.has_morph(token, "Gender", "Masc"): + masc = True + if token.tag_ == "PPRON3" and not self.has_morph( + token, "Case", "Nom" + ): + neut = True + if self.has_morph(token, "Gender", "Fem"): + fem = True + if self.has_morph(token, "Gender", "Neut"): + neut = True + if token.tag_ == "PPRON3" and not self.has_morph( + token, "Case", "Nom" + ): + masc = True + if token.pos_ == "PROPN": + if token.lemma_ in self.male_names: + masc = True + if token.lemma_ in self.female_names: + fem = True + if self.has_morph(token, "Number", "Plur"): + if ( + self.has_morph(token, "Gender", "Masc") + and self.has_morph(token, "Animacy", "Hum") + and token.dep_ != "nmod" + ): # 'ich' + virile = True + elif ( + ( + self.has_morph(token, "Gender", "Masc") + and self.has_morph(token, "Animacy", "Nhum") + ) + or ( + self.has_morph(token, "Gender", "Masc") + and self.has_morph(token, "Animacy", "Inan") + ) + or self.has_morph(token, "Gender", "Fem") + or self.has_morph(token, "Gender", "Neut") + ): + nonvirile = True + if token.pos_ == "PROPN" and not directly: + # common noun and proper noun in same chain may have different genders + masc = fem = neut = nonvirile = virile = True + return masc, fem, neut, nonvirile, virile + + def get_gender_number_info_for_single_token(token): + + masc = fem = neut = nonvirile = virile = False + if not self.is_reflexive_possessive_pronoun(token): + masc, fem, neut, nonvirile, virile = get_gender_number_info( + token + ) + if ( + not (masc or fem or neut or nonvirile or virile) + and token.dep_.startswith("nsubj") + and token.head.pos_ in ("VERB", "AUX") + ): + ( + masc, + fem, + neut, + nonvirile, + virile, + ) = get_gender_number_info(token.head) + if not (masc or fem or neut or nonvirile or virile): + if self.has_morph(token, "Number", "Sing"): + masc = fem = neut = True + if self.has_morph(token, "Number", "Plur"): + nonvirile = virile = True + if not (masc or fem or neut or nonvirile or virile): + masc = fem = neut = nonvirile = virile = True + return masc, fem, neut, nonvirile, virile + + def are_coordinated_tokens_possibly_virile(tokens: list) -> int: + masc = fem = neut = False + for token in tokens: + if self.has_morph(token, "Gender", "Masc") and self.has_morph( + token, "Animacy", "Hum" + ): + return 2 + if self.has_morph(token, "Gender", "Masc") and self.has_morph( + token, "Animacy", "Nhum" + ): + masc = True + if self.has_morph(token, "Gender", "Masc") and self.has_morph( + token, "Animacy", "Inan" + ): + masc = True + if self.has_morph(token, "Gender", "Fem"): + fem = True + if self.has_morph(token, "Gender", "Neut"): + neut = True + if (masc and fem) or (masc and neut) or (fem and neut): + if tokens[0].dep_.startswith("nsubj") and tokens[ + 0 + ].head.pos_ in ("VERB", "AUX"): + ( + _, + _, + _, + head_nonvirile, + head_virile, + ) = get_gender_number_info(tokens[0].head) + if head_nonvirile and not head_virile: + return 0 # only nonvirile + if head_virile and not head_nonvirile: + return 2 # only virile + # The rules about whether to use virile or nonvirile nouns or pronouns where + # a coordination phrase contains a mixture of genders are very complex and require + # knowledge of the animacy of feminine and neuter nouns which the Spacy model does + # not supply us with. For this reason, and because people often get the rules + # wrong, we accept both virile and nonvirile anaphors when a coordination phrase + # contained more than one nonvirile gender and there is no governing verb to + # specify either virile or nonvirile gender. + return 1 # either virile or nonvirile + if len([t for t in tokens if self.has_morph(t, "Gender")]) == 0: + return 1 + return 0 # only nonvirile + + doc = referring.doc + referred_root = doc[referred.root_index] + uncertain = False + + ( + referring_masc, + referring_fem, + referring_neut, + referring_nonvirile, + referring_virile, + ) = get_gender_number_info_for_single_token(referring) + + if self.is_involved_in_non_or_conjunction(referred_root): + if referred_root._.coref_chains.temp_governing_sibling is not None: + all_involved_referreds = [ + referred_root._.coref_chains.temp_governing_sibling + ] + else: + all_involved_referreds = [referred_root] + all_involved_referreds.extend( + all_involved_referreds[ + 0 + ]._.coref_chains.temp_dependent_siblings + ) + else: + all_involved_referreds = [referred_root] + + # e.g. 'Janek był w domu. Zadzwonili z żoną ...' + comitative_siblings = [ + c + for c in referring._.coref_chains.temp_dependent_siblings + if referring.pos_ in ("VERB", "AUX") + and self.has_morph(referring, "Number", "Plur") + and self.has_morph(c, "Case", "Ins") + and c.i not in referred.token_indexes + ] + + if ( + not directly + and len(comitative_siblings) > 0 + and (referring_nonvirile or referring_virile) + ): + return 2 + + all_involved_referreds.extend(comitative_siblings) + + referreds_included_here = [doc[i] for i in referred.token_indexes] + referreds_included_here.extend(comitative_siblings) + + if len(all_involved_referreds) > 1: + + possibly_virile = are_coordinated_tokens_possibly_virile( + all_involved_referreds + ) + + if len(referreds_included_here) == len(all_involved_referreds): + if possibly_virile == 2: + if not referring_virile: + return 0 + elif possibly_virile == 1: + if not referring_nonvirile and not referring_virile: + return 0 + elif not referring_nonvirile: + return 0 + return 2 + + if len(referreds_included_here) > 1: + # implies len(all_involved_referreds) > len(referreds_included_here) + referreds_included_here_possibly_virile = ( + are_coordinated_tokens_possibly_virile( + referreds_included_here + ) + ) + if ( + referring_nonvirile + and possibly_virile == 2 + and referreds_included_here_possibly_virile == 0 + ): + return 2 + return 0 + + ( + referred_masc, + referred_fem, + referred_neut, + referred_nonvirile, + referred_virile, + ) = get_gender_number_info_for_single_token(referred_root) + + referred_comitative_siblings = [ + c + for c in referred_root._.coref_chains.temp_dependent_siblings + if referred_root.pos_ in ("VERB", "AUX") + and self.has_morph(referred_root, "Number", "Plur") + and self.has_morph(c, "Case", "Ins") + and c.i != referring.i + ] + if ( + not directly + and len(referred_comitative_siblings) > 0 + and (referred_nonvirile or referred_virile) + ): + return 2 + + if ( + possibly_virile == 2 + and referred_nonvirile + and referring_nonvirile + ): + return 2 + if referred_nonvirile or referred_virile: + return 0 + if possibly_virile != 0 and referring_virile: + return 0 + if possibly_virile != 2 and referring_nonvirile: + return 0 + + referred_masc = ( + referred_fem + ) = referred_neut = referred_nonvirile = referred_virile = False + + for working_token in (doc[index] for index in referred.token_indexes): + ( + working_masc, + working_fem, + working_neut, + working_nonvirile, + working_virile, + ) = get_gender_number_info_for_single_token(working_token) + referred_masc = referred_masc or working_masc + referred_fem = referred_fem or working_fem + referred_neut = referred_neut or working_neut + referred_nonvirile = referred_nonvirile or working_nonvirile + referred_virile = referred_virile or working_virile + + if ( + not (referred_masc and referring_masc) + and not (referred_fem and referring_fem) + and not (referred_neut and referring_neut) + and not (referred_nonvirile and referring_nonvirile) + and not (referred_virile and referring_virile) + ): + return 0 + + if ( + self.is_reflexive_possessive_pronoun(referring) + and referring.head.i in referred.token_indexes + ): + return 0 + + if referred_root.head.i == referring.i: + return 0 + + if directly: + if ( + self.is_potential_reflexive_pair(referred, referring) + and self.is_reflexive_anaphor(referring) == 0 + ): + return 0 + + if ( + not self.is_potential_reflexive_pair(referred, referring) + and self.is_reflexive_anaphor(referring) == 2 + ): + return 0 + + # possessive pronouns cannot refer back to the head within a genitive phrase. + # This functionality is under 'directly' to improve performance. + working_token = referring + while working_token.dep_ != self.root_dep: + if ( + working_token.head.i in referred.token_indexes + and working_token.dep_ not in self.dependent_sibling_deps + and self.has_morph(working_token, "Case", "Gen") + ): + return 0 + if ( + working_token.dep_ not in self.dependent_sibling_deps + and ( + working_token.dep_ != "nmod" + or not self.has_morph(working_token, "Case", "Gen") + ) + and not self.is_reflexive_possessive_pronoun(working_token) + ): + break + working_token = working_token.head + + # Some verbs like 'mówić' require a personal subject + referring_governing_sibling = referring + if referring._.coref_chains.temp_governing_sibling is not None: + referring_governing_sibling = ( + referring._.coref_chains.temp_governing_sibling + ) + if ( + referring_governing_sibling.dep_.startswith("nsubj") + and referring_governing_sibling.head.lemma_ + in self.verbs_with_personal_subject + ) or referring.lemma_ in self.verbs_with_personal_subject: + for working_token in ( + doc[index] for index in referred.token_indexes + ): + if ( + working_token.pos_ == self.propn_pos + or working_token.ent_type_ == "persName" + ): + return 2 + return 1 + + return 1 if uncertain else 2 + + def is_potentially_indefinite(self, token: Token) -> bool: + + if token.pos_ != "NOUN": + return False + for child in ( + child + for child in token.children + if child.pos_ in self.term_operator_pos + ): + if child.lemma_.lower() in ( + "ten", + "tego", + "temu", + "tym", + "to", + "ta", + "tę", + "tą", + "tej", + "ci", + "te", + "tych", + "tymi", + "tym", + "tych", + ): + return False + if ( + child.pos_ == "DET" + and child.tag_ == "ADJ" + and child.dep_ == "det" + and self.has_morph(child, "Poss", "Yes") + ): + return False + return True + + def is_potentially_definite(self, token: Token) -> bool: + + if token.pos_ != "NOUN": + return False + for child in ( + child + for child in token.children + if child.pos_ in self.term_operator_pos + ): + if child.lemma_.lower().startswith("jak"): + return False + return True + + def is_reflexive_anaphor(self, token: Token) -> int: + if token.tag_ == "SIEBIE" or self.is_reflexive_possessive_pronoun( + token + ): + return 2 + if ( + token.tag_ == "PPRON3" + and token.dep_ == "nmod" + and self.has_morph(token, "PrepCase", "Npr") + ): # e.g. 'jego, jej, ich' + return 1 + return 0 + + def is_potential_reflexive_pair( + self, referred: Mention, referring: Token + ) -> bool: + + if ( + referring.pos_ != "PRON" + and not self.is_reflexive_possessive_pronoun(referring) + ): + return False + + referred_root = referring.doc[referred.root_index] + + if referred_root._.coref_chains.temp_governing_sibling is not None: + referred_root = referred_root._.coref_chains.temp_governing_sibling + + if referring._.coref_chains.temp_governing_sibling is not None: + referring = referring._.coref_chains.temp_governing_sibling + + if referred_root.dep_.startswith("nsubj") or ( + referred_root.pos_ in ("VERB", "AUX") + and self.is_potential_anaphor(referred_root) + ): + for referring_ancestor in referring.ancestors: + + # Loop up through the ancestors of the pronoun + + if ( + referred_root == referring_ancestor + or referred_root in referring_ancestor.children + ): + return True + + # The ancestor has its own subject, so stop here + if ( + len( + [ + t + for t in referring_ancestor.children + if t.dep_.startswith("nsubj") + and t != referred_root + ] + ) + > 0 + ): + return False + + if ( + referring_ancestor._.coref_chains.temp_governing_sibling + == referred_root + ): + return False + + return ( + referring.dep_ != self.root_dep + and referred_root.dep_ != self.root_dep + and ( + referring.head == referred_root.head + or referring.head.i in referred.token_indexes + ) + and referring.i > referred_root.i + ) diff --git a/transformations/gender_randomizer/coreferee/coreferee/manager.py b/transformations/gender_randomizer/coreferee/coreferee/manager.py new file mode 100644 index 000000000..c7a8ecb3a --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/manager.py @@ -0,0 +1,227 @@ +# Copyright 2021 msg systems ag + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +import os +import pickle +import traceback +from sys import exc_info + +import pkg_resources +import spacy +from packaging import version +from spacy.language import Language +from spacy.tokens import Doc, Token +from tensorflow import keras +from thinc.api import Config + +from .annotation import Annotator +from .errors import ( + LanguageNotSupportedError, + ModelNotSupportedError, + MultiprocessingParsingNotSupportedError, + VectorsModelHasWrongVersionError, + VectorsModelNotInstalledError, +) + +COMMON_MODELS_PACKAGE_NAMEPART = "coreferee_model_" + +FEATURE_TABLE_FILENAME = "feature_table.bin" + +KERAS_MODEL_FILENAME = "keras_ensemble.h5" + + +class CorefereeManager: + @staticmethod + def get_annotator(nlp: Language) -> Annotator: + model_name = "_".join((nlp.meta["lang"], nlp.meta["name"])) + relative_config_filename = os.sep.join( + ("lang", nlp.meta["lang"], "config.cfg") + ) + if not pkg_resources.resource_exists( + __name__, relative_config_filename + ): + raise LanguageNotSupportedError(nlp.meta["lang"]) + absolute_config_filename = pkg_resources.resource_filename( + __name__, relative_config_filename + ) + config = Config().from_disk(absolute_config_filename) + for config_entry_name, config_entry in config.items(): + if ( + nlp.meta["name"] == config_entry["model"] + and version.parse(nlp.meta["version"]) + >= version.parse(config_entry["from_version"]) + and version.parse(nlp.meta["version"]) + <= version.parse(config_entry["to_version"]) + ): + if "vectors_model" in config_entry: + try: + vectors_nlp = spacy.load( + "_".join( + ( + nlp.meta["lang"], + config_entry["vectors_model"], + ) + ) + ) + except OSError: + raise VectorsModelNotInstalledError( + "".join( + ( + "Model ", + model_name, + " is only supported in conjunction with model ", + nlp.meta["lang"], + "_", + config_entry["vectors_model"], + " which must be loaded using 'python -m spacy download ", + nlp.meta["lang"], + "_", + config_entry["vectors_model"], + "'.", + ) + ) + ) + if version.parse( + vectors_nlp.meta["version"] + ) < version.parse( + config_entry["vectors_from_version"] + ) or version.parse( + vectors_nlp.meta["version"] + ) > version.parse( + config_entry["vectors_to_version"] + ): + raise VectorsModelHasWrongVersionError( + "".join( + ( + "Model ", + model_name, + " is only supported in conjunction with model ", + nlp.meta["lang"], + "_", + config_entry["vectors_model"], + " between versions ", + config_entry["vectors_from_version"], + " and ", + config_entry["vectors_to_version"], + " inclusive.", + ) + ) + ) + else: + vectors_nlp = nlp + model_package_name = "".join( + ( + COMMON_MODELS_PACKAGE_NAMEPART, + nlp.meta["lang"], + ".", + config_entry_name, + ) + ) + try: + importlib.import_module(model_package_name) + except ModuleNotFoundError: + print( + "".join( + ( + "Model could not be loaded for config entry '", + config_entry_name, + "' If models exist for language '", + nlp.meta["lang"], + "', load them with the command 'python -m coreferee install ", + nlp.meta["lang"], + "'.", + ) + ) + ) + raise ModelNotSupportedError( + "".join( + ( + nlp.meta["lang"], + "_", + nlp.meta["name"], + " version ", + nlp.meta["version"], + ) + ) + ) + this_feature_table_filename = pkg_resources.resource_filename( + model_package_name, FEATURE_TABLE_FILENAME + ) + with open( + this_feature_table_filename, "rb" + ) as feature_table_file: + feature_table = pickle.load(feature_table_file) + absolute_keras_model_filename = ( + pkg_resources.resource_filename( + model_package_name, KERAS_MODEL_FILENAME + ) + ) + keras_ensemble = keras.models.load_model( + absolute_keras_model_filename + ) + return Annotator( + nlp, vectors_nlp, feature_table, keras_ensemble + ) + raise ModelNotSupportedError( + "".join( + ( + nlp.meta["lang"], + "_", + nlp.meta["name"], + " version ", + nlp.meta["version"], + ) + ) + ) + + +@Language.factory("coreferee") +class CorefereeBroker: + def __init__(self, nlp: Language, name: str): + self.nlp = nlp + self.pid = os.getpid() + self.annotator = CorefereeManager().get_annotator(nlp) + + def __call__(self, doc: Doc) -> Doc: + if os.getpid() != self.pid: + raise MultiprocessingParsingNotSupportedError( + "Unfortunately at present parsing cannot be shared between forked processes." + ) + try: + self.annotator.annotate(doc) + except: + print("Unexpected error annotating document, skipping ....") + exception_info_parts = exc_info() + print(exception_info_parts[0]) + print(exception_info_parts[1]) + traceback.print_tb(exception_info_parts[2]) + return doc + + def __getstate__(self): + return self.nlp.meta + + def __setstate__(self, meta): + nlp_name = "_".join((meta["lang"], meta["name"])) + self.nlp = spacy.load(nlp_name) + self.annotator = CorefereeManager().get_annotator(self.nlp) + self.pid = os.getpid() + CorefereeBroker.set_extensions() + + @staticmethod + def set_extensions(): + if not Doc.has_extension("coref_chains"): + Doc.set_extension("coref_chains", default=None) + if not Token.has_extension("coref_chains"): + Token.set_extension("coref_chains", default=None) diff --git a/transformations/gender_randomizer/coreferee/coreferee/rules.py b/transformations/gender_randomizer/coreferee/coreferee/rules.py new file mode 100644 index 000000000..cd6e70095 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/rules.py @@ -0,0 +1,681 @@ +# Copyright 2021 msg systems ag + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +import sys +from abc import ABC, abstractmethod +from os import sep +from threading import Lock + +import pkg_resources +from spacy.tokens import Doc, Token + +from .data_model import ChainHolder, Mention + +language_to_rules = {} +lock = Lock() + + +class RulesAnalyzerFactory: + @staticmethod + def get_rules_analyzer(nlp): + def read_in_data_files(directory: str, rules_analyzer): + for data_filename in ( + filename + for filename in pkg_resources.resource_listdir( + __name__, sep.join(("lang", directory, "data")) + ) + if filename.endswith(".dat") + ): + full_data_filename = pkg_resources.resource_filename( + __name__, + sep.join(("lang", directory, "data", data_filename)), + ) + with open(full_data_filename, "r", encoding="utf-8") as file: + setattr( + rules_analyzer, + data_filename[:-4], + [ + v.strip() + for v in file.read().splitlines() + if len(v.strip()) > 1 + and not v.strip().startswith("#") + ], + ) + + language = nlp.meta["lang"] + with lock: + if language not in language_to_rules: + language_specific_rules_module = importlib.import_module( + ".".join( + (".lang", nlp.meta["lang"], "language_specific_rules") + ), + "coreferee", + ) + rules_analyzer = ( + language_specific_rules_module.LanguageSpecificRulesAnalyzer() + ) + language_to_rules[language] = rules_analyzer + read_in_data_files(language, rules_analyzer) + read_in_data_files("common", rules_analyzer) + rules_analyzer.exclusively_male_names = [ + name + for name in rules_analyzer.male_names + if name not in rules_analyzer.female_names + ] + rules_analyzer.exclusively_female_names = [ + name + for name in rules_analyzer.female_names + if name not in rules_analyzer.male_names + ] + return language_to_rules[language] + + +class RulesAnalyzer(ABC): + + # MUST BE IMPLEMENTED BY IMPLEMENTING SUBCLASSES: + + # A word in the language that will have a vector in any model that has vectors + random_word = NotImplemented + + # A tuple of lemmas meaning 'or'. + or_lemmas = NotImplemented + + # A dictionary from entity labels to lists of nouns that refer to entities with those labels, + # e.g. {'PERSON': ['person', 'man', 'woman'], ...} + entity_noun_dictionary = NotImplemented + + # A list of two-member tuples that can be used to begin and end quotations respectively, + # e.g. [('“', '”')] + quote_tuples = NotImplemented + + # Dependency labals that mark dependent siblings + dependent_sibling_deps = NotImplemented + + # Dependency labels that mark linking elements in a conjunction phrase. + conjunction_deps = NotImplemented + + # Dependency labels that mark predicates within adverbial clauses. + adverbial_clause_deps = NotImplemented + + # A tuple of parts of speech that term operators can have. Term operators are determiners and - + # in languages where prepositions depend on nouns in prepositional phrases - prepositions. + term_operator_pos = NotImplemented + + # A tuple of parts of speech that can form the root of clauses. + clause_root_pos = NotImplemented + + @abstractmethod + def get_dependent_siblings(self, token: Token) -> list: + """Returns a list of tokens that are dependent siblings of *token*. The method must + additionally set *token._.coref_chains.temp_has_or_coordination = True* for all + tokens with dependent siblings that are linked to those siblings by an *or* + relationship.""" + + @abstractmethod + def is_independent_noun(self, token: Token) -> bool: + """Returns *True* if *token* heads a noun phrase. + Being an independent noun and being a potential anaphor are mutually exclusive. + """ + + @abstractmethod + def is_potential_anaphor(self, token: Token) -> bool: + """Returns *True* if *token* is a potential anaphor, e.g. a pronoun like 'he', 'she'. + Being an independent noun and being a potential anaphor are mutually exclusive. + """ + + @abstractmethod + def is_potential_anaphoric_pair( + self, referred: Mention, referring: Token, directly: bool + ) -> bool: + """Returns *2* if the rules would permit *referred* and *referring* to co-exist + within a chain, *0* if they would not and *1* if coexistence is unlikely. + if *directly==True*, the question concerns direct coreference between the two + elements; if *directly==False* the question concerns coexistence in a chain. + For example, although 'himself' is unlikely to refer directly to 'he' in a non-reflexive + situation, the same two pronouns can easily coexist within a chain, while 'he' and 'she' + can never coexist anywhere within the same chain. + + Implementations of this method may need to exclude special cases of incorrect + cataphoric (forward-referring) pronouns picked up by the general (non language- + specific) method below. + """ + + @abstractmethod + def is_potentially_indefinite(self, token: Token) -> bool: + """Returns *True* if *token* heads a common noun phrase that is indefinite, or — in + languages that do not mark indefiniteness — which could be interpreted as being indefinite. + + *False* should be returned if *token* is a proper noun. + """ + + @abstractmethod + def is_potentially_definite(self, token: Token) -> bool: + """Returns *True* if *token* heads a common noun phrase that is definite, or — in + languages that do not mark definiteness — which could be interpreted as being definite. + + *False* should be returned if *token* is a proper noun. + """ + + @abstractmethod + def is_reflexive_anaphor(self, token: Token) -> int: + """Returns *2* if *token* expresses an anaphor which MUST be used reflexively, e.g. + 'sich' in German; *1* if *token* expresses an anaphor which MAY be used reflexively, + e.g. 'himself'; *0* if *token* expresses an anaphor which MUST NOT be used reflexively, + e.g. 'him'. + """ + + @abstractmethod + def is_potential_reflexive_pair( + self, referred: Mention, referring: Token + ) -> bool: + """Returns *True* if *referring* stands in a syntactic relationship to + *referred* that would require a reflexive anaphor if the two elements belonged to the + same chain e.g. 'he saw himself', but also 'he saw him' (where the non-reflexive + anaphor would _preclude_ the two elements from being in the same chain). + + *True* should only be returned in syntactic positions where reflexive anaphors are + observed for the language in question. For example, Polish has reflexive possessive + pronouns but German does not, so *True* is returned for Polish in situations where + *False* is returned for German. In a language without reflexive anaphors, *False* + should always be returned. + + In many languages reflexive anaphors can precede their referents: in languages where + this is not the case, the method should check that *referred.root_index < referring.i*. + """ + + # MAY BE OVERRIDDEN BY IMPLEMENTING SUBCLASSES: + + maximum_anaphora_sentence_referential_distance = 5 + + maximum_coreferring_nouns_sentence_referential_distance = 2 + + training_epochs = 4 + + root_dep = "ROOT" + + # A tuple of parts of speech that can head predications semantically, i.e. verbs + # (but not auxiliaries) + verb_pos = "VERB" + + # A tuple of parts of speech that nouns can have. + noun_pos = ("NOUN", "PROPN") + + # A tuple of parts of speech that proper nouns can have. + propn_pos = "PROPN" + + number_morph_key = "Number" + + # COULD BE OVERRIDDEN BY IMPLEMENTING CLASSES, BUT THIS IS NOT EXPECTED + # TO BE NECESSARY: + + def __init__(self): + self.reverse_entity_noun_dictionary = {} + for entity_type, values in self.entity_noun_dictionary.items(): + for value in values: + assert value not in self.reverse_entity_noun_dictionary + self.reverse_entity_noun_dictionary[ + value.lower() + ] = entity_type + + def initialize(self, doc: Doc) -> None: + """Adds *ChainHolder* objects to *doc* as well as to each token in *doc* + and stores temporary information on the objects that will be required during further + processing.""" + + doc._.coref_chains = ChainHolder() + for token in doc: + token._.coref_chains = ChainHolder() + + # Adds to *doc* a list of the start indexes of the sentences it contains. + doc._.coref_chains.temp_sent_starts = [s[0].i for s in doc.sents] + + # Adds to each token in *doc* the index of the sentence that contains it. + for index, sent in enumerate(doc.sents): + for token in sent: + token._.coref_chains.temp_sent_index = index + + # For each token in *doc*, if the token has dependent siblings, adds to the + # *CorefChainHolder* instance of the token a list containing them, otherwise an empty list. + # Wherever token B is added as a dependent sibling of token A, A is also added to B as a + # governing sibling. + for token in doc: + siblings_list = self.get_dependent_siblings(token) + token._.coref_chains.temp_dependent_siblings = siblings_list + for sibling in ( + sibling for sibling in siblings_list if sibling.i != token.i + ): + if token._.coref_chains.temp_governing_sibling is None: + # in Polish some nouns can form part of two chains + sibling._.coref_chains.temp_governing_sibling = token + + #Adds an array representing which quotes the word is within. Note that the failure + # to end a quotation within a document will not cause any problems because the neural + # network is only given the information whether two members of a potential pair have + # the same quote array or a different quote array. + working_quote_array = [0 for entry in self.quote_tuples] + for token in doc: + for index, quote_tuple in enumerate(self.quote_tuples): + if ( + working_quote_array[index] == 0 + and token.text == quote_tuple[0] + ): + working_quote_array[index] = 1 + elif ( + working_quote_array[index] == 1 + and token.text == quote_tuple[1] + ): + working_quote_array[index] = 0 + token._.coref_chains.temp_quote_array = working_quote_array[:] + + # Adds to each potential anaphora a list of potential referred mentions. + for token in doc: + token._.coref_chains.temp_potentially_referring = ( + self.is_independent_noun(token) + ) + if self.is_potential_anaphor(token): + potential_referreds = [] + this_sentence_start_index = token.sent[0].i + this_sentence_number = ( + doc._.coref_chains.temp_sent_starts.index( + this_sentence_start_index + ) + ) + start_sentence_number = 0 + if ( + this_sentence_number + > self.maximum_anaphora_sentence_referential_distance + ): + start_sentence_number = ( + this_sentence_number + - self.maximum_anaphora_sentence_referential_distance + ) + for preceding_token in ( + t + for t in doc[ + doc._.coref_chains.temp_sent_starts[ + start_sentence_number + ]: token.i + ] + if ( + self.is_potential_anaphor(t) + or self.is_independent_noun(t) + ) + ): + simple_referred = Mention(preceding_token, False) + if self.language_independent_is_potential_anaphoric_pair( + simple_referred, token + ) > 0 and not self.is_potential_reflexive_pair( + Mention(token, False), doc[simple_referred.root_index] + ): + potential_referreds.append(simple_referred) + if ( + len( + preceding_token._.coref_chains.temp_dependent_siblings + ) + > 0 + ): + complex_referred = Mention(preceding_token, True) + if ( + self.language_independent_is_potential_anaphoric_pair( + complex_referred, token + ) + > 0 + ): + potential_referreds.append(complex_referred) + if this_sentence_number + 1 == len( + doc._.coref_chains.temp_sent_starts + ): + succeeding_tokens = doc[token.i + 1:] + else: + succeeding_tokens = doc[ + token.i + + 1: doc._.coref_chains.temp_sent_starts[ + this_sentence_number + 1 + ] + ] + for succeeding_token in ( + t + for t in succeeding_tokens + if ( + self.is_potential_anaphor(t) + or self.is_independent_noun(t) + ) + ): + simple_referred = Mention(succeeding_token, False) + if self.language_independent_is_potential_anaphoric_pair( + simple_referred, token + ) > 0 and ( + self.is_potential_cataphoric_pair( + simple_referred, token + ) + or self.is_potential_reflexive_pair( + simple_referred, token + ) + ): + potential_referreds.append(simple_referred) + if ( + len( + succeeding_token._.coref_chains.temp_dependent_siblings + ) + > 0 + ): + complex_referred = Mention(succeeding_token, True) + if self.language_independent_is_potential_anaphoric_pair( + complex_referred, token + ) > 0 and self.is_potential_cataphoric_pair( + simple_referred, token + ): + potential_referreds.append(complex_referred) + if len(potential_referreds) > 0: + token._.coref_chains.temp_potential_referreds = ( + potential_referreds + ) + + def is_potentially_introducing_noun(self, token: Token) -> bool: + # We are not considering coordination + + if self.is_potentially_indefinite(token): + return True + + # Definite noun phrases with additional children, e.g. 'the man who ...' + return ( + self.is_potentially_definite(token) + and len( + [ + child + for child in token.children + if child.pos_ not in self.term_operator_pos + and child.dep_ not in self.conjunction_deps + and child.dep_ not in self.dependent_sibling_deps + ] + ) + > 0 + ) + + def is_potentially_referring_back_noun(self, token: Token) -> bool: + + return ( + self.is_potentially_definite(token) + and len( + [ + child + for child in token.children + if child.pos_ not in self.term_operator_pos + and child.dep_ not in self.conjunction_deps + and child.dep_ not in self.dependent_sibling_deps + ] + ) + == 0 + ) + + def is_potential_coreferring_noun_pair( + self, referred: Token, referring: Token + ) -> bool: + """Returns *True* if *referred* and *referring* are potentially coreferring nouns. + The method presumes that *is_independent_noun(token)* has + already returned *True* for both *referred* and *referring* and that + *referred* precedes *referring* within the document. + """ + if ( + referred.pos_ not in self.noun_pos + or referring.pos_ not in self.noun_pos + ): + return False + + if referring in referred._.coref_chains.temp_dependent_siblings: + return False + + # If *referred* and *referring* are names that potentially consist of several words, + # the text of *referring* must correspond to the end of the text of *referred* + # e.g. 'Richard Paul Hudson' -> 'Hudson' + referred_propn_subtree = self.get_propn_subtree(referred) + if referring in referred_propn_subtree: + return False + if len(referred_propn_subtree) > 0: + referring_propn_subtree = self.get_propn_subtree(referring) + if len(referring_propn_subtree) > 0 and " ".join( + t.text for t in referred_propn_subtree + ).endswith(" ".join(t.text for t in referring_propn_subtree)): + return True + if len(referring_propn_subtree) > 0 and " ".join( + t.lemma_.lower() for t in referred_propn_subtree + ).endswith( + " ".join(t.lemma_.lower() for t in referring_propn_subtree) + ): + return True + + # e.g. 'BMW' -> 'the company' + if ( + referring.lemma_.lower() in self.reverse_entity_noun_dictionary + and referred.pos_ in self.propn_pos + and referred.ent_type_ + == self.reverse_entity_noun_dictionary[referring.lemma_.lower()] + and self.is_potentially_definite(referring) + ): + return True + if not self.is_potentially_referring_back_noun(referring): + return False + if not self.is_potentially_introducing_noun( + referred + ) and not self.is_potentially_referring_back_noun(referred): + return False + if referred.lemma_ == referring.lemma_ and referred.morph.get( + self.number_morph_key + ) == referring.morph.get(self.number_morph_key): + return True + return False + + def language_independent_is_potential_anaphoric_pair( + self, referred: Mention, referring: Token + ) -> bool: + """Calls *is_potential_anaphoric_pair*, then sets *referred.temp_is_uncertain* depending + on the result and on additional language-independent tests. Because this method + is not called from *Annotator*, all language-independent tests are understood to + apply to the *directly* situation explained above in *is_potential_anaphoric_pair*.""" + + # all common tests are 'directly' tests + doc = referring.doc + referred_root = doc[referred.root_index] + if referring in referred_root._.coref_chains.temp_dependent_siblings: + return 0 + + result = self.is_potential_anaphoric_pair(referred, referring, True) + + # Checks whether there a token with the same lemma as one of the tokens in *referred* that + # is closer to *referring* in the structure than *referred* is and the two tokens form + # a potential coreferring noun pair. + if result == 2 and not self.is_potential_anaphor(referred_root): + doc = referring.doc + referring_or_governor = referring + while True: + referring_or_governor_subtree = list( + referring_or_governor.subtree + ) + if referred_root in referring_or_governor_subtree: + break + for referring_sub_token in referring_or_governor_subtree: + for referred_token in ( + doc[i] for i in referred.token_indexes + ): + if self.is_potential_coreferring_noun_pair( + referred_token, referring_sub_token + ): + result = 1 + break + if result == 1: + break + if referring_or_governor.dep_ == "ROOT": + break + referring_or_governor = referring_or_governor.head + + # Checks whether the two words have different quote arrays + if ( + result == 2 + and referred_root._.coref_chains.temp_quote_array + != referring._.coref_chains.temp_quote_array + ): + result = 1 + + if result == 1: + referred.temp_is_uncertain = True + elif result == 2: + referred.temp_is_uncertain = False + return result + + def has_list_member_in_propn_subtree( + self, token: Token, word_list: list + ) -> bool: + """Returns *True* if a member of the proper-name subtree of *Token* + corresponds to a member of *word_list*. + """ + for sub_token in self.get_propn_subtree(token): + if sub_token.lemma_ in word_list: + return True + return False + + def get_propn_subtree(self, token: Token) -> list: + """Returns a list containing each member M of the subtree of *token* that are proper nouns + and where all the tokens between M and *token* are themselves proper nouns. If *token* + is itself not a proper noun or if the head of *token* is a proper noun, an empty list + is returned. + """ + if token.pos_ not in self.propn_pos: + return [] + if ( + token.dep_ != self.root_dep + and token.dep_ not in self.dependent_sibling_deps + and token.head.pos_ in self.propn_pos + ): + return [] + subtree = list(token.subtree) + before_start_index = -1 + after_end_index = sys.maxsize + for subtoken in subtree: + if ( + subtoken.pos_ not in self.propn_pos + and subtoken.i < token.i + and before_start_index < subtoken.i + ): + before_start_index = subtoken.i + elif ( + subtoken.pos_ not in self.propn_pos + and subtoken.i > token.i + and after_end_index > subtoken.i + ): + after_end_index = subtoken.i + return [ + t + for t in subtree + if t.i > before_start_index and t.i < after_end_index + ] + + @staticmethod + def has_morph(token: Token, key: str, value: str = None) -> bool: + """Returns *True* if *token* has morphological feature *key*. If *value* is supplied, + additionally checks that the list contains *value*.""" + if value is None: + return len(token.morph.get(key)) > 0 + return value in token.morph.get(key) + + @staticmethod + def is_involved_in_non_or_conjunction(token: Token) -> bool: + """Returns *True* if *token* is part of a conjunction phrase that does not contain an or- + lemma.""" + if len(token._.coref_chains.temp_dependent_siblings) > 0: + return not token._.coref_chains.temp_has_or_coordination + if token._.coref_chains.temp_governing_sibling is not None: + return ( + not token._.coref_chains.temp_governing_sibling._.coref_chains.temp_has_or_coordination + ) + return False + + @staticmethod + def is_token_in_one_of_phrases(token: Token, phrases: list) -> bool: + """Checks whether *token* is part of a phrase that is listed in *phrases*.""" + doc = token.doc + token_text = token.text.lower() + for phrase in phrases: + phrase_words = phrase.lower().split() + if token_text not in phrase_words: + continue + possible_index = phrase_words.index(token_text) + start_index = max(0, token.i - possible_index) + end_index = token.i + len(phrase_words) - possible_index + if phrase.lower() == " ".join( + [t.text.lower() for t in doc[start_index:end_index]] + ): + return True + return False + + def is_potential_cataphoric_pair( + self, referred: Mention, referring: Token + ) -> bool: + """Checks whether *referring* can refer cataphorically to *referred*, i.e. + where *referring* precedes *referred* in the text. That *referring* precedes + *referred* is not itself checked by the method. + """ + + doc = referring.doc + referred_root = doc[referred.root_index] + + if referred_root.sent != referring.sent: + return False + if self.is_potential_anaphor(referred_root): + return False + + referred_verb_ancestors = [] + # Find the ancestors of the referent that are verbs, stopping anywhere where there + # is conjunction between verbs + for ancestor in referred_root.ancestors: + if ancestor.pos_ in self.clause_root_pos: + referred_verb_ancestors.append(ancestor) + if ancestor.dep_ in self.dependent_sibling_deps: + break + + # Loop through the ancestors of the referring pronoun that are verbs, that are not + # within the first list and that have an adverbial clause dependency label + referring_inclusive_ancestors = [referring] + referring_inclusive_ancestors.extend(referring.ancestors) + if ( + len( + [ + 1 + for ancestor in referring_inclusive_ancestors + if ancestor.dep_ in self.adverbial_clause_deps + ] + ) + == 0 + ): + return False + for referring_verb_ancestor in ( + t + for t in referring_inclusive_ancestors + if t.pos_ in self.clause_root_pos + and t not in referred_verb_ancestors + ): + # If one of the elements of the second list has one of the elements of the first list + # within its ancestors, we have subordination and cataphora is permissible + if ( + len( + [ + t + for t in referring_verb_ancestor.ancestors + if t in referred_verb_ancestors + ] + ) + > 0 + ): + return True + return False diff --git a/transformations/gender_randomizer/coreferee/coreferee/tendencies.py b/transformations/gender_randomizer/coreferee/coreferee/tendencies.py new file mode 100644 index 000000000..05b8e79bc --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/tendencies.py @@ -0,0 +1,622 @@ +# Copyright 2021 msg systems ag + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from spacy.language import Language +from spacy.tokens import Doc, Token + +from .data_model import FeatureTable, Mention +from .rules import RulesAnalyzer + +ENSEMBLE_SIZE = 5 + + +class TendenciesAnalyzer: + def __init__( + self, + rules_analyzer: RulesAnalyzer, + vectors_nlp: Language, + feature_table: FeatureTable, + ): + self.rules_analyzer = rules_analyzer + self.vectors_nlp = vectors_nlp + if self.vectors_nlp.vocab[rules_analyzer.random_word].has_vector: + self.vector_length = len( + self.vectors_nlp.vocab[rules_analyzer.random_word].vector + ) + else: + self.vector_length = len( + vectors_nlp(rules_analyzer.random_word)[0].vector + ) + assert self.vector_length > 0 + self.feature_table = feature_table + + def get_feature_map(self, token_or_mention, doc: Doc) -> list: + """Returns a binary list representing the features from *self.feature_table* that + the token or any of the tokens within the mention has. The list is also + added as *token._.coref_chains.temp_feature_map* or *mention.temp_feature_map*. + """ + + def convert_to_oneshot(reference_list, actual_list): + """ + Returns a list of the same length as 'reference_list' where positions corresponding to + entries in 'reference_list' that are also contained within 'actual_list' have the + value '1' and other positions have the value '0'. + """ + return [ + 1 if reference in actual_list else 0 + for reference in reference_list + ] + + def get_oneshot_for_token_and_siblings(prop, func): + """Executes a logical AND between the values for the respective siblings.""" + oneshot = convert_to_oneshot(prop, func(token)) + for sibling in siblings: + sibling_oneshot = convert_to_oneshot(prop, func(sibling)) + oneshot = [ + 1 if (entry == 1 or sibling_oneshot[index] == 1) else 0 + for (index, entry) in enumerate(oneshot) + ] + return oneshot + + siblings = [] + if isinstance(token_or_mention, Token): + if hasattr(token_or_mention._.coref_chains, "temp_feature_map"): + return token_or_mention._.coref_chains.temp_feature_map + token = token_or_mention + else: + if hasattr(token_or_mention, "temp_feature_map"): + return token_or_mention.temp_feature_map + token = doc[token_or_mention.root_index] + if len(token_or_mention.token_indexes) > 1: + siblings = [doc[i] for i in token_or_mention.token_indexes[1:]] + + feature_map = convert_to_oneshot(self.feature_table.tags, [token.tag_]) + + feature_map.extend( + get_oneshot_for_token_and_siblings( + self.feature_table.morphs, lambda token: token.morph + ) + ) + + feature_map.extend( + convert_to_oneshot(self.feature_table.ent_types, [token.ent_type_]) + ) + + feature_map.extend( + get_oneshot_for_token_and_siblings( + self.feature_table.lefthand_deps_to_children, + lambda token: [ + child.dep_ for child in token.children if child.i < token.i + ], + ) + ) + + feature_map.extend( + get_oneshot_for_token_and_siblings( + self.feature_table.righthand_deps_to_children, + lambda token: [ + child.dep_ for child in token.children if child.i > token.i + ], + ) + ) + + if ( + token.dep_ != self.rules_analyzer.root_dep + and token.i < token.head.i + ): + feature_map.extend( + convert_to_oneshot( + self.feature_table.lefthand_deps_to_parents, [token.dep_] + ) + ) + else: + feature_map.extend( + convert_to_oneshot( + self.feature_table.lefthand_deps_to_parents, [] + ) + ) + + if ( + token.dep_ != self.rules_analyzer.root_dep + and token.i > token.head.i + ): + feature_map.extend( + convert_to_oneshot( + self.feature_table.righthand_deps_to_parents, [token.dep_] + ) + ) + else: + feature_map.extend( + convert_to_oneshot( + self.feature_table.righthand_deps_to_parents, [] + ) + ) + + if token.dep_ != self.rules_analyzer.root_dep: + feature_map.extend( + convert_to_oneshot( + self.feature_table.parent_tags, [token.head.tag_] + ) + ) + else: + feature_map.extend( + convert_to_oneshot(self.feature_table.parent_tags, []) + ) + + if token.dep_ != self.rules_analyzer.root_dep: + feature_map.extend( + convert_to_oneshot( + self.feature_table.parent_morphs, token.head.morph + ) + ) + else: + feature_map.extend( + convert_to_oneshot(self.feature_table.parent_morphs, []) + ) + + if token.dep_ != self.rules_analyzer.root_dep: + feature_map.extend( + convert_to_oneshot( + self.feature_table.parent_lefthand_deps_to_children, + [ + child.dep_ + for child in token.head.children + if child.i < token.head.i + ], + ) + ) + else: + feature_map.extend( + convert_to_oneshot( + self.feature_table.parent_lefthand_deps_to_children, [] + ) + ) + + if token.dep_ != self.rules_analyzer.root_dep: + feature_map.extend( + convert_to_oneshot( + self.feature_table.parent_righthand_deps_to_children, + [ + child.dep_ + for child in token.head.children + if child.i > token.head.i + ], + ) + ) + else: + feature_map.extend( + convert_to_oneshot( + self.feature_table.parent_righthand_deps_to_children, [] + ) + ) + + if isinstance(token_or_mention, Token): + token_or_mention._.coref_chains.temp_feature_map = feature_map + else: + token_or_mention.temp_feature_map = feature_map + return feature_map + + def get_position_map(self, token_or_mention, doc: Doc) -> list: + """Returns a list of numbers representing the position, depth, etc. of the token or mention + within its sentence. The list is also added as *token._.coref_chains.temp_position_map* + or *mention.temp_position_map*. + """ + + if isinstance(token_or_mention, Token): + if hasattr(token_or_mention._.coref_chains, "temp_position_map"): + return token_or_mention._.coref_chains.temp_position_map + token = token_or_mention + else: + if hasattr(token_or_mention, "temp_position_map"): + return token_or_mention.temp_position_map + token = doc[token_or_mention.root_index] + + # This token is the nth word within its sentence + position_map = [ + token.i + - token.doc._.coref_chains.temp_sent_starts[ + token._.coref_chains.temp_sent_index + ] + ] + + # This token is at depth n from the root + position_map.append(len(list(token.ancestors))) + + # This token is n verbs from the root + position_map.append( + len( + [ + ancestor + for ancestor in token.ancestors + if ancestor.pos_ in self.rules_analyzer.verb_pos + ] + ) + ) + + # This token is the nth token at its depth within its sentence + position_map.append( + len( + [ + 1 + for token_in_sentence in token.sent + if token_in_sentence.i < token.i + and len(list(token_in_sentence.ancestors)) + == len(list(token.ancestors)) + ] + ) + ) + + # This token is the nth child of its parents + if token.dep_ != self.rules_analyzer.root_dep: + position_map.append( + sorted([child.i for child in token.head.children]).index( + token.i + ) + ) + else: + position_map.append(-1) + + # Number of dependent siblings, or -1 if the method was passed a mention that is within + # a coordination phrase but only covers one token within that phrase + if token._.coref_chains.temp_governing_sibling is not None or ( + len(token._.coref_chains.temp_dependent_siblings) > 0 + and not ( + isinstance(token_or_mention, Mention) + and len(token_or_mention.token_indexes) > 1 + ) + ): + position_map.append(-1) + else: + position_map.append( + len(token._.coref_chains.temp_dependent_siblings) + ) + + position_map.append( + 1 if token._.coref_chains.temp_governing_sibling is not None else 0 + ) + + if isinstance(token_or_mention, Token): + token_or_mention._.coref_chains.temp_position_map = position_map + else: + token_or_mention.temp_position_map = position_map + return position_map + + def get_compatibility_map( + self, referred: Mention, referring: Token + ) -> list: + """Returns a list of numbers representing the interaction between *referred* and + *referring*. It will already have been established that coreference between the two is + possible; the compatibility map assists the neural network in ascertaining how likely + it is. The list is also added as *referred.temp_compatibility_map*. + """ + doc = referring.doc + referred_root = doc[referred.root_index] + + if hasattr(referred, "temp_compatibility_map"): + return referred.temp_compatibility_map + + # Referential distance in words (may be negative in the case of cataphora) + compatibility_map = [referring.i - referred_root.i] + + # Referential distance in sentences + compatibility_map.append( + referring._.coref_chains.temp_sent_index + - referred_root._.coref_chains.temp_sent_index + ) + + # Whether the referred mention, its lefthand sibling or its head is among the ancestors + # of the referring element + compatibility_map.append( + 1 + if referred_root in referring.ancestors + or ( + referred_root.dep_ != self.rules_analyzer.root_dep + and referred_root.head in referring.ancestors + ) + or referred_root._.coref_chains.temp_governing_sibling is not None + and ( + referred_root._.coref_chains.temp_governing_sibling + in referring.ancestors + or ( + referred_root._.coref_chains.temp_governing_sibling.dep_ + != self.rules_analyzer.root_dep + and referred_root._.coref_chains.temp_governing_sibling.head + in referring.ancestors + ) + ) + else 0 + ) + + # The cosine similarity of the two objects' heads' vectors + if ( + referred_root.dep_ != self.rules_analyzer.root_dep + and referring.dep_ != self.rules_analyzer.root_dep + ): + referred_head_lexeme = self.vectors_nlp.vocab[ + referred_root.head.lemma_ + ] + referring_head_lexeme = self.vectors_nlp.vocab[ + referring.head.lemma_ + ] + if ( + referred_head_lexeme.has_vector + and referring_head_lexeme.has_vector + ): + compatibility_map.append( + referred_head_lexeme.similarity(referring_head_lexeme) + ) + elif ( + referred_root.has_vector and referring.has_vector + ): # _sm models + compatibility_map.append(referred_root.similarity(referring)) + else: + compatibility_map.append(-1) + else: + compatibility_map.append(-1) + + # The number of common true values in the feature maps of *referred.root* and *referring*. + referred_feature_map = self.get_feature_map(referred, referring.doc) + referring_feature_map = self.get_feature_map( + Mention(referring, False), referring.doc + ) + compatibility_map.append( + [ + 1 if (entry == 1 and referring_feature_map[index] == 1) else 0 + for (index, entry) in enumerate(referred_feature_map) + ].count(1) + ) + + referred.temp_compatibility_map = compatibility_map + return compatibility_map + + def get_vectors(self, token_or_mention, doc: Doc) -> list: + """Returns vector representations for *token_or_mention* and its head. If there is no head, + a zero vector is returned in place of the head vector. The vector representations are + added as a tuple as *token._.coref_chains.temp_vectors* or *mention.temp_vectors* + """ + if isinstance(token_or_mention, Token): + if hasattr(token_or_mention._.coref_chains, "temp_vectors"): + return token_or_mention._.coref_chains.temp_vectors + tokens = [token_or_mention] + else: + if hasattr(token_or_mention, "temp_vectors"): + return token_or_mention.temp_vectors + tokens = [doc[i] for i in token_or_mention.token_indexes] + if self.vectors_nlp.vocab[tokens[0].lemma_].has_vector: + # The mean of the siblings seems likely to be more representative than the whole span + this_object_vector = np.mean( + np.array( + [self.vectors_nlp.vocab[t.lemma_].vector for t in tokens] + ), + axis=0, + ) + else: + this_object_vector = np.mean( + np.array([t.vector for t in tokens]), axis=0 + ) + if len(this_object_vector) == 0: + this_object_vector = np.zeros(self.vector_length) + + if tokens[0].dep_ != self.rules_analyzer.root_dep: + head = tokens[0].head + if self.vectors_nlp.vocab[head.lemma_].has_vector: + head_vector = self.vectors_nlp.vocab[head.lemma_].vector + else: + head_vector = head.vector + if len(head_vector) == 0: + head_vector = np.zeros(self.vector_length) + else: + head_vector = np.zeros(self.vector_length) + + if isinstance(token_or_mention, Token): + token_or_mention._.coref_chains.temp_vectors = ( + this_object_vector, + head_vector, + ) + else: + token_or_mention.temp_vectors = (this_object_vector, head_vector) + + return this_object_vector, head_vector + + def prepare_keras_data(self, docs: list, *, return_outputs: bool = False): + """Generates from a list of documents the inputs for a Keras model, a boolean value + specfying whether scoring is necessary, and - when training only - the outputs + for a Keras model. + """ + referred_vector_inputs = [] + referred_head_vector_inputs = [] + referred_feature_map_inputs = [] + referred_position_map_inputs = [] + referring_vector_inputs = [] + referring_head_vector_inputs = [] + referring_feature_map_inputs = [] + referring_position_map_inputs = [] + compatibility_map_inputs = [] + if return_outputs: + outputs = [] + + keras_inputs = {} + keras_outputs = {} + + # if there are no competing interpretations of anaphors in the document, there is nothing + # to score + scoring_necessary = False + + for doc in docs: + for referring in ( + t + for t in doc + if hasattr(t._.coref_chains, "temp_potential_referreds") + ): + ( + referring_vector_input, + referring_head_vector_input, + ) = self.get_vectors(referring, doc) + referring_feature_map_input = self.get_feature_map( + referring, doc + ) + referring_position_map_input = self.get_position_map( + referring, doc + ) + + for index, potential_referred in enumerate( + p + for p in referring._.coref_chains.temp_potential_referreds + if not hasattr(p, "spanned_in_training") + ): + # spanned in training - X->Y and Y->Z; we do want to present X->Z + # as neither correct nor incorrect and so remove it from the + # training data + if index > 0: + scoring_necessary = True + ( + referred_vector_input, + referred_head_vector_input, + ) = self.get_vectors(potential_referred, doc) + referred_feature_map_input = self.get_feature_map( + potential_referred, doc + ) + referred_position_map_input = self.get_position_map( + potential_referred, doc + ) + compatibility_map_input = self.get_compatibility_map( + potential_referred, referring + ) + referred_vector_inputs.append(referred_vector_input) + referred_head_vector_inputs.append( + referred_head_vector_input + ) + referred_feature_map_inputs.append( + referred_feature_map_input + ) + referred_position_map_inputs.append( + referred_position_map_input + ) + referring_vector_inputs.append(referring_vector_input) + referring_head_vector_inputs.append( + referring_head_vector_input + ) + referring_feature_map_inputs.append( + referring_feature_map_input + ) + referring_position_map_inputs.append( + referring_position_map_input + ) + compatibility_map_inputs.append(compatibility_map_input) + if return_outputs: + outputs.append( + [ + 1 + if hasattr( + potential_referred, "true_in_training" + ) + else 0 + ] + ) + + np_referred_vector_inputs = np.array(referred_vector_inputs) + np_referred_head_vector_inputs = np.array(referred_head_vector_inputs) + np_referred_feature_map_inputs = np.array(referred_feature_map_inputs) + np_referred_position_map_inputs = np.array( + referred_position_map_inputs + ) + np_referring_vector_inputs = np.array(referring_vector_inputs) + np_referring_head_vector_inputs = np.array( + referring_head_vector_inputs + ) + np_referring_feature_map_inputs = np.array( + referring_feature_map_inputs + ) + np_referring_position_map_inputs = np.array( + referring_position_map_inputs + ) + np_compatibility_map_inputs = np.array(compatibility_map_inputs) + + for index in range(ENSEMBLE_SIZE): + keras_inputs[ + "_".join(("referred_vector_input", str(index))) + ] = np_referred_vector_inputs + keras_inputs[ + "_".join(("referred_head_vector_input", str(index))) + ] = np_referred_head_vector_inputs + keras_inputs[ + "_".join(("referred_feature_map_input", str(index))) + ] = np_referred_feature_map_inputs + keras_inputs[ + "_".join(("referred_position_map_input", str(index))) + ] = np_referred_position_map_inputs + keras_inputs[ + "_".join(("referring_vector_input", str(index))) + ] = np_referring_vector_inputs + keras_inputs[ + "_".join(("referring_head_vector_input", str(index))) + ] = np_referring_head_vector_inputs + keras_inputs[ + "_".join(("referring_feature_map_input", str(index))) + ] = np_referring_feature_map_inputs + keras_inputs[ + "_".join(("referring_position_map_input", str(index))) + ] = np_referring_position_map_inputs + keras_inputs[ + "_".join(("compatibility_map_input", str(index))) + ] = np_compatibility_map_inputs + + if return_outputs: + np_outputs = np.array(outputs) + for index in range(ENSEMBLE_SIZE): + keras_outputs["_".join(("output", str(index)))] = np_outputs + + if return_outputs: + return keras_inputs, scoring_necessary, keras_outputs + return keras_inputs, scoring_necessary + + def score(self, doc: Doc, keras_ensemble) -> None: + """Scores all possible anaphoric pairs in *doc*. The scores are never referenced + outside this method because the possible pairs on each anaphor are sorted within + this method with the more likely interpretations at the front of the list. + """ + keras_inputs, scoring_necessary = self.prepare_keras_data([doc]) + if scoring_necessary: + scores = np.mean(keras_ensemble.predict(keras_inputs), axis=0) + score_iterator = iter(scores) + for referring in ( + t + for t in doc + if hasattr(t._.coref_chains, "temp_potential_referreds") + ): + for potential_referred in ( + p + for p in referring._.coref_chains.temp_potential_referreds + ): + potential_referred.temp_score = next(score_iterator)[0] + is_last = False + try: + next(score_iterator) + except StopIteration: + is_last = True + assert ( + is_last + ), "Mismatch between potential referreds and Keras output." + for referring in ( + t + for t in doc + if hasattr(t._.coref_chains, "temp_potential_referreds") + ): + referring._.coref_chains.temp_potential_referreds.sort( + key=lambda potential_referred: ( + potential_referred.temp_is_uncertain, + 0 - potential_referred.temp_score, + ) + ) diff --git a/transformations/gender_randomizer/coreferee/coreferee/test_utils.py b/transformations/gender_randomizer/coreferee/coreferee/test_utils.py new file mode 100644 index 000000000..b654b4fea --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/test_utils.py @@ -0,0 +1,84 @@ +# Copyright 2021 msg systems ag + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from os import sep +from threading import Lock + +import pkg_resources +import spacy +from spacy.tokens import Doc +from thinc.api import Config + +from .errors import LanguageNotSupportedError + + +def debug_structures(doc: Doc): + for token in doc: + print( + token.i, + token.text, + token.lemma_, + token.pos_, + token.tag_, + token.dep_, + token.ent_type_, + token.head.i, + list(token.children), + ) + + +language_to_nlps = {} +lock = Lock() + + +def get_nlps(language: str, *, add_coreferee: bool = True) -> list: + """Returns a list of *nlp* objects to use when testing the functionality for *language*. + The list contains the latest versions of the Spacy models named in the config file. + Note that if this method is called with *add_coreferee=False*, this setting will apply + to all future calls within the same process space. This means that *add_coreferee=False* + is only appropriate during development of rules tests and before any smoke tests are + required.""" + with lock: + if language not in language_to_nlps: + relative_config_filename = sep.join( + ("lang", language, "config.cfg") + ) + if not pkg_resources.resource_exists( + "coreferee", relative_config_filename + ): + raise LanguageNotSupportedError(language) + absolute_config_filename = pkg_resources.resource_filename( + __name__, relative_config_filename + ) + config = Config().from_disk(absolute_config_filename) + model_set = set() + for config_entry in config: + model_set.add( + "_".join((language, config[config_entry]["model"])) + ) + nlps = [] + for model in model_set: + # At present we presume there will never be an entry in the config file that + # specifies a model name that can no longer be loaded. This seems a reasonable + # assumption, but if it no longer applies this code will need to be changed in the + # future. + nlp = spacy.load(model) + if add_coreferee: + nlp.add_pipe("coreferee") + nlps.append(nlp) + nlps = sorted( + nlps, key=lambda nlp: (nlp.meta["name"], nlp.meta["version"]) + ) + language_to_nlps[language] = nlps + return language_to_nlps[language] diff --git a/transformations/gender_randomizer/coreferee/coreferee/training/__init__.py b/transformations/gender_randomizer/coreferee/coreferee/training/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/transformations/gender_randomizer/coreferee/coreferee/training/loaders.py b/transformations/gender_randomizer/coreferee/coreferee/training/loaders.py new file mode 100644 index 000000000..716984567 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/training/loaders.py @@ -0,0 +1,470 @@ +# Copyright 2021 msg systems ag + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import bisect +import os +import xml.sax +from abc import ABC, abstractmethod +from sys import maxsize + +from spacy.language import Language +from spacy.tokens import Doc + +from ..data_model import Mention +from ..rules import RulesAnalyzer + + +class GenericLoader(ABC): + @abstractmethod + def load( + self, directory_name: str, nlp: Language, rules_analyzer: RulesAnalyzer + ) -> list: + """Loads training data from *directory_name* to produce a list of documents parsed using + the spacy model *nlp*. Each document goes through *RulesAnalyzer.initialize()*. + Wherever an anaphor points to a referred mention in the training data, the + mention within *token._.coref_chains.temp_potential_referreds* is annotated with + *true_in_training=True*.""" + + +class ParCorHandler(xml.sax.ContentHandler): + def __init__(self): + super().__init__() + self._current_tag = "" + self._working_word = None + self.words = [] + self.words.append(".") + self.corefs = {} + + def startElement(self, tag, attributes): + if tag == "word": + self._current_tag = "word" + else: + self._current_tag = "" + if tag == "markable": + coref_class = attributes.getValue("coref_class") + if coref_class != "empty": + label = coref_class[4:] + span_value = attributes.getValue("span") + if "," in span_value: + span_value = span_value[span_value.index(",") + 1 :] + span_values = span_value.split("..") + start = int(span_values[0][5:]) + end = int( + span_values[1][5:] if len(span_values) > 1 else start + ) + if label not in self.corefs: + self.corefs[label] = [] + self.corefs[label].append((start, end)) + + def endElement(self, tag): + if self._working_word is not None: + self.words.append(self._working_word) + self._working_word = None + + def characters(self, content): + content = content.strip() + if self._current_tag == "word" and len(content) > 0: + if self._working_word is None: + self._working_word = content + else: + self._working_word = "".join((self._working_word, content)) + + +class ParCorLoader(GenericLoader): + @staticmethod + def load_file( + words_filename: str, + coref_level_filename: str, + nlp: Language, + rules_analyzer: RulesAnalyzer, + parser, + ) -> None: + parcor_handler = ParCorHandler() + parser.setContentHandler(parcor_handler) + parser.parse(words_filename) + parser.parse(coref_level_filename) + doc = nlp(" ".join(word for word in parcor_handler.words)) + rules_analyzer.initialize(doc) + lookup = [] + spacy_token_iterator = enumerate(token for token in doc) + for parcor_token in parcor_handler.words: + this_parcor_token_lookup = [] + while len(parcor_token) > 0: + spacy_token_index, spacy_token = next( + spacy_token_iterator, (None, None) + ) + if spacy_token_index is None or not parcor_token.startswith( + spacy_token.text + ): + break + this_parcor_token_lookup.append(spacy_token_index) + parcor_token = parcor_token[len(spacy_token) :] + assert ( + len(this_parcor_token_lookup) > 0 + ), "Unmatched parcor and spacy tokens" + lookup.append(this_parcor_token_lookup) + + for parcor_spans in parcor_handler.corefs.values(): + thinned_parcor_spans = ( + [] + ) # only those spans that are relevant to the types of + # coreference we are learning + for parcor_span in parcor_spans: + holmes_span = doc[ + lookup[parcor_span[0]][0] : lookup[parcor_span[1]][-1] + 1 + ] + if rules_analyzer.is_independent_noun( + holmes_span.root + ) or rules_analyzer.is_potential_anaphor(holmes_span.root): + thinned_parcor_spans.append(parcor_span) + thinned_parcor_spans.sort(key=lambda span: span[0]) + for index, parcor_span in enumerate(thinned_parcor_spans): + holmes_span = doc[ + lookup[parcor_span[0]][0] : lookup[parcor_span[1]][-1] + 1 + ] + include_dependent_siblings = ( + len( + holmes_span.root._.coref_chains.temp_dependent_siblings + ) + > 0 + and holmes_span.root._.coref_chains.temp_dependent_siblings[ + -1 + ].i + <= lookup[parcor_span[1]][-1] + ) + working_referent = Mention( + holmes_span.root, include_dependent_siblings + ) + marked = False + if index > 0: + previous_parcor_span = thinned_parcor_spans[index - 1] + previous_holmes_span = doc[ + lookup[previous_parcor_span[0]][0] : lookup[ + previous_parcor_span[1] + ][-1] + + 1 + ] + if hasattr( + previous_holmes_span.root._.coref_chains, + "temp_potential_referreds", + ): + for ( + mention + ) in ( + previous_holmes_span.root._.coref_chains.temp_potential_referreds + ): + if mention == working_referent: + mention.true_in_training = True + marked = True + continue + if not marked and index < len(thinned_parcor_spans) - 1: + next_parcor_span = thinned_parcor_spans[index + 1] + next_holmes_span = doc[ + lookup[next_parcor_span[0]][0] : lookup[ + next_parcor_span[1] + ][-1] + + 1 + ] + if hasattr( + next_holmes_span.root._.coref_chains, + "temp_potential_referreds", + ): + for ( + mention + ) in ( + next_holmes_span.root._.coref_chains.temp_potential_referreds + ): + if mention == working_referent: + mention.true_in_training = True + continue + return doc + + def load( + self, directory_name: str, nlp: Language, rules_analyzer: RulesAnalyzer + ) -> list: + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_namespaces, 0) + docs = [] + for words_filename in ( + w + for w in os.scandir(directory_name) + if w.path.endswith("words.xml") + ): + coref_data_filename = "".join( + (words_filename.name[:-10], "_coref_level.xml") + ) + coref_data_full_filename = os.sep.join( + (directory_name, coref_data_filename) + ) + if not os.path.isfile(coref_data_full_filename): + raise RuntimeError( + " ".join((coref_data_full_filename, "not found.")) + ) + print("Loading", words_filename.path) + docs.append( + self.load_file( + words_filename, + coref_data_full_filename, + nlp, + rules_analyzer, + parser, + ) + ) + return docs + + +class PolishCoreferenceCorpusANNLoader(GenericLoader): + @staticmethod + def load_file( + doc: Doc, ann_file_lines: list, rules_analyzer: RulesAnalyzer + ) -> None: + rules_analyzer.initialize(doc) + token_char_start_indexes = [token.idx for token in doc] + mention_numbers_to_spans = {} + mention_numbers_to_set_numbers = {} + for index, ann_file_line in enumerate(ann_file_lines): + words = ann_file_line.split() + if words[0].startswith("T"): + assert words[1] == "Mention" + end_word = words[3] + end_index = 3 + while ";" in end_word: + end_index += 1 + end_word = words[end_index] + span = doc[ + bisect.bisect_left( + token_char_start_indexes, int(words[2]) + ) : bisect.bisect_left( + token_char_start_indexes, int(end_word) + ) + ] + mention_numbers_to_spans[words[0]] = span + if words[0] == "*" and words[1] == "Coref": + lowest_already_defined_set_number = maxsize + for mention_number in ( + words[ref] for ref in range(2, len(words)) + ): + if ( + mention_number in mention_numbers_to_set_numbers + and mention_numbers_to_set_numbers[mention_number] + < lowest_already_defined_set_number + ): + lowest_already_defined_set_number = ( + mention_numbers_to_set_numbers[mention_number] + ) + if lowest_already_defined_set_number < maxsize: + for mention_number in ( + words[ref] for ref in range(2, len(words)) + ): + if ( + mention_number in mention_numbers_to_set_numbers + and mention_numbers_to_set_numbers[mention_number] + > lowest_already_defined_set_number + ): + # an intermediate set, so redefine it as part of the lowest set + for working_mention_number in ( + m + for m in mention_numbers_to_set_numbers + if mention_numbers_to_set_numbers[m] + == mention_numbers_to_set_numbers[ + mention_number + ] + ): + mention_numbers_to_set_numbers[ + working_mention_number + ] = lowest_already_defined_set_number + this_set_number = lowest_already_defined_set_number + else: + this_set_number = index + for mention_number in ( + words[ref] for ref in range(2, len(words)) + ): + mention_numbers_to_set_numbers[ + mention_number + ] = this_set_number + for set_number in sorted( + list(set(mention_numbers_to_set_numbers.values())) + ): + spans = [] + for mention_number in sorted( + [ + m + for m in mention_numbers_to_set_numbers + if mention_numbers_to_set_numbers[m] == set_number + ], + key=lambda m: int(m[1:]), + ): + span_to_check = mention_numbers_to_spans[mention_number] + if rules_analyzer.is_independent_noun( + span_to_check.root + ) or rules_analyzer.is_potential_anaphor(span_to_check.root): + spans.append(span_to_check) + for index, span in enumerate(spans): + include_dependent_siblings = ( + len(span.root._.coref_chains.temp_dependent_siblings) > 0 + and span.root._.coref_chains.temp_dependent_siblings[-1].i + < span.end + ) + working_referent = Mention( + span.root, include_dependent_siblings + ) + marked = False + if index > 0: + previous_span = spans[index - 1] + if hasattr( + previous_span.root._.coref_chains, + "temp_potential_referreds", + ): + for ( + mention + ) in ( + previous_span.root._.coref_chains.temp_potential_referreds + ): + if mention == working_referent: + mention.true_in_training = True + marked = True + continue + if not marked and index < len(spans) - 1: + next_span = spans[index + 1] + if hasattr( + next_span.root._.coref_chains, + "temp_potential_referreds", + ): + for ( + mention + ) in ( + next_span.root._.coref_chains.temp_potential_referreds + ): + if mention == working_referent: + mention.true_in_training = True + continue + + def load( + self, directory_name: str, nlp: Language, rules_analyzer: RulesAnalyzer + ) -> list: + txt_file_contents = [] + ann_file_lines_list = [] + for index, txt_filename in enumerate( + t for t in os.scandir(directory_name) if t.path.endswith(".txt") + ): + with open(txt_filename, "r", encoding="UTF8") as txt_file: + txt_file_contents.append("".join(txt_file.readlines())) + ann_filename = "".join((txt_filename.path[:-4], ".ann")) + with open(ann_filename, "r", encoding="UTF8") as ann_file: + ann_file_lines_list.append(ann_file.readlines()) + docs = nlp.pipe(txt_file_contents) + docs_to_return = [] + for index, doc in enumerate(docs): + if index % 10 == 0: + print("Loaded", index, "documents") + self.load_file(doc, ann_file_lines_list[index], rules_analyzer) + docs_to_return.append(doc) + return docs_to_return + + +class LitBankANNLoader(GenericLoader): + @staticmethod + def load_file( + doc: Doc, ann_file_lines: list, rules_analyzer: RulesAnalyzer + ) -> None: + rules_analyzer.initialize(doc) + token_char_start_indexes = [token.idx for token in doc] + mention_labels_to_span_sets = {} + for index, ann_file_line in enumerate(ann_file_lines): + words = ann_file_line.split() + if words[0].startswith("T"): # normally always true + span = doc[ + bisect.bisect_left( + token_char_start_indexes, int(words[2]) + ) : bisect.bisect_left( + token_char_start_indexes, int(words[3]) + ) + ] + if "-" in words[1]: + if words[1] in mention_labels_to_span_sets: + working_span_set = mention_labels_to_span_sets[words[1]] + else: + working_span_set = set() + mention_labels_to_span_sets[words[1]] = working_span_set + working_span_set.add(span) + for span_set in mention_labels_to_span_sets.values(): + spans = list( + filter( + lambda span: rules_analyzer.is_independent_noun(span.root) + or rules_analyzer.is_potential_anaphor(span.root), + span_set, + ) + ) + spans.sort(key=lambda span: span.start) + for index, span in enumerate(spans): + include_dependent_siblings = ( + len(span.root._.coref_chains.temp_dependent_siblings) > 0 + and span.root._.coref_chains.temp_dependent_siblings[-1].i + < span.end + ) + working_referent = Mention( + span.root, include_dependent_siblings + ) + marked = False + if index > 0: + previous_span = spans[index - 1] + if hasattr( + previous_span.root._.coref_chains, + "temp_potential_referreds", + ): + for ( + mention + ) in ( + previous_span.root._.coref_chains.temp_potential_referreds + ): + if mention == working_referent: + mention.true_in_training = True + marked = True + continue + if not marked and index < len(spans) - 1: + next_span = spans[index + 1] + if hasattr( + next_span.root._.coref_chains, + "temp_potential_referreds", + ): + for ( + mention + ) in ( + next_span.root._.coref_chains.temp_potential_referreds + ): + if mention == working_referent: + mention.true_in_training = True + continue + + def load( + self, directory_name: str, nlp: Language, rules_analyzer: RulesAnalyzer + ) -> list: + txt_file_contents = [] + ann_file_lines_list = [] + for index, txt_filename in enumerate( + t for t in os.scandir(directory_name) if t.path.endswith(".txt") + ): + with open(txt_filename, "r", encoding="UTF8") as txt_file: + txt_file_contents.append("".join(txt_file.readlines())) + ann_filename = "".join((txt_filename.path[:-4], ".ann")) + with open(ann_filename, "r", encoding="UTF8") as ann_file: + ann_file_lines_list.append(ann_file.readlines()) + docs = nlp.pipe(txt_file_contents) + docs_to_return = [] + for index, doc in enumerate(docs): + if index % 10 == 0: + print("Loaded", index, "documents") + self.load_file(doc, ann_file_lines_list[index], rules_analyzer) + docs_to_return.append(doc) + return docs_to_return diff --git a/transformations/gender_randomizer/coreferee/coreferee/training/model.py b/transformations/gender_randomizer/coreferee/coreferee/training/model.py new file mode 100644 index 000000000..6e1d71497 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/training/model.py @@ -0,0 +1,327 @@ +# Copyright 2021 msg systems ag + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import keras +from keras import layers +from spacy.language import Language + +from ..data_model import FeatureTable, Mention +from ..rules import RulesAnalyzerFactory + + +class ModelGenerator: + def __init__(self, model_label: str, nlp: Language, vectors_nlp: Language): + self.nlp = nlp + self.model_label = model_label + self.rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp) + self.vectors_nlp = vectors_nlp + + def generate_feature_table(self, docs: list) -> FeatureTable: + + tags = set() + morphs = set() + ent_types = set() + lefthand_deps_to_children = set() + righthand_deps_to_children = set() + lefthand_deps_to_parents = set() + righthand_deps_to_parents = set() + parent_tags = set() + parent_morphs = set() + parent_lefthand_deps_to_children = set() + parent_righthand_deps_to_children = set() + + for doc in docs: + for token in ( + token + for token in doc + if self.rules_analyzer.is_independent_noun(token) + or self.rules_analyzer.is_potential_anaphor(token) + ): + tags.add(token.tag_) + morphs.update(token.morph) + ent_types.add(token.ent_type_) + lefthand_deps_to_children.update( + ( + child.dep_ + for child in token.children + if child.i < token.i + ) + ) + righthand_deps_to_children.update( + ( + child.dep_ + for child in token.children + if child.i > token.i + ) + ) + if token.dep_ != self.rules_analyzer.root_dep: + if token.i < token.head.i: + lefthand_deps_to_parents.add(token.dep_) + else: + righthand_deps_to_parents.add(token.dep_) + parent_tags.add(token.head.tag_) + parent_morphs.update(token.head.morph) + parent_lefthand_deps_to_children.update( + ( + child.dep_ + for child in token.head.children + if child.i < token.head.i + ) + ) + parent_righthand_deps_to_children.update( + ( + child.dep_ + for child in token.head.children + if child.i > token.head.i + ) + ) + + return FeatureTable( + tags=sorted(list(tags)), + morphs=sorted(list(morphs)), + ent_types=sorted(list(ent_types)), + lefthand_deps_to_children=sorted(list(lefthand_deps_to_children)), + righthand_deps_to_children=sorted( + list(righthand_deps_to_children) + ), + lefthand_deps_to_parents=sorted(list(lefthand_deps_to_parents)), + righthand_deps_to_parents=sorted(list(righthand_deps_to_parents)), + parent_tags=sorted(list(parent_tags)), + parent_morphs=sorted(list(parent_morphs)), + parent_lefthand_deps_to_children=sorted( + list(parent_lefthand_deps_to_children) + ), + parent_righthand_deps_to_children=sorted( + list(parent_righthand_deps_to_children) + ), + ) + + def generate_keras_model( + self, docs: list, tendencies_analyzer, ensemble_size: int + ): + def create_vector_squeezer(name, ensemble_index): + """Generates part of the network that accepts a full-width vector and squeezes + it down to 3 neurons to feed into the rest of the network. This is intended + to force the network to learn succinct, relevant information about the vectors + and also to reduce the overall importance of the vectors compared to the other + map inputs during training. + """ + input_layer = keras.Input( + shape=(vector_width,), + name="_".join((name, "vector_input", str(ensemble_index))), + ) + layer = layers.Dense( + 24, + activation="relu", + name="_".join((name, "vector_hidden_0_", str(ensemble_index))), + )(input_layer) + output_layer = layers.Dense( + 3, + activation="relu", + name="_".join((name, "vector_output", str(ensemble_index))), + )(layer) + return input_layer, output_layer + + # Look for a helpful document in the training corpus that has a token near the beginning + # with a vector (should normally be the case for the majority of tokens in the majority + # of documents) + helpful_docs = [doc for doc in docs if len(doc) > 10] + if len(helpful_docs) == 0: + raise RuntimeError("No usable docs in training corpus.") + vector_width = -1 + for token in helpful_docs[0][0:10]: + if self.vectors_nlp.vocab[token.lemma_].has_vector: + vector_width = len(self.vectors_nlp.vocab[token.lemma_].vector) + break + if vector_width == -1: # _sm models + for token in helpful_docs[0][0:10]: + if token.has_vector: + vector_width = len(token.vector) + break + if vector_width == -1: + raise RuntimeError("Unable to determine vector width.") + + feature_map = tendencies_analyzer.get_feature_map( + helpful_docs[0][0], helpful_docs[0] + ) + + feature_map_width = len(feature_map) + position_map_width = len( + tendencies_analyzer.get_position_map( + helpful_docs[0][0], helpful_docs[0] + ) + ) + compatibility_map_width = len( + tendencies_analyzer.get_compatibility_map( + Mention(helpful_docs[0][0], False), helpful_docs[0][1] + ) + ) + overall_input_width = ( + (2 * feature_map_width) + + (2 * position_map_width) + + compatibility_map_width + + 12 + ) # each vector is squeezed to 3 neurons + + keras_inputs = [] + keras_outputs = [] + for ensemble_index in range(ensemble_size): + ( + referred_vector_input, + referred_vector_output, + ) = create_vector_squeezer("referred", ensemble_index) + keras_inputs.append(referred_vector_input) + ( + referred_head_vector_input, + referred_head_vector_output, + ) = create_vector_squeezer("referred_head", ensemble_index) + keras_inputs.append(referred_head_vector_input) + referred_feature_map_input = keras.Input( + shape=(feature_map_width,), + name="_".join( + ("referred_feature_map_input", str(ensemble_index)) + ), + ) + keras_inputs.append(referred_feature_map_input) + referred_position_map_input = keras.Input( + shape=(position_map_width,), + name="_".join( + ("referred_position_map_input", str(ensemble_index)) + ), + ) + keras_inputs.append(referred_position_map_input) + ( + referring_vector_input, + referring_vector_output, + ) = create_vector_squeezer("referring", ensemble_index) + keras_inputs.append(referring_vector_input) + ( + referring_head_vector_input, + referring_head_vector_output, + ) = create_vector_squeezer("referring_head", ensemble_index) + keras_inputs.append(referring_head_vector_input) + referring_feature_map_input = keras.Input( + shape=(feature_map_width,), + name="_".join( + ("referring_feature_map_input", str(ensemble_index)) + ), + ) + keras_inputs.append(referring_feature_map_input) + referring_position_map_input = keras.Input( + shape=(position_map_width,), + name="_".join( + ("referring_position_map_input", str(ensemble_index)) + ), + ) + keras_inputs.append(referring_position_map_input) + compatibility_map_input = keras.Input( + shape=(compatibility_map_width,), + name="_".join( + ("compatibility_map_input", str(ensemble_index)) + ), + ) + keras_inputs.append(compatibility_map_input) + layer = layers.Concatenate( + axis=1, name="_".join(("combined_input", str(ensemble_index))) + )( + [ + referred_vector_output, + referred_head_vector_output, + referred_feature_map_input, + referred_position_map_input, + referring_vector_output, + referring_head_vector_output, + referring_feature_map_input, + referring_position_map_input, + compatibility_map_input, + ] + ) + layer = layers.Dense( + overall_input_width, + activation="relu", + name="_".join(("combined_hidden_0", str(ensemble_index))), + )(layer) + layer = layers.Dense( + 20, + activation="relu", + name="_".join(("combined_hidden_1", str(ensemble_index))), + )(layer) + output = layers.Dense( + 1, + activation="sigmoid", + name="_".join(("output", str(ensemble_index))), + )(layer) + keras_outputs.append(output) + keras_model = keras.Model( + inputs=keras_inputs, + outputs=keras_outputs, + name="_".join(("model", self.model_label)), + ) + keras_model.compile( + loss="binary_crossentropy", + optimizer="adam", + metrics=["binary_accuracy"], + ) + return keras_model + + def train_keras_model(self, docs: list, tendencies_analyzer, keras_model): + + # First we go through the document marking any potential referreds that are in the + # same chain but not next to one another; these are then excluded from training + # (neither True nor False) + for doc in docs: + for token in ( + t + for t in doc + if hasattr(t._.coref_chains, "temp_potential_referreds") + ): + for index, referred in enumerate( + r + for r in token._.coref_chains.temp_potential_referreds + if hasattr(r, "true_in_training") + ): + while True: + working_referring = doc[referred.root_index] + if not hasattr( + working_referring._.coref_chains, + "temp_potential_referreds", + ): + break + for index, working_referred in enumerate( + r + for r in working_referring._.coref_chains.temp_potential_referreds + if hasattr(r, "true_in_training") + ): + assert index == 0 # should only be one + for index, spanning_referred in enumerate( + r + for r in token._.coref_chains.temp_potential_referreds + if r == working_referred + ): + assert index == 0 # should only be one + spanning_referred.spanned_in_training = True + referred = working_referred + break + else: + break + ( + keras_inputs, + _, + keras_outputs, + ) = tendencies_analyzer.prepare_keras_data(docs, return_outputs=True) + return keras_model.fit( + x=keras_inputs, + y=keras_outputs, + epochs=self.rules_analyzer.training_epochs, + ) diff --git a/transformations/gender_randomizer/coreferee/coreferee/training/train.py b/transformations/gender_randomizer/coreferee/coreferee/training/train.py new file mode 100644 index 000000000..febbab104 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/coreferee/training/train.py @@ -0,0 +1,519 @@ +# Copyright 2021 msg systems ag + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import bisect +import os +import pickle +import shutil +import sys +import time +from datetime import datetime + +import pkg_resources +import spacy +from packaging import version +from thinc.api import Config + +from ..annotation import Annotator +from ..errors import LanguageNotSupportedError +from ..manager import ( + COMMON_MODELS_PACKAGE_NAMEPART, + FEATURE_TABLE_FILENAME, + KERAS_MODEL_FILENAME, +) +from ..rules import RulesAnalyzerFactory +from ..tendencies import ENSEMBLE_SIZE, TendenciesAnalyzer +from .loaders import GenericLoader +from .model import ModelGenerator + + +class TrainingManager: + def __init__( + self, + root_path: str, + lang: str, + loader_classes: str, + data_dir: str, + log_dir: str, + ): + self.file_system_root = pkg_resources.resource_filename(root_path, "") + relative_config_filename = os.sep.join(("lang", lang, "config.cfg")) + if not pkg_resources.resource_exists( + root_path, relative_config_filename + ): + raise LanguageNotSupportedError(lang) + self.config = Config().from_disk( + os.sep.join((self.file_system_root, relative_config_filename)) + ) + loader_classnames = loader_classes.split(",") + self.loaders = [] + for loader_classname in loader_classnames: + class_ = getattr( + sys.modules["coreferee.training.loaders"], loader_classname + ) + self.loaders.append(class_()) + self.lang = lang + self.models_dirname = os.sep.join( + (self.file_system_root, "..", "models", lang) + ) + if not os.path.isdir(self.models_dirname): + self.set_up_models_dir() + + self.relevant_config_entry_names = [] + self.nlp_dict = {} + for config_entry_name, config_entry in self.config.items(): + this_model_dir = "".join( + ( + self.models_dirname, + os.sep, + "".join((COMMON_MODELS_PACKAGE_NAMEPART, self.lang)), + os.sep, + config_entry_name, + ) + ) + if not os.path.isdir(this_model_dir): + self.relevant_config_entry_names.append(config_entry_name) + model_name = "_".join((lang, config_entry["model"])) + self.load_model( + model_name, + config_entry_name, + config_entry["from_version"], + config_entry["to_version"], + ) + if "vectors_model" in config_entry: + vectors_model_name = "_".join( + (lang, config_entry["vectors_model"]) + ) + self.load_model( + vectors_model_name, + config_entry_name, + config_entry["vectors_from_version"], + config_entry["vectors_to_version"], + is_vector_model=True, + ) + else: + print( + "Skipping config entry", + config_entry_name, + "as model exists", + ) + + self.log_dir = log_dir + if ".." in log_dir: + print(".. not permitted in log_dir") + sys.exit(1) + if not os.path.isdir(self.log_dir): + os.makedirs(self.log_dir) + + if not os.path.isdir(data_dir): + print("Data directory", data_dir, "not found.") + sys.exit(1) + self.data_dir = data_dir + + temp_dir = os.sep.join((self.log_dir, "temp")) + if os.path.isdir(temp_dir): + shutil.rmtree(temp_dir) + time.sleep(1) + os.mkdir(temp_dir) + + def load_model( + self, + name, + config_entry_name, + from_version, + to_version, + *, + is_vector_model=False + ): + if name not in self.nlp_dict: + print("Loading model", name, "...") + try: + nlp = spacy.load(name) + except OSError: + if is_vector_model: + print( + "Config entry", + config_entry_name, + "specifies a vectors model", + name, + "that cannot be loaded.", + ) + else: + print( + "Config entry", + config_entry_name, + "specifies a model", + name, + "that cannot be loaded.", + ) + sys.exit(1) + else: + nlp = self.nlp_dict[name] + if version.parse(nlp.meta["version"]) < version.parse( + from_version + ) or version.parse(nlp.meta["version"]) > version.parse(to_version): + if is_vector_model: + print( + "Config entry", + config_entry_name, + "specifies a version range for vectors model", + name, + "that does not include the loaded version.", + ) + else: + print( + "Config entry", + config_entry_name, + "specifies a version range for model", + name, + "that does not include the loaded version.", + ) + sys.exit(1) + self.nlp_dict[name] = nlp + + def set_up_models_dir(self): + os.mkdir(self.models_dirname) + package_dirname = "".join((COMMON_MODELS_PACKAGE_NAMEPART, self.lang)) + os.mkdir(os.sep.join((self.models_dirname, package_dirname))) + setup_cfg_filename = os.sep.join((self.models_dirname, "setup.cfg")) + with open(setup_cfg_filename, "w") as setup_cfg_file: + self.writeln(setup_cfg_file, "[metadata]") + self.writeln( + setup_cfg_file, "name = ", package_dirname.replace("_", "-") + ) + self.writeln(setup_cfg_file, "version = 1.0.0") + self.writeln(setup_cfg_file) + self.writeln(setup_cfg_file, "[options]") + self.writeln(setup_cfg_file, "packages = find:") + self.writeln(setup_cfg_file, "include_package_data = True") + self.writeln(setup_cfg_file) + self.writeln(setup_cfg_file, "[options.package_data]") + self.writeln(setup_cfg_file, "* = *.bin, *.h5") + pyproject_toml_filename = os.sep.join( + (self.models_dirname, "pyproject.toml") + ) + with open(pyproject_toml_filename, "w") as pyproject_toml_file: + self.writeln(pyproject_toml_file, "[build-system]") + self.writeln(pyproject_toml_file, "requires = [") + self.writeln(pyproject_toml_file, ' "setuptools",') + self.writeln(pyproject_toml_file, ' "wheel",') + self.writeln(pyproject_toml_file, "]") + self.writeln( + pyproject_toml_file, 'build-backend = "setuptools.build_meta"' + ) + init_py_filename = os.sep.join( + (self.models_dirname, package_dirname, "__init__.py") + ) + with open(init_py_filename, "w") as init_py_file: + self.writeln(init_py_file) + + @staticmethod + def writeln(file, *args): + file.write("".join(("".join([str(arg) for arg in args]), "\n"))) + + def log_incorrect_annotation( + self, + temp_log_file, + token, + correct_referred_token, + incorrect_referred_token, + ): + doc = token.doc + self.writeln(temp_log_file, "Incorrect annotation:") + start_token_index = min( + correct_referred_token.i, incorrect_referred_token.i + ) + sentence_start_index = doc._.coref_chains.temp_sent_starts[ + doc[start_token_index]._.coref_chains.temp_sent_index + ] + if token._.coref_chains.temp_sent_index + 1 == len( + doc._.coref_chains.temp_sent_starts + ): + self.writeln(temp_log_file, doc[sentence_start_index:]) + self.writeln( + temp_log_file, + "Tokens from ", + sentence_start_index, + " to the end:", + ) + self.writeln(temp_log_file, doc[sentence_start_index:]) + else: + sentence_end_index = doc._.coref_chains.temp_sent_starts[ + token._.coref_chains.temp_sent_index + 1 + ] + self.writeln( + temp_log_file, + "Tokens ", + sentence_start_index, + " to ", + sentence_end_index, + ":", + ) + self.writeln( + temp_log_file, doc[sentence_start_index:sentence_end_index] + ) + self.writeln( + temp_log_file, "Referring pronoun: ", token, " at index ", token.i + ) + for ( + potential_referred + ) in token._.coref_chains.temp_potential_referreds: + if hasattr(potential_referred, "true_in_training"): + self.writeln( + temp_log_file, + "Training referred mentions: ", + potential_referred.pretty_representation, + ) + self.writeln( + temp_log_file, + "Annotated referred mentions: ", + [chain.pretty_representation for chain in token._.coref_chains], + ) + self.writeln(temp_log_file) + + def generate_keras_ensemble( + self, + model_generator, + temp_log_file, + training_docs, + tendencies_analyzer, + ): + keras_model = model_generator.generate_keras_model( + training_docs, tendencies_analyzer, ENSEMBLE_SIZE + ) + self.writeln(temp_log_file) + self.writeln(temp_log_file, "Generated Keras model:") + keras_model.summary( + print_fn=lambda line: self.writeln(temp_log_file, line) + ) + self.writeln(temp_log_file, "Training model ...") + keras_history = model_generator.train_keras_model( + training_docs, tendencies_analyzer, keras_model + ) + for index in range(ENSEMBLE_SIZE): + keras_accuracy = keras_history.history[ + "_".join(("output", str(index), "binary_accuracy")) + ][-1] + self.writeln( + temp_log_file, "Sub-network ", index, " within ensemble:" + ) + self.writeln( + temp_log_file, + "Binary accuracy after training is ", + keras_accuracy, + ) + return keras_model + + def load_documents(self, nlp, rules_analyzer): + docs = [] + for loader in self.loaders: + docs.extend(loader.load(self.data_dir, nlp, rules_analyzer)) + return docs + + def train_model(self, config_entry_name, config_entry, temp_log_file): + self.writeln(temp_log_file, "Config entry name: ", config_entry_name) + nlp_name = "_".join((self.lang, config_entry["model"])) + nlp = self.nlp_dict[nlp_name] + self.writeln( + temp_log_file, + "Spacy model: ", + nlp_name, + " version ", + nlp.meta["version"], + ) + if "vectors_model" in config_entry: + vectors_nlp_name = "_".join( + (self.lang, config_entry["vectors_model"]) + ) + vectors_nlp = self.nlp_dict[vectors_nlp_name] + self.writeln( + temp_log_file, + "Spacy vectors model: ", + vectors_nlp_name, + " version ", + vectors_nlp.meta["version"], + ) + else: + vectors_nlp = nlp + self.writeln( + temp_log_file, "Main model is being used as vectors model" + ) + + rules_analyzer = RulesAnalyzerFactory().get_rules_analyzer(nlp) + docs = self.load_documents(nlp, rules_analyzer) + # Separate into training and test for first run + total_words = 0 + docs_to_total_words_position = [] + for doc in docs: + docs_to_total_words_position.append(total_words) + total_words += len(doc) + split_index = bisect.bisect_right( + docs_to_total_words_position, total_words * 0.8 + ) + training_docs = docs[:split_index] + test_docs = docs[split_index:] + self.writeln(temp_log_file, "Total words: ", total_words) + self.writeln( + temp_log_file, + "Training docs: ", + len(training_docs), + "; test docs: ", + len(test_docs), + ) + model_generator = ModelGenerator(config_entry_name, nlp, vectors_nlp) + feature_table = model_generator.generate_feature_table(training_docs) + self.writeln(temp_log_file, "Feature table: ", feature_table.__dict__) + tendencies_analyzer = TendenciesAnalyzer( + rules_analyzer, vectors_nlp, feature_table + ) + keras_ensemble = self.generate_keras_ensemble( + model_generator, temp_log_file, training_docs, tendencies_analyzer + ) + annotator = Annotator(nlp, vectors_nlp, feature_table, keras_ensemble) + self.writeln(temp_log_file) + correct_counter = incorrect_counter = 0 + for test_doc in test_docs: + annotator.annotate(test_doc, used_in_training=True) + self.writeln(temp_log_file, "test_doc ", test_doc[:100], "... :") + self.writeln(temp_log_file) + self.writeln(temp_log_file, "Coref chains:") + self.writeln(temp_log_file) + for chain in test_doc._.coref_chains: + self.writeln(temp_log_file, chain.pretty_representation) + self.writeln(temp_log_file) + self.writeln(temp_log_file, "Incorrect annotations:") + self.writeln(temp_log_file) + for token in test_doc: + if hasattr(token._.coref_chains, "temp_potential_referreds"): + for ( + potential_referred + ) in token._.coref_chains.temp_potential_referreds: + if hasattr(potential_referred, "true_in_training"): + for chain in token._.coref_chains: + if potential_referred in chain: + correct_counter += 1 + else: + incorrect_counter += 1 + self.log_incorrect_annotation( + temp_log_file, + token, + token.doc[ + potential_referred.root_index + ], + token.doc[ + chain.mentions[0].root_index + ], + ) + if len(test_docs) > 0: + accuracy = round( + 100 * correct_counter / (correct_counter + incorrect_counter), + 2, + ) + self.writeln(temp_log_file) + self.writeln( + temp_log_file, + "Correct: ", + correct_counter, + "; Incorrect: ", + incorrect_counter, + " (", + accuracy, + "%)", + ) + print("Accuracy: ", "".join((str(accuracy), "%"))) + self.writeln(temp_log_file) + self.writeln(temp_log_file, "Retraining with all documents") + self.writeln(temp_log_file) + docs = self.load_documents(nlp, rules_analyzer) + feature_table = model_generator.generate_feature_table(docs) + self.writeln(temp_log_file, "Feature table: ", feature_table.__dict__) + tendencies_analyzer = TendenciesAnalyzer( + rules_analyzer, vectors_nlp, feature_table + ) + keras_ensemble = self.generate_keras_ensemble( + model_generator, temp_log_file, docs, tendencies_analyzer + ) + this_model_dir = os.sep.join( + ( + self.models_dirname, + "".join((COMMON_MODELS_PACKAGE_NAMEPART, self.lang)), + config_entry_name, + ) + ) + os.mkdir(this_model_dir) + init_py_filename = os.sep.join((this_model_dir, "__init__.py")) + with open(init_py_filename, "w") as init_py_file: + self.writeln(init_py_file) + feature_table_filename = os.sep.join( + (this_model_dir, FEATURE_TABLE_FILENAME) + ) + with open(feature_table_filename, "wb") as feature_table_file: + pickle.dump(feature_table, feature_table_file) + keras_filename = "".join( + (this_model_dir, os.sep, KERAS_MODEL_FILENAME) + ) + keras_ensemble.save(keras_filename) + + def train_models(self): + for config_entry_name in self.relevant_config_entry_names: + config_entry = self.config[config_entry_name] + print("Processing", config_entry_name, "...") + temp_log_filename = "".join( + ( + self.log_dir, + os.sep, + "temp", + os.sep, + config_entry_name, + ".log", + ) + ) + with open( + temp_log_filename, "w", encoding="utf-8" + ) as temp_log_file: + self.train_model( + config_entry_name, config_entry, temp_log_file + ) + timestamp = datetime.now().isoformat(timespec="microseconds") + sanitized_timestamp = "".join([ch for ch in timestamp if ch.isalnum()]) + zip_filename = "".join( + ( + self.log_dir, + os.sep, + "training_log_", + self.lang, + "_", + sanitized_timestamp, + ".zip", + ) + ) + shutil.make_archive( + zip_filename, "zip", os.sep.join((self.log_dir, "temp")) + ) + temp_dir = os.sep.join((self.log_dir, "temp")) + if os.path.isdir(temp_dir): + shutil.rmtree(temp_dir) + zip_filename = "".join( + ( + self.models_dirname, + os.sep, + "..", + os.sep, + COMMON_MODELS_PACKAGE_NAMEPART, + self.lang, + ) + ) + if os.path.isfile(zip_filename): + os.remove(zip_filename) + shutil.make_archive(zip_filename, "zip", self.models_dirname) diff --git a/transformations/gender_randomizer/coreferee/docs/nn_structure.png b/transformations/gender_randomizer/coreferee/docs/nn_structure.png new file mode 100644 index 000000000..44600099a Binary files /dev/null and b/transformations/gender_randomizer/coreferee/docs/nn_structure.png differ diff --git a/transformations/gender_randomizer/coreferee/models/coreferee_model_de.zip b/transformations/gender_randomizer/coreferee/models/coreferee_model_de.zip new file mode 100644 index 000000000..aaa3e02a2 Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/coreferee_model_de.zip differ diff --git a/transformations/gender_randomizer/coreferee/models/coreferee_model_en.zip b/transformations/gender_randomizer/coreferee/models/coreferee_model_en.zip new file mode 100644 index 000000000..4b8ffa5b0 Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/coreferee_model_en.zip differ diff --git a/transformations/gender_randomizer/coreferee/models/coreferee_model_pl.zip b/transformations/gender_randomizer/coreferee/models/coreferee_model_pl.zip new file mode 100644 index 000000000..82dbedec3 Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/coreferee_model_pl.zip differ diff --git a/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/__init__.py b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/__init__.py @@ -0,0 +1 @@ + diff --git a/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/lg_3_0_0/__init__.py b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/lg_3_0_0/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/lg_3_0_0/__init__.py @@ -0,0 +1 @@ + diff --git a/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/lg_3_0_0/feature_table.bin b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/lg_3_0_0/feature_table.bin new file mode 100644 index 000000000..267fb9e3c Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/lg_3_0_0/feature_table.bin differ diff --git a/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/lg_3_0_0/keras_ensemble.h5 b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/lg_3_0_0/keras_ensemble.h5 new file mode 100644 index 000000000..fd84a7804 Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/lg_3_0_0/keras_ensemble.h5 differ diff --git a/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/md_3_0_0/__init__.py b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/md_3_0_0/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/md_3_0_0/__init__.py @@ -0,0 +1 @@ + diff --git a/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/md_3_0_0/feature_table.bin b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/md_3_0_0/feature_table.bin new file mode 100644 index 000000000..25a6326e8 Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/md_3_0_0/feature_table.bin differ diff --git a/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/md_3_0_0/keras_ensemble.h5 b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/md_3_0_0/keras_ensemble.h5 new file mode 100644 index 000000000..5a8d0d9ef Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/md_3_0_0/keras_ensemble.h5 differ diff --git a/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/sm_3_0_0/__init__.py b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/sm_3_0_0/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/sm_3_0_0/__init__.py @@ -0,0 +1 @@ + diff --git a/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/sm_3_0_0/feature_table.bin b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/sm_3_0_0/feature_table.bin new file mode 100644 index 000000000..2eca79cc8 Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/sm_3_0_0/feature_table.bin differ diff --git a/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/sm_3_0_0/keras_ensemble.h5 b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/sm_3_0_0/keras_ensemble.h5 new file mode 100644 index 000000000..e514f9d8c Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/de/coreferee_model_de/sm_3_0_0/keras_ensemble.h5 differ diff --git a/transformations/gender_randomizer/coreferee/models/de/pyproject.toml b/transformations/gender_randomizer/coreferee/models/de/pyproject.toml new file mode 100644 index 000000000..7c0cbdc36 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/de/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = [ + "setuptools", + "wheel", +] +build-backend = "setuptools.build_meta" diff --git a/transformations/gender_randomizer/coreferee/models/de/setup.cfg b/transformations/gender_randomizer/coreferee/models/de/setup.cfg new file mode 100644 index 000000000..a1bb7b8ed --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/de/setup.cfg @@ -0,0 +1,10 @@ +[metadata] +name = coreferee-model-de +version = 1.0.0 + +[options] +packages = find: +include_package_data = True + +[options.package_data] +* = *.bin, *.h5 diff --git a/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/__init__.py b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/__init__.py @@ -0,0 +1 @@ + diff --git a/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/lg_3_0_0/__init__.py b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/lg_3_0_0/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/lg_3_0_0/__init__.py @@ -0,0 +1 @@ + diff --git a/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/lg_3_0_0/feature_table.bin b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/lg_3_0_0/feature_table.bin new file mode 100644 index 000000000..91a2feeff Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/lg_3_0_0/feature_table.bin differ diff --git a/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/lg_3_0_0/keras_ensemble.h5 b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/lg_3_0_0/keras_ensemble.h5 new file mode 100644 index 000000000..b9df3d666 Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/lg_3_0_0/keras_ensemble.h5 differ diff --git a/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/md_3_0_0/__init__.py b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/md_3_0_0/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/md_3_0_0/__init__.py @@ -0,0 +1 @@ + diff --git a/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/md_3_0_0/feature_table.bin b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/md_3_0_0/feature_table.bin new file mode 100644 index 000000000..8f2bdc1c5 Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/md_3_0_0/feature_table.bin differ diff --git a/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/md_3_0_0/keras_ensemble.h5 b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/md_3_0_0/keras_ensemble.h5 new file mode 100644 index 000000000..2ee7380cb Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/md_3_0_0/keras_ensemble.h5 differ diff --git a/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/sm_3_0_0/__init__.py b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/sm_3_0_0/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/sm_3_0_0/__init__.py @@ -0,0 +1 @@ + diff --git a/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/sm_3_0_0/feature_table.bin b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/sm_3_0_0/feature_table.bin new file mode 100644 index 000000000..dea6a98e5 Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/sm_3_0_0/feature_table.bin differ diff --git a/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/sm_3_0_0/keras_ensemble.h5 b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/sm_3_0_0/keras_ensemble.h5 new file mode 100644 index 000000000..9d00c6493 Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/sm_3_0_0/keras_ensemble.h5 differ diff --git a/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/trf_3_0_0/__init__.py b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/trf_3_0_0/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/trf_3_0_0/__init__.py @@ -0,0 +1 @@ + diff --git a/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/trf_3_0_0/feature_table.bin b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/trf_3_0_0/feature_table.bin new file mode 100644 index 000000000..81c2bd453 Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/trf_3_0_0/feature_table.bin differ diff --git a/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/trf_3_0_0/keras_ensemble.h5 b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/trf_3_0_0/keras_ensemble.h5 new file mode 100644 index 000000000..16995e4f7 Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/en/coreferee_model_en/trf_3_0_0/keras_ensemble.h5 differ diff --git a/transformations/gender_randomizer/coreferee/models/en/pyproject.toml b/transformations/gender_randomizer/coreferee/models/en/pyproject.toml new file mode 100644 index 000000000..7c0cbdc36 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/en/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = [ + "setuptools", + "wheel", +] +build-backend = "setuptools.build_meta" diff --git a/transformations/gender_randomizer/coreferee/models/en/setup.cfg b/transformations/gender_randomizer/coreferee/models/en/setup.cfg new file mode 100644 index 000000000..95479efb9 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/en/setup.cfg @@ -0,0 +1,10 @@ +[metadata] +name = coreferee-model-en +version = 1.0.0 + +[options] +packages = find: +include_package_data = True + +[options.package_data] +* = *.bin, *.h5 diff --git a/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/__init__.py b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/__init__.py @@ -0,0 +1 @@ + diff --git a/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/lg_3_0_0/__init__.py b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/lg_3_0_0/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/lg_3_0_0/__init__.py @@ -0,0 +1 @@ + diff --git a/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/lg_3_0_0/feature_table.bin b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/lg_3_0_0/feature_table.bin new file mode 100644 index 000000000..6a832065a Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/lg_3_0_0/feature_table.bin differ diff --git a/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/lg_3_0_0/keras_ensemble.h5 b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/lg_3_0_0/keras_ensemble.h5 new file mode 100644 index 000000000..f48c38076 Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/lg_3_0_0/keras_ensemble.h5 differ diff --git a/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/md_3_0_0/__init__.py b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/md_3_0_0/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/md_3_0_0/__init__.py @@ -0,0 +1 @@ + diff --git a/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/md_3_0_0/feature_table.bin b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/md_3_0_0/feature_table.bin new file mode 100644 index 000000000..e0b82ab7a Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/md_3_0_0/feature_table.bin differ diff --git a/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/md_3_0_0/keras_ensemble.h5 b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/md_3_0_0/keras_ensemble.h5 new file mode 100644 index 000000000..378a40ffd Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/md_3_0_0/keras_ensemble.h5 differ diff --git a/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/sm_3_0_0/__init__.py b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/sm_3_0_0/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/sm_3_0_0/__init__.py @@ -0,0 +1 @@ + diff --git a/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/sm_3_0_0/feature_table.bin b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/sm_3_0_0/feature_table.bin new file mode 100644 index 000000000..bdb76a315 Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/sm_3_0_0/feature_table.bin differ diff --git a/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/sm_3_0_0/keras_ensemble.h5 b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/sm_3_0_0/keras_ensemble.h5 new file mode 100644 index 000000000..306159970 Binary files /dev/null and b/transformations/gender_randomizer/coreferee/models/pl/coreferee_model_pl/sm_3_0_0/keras_ensemble.h5 differ diff --git a/transformations/gender_randomizer/coreferee/models/pl/pyproject.toml b/transformations/gender_randomizer/coreferee/models/pl/pyproject.toml new file mode 100644 index 000000000..7c0cbdc36 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/pl/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = [ + "setuptools", + "wheel", +] +build-backend = "setuptools.build_meta" diff --git a/transformations/gender_randomizer/coreferee/models/pl/setup.cfg b/transformations/gender_randomizer/coreferee/models/pl/setup.cfg new file mode 100644 index 000000000..05d82b67c --- /dev/null +++ b/transformations/gender_randomizer/coreferee/models/pl/setup.cfg @@ -0,0 +1,10 @@ +[metadata] +name = coreferee-model-pl +version = 1.0.0 + +[options] +packages = find: +include_package_data = True + +[options.package_data] +* = *.bin, *.h5 diff --git a/transformations/gender_randomizer/coreferee/pyproject.toml b/transformations/gender_randomizer/coreferee/pyproject.toml new file mode 100644 index 000000000..8d91941bb --- /dev/null +++ b/transformations/gender_randomizer/coreferee/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = [ + "setuptools", + "wheel" +] +build-backend = "setuptools.build_meta" diff --git a/transformations/gender_randomizer/coreferee/setup.cfg b/transformations/gender_randomizer/coreferee/setup.cfg new file mode 100644 index 000000000..9f55ba972 --- /dev/null +++ b/transformations/gender_randomizer/coreferee/setup.cfg @@ -0,0 +1,39 @@ +[metadata] +name = coreferee +version = 1.0.1 +description = Coreference resolution for English, German and Polish, optimised for limited training data and easily extensible for further languages +long_description = file: SHORTREADME.md +long_description_content_type = text/markdown +url = https://github.com/msg-systems/coreferee +author = Richard Paul Hudson, msg systems ag +author_email = richard.hudson@msg.group +license = Apache License 2.0 +keywords= nlp, spacy, spacy-extension, python, machine-learning, keras, coreference, anaphora, coreference-resolution, anaphora-resolution +classifiers = + Development Status :: 5 - Production/Stable + Intended Audience :: Developers + Intended Audience :: Other Audience + Intended Audience :: Education + Intended Audience :: Information Technology + Intended Audience :: Science/Research + License :: OSI Approved :: Apache Software License + Natural Language :: English + Natural Language :: German + Natural Language :: Polish + Programming Language :: Python :: 3.7 + Topic :: Scientific/Engineering :: Artificial Intelligence + Topic :: Text Processing :: Linguistic + +[options] +include_package_data = True +packages = find: +python_requires = >=3.7,<3.9 +install_requires = + spacy>=3.0.5 + keras>=2.4.3 + tensorflow>=2.4.1 + numpy~=1.19.2 + #h5py~=2.10.0 + +[options.package_data] +* = *.dat, *.cfg diff --git a/transformations/gender_randomizer/names/README b/transformations/gender_randomizer/names/README new file mode 100644 index 000000000..718a71cce --- /dev/null +++ b/transformations/gender_randomizer/names/README @@ -0,0 +1,16 @@ +Names Corpus, Version 1.3 (1994-03-29) +Copyright (C) 1991 Mark Kantrowitz +Additions by Bill Ross + +This corpus contains 5001 female names and 2943 male names, sorted +alphabetically, one per line. + +You may use the lists of names for any purpose, so long as credit is +given in any published work. You may also redistribute the list if you +provide the recipients with a copy of this README file. The lists are +not in the public domain (I retain the copyright on the lists) but are +freely redistributable. If you have any additions to the lists of +names, I would appreciate receiving them. + +Mark Kantrowitz +http://www-2.cs.cmu.edu/afs/cs/project/ai-repository/ai/areas/nlp/corpora/names/ \ No newline at end of file diff --git a/transformations/gender_randomizer/names/female.txt b/transformations/gender_randomizer/names/female.txt new file mode 100644 index 000000000..a387e03ba --- /dev/null +++ b/transformations/gender_randomizer/names/female.txt @@ -0,0 +1,5001 @@ +Abagael +Abagail +Abbe +Abbey +Abbi +Abbie +Abby +Abigael +Abigail +Abigale +Abra +Acacia +Ada +Adah +Adaline +Adara +Addie +Addis +Adel +Adela +Adelaide +Adele +Adelice +Adelina +Adelind +Adeline +Adella +Adelle +Adena +Adey +Adi +Adiana +Adina +Adora +Adore +Adoree +Adorne +Adrea +Adria +Adriaens +Adrian +Adriana +Adriane +Adrianna +Adrianne +Adrien +Adriena +Adrienne +Aeriel +Aeriela +Aeriell +Ag +Agace +Agata +Agatha +Agathe +Aggi +Aggie +Aggy +Agna +Agnella +Agnes +Agnese +Agnesse +Agneta +Agnola +Agretha +Aida +Aidan +Aigneis +Aila +Aile +Ailee +Aileen +Ailene +Ailey +Aili +Ailina +Ailyn +Aime +Aimee +Aimil +Aina +Aindrea +Ainslee +Ainsley +Ainslie +Ajay +Alaine +Alameda +Alana +Alanah +Alane +Alanna +Alayne +Alberta +Albertina +Albertine +Albina +Alecia +Aleda +Aleece +Aleecia +Aleen +Alejandra +Alejandrina +Alena +Alene +Alessandra +Aleta +Alethea +Alex +Alexa +Alexandra +Alexandrina +Alexi +Alexia +Alexina +Alexine +Alexis +Alfie +Alfreda +Ali +Alia +Alica +Alice +Alicea +Alicia +Alida +Alidia +Alina +Aline +Alis +Alisa +Alisha +Alison +Alissa +Alisun +Alix +Aliza +Alla +Alleen +Allegra +Allene +Alli +Allianora +Allie +Allina +Allis +Allison +Allissa +Allsun +Ally +Allyce +Allyn +Allys +Allyson +Alma +Almeda +Almeria +Almeta +Almira +Almire +Aloise +Aloisia +Aloysia +Alpa +Alta +Althea +Alvera +Alvina +Alvinia +Alvira +Alyce +Alyda +Alys +Alysa +Alyse +Alysia +Alyson +Alyss +Alyssa +Amabel +Amabelle +Amalea +Amalee +Amaleta +Amalia +Amalie +Amalita +Amalle +Amanda +Amandi +Amandie +Amandy +Amara +Amargo +Amata +Amber +Amberly +Ambrosia +Ambur +Ame +Amelia +Amelie +Amelina +Ameline +Amelita +Ami +Amie +Amity +Ammamaria +Amy +Ana +Anabel +Anabella +Anabelle +Anais +Analiese +Analise +Anallese +Anallise +Anastasia +Anastasie +Anastassia +Anatola +Andee +Andi +Andie +Andra +Andrea +Andreana +Andree +Andrei +Andria +Andriana +Andriette +Andromache +Andromeda +Andy +Anestassia +Anet +Anett +Anetta +Anette +Ange +Angel +Angela +Angele +Angelia +Angelica +Angelika +Angelina +Angeline +Angelique +Angelita +Angelle +Angie +Angil +Angy +Ania +Anica +Anissa +Anita +Anitra +Anja +Anjanette +Anjela +Ann +Ann-Mari +Ann-Marie +Anna +Anna-Diana +Anna-Diane +Anna-Maria +Annabal +Annabel +Annabela +Annabell +Annabella +Annabelle +Annadiana +Annadiane +Annalee +Annalena +Annaliese +Annalisa +Annalise +Annalyse +Annamari +Annamaria +Annamarie +Anne +Anne-Corinne +Anne-Mar +Anne-Marie +Annecorinne +Anneliese +Annelise +Annemarie +Annetta +Annette +Anni +Annice +Annie +Annissa +Annmaria +Annmarie +Annnora +Annora +Anny +Anselma +Ansley +Anstice +Anthe +Anthea +Anthia +Antoinette +Antonella +Antonetta +Antonia +Antonie +Antonietta +Antonina +Anya +Aphrodite +Appolonia +April +Aprilette +Ara +Arabel +Arabela +Arabele +Arabella +Arabelle +Arda +Ardath +Ardeen +Ardelia +Ardelis +Ardella +Ardelle +Arden +Ardene +Ardenia +Ardine +Ardis +Ardith +Ardra +Ardyce +Ardys +Ardyth +Aretha +Ariadne +Ariana +Arianne +Aridatha +Ariel +Ariela +Ariella +Arielle +Arlana +Arlee +Arleen +Arlen +Arlena +Arlene +Arleta +Arlette +Arleyne +Arlie +Arliene +Arlina +Arlinda +Arline +Arly +Arlyn +Arlyne +Aryn +Ashely +Ashlee +Ashleigh +Ashlen +Ashley +Ashli +Ashlie +Ashly +Asia +Astra +Astrid +Astrix +Atalanta +Athena +Athene +Atlanta +Atlante +Auberta +Aubine +Aubree +Aubrette +Aubrey +Aubrie +Aubry +Audi +Audie +Audra +Audre +Audrey +Audrie +Audry +Audrye +Audy +Augusta +Auguste +Augustina +Augustine +Aura +Aurea +Aurel +Aurelea +Aurelia +Aurelie +Auria +Aurie +Aurilia +Aurlie +Auroora +Aurora +Aurore +Austin +Austina +Austine +Ava +Aveline +Averil +Averyl +Avie +Avis +Aviva +Avivah +Avril +Avrit +Ayn +Bab +Babara +Babette +Babita +Babs +Bambi +Bambie +Bamby +Barb +Barbabra +Barbara +Barbara-Anne +Barbaraanne +Barbe +Barbee +Barbette +Barbey +Barbi +Barbie +Barbra +Barby +Bari +Barrie +Barry +Basia +Bathsheba +Batsheva +Bea +Beatrice +Beatrisa +Beatrix +Beatriz +Beau +Bebe +Becca +Becka +Becki +Beckie +Becky +Bee +Beilul +Beitris +Bekki +Bel +Belia +Belicia +Belinda +Belita +Bell +Bella +Bellamy +Bellanca +Belle +Bellina +Belva +Belvia +Bendite +Benedetta +Benedicta +Benedikta +Benetta +Benita +Benni +Bennie +Benny +Benoite +Berenice +Beret +Berget +Berna +Bernadene +Bernadette +Bernadina +Bernadine +Bernardina +Bernardine +Bernelle +Bernete +Bernetta +Bernette +Berni +Bernice +Bernie +Bernita +Berny +Berri +Berrie +Berry +Bert +Berta +Berte +Bertha +Berthe +Berti +Bertie +Bertina +Bertine +Berty +Beryl +Beryle +Bess +Bessie +Bessy +Beth +Bethanne +Bethany +Bethena +Bethina +Betsey +Betsy +Betta +Bette +Bette-Ann +Betteann +Betteanne +Betti +Bettie +Bettina +Bettine +Betty +Bettye +Beulah +Bev +Beverie +Beverlee +Beverlie +Beverly +Bevvy +Bianca +Bianka +Biddy +Bidget +Bill +Billi +Billie +Billy +Binni +Binnie +Binny +Bird +Birdie +Birgit +Birgitta +Blair +Blaire +Blake +Blakelee +Blakeley +Blanca +Blanch +Blancha +Blanche +Blinni +Blinnie +Blinny +Bliss +Blisse +Blithe +Blondell +Blondelle +Blondie +Blondy +Blythe +Bo +Bobbette +Bobbi +Bobbie +Bobby +Bobette +Bobina +Bobine +Bobinette +Bonita +Bonnee +Bonni +Bonnie +Bonny +Brana +Brandais +Brande +Brandea +Brandi +Brandice +Brandie +Brandise +Brandy +Brea +Breanne +Brear +Bree +Breena +Bren +Brena +Brenda +Brenn +Brenna +Brett +Bria +Briana +Brianna +Brianne +Bride +Bridget +Bridgett +Bridgette +Bridie +Brier +Brietta +Brigid +Brigida +Brigit +Brigitta +Brigitte +Brina +Briney +Briny +Brit +Brita +Britaney +Britani +Briteny +Britney +Britni +Britt +Britta +Brittan +Brittany +Britte +Brittney +Brook +Brooke +Brooks +Brunella +Brunhilda +Brunhilde +Bryana +Bryn +Bryna +Brynn +Brynna +Brynne +Buffy +Bunni +Bunnie +Bunny +Burta +Cabrina +Cacilia +Cacilie +Caitlin +Caitrin +Cal +Calida +Calla +Calley +Calli +Callida +Callie +Cally +Calypso +Cam +Camala +Camel +Camella +Camellia +Cameo +Cami +Camila +Camile +Camilla +Camille +Cammi +Cammie +Cammy +Canada +Candace +Candi +Candice +Candida +Candide +Candie +Candis +Candra +Candy +Cappella +Caprice +Cara +Caralie +Caren +Carena +Caresa +Caressa +Caresse +Carey +Cari +Caria +Carie +Caril +Carilyn +Carin +Carina +Carine +Cariotta +Carissa +Carita +Caritta +Carla +Carlee +Carleen +Carlen +Carlena +Carlene +Carley +Carli +Carlie +Carlin +Carlina +Carline +Carlisle +Carlita +Carlota +Carlotta +Carly +Carlye +Carlyn +Carlynn +Carlynne +Carma +Carmel +Carmela +Carmelia +Carmelina +Carmelita +Carmella +Carmelle +Carmen +Carmina +Carmine +Carmita +Carmon +Caro +Carol +Carol-Jean +Carola +Carolan +Carolann +Carole +Carolee +Caroleen +Carolie +Carolin +Carolina +Caroline +Caroljean +Carolyn +Carolyne +Carolynn +Caron +Carree +Carri +Carrie +Carrissa +Carrol +Carroll +Carry +Cary +Caryl +Caryn +Casandra +Casey +Casi +Casia +Casie +Cass +Cassandra +Cassandre +Cassandry +Cassaundra +Cassey +Cassi +Cassie +Cassondra +Cassy +Cat +Catarina +Cate +Caterina +Catha +Catharina +Catharine +Cathe +Cathee +Catherin +Catherina +Catherine +Cathi +Cathie +Cathleen +Cathlene +Cathrin +Cathrine +Cathryn +Cathy +Cathyleen +Cati +Catie +Catina +Catlaina +Catlee +Catlin +Catrina +Catriona +Caty +Cayla +Cecelia +Cecil +Cecile +Ceciley +Cecilia +Cecilla +Cecily +Ceil +Cele +Celene +Celesta +Celeste +Celestia +Celestina +Celestine +Celestyn +Celestyna +Celia +Celie +Celina +Celinda +Celine +Celinka +Celisse +Celle +Cesya +Chad +Chanda +Chandal +Chandra +Channa +Chantal +Chantalle +Charil +Charin +Charis +Charissa +Charisse +Charita +Charity +Charla +Charlean +Charleen +Charlena +Charlene +Charline +Charlot +Charlott +Charlotta +Charlotte +Charmain +Charmaine +Charmane +Charmian +Charmine +Charmion +Charo +Charyl +Chastity +Chelsae +Chelsea +Chelsey +Chelsie +Chelsy +Cher +Chere +Cherey +Cheri +Cherianne +Cherice +Cherida +Cherie +Cherilyn +Cherilynn +Cherin +Cherise +Cherish +Cherlyn +Cherri +Cherrita +Cherry +Chery +Cherye +Cheryl +Cheslie +Chiarra +Chickie +Chicky +Chiquita +Chloe +Chloette +Chloris +Chris +Chriss +Chrissa +Chrissie +Chrissy +Christa +Christabel +Christabella +Christabelle +Christal +Christalle +Christan +Christean +Christel +Christen +Christi +Christian +Christiana +Christiane +Christie +Christin +Christina +Christine +Christy +Christyna +Chrysa +Chrysler +Chrystal +Chryste +Chrystel +Ciara +Cicely +Cicily +Ciel +Cilka +Cinda +Cindee +Cindelyn +Cinderella +Cindi +Cindie +Cindra +Cindy +Cinnamon +Cissie +Cissy +Clair +Claire +Clara +Clarabelle +Clare +Claresta +Clareta +Claretta +Clarette +Clarey +Clari +Claribel +Clarice +Clarie +Clarinda +Clarine +Clarisa +Clarissa +Clarisse +Clarita +Clary +Claude +Claudelle +Claudetta +Claudette +Claudia +Claudie +Claudina +Claudine +Clea +Clem +Clemence +Clementia +Clementina +Clementine +Clemmie +Clemmy +Cleo +Cleopatra +Clerissa +Cleva +Clio +Clo +Cloe +Cloris +Clotilda +Clovis +Codee +Codi +Codie +Cody +Coleen +Colene +Coletta +Colette +Colleen +Collete +Collette +Collie +Colline +Colly +Con +Concettina +Conchita +Concordia +Conney +Conni +Connie +Conny +Consolata +Constance +Constancia +Constancy +Constanta +Constantia +Constantina +Constantine +Consuela +Consuelo +Cookie +Cora +Corabel +Corabella +Corabelle +Coral +Coralie +Coraline +Coralyn +Cordelia +Cordelie +Cordey +Cordie +Cordula +Cordy +Coreen +Corella +Corena +Corenda +Corene +Coretta +Corette +Corey +Cori +Corie +Corilla +Corina +Corine +Corinna +Corinne +Coriss +Corissa +Corliss +Corly +Cornela +Cornelia +Cornelle +Cornie +Corny +Correna +Correy +Corri +Corrianne +Corrie +Corrina +Corrine +Corrinne +Corry +Cortney +Cory +Cosetta +Cosette +Courtenay +Courtney +Cresa +Cris +Crissie +Crissy +Crista +Cristabel +Cristal +Cristen +Cristi +Cristie +Cristin +Cristina +Cristine +Cristionna +Cristy +Crysta +Crystal +Crystie +Cyb +Cybal +Cybel +Cybelle +Cybil +Cybill +Cyndi +Cyndy +Cynthea +Cynthia +Cynthie +Cynthy +Dacey +Dacia +Dacie +Dacy +Dael +Daffi +Daffie +Daffy +Dafna +Dagmar +Dahlia +Daile +Daisey +Daisi +Daisie +Daisy +Dale +Dalenna +Dalia +Dalila +Dallas +Daloris +Damara +Damaris +Damita +Dana +Danell +Danella +Danelle +Danette +Dani +Dania +Danica +Danice +Daniel +Daniela +Daniele +Daniella +Danielle +Danika +Danila +Danit +Danita +Danna +Danni +Dannie +Danny +Dannye +Danya +Danyelle +Danyette +Daphene +Daphna +Daphne +Dara +Darb +Darbie +Darby +Darcee +Darcey +Darci +Darcie +Darcy +Darda +Dareen +Darell +Darelle +Dari +Daria +Darice +Darla +Darleen +Darlene +Darline +Darryl +Darsey +Darsie +Darya +Daryl +Daryn +Dasha +Dasi +Dasie +Dasya +Datha +Daune +Daveen +Daveta +Davida +Davina +Davine +Davita +Dawn +Dawna +Dayle +Dayna +Dea +Deana +Deane +Deanna +Deanne +Deb +Debbi +Debbie +Debbra +Debby +Debee +Debera +Debi +Debor +Debora +Deborah +Debra +Dede +Dedie +Dedra +Dee +Dee Dee +Deeann +Deeanne +Deedee +Deena +Deerdre +Dehlia +Deidre +Deina +Deirdre +Del +Dela +Delaney +Delcina +Delcine +Delia +Delila +Delilah +Delinda +Dell +Della +Delly +Delora +Delores +Deloria +Deloris +Delphina +Delphine +Delphinia +Demeter +Demetra +Demetria +Demetris +Dena +Deni +Denice +Denise +Denna +Denni +Dennie +Denny +Deny +Denys +Denyse +Deonne +Desaree +Desdemona +Desirae +Desiree +Desiri +Deva +Devan +Devi +Devin +Devina +Devinne +Devon +Devondra +Devonna +Devonne +Devora +Dew +Di +Diahann +Diamond +Dian +Diana +Diandra +Diane +Diane-Marie +Dianemarie +Diann +Dianna +Dianne +Diannne +Didi +Dido +Diena +Dierdre +Dina +Dinah +Dinnie +Dinny +Dion +Dione +Dionis +Dionne +Dita +Dix +Dixie +Dode +Dodi +Dodie +Dody +Doe +Doll +Dolley +Dolli +Dollie +Dolly +Dolora +Dolores +Dolorita +Doloritas +Dominica +Dominique +Dona +Donella +Donelle +Donetta +Donia +Donica +Donielle +Donna +Donnajean +Donnamarie +Donni +Donnie +Donny +Dora +Doralia +Doralin +Doralyn +Doralynn +Doralynne +Dorcas +Dore +Doreen +Dorelia +Dorella +Dorelle +Dorena +Dorene +Doretta +Dorette +Dorey +Dori +Doria +Dorian +Dorice +Dorie +Dorine +Doris +Dorisa +Dorise +Dorit +Dorita +Doro +Dorolice +Dorolisa +Dorotea +Doroteya +Dorothea +Dorothee +Dorothy +Dorree +Dorri +Dorrie +Dorris +Dorry +Dorthea +Dorthy +Dory +Dosi +Dot +Doti +Dotti +Dottie +Dotty +Dove +Drea +Drew +Dulce +Dulcea +Dulci +Dulcia +Dulciana +Dulcie +Dulcine +Dulcinea +Dulcy +Dulsea +Dusty +Dyan +Dyana +Dyane +Dyann +Dyanna +Dyanne +Dyna +Dynah +E'Lane +Eada +Eadie +Eadith +Ealasaid +Eartha +Easter +Eba +Ebba +Ebonee +Ebony +Eda +Eddi +Eddie +Eddy +Ede +Edee +Edeline +Eden +Edi +Edie +Edin +Edita +Edith +Editha +Edithe +Ediva +Edna +Edwina +Edy +Edyth +Edythe +Effie +Eileen +Eilis +Eimile +Eirena +Ekaterina +Elaina +Elaine +Elana +Elane +Elayne +Elberta +Elbertina +Elbertine +Eleanor +Eleanora +Eleanore +Electra +Elena +Elene +Eleni +Elenore +Eleonora +Eleonore +Elfie +Elfreda +Elfrida +Elfrieda +Elga +Elianora +Elianore +Elicia +Elie +Elinor +Elinore +Elisa +Elisabet +Elisabeth +Elisabetta +Elise +Elisha +Elissa +Elita +Eliza +Elizabet +Elizabeth +Elka +Elke +Ella +Elladine +Elle +Ellen +Ellene +Ellette +Elli +Ellie +Ellissa +Elly +Ellyn +Ellynn +Elmira +Elna +Elnora +Elnore +Eloisa +Eloise +Elonore +Elora +Elsa +Elsbeth +Else +Elsey +Elsi +Elsie +Elsinore +Elspeth +Elsy +Elva +Elvera +Elvina +Elvira +Elwina +Elwira +Elyn +Elyse +Elysee +Elysha +Elysia +Elyssa +Em +Ema +Emalee +Emalia +Emanuela +Emelda +Emelia +Emelina +Emeline +Emelita +Emelyne +Emera +Emilee +Emili +Emilia +Emilie +Emiline +Emily +Emlyn +Emlynn +Emlynne +Emma +Emmalee +Emmaline +Emmalyn +Emmalynn +Emmalynne +Emmeline +Emmey +Emmi +Emmie +Emmy +Emmye +Emogene +Emyle +Emylee +Endora +Engracia +Enid +Enrica +Enrichetta +Enrika +Enriqueta +Enya +Eolanda +Eolande +Eran +Erda +Erena +Erica +Ericha +Ericka +Erika +Erin +Erina +Erinn +Erinna +Erma +Ermengarde +Ermentrude +Ermina +Erminia +Erminie +Erna +Ernaline +Ernesta +Ernestine +Ertha +Eryn +Esma +Esmaria +Esme +Esmeralda +Esmerelda +Essa +Essie +Essy +Esta +Estel +Estele +Estell +Estella +Estelle +Ester +Esther +Estrella +Estrellita +Ethel +Ethelda +Ethelin +Ethelind +Etheline +Ethelyn +Ethyl +Etta +Etti +Ettie +Etty +Eudora +Eugenia +Eugenie +Eugine +Eula +Eulalie +Eunice +Euphemia +Eustacia +Eva +Evaleen +Evangelia +Evangelin +Evangelina +Evangeline +Evania +Evanne +Eve +Eveleen +Evelina +Eveline +Evelyn +Evette +Evey +Evie +Evita +Evonne +Evvie +Evvy +Evy +Eyde +Eydie +Fabrianne +Fabrice +Fae +Faina +Faith +Fallon +Fan +Fanchette +Fanchon +Fancie +Fancy +Fanechka +Fania +Fanni +Fannie +Fanny +Fanya +Fara +Farah +Farand +Farica +Farra +Farrah +Farrand +Fatima +Faun +Faunie +Faustina +Faustine +Fawn +Fawna +Fawne +Fawnia +Fay +Faydra +Faye +Fayette +Fayina +Fayre +Fayth +Faythe +Federica +Fedora +Felecia +Felicdad +Felice +Felicia +Felicity +Felicle +Felipa +Felisha +Felita +Feliza +Fenelia +Feodora +Ferdinanda +Ferdinande +Fern +Fernanda +Fernande +Fernandina +Ferne +Fey +Fiann +Fianna +Fidela +Fidelia +Fidelity +Fifi +Fifine +Filia +Filide +Filippa +Fina +Fiona +Fionna +Fionnula +Fiorenze +Fleur +Fleurette +Flo +Flor +Flora +Florance +Flore +Florella +Florence +Florencia +Florentia +Florenza +Florette +Flori +Floria +Florice +Florida +Florie +Florina +Florinda +Floris +Florri +Florrie +Florry +Flory +Flossi +Flossie +Flossy +Flower +Fortuna +Fortune +Fran +France +Francene +Frances +Francesca +Francesmary +Francine +Francis +Francisca +Franciska +Francoise +Francyne +Frank +Frankie +Franky +Franni +Frannie +Franny +Frayda +Fred +Freda +Freddi +Freddie +Freddy +Fredelia +Frederica +Fredericka +Fredi +Fredia +Fredra +Fredrika +Freida +Frieda +Friederike +Fulvia +Gabbey +Gabbi +Gabbie +Gabey +Gabi +Gabie +Gabriel +Gabriela +Gabriell +Gabriella +Gabrielle +Gabriellia +Gabrila +Gaby +Gae +Gael +Gail +Gale +Gale +Galina +Garland +Garnet +Garnette +Gates +Gavra +Gavrielle +Gay +Gayla +Gayle +Gayleen +Gaylene +Gaynor +Geeta +Gelya +Gen +Gena +Gene +Geneva +Genevieve +Genevra +Genia +Genna +Genni +Gennie +Gennifer +Genny +Genovera +Genvieve +George +Georgeanna +Georgeanne +Georgena +Georgeta +Georgetta +Georgette +Georgia +Georgiamay +Georgiana +Georgianna +Georgianne +Georgie +Georgina +Georgine +Gera +Geralda +Geraldina +Geraldine +Gerda +Gerhardine +Geri +Gerianna +Gerianne +Gerladina +Germain +Germaine +Germana +Gerri +Gerrie +Gerrilee +Gerry +Gert +Gerta +Gerti +Gertie +Gertrud +Gertruda +Gertrude +Gertrudis +Gerty +Giacinta +Giana +Gianina +Gianna +Gigi +Gilberta +Gilberte +Gilbertina +Gilbertine +Gilda +Gill +Gillan +Gilli +Gillian +Gillie +Gilligan +Gilly +Gina +Ginelle +Ginevra +Ginger +Ginni +Ginnie +Ginnifer +Ginny +Giorgia +Giovanna +Gipsy +Giralda +Gisela +Gisele +Gisella +Giselle +Gita +Gizela +Glad +Gladi +Gladis +Gladys +Gleda +Glen +Glenda +Glenine +Glenn +Glenna +Glennie +Glennis +Glori +Gloria +Gloriana +Gloriane +Glorianna +Glory +Glyn +Glynda +Glynis +Glynnis +Godiva +Golda +Goldarina +Goldi +Goldia +Goldie +Goldina +Goldy +Grace +Gracia +Gracie +Grata +Gratia +Gratiana +Gray +Grayce +Grazia +Gredel +Greer +Greta +Gretal +Gretchen +Grete +Gretel +Grethel +Gretna +Gretta +Grier +Griselda +Grissel +Guendolen +Guenevere +Guenna +Guglielma +Gui +Guillema +Guillemette +Guinevere +Guinna +Gunilla +Gunvor +Gus +Gusella +Gussi +Gussie +Gussy +Gusta +Gusti +Gustie +Gusty +Gwen +Gwendolen +Gwendolin +Gwendolyn +Gweneth +Gwenette +Gwenn +Gwenneth +Gwenni +Gwennie +Gwenny +Gwenora +Gwenore +Gwyn +Gwyneth +Gwynne +Gypsy +Hadria +Hailee +Haily +Haleigh +Halette +Haley +Hali +Halie +Halimeda +Halley +Halli +Hallie +Hally +Hana +Hanna +Hannah +Hanni +Hannibal +Hannie +Hannis +Hanny +Happy +Harlene +Harley +Harli +Harlie +Harmonia +Harmonie +Harmony +Harri +Harrie +Harriet +Harriett +Harrietta +Harriette +Harriot +Harriott +Hatti +Hattie +Hatty +Havivah +Hayley +Hazel +Heath +Heather +Heda +Hedda +Heddi +Heddie +Hedi +Hedvig +Hedwig +Hedy +Heida +Heide +Heidi +Heidie +Helaina +Helaine +Helen +Helen-Elizabeth +Helena +Helene +Helga +Helge +Helise +Hellene +Helli +Heloise +Helsa +Helyn +Hendrika +Henka +Henrie +Henrieta +Henrietta +Henriette +Henryetta +Hephzibah +Hermia +Hermina +Hermine +Herminia +Hermione +Herta +Hertha +Hester +Hesther +Hestia +Hetti +Hettie +Hetty +Hilarie +Hilary +Hilda +Hildagard +Hildagarde +Hilde +Hildegaard +Hildegarde +Hildy +Hillary +Hilliary +Hinda +Holley +Holli +Hollie +Holly +Holly-Anne +Hollyanne +Honey +Honor +Honoria +Hope +Horatia +Hortense +Hortensia +Hulda +Hyacinth +Hyacintha +Hyacinthe +Hyacinthia +Hyacinthie +Hynda +Ianthe +Ibbie +Ibby +Ida +Idalia +Idalina +Idaline +Idell +Idelle +Idette +Ike +Ikey +Ilana +Ileana +Ileane +Ilene +Ilise +Ilka +Illa +Ilona +Ilsa +Ilse +Ilysa +Ilyse +Ilyssa +Imelda +Imogen +Imogene +Imojean +Ina +Inci +Indira +Ines +Inesita +Inessa +Inez +Inga +Ingaberg +Ingaborg +Inge +Ingeberg +Ingeborg +Inger +Ingrid +Ingunna +Inna +Ioana +Iolande +Iolanthe +Iona +Iormina +Ira +Irena +Irene +Irina +Iris +Irita +Irma +Isa +Isabeau +Isabel +Isabelita +Isabella +Isabelle +Isador +Isadora +Isadore +Isahella +Iseabal +Isidora +Isis +Isobel +Issi +Issie +Issy +Ivett +Ivette +Ivie +Ivonne +Ivory +Ivy +Izabel +Izzi +Jacenta +Jacinda +Jacinta +Jacintha +Jacinthe +Jackelyn +Jacki +Jackie +Jacklin +Jacklyn +Jackquelin +Jackqueline +Jacky +Jaclin +Jaclyn +Jacquelin +Jacqueline +Jacquelyn +Jacquelynn +Jacquenetta +Jacquenette +Jacquetta +Jacquette +Jacqui +Jacquie +Jacynth +Jada +Jade +Jaime +Jaimie +Jaine +Jaleh +Jami +Jamie +Jamima +Jammie +Jan +Jana +Janaya +Janaye +Jandy +Jane +Janean +Janeczka +Janeen +Janel +Janela +Janella +Janelle +Janene +Janenna +Janessa +Janet +Janeta +Janetta +Janette +Janeva +Janey +Jania +Janice +Janie +Janifer +Janina +Janine +Janis +Janith +Janka +Janna +Jannel +Jannelle +Janot +Jany +Jaquelin +Jaquelyn +Jaquenetta +Jaquenette +Jaquith +Jasmin +Jasmina +Jasmine +Jayme +Jaymee +Jayne +Jaynell +Jazmin +Jean +Jeana +Jeane +Jeanelle +Jeanette +Jeanie +Jeanine +Jeanna +Jeanne +Jeannette +Jeannie +Jeannine +Jehanna +Jelene +Jemie +Jemima +Jemimah +Jemmie +Jemmy +Jen +Jena +Jenda +Jenelle +Jenette +Jeni +Jenica +Jeniece +Jenifer +Jeniffer +Jenilee +Jenine +Jenn +Jenna +Jennee +Jennette +Jenni +Jennica +Jennie +Jennifer +Jennilee +Jennine +Jenny +Jeraldine +Jeralee +Jere +Jeri +Jermaine +Jerrie +Jerrilee +Jerrilyn +Jerrine +Jerry +Jerrylee +Jess +Jessa +Jessalin +Jessalyn +Jessamine +Jessamyn +Jesse +Jesselyn +Jessi +Jessica +Jessie +Jessika +Jessy +Jewel +Jewell +Jewelle +Jill +Jillana +Jillane +Jillayne +Jilleen +Jillene +Jilli +Jillian +Jillie +Jilly +Jinny +Jo +Jo Ann +Jo-Ann +JoAnn +Jo-Anne +JoAnne +Joan +Joana +Joane +Joanie +Joann +Joanna +Joanne +Joannes +Jobey +Jobi +Jobie +Jobina +Joby +Jobye +Jobyna +Jocelin +Joceline +Jocelyn +Jocelyne +Jodee +Jodi +Jodie +Jody +Joela +Joelie +Joell +Joella +Joelle +Joellen +Joelly +Joellyn +Joelynn +Joete +Joey +Johanna +Johannah +Johnette +Johnna +Joice +Jojo +Jolee +Joleen +Jolene +Joletta +Joli +Jolie +Joline +Joly +Jolyn +Jolynn +Jonell +Joni +Jonie +Jonis +Jordain +Jordan +Jordana +Jordanna +Jorey +Jori +Jorie +Jorrie +Jorry +Joscelin +Josee +Josefa +Josefina +Joselyn +Josepha +Josephina +Josephine +Josey +Josi +Josie +Joslyn +Josselyn +Josy +Jourdan +Joy +Joya +Joyan +Joyann +Joyce +Joycelin +Joye +Joyous +Juana +Juanita +Jude +Judi +Judie +Judith +Juditha +Judy +Judye +Julee +Juli +Julia +Juliana +Juliane +Juliann +Julianna +Julianne +Julie +Julienne +Juliet +Julieta +Julietta +Juliette +Julina +Juline +Julissa +Julita +June +Junette +Junia +Junie +Junina +Justin +Justina +Justine +Jyoti +Kaari +Kacey +Kacie +Kacy +Kai +Kaia +Kaila +Kaile +Kailey +Kaitlin +Kaitlyn +Kaitlynn +Kaja +Kakalina +Kala +Kaleena +Kali +Kalie +Kalila +Kalina +Kalinda +Kalindi +Kalli +Kally +Kameko +Kamila +Kamilah +Kamillah +Kandace +Kandy +Kania +Kanya +Kara +Kara-Lynn +Karalee +Karalynn +Kare +Karee +Karel +Karen +Karena +Kari +Karia +Karie +Karil +Karilynn +Karin +Karina +Karine +Kariotta +Karisa +Karissa +Karita +Karla +Karlee +Karleen +Karlen +Karlene +Karlie +Karlotta +Karlotte +Karly +Karlyn +Karmen +Karna +Karol +Karola +Karole +Karolina +Karoline +Karoly +Karon +Karrah +Karrie +Karry +Kary +Karyl +Karylin +Karyn +Kasey +Kass +Kassandra +Kassey +Kassi +Kassia +Kassie +Kaster +Kat +Kata +Katalin +Kate +Katee +Katerina +Katerine +Katey +Kath +Katha +Katharina +Katharine +Katharyn +Kathe +Katheleen +Katherina +Katherine +Katheryn +Kathi +Kathie +Kathleen +Kathlene +Kathlin +Kathrine +Kathryn +Kathryne +Kathy +Kathye +Kati +Katie +Katina +Katine +Katinka +Katleen +Katlin +Katrina +Katrine +Katrinka +Katti +Kattie +Katuscha +Katusha +Katy +Katya +Kay +Kaycee +Kaye +Kayla +Kayle +Kaylee +Kayley +Kaylil +Kaylyn +Kee +Keeley +Keelia +Keely +Kelcey +Kelci +Kelcie +Kelcy +Kelila +Kellen +Kelley +Kelli +Kellia +Kellie +Kellina +Kellsie +Kelly +Kellyann +Kelsey +Kelsi +Kelsy +Kendra +Kendre +Kenna +Keren +Keri +Keriann +Kerianne +Kerri +Kerrie +Kerrill +Kerrin +Kerry +Kerstin +Kesley +Keslie +Kessia +Kessiah +Ketti +Kettie +Ketty +Kevina +Kevyn +Ki +Kia +Kiah +Kial +Kiele +Kiersten +Kikelia +Kiley +Kim +Kimberlee +Kimberley +Kimberli +Kimberly +Kimberlyn +Kimbra +Kimmi +Kimmie +Kimmy +Kinna +Kip +Kipp +Kippie +Kippy +Kira +Kirbee +Kirbie +Kirby +Kiri +Kirsten +Kirsteni +Kirsti +Kirstie +Kirstin +Kirstyn +Kissee +Kissiah +Kissie +Kit +Kitti +Kittie +Kitty +Kizzee +Kizzie +Klara +Klarika +Klarrisa +Konstance +Konstanze +Koo +Kora +Koral +Koralle +Kordula +Kore +Korella +Koren +Koressa +Kori +Korie +Korney +Korrie +Korry +Kourtney +Kris +Krissie +Krissy +Krista +Kristal +Kristan +Kriste +Kristel +Kristen +Kristi +Kristien +Kristin +Kristina +Kristine +Kristy +Kristyn +Krysta +Krystal +Krystalle +Krystle +Krystyna +Kyla +Kyle +Kylen +Kylie +Kylila +Kylynn +Kym +Kynthia +Kyrstin +La +Lacee +Lacey +Lacie +Lacy +Ladonna +Laetitia +Laila +Laina +Lainey +Lamb +Lana +Lane +Lanette +Laney +Lani +Lanie +Lanita +Lanna +Lanni +Lanny +Lara +Laraine +Lari +Larina +Larine +Larisa +Larissa +Lark +Laryssa +Latashia +Latia +Latisha +Latrena +Latrina +Laura +Lauraine +Laural +Lauralee +Laure +Lauree +Laureen +Laurel +Laurella +Lauren +Laurena +Laurene +Lauretta +Laurette +Lauri +Laurianne +Laurice +Laurie +Lauryn +Lavena +Laverna +Laverne +Lavina +Lavinia +Lavinie +Layla +Layne +Layney +Lea +Leah +Leandra +Leann +Leanna +Leanne +Leanor +Leanora +Lebbie +Leda +Lee +LeeAnn +Leeann +Leeanne +Leela +Leelah +Leena +Leesa +Leese +Legra +Leia +Leiah +Leigh +Leigha +Leila +Leilah +Leisha +Lela +Lelah +Leland +Lelia +Lena +Lenee +Lenette +Lenka +Lenna +Lenora +Lenore +Leodora +Leoine +Leola +Leoline +Leona +Leonanie +Leone +Leonelle +Leonie +Leonora +Leonore +Leontine +Leontyne +Leora +Leorah +Leshia +Lesley +Lesli +Leslie +Lesly +Lesya +Leta +Lethia +Leticia +Letisha +Letitia +Letta +Letti +Lettie +Letty +Leyla +Lezlie +Lia +Lian +Liana +Liane +Lianna +Lianne +Lib +Libbey +Libbi +Libbie +Libby +Licha +Lida +Lidia +Lil +Lila +Lilah +Lilas +Lilia +Lilian +Liliane +Lilias +Lilith +Lilla +Lilli +Lillian +Lillis +Lilllie +Lilly +Lily +Lilyan +Lin +Lina +Lind +Linda +Lindi +Lindie +Lindsay +Lindsey +Lindsy +Lindy +Linea +Linell +Linet +Linette +Linn +Linnea +Linnell +Linnet +Linnie +Linzy +Liora +Liorah +Lira +Lisa +Lisabeth +Lisandra +Lisbeth +Lise +Lisetta +Lisette +Lisha +Lishe +Lissa +Lissi +Lissie +Lissy +Lita +Liuka +Livia +Liz +Liza +Lizabeth +Lizbeth +Lizette +Lizzie +Lizzy +Loella +Lois +Loise +Lola +Lolande +Loleta +Lolita +Lolly +Lona +Lonee +Loni +Lonna +Lonni +Lonnie +Lora +Lorain +Loraine +Loralee +Loralie +Loralyn +Loree +Loreen +Lorelei +Lorelle +Loren +Lorena +Lorene +Lorenza +Loretta +Lorettalorna +Lorette +Lori +Loria +Lorianna +Lorianne +Lorie +Lorilee +Lorilyn +Lorinda +Lorine +Lorita +Lorna +Lorne +Lorraine +Lorrayne +Lorri +Lorrie +Lorrin +Lorry +Lory +Lotta +Lotte +Lotti +Lottie +Lotty +Lou +Louella +Louisa +Louise +Louisette +Love +Luana +Luanna +Luce +Luci +Lucia +Luciana +Lucie +Lucienne +Lucila +Lucilia +Lucille +Lucina +Lucinda +Lucine +Lucita +Lucky +Lucretia +Lucy +Luella +Luelle +Luisa +Luise +Lula +Lulita +Lulu +Luna +Lura +Lurette +Lurleen +Lurlene +Lurline +Lusa +Lust +Lyda +Lydia +Lydie +Lyn +Lynda +Lynde +Lyndel +Lyndell +Lyndsay +Lyndsey +Lyndsie +Lyndy +Lynea +Lynelle +Lynett +Lynette +Lynn +Lynna +Lynne +Lynnea +Lynnell +Lynnelle +Lynnet +Lynnett +Lynnette +Lynsey +Lysandra +Lyssa +Mab +Mabel +Mabelle +Mable +Mada +Madalena +Madalyn +Maddalena +Maddi +Maddie +Maddy +Madel +Madelaine +Madeleine +Madelena +Madelene +Madelin +Madelina +Madeline +Madella +Madelle +Madelon +Madelyn +Madge +Madlen +Madlin +Madona +Madonna +Mady +Mae +Maegan +Mag +Magda +Magdaia +Magdalen +Magdalena +Magdalene +Maggee +Maggi +Maggie +Maggy +Magna +Mahala +Mahalia +Maia +Maible +Maiga +Mair +Maire +Mairead +Maisey +Maisie +Mala +Malanie +Malcah +Malena +Malia +Malina +Malinda +Malinde +Malissa +Malissia +Malka +Malkah +Mallissa +Mallorie +Mallory +Malorie +Malory +Malva +Malvina +Malynda +Mame +Mamie +Manda +Mandi +Mandie +Mandy +Manon +Manya +Mara +Marabel +Marcela +Marcelia +Marcella +Marcelle +Marcellina +Marcelline +Marchelle +Marci +Marcia +Marcie +Marcile +Marcille +Marcy +Mareah +Maren +Marena +Maressa +Marga +Margalit +Margalo +Margaret +Margareta +Margarete +Margaretha +Margarethe +Margaretta +Margarette +Margarita +Margaux +Marge +Margeaux +Margery +Marget +Margette +Margi +Margie +Margit +Marglerite +Margo +Margot +Margret +Marguerite +Margurite +Margy +Mari +Maria +Mariam +Marian +Mariana +Mariann +Marianna +Marianne +Maribel +Maribelle +Maribeth +Marice +Maridel +Marie +Marie-Ann +Marie-Jeanne +Marieann +Mariejeanne +Mariel +Mariele +Marielle +Mariellen +Marietta +Mariette +Marigold +Marijo +Marika +Marilee +Marilin +Marillin +Marilyn +Marin +Marina +Marinna +Marion +Mariquilla +Maris +Marisa +Mariska +Marissa +Marit +Marita +Maritsa +Mariya +Marj +Marja +Marje +Marji +Marjie +Marjorie +Marjory +Marjy +Marketa +Marla +Marlane +Marleah +Marlee +Marleen +Marlena +Marlene +Marley +Marlie +Marline +Marlo +Marlyn +Marna +Marne +Marney +Marni +Marnia +Marnie +Marquita +Marrilee +Marris +Marrissa +Marry +Marsha +Marsiella +Marta +Martelle +Martguerita +Martha +Marthe +Marthena +Marti +Martica +Martie +Martina +Martita +Marty +Martynne +Mary +Marya +Maryangelyn +Maryann +Maryanna +Maryanne +Marybelle +Marybeth +Maryellen +Maryjane +Maryjo +Maryl +Marylee +Marylin +Marylinda +Marylou +Marylynne +Maryrose +Marys +Marysa +Masha +Matelda +Mathilda +Mathilde +Matilda +Matilde +Matti +Mattie +Matty +Maud +Maude +Maudie +Maura +Maure +Maureen +Maureene +Maurene +Maurine +Maurise +Maurita +Mavis +Mavra +Max +Maxi +Maxie +Maxine +Maxy +May +Maya +Maybelle +Mayda +Maye +Mead +Meade +Meagan +Meaghan +Meara +Mechelle +Meg +Megan +Megen +Meggan +Meggi +Meggie +Meggy +Meghan +Meghann +Mehetabel +Mei +Meira +Mel +Mela +Melamie +Melania +Melanie +Melantha +Melany +Melba +Melesa +Melessa +Melicent +Melina +Melinda +Melinde +Melisa +Melisande +Melisandra +Melisenda +Melisent +Melissa +Melisse +Melita +Melitta +Mella +Melli +Mellicent +Mellie +Mellisa +Mellisent +Mellissa +Melloney +Melly +Melodee +Melodie +Melody +Melonie +Melony +Melosa +Melva +Mercedes +Merci +Mercie +Mercy +Meredith +Meredithe +Meridel +Meridith +Meriel +Merilee +Merilyn +Meris +Merissa +Merl +Merla +Merle +Merlina +Merline +Merna +Merola +Merralee +Merridie +Merrie +Merrielle +Merrile +Merrilee +Merrili +Merrill +Merrily +Merry +Mersey +Meryl +Meta +Mia +Micaela +Michaela +Michaelina +Michaeline +Michaella +Michal +Michel +Michele +Michelina +Micheline +Michell +Michelle +Micki +Mickie +Micky +Midge +Mignon +Mignonne +Miguela +Miguelita +Mikako +Mildred +Mildrid +Milena +Milicent +Milissent +Milka +Milli +Millicent +Millie +Millisent +Milly +Milzie +Mimi +Min +Mina +Minda +Mindy +Minerva +Minetta +Minette +Minna +Minni +Minnie +Minny +Minta +Miquela +Mira +Mirabel +Mirabella +Mirabelle +Miran +Miranda +Mireielle +Mireille +Mirella +Mirelle +Miriam +Mirilla +Mirna +Misha +Missie +Missy +Misti +Misty +Mitra +Mitzi +Mmarianne +Modesta +Modestia +Modestine +Modesty +Moina +Moira +Moll +Mollee +Molli +Mollie +Molly +Mommy +Mona +Monah +Monica +Monika +Monique +Mora +Moreen +Morena +Morgan +Morgana +Morganica +Morganne +Morgen +Moria +Morissa +Morlee +Morna +Moselle +Moya +Moyna +Moyra +Mozelle +Muffin +Mufi +Mufinella +Muire +Mureil +Murial +Muriel +Murielle +Myna +Myra +Myrah +Myranda +Myriam +Myrilla +Myrle +Myrlene +Myrna +Myrta +Myrtia +Myrtice +Myrtie +Myrtle +Nada +Nadean +Nadeen +Nadia +Nadine +Nadiya +Nady +Nadya +Nalani +Nan +Nana +Nananne +Nance +Nancee +Nancey +Nanci +Nancie +Nancy +Nanete +Nanette +Nani +Nanice +Nanine +Nannette +Nanni +Nannie +Nanny +Nanon +Naoma +Naomi +Nara +Nari +Nariko +Nat +Nata +Natala +Natalee +Natalia +Natalie +Natalina +Nataline +Natalya +Natasha +Natassia +Nathalia +Nathalie +Natka +Natty +Neala +Neda +Nedda +Nedi +Neely +Neila +Neile +Neilla +Neille +Nela +Nelia +Nelie +Nell +Nelle +Nelli +Nellie +Nelly +Nena +Nerissa +Nerita +Nert +Nerta +Nerte +Nerti +Nertie +Nerty +Nessa +Nessi +Nessie +Nessy +Nesta +Netta +Netti +Nettie +Nettle +Netty +Nevsa +Neysa +Nichol +Nichole +Nicholle +Nicki +Nickie +Nicky +Nicol +Nicola +Nicole +Nicolea +Nicolette +Nicoli +Nicolina +Nicoline +Nicolle +Nidia +Nike +Niki +Nikki +Nikkie +Nikoletta +Nikolia +Nil +Nina +Ninetta +Ninette +Ninnetta +Ninnette +Ninon +Nisa +Nissa +Nisse +Nissie +Nissy +Nita +Nitin +Nixie +Noami +Noel +Noelani +Noell +Noella +Noelle +Noellyn +Noelyn +Noemi +Nola +Nolana +Nolie +Nollie +Nomi +Nona +Nonah +Noni +Nonie +Nonna +Nonnah +Nora +Norah +Norean +Noreen +Norene +Norina +Norine +Norma +Norri +Norrie +Norry +Nova +Novelia +Nydia +Nyssa +Octavia +Odele +Odelia +Odelinda +Odella +Odelle +Odessa +Odetta +Odette +Odilia +Odille +Ofelia +Ofella +Ofilia +Ola +Olenka +Olga +Olia +Olimpia +Olive +Olivette +Olivia +Olivie +Oliy +Ollie +Olly +Olva +Olwen +Olympe +Olympia +Olympie +Ondrea +Oneida +Onida +Onlea +Oona +Opal +Opalina +Opaline +Ophelia +Ophelie +Oprah +Ora +Oralee +Oralia +Oralie +Oralla +Oralle +Orel +Orelee +Orelia +Orelie +Orella +Orelle +Oreste +Oriana +Orly +Orsa +Orsola +Ortensia +Otha +Othelia +Othella +Othilia +Othilie +Ottilie +Pacifica +Page +Paige +Paloma +Pam +Pamela +Pamelina +Pamella +Pammi +Pammie +Pammy +Pandora +Pansie +Pansy +Paola +Paolina +Parwane +Pat +Patience +Patrica +Patrice +Patricia +Patrizia +Patsy +Patti +Pattie +Patty +Paula +Paula-Grace +Paule +Pauletta +Paulette +Pauli +Paulie +Paulina +Pauline +Paulita +Pauly +Pavia +Pavla +Pearl +Pearla +Pearle +Pearline +Peg +Pegeen +Peggi +Peggie +Peggy +Pen +Penelopa +Penelope +Penni +Pennie +Penny +Pepi +Pepita +Peri +Peria +Perl +Perla +Perle +Perri +Perrine +Perry +Persis +Pet +Peta +Petra +Petrina +Petronella +Petronia +Petronilla +Petronille +Petunia +Phaedra +Phaidra +Phebe +Phedra +Phelia +Phil +Philipa +Philippa +Philippe +Philippine +Philis +Phillida +Phillie +Phillis +Philly +Philomena +Phoebe +Phylis +Phyllida +Phyllis +Phyllys +Phylys +Pia +Pier +Pierette +Pierrette +Pietra +Piper +Pippa +Pippy +Polly +Pollyanna +Pooh +Poppy +Portia +Pris +Prisca +Priscella +Priscilla +Prissie +Pru +Prudence +Prudi +Prudy +Prue +Prunella +Queada +Queenie +Quentin +Querida +Quinn +Quinta +Quintana +Quintilla +Quintina +Rachael +Rachel +Rachele +Rachelle +Rae +Raf +Rafa +Rafaela +Rafaelia +Rafaelita +Ragnhild +Rahal +Rahel +Raina +Raine +Rakel +Ralina +Ramona +Ramonda +Rana +Randa +Randee +Randene +Randi +Randie +Randy +Ranee +Rani +Rania +Ranice +Ranique +Ranna +Raphaela +Raquel +Raquela +Rasia +Rasla +Raven +Ray +Raychel +Raye +Rayna +Raynell +Rayshell +Rea +Reba +Rebbecca +Rebe +Rebeca +Rebecca +Rebecka +Rebeka +Rebekah +Rebekkah +Ree +Reeba +Reena +Reeta +Reeva +Regan +Reggi +Reggie +Regina +Regine +Reiko +Reina +Reine +Remy +Rena +Renae +Renata +Renate +Rene +Renee +Renel +Renell +Renelle +Renie +Rennie +Reta +Retha +Revkah +Rey +Reyna +Rhea +Rheba +Rheta +Rhetta +Rhiamon +Rhianna +Rhianon +Rhoda +Rhodia +Rhodie +Rhody +Rhona +Rhonda +Riane +Riannon +Rianon +Rica +Ricca +Rici +Ricki +Rickie +Ricky +Riki +Rikki +Rina +Risa +Rissa +Rita +Riva +Rivalee +Rivi +Rivkah +Rivy +Roana +Roanna +Roanne +Robbi +Robbie +Robbin +Robby +Robbyn +Robena +Robenia +Roberta +Robin +Robina +Robinet +Robinett +Robinetta +Robinette +Robinia +Roby +Robyn +Roch +Rochell +Rochella +Rochelle +Rochette +Roda +Rodi +Rodie +Rodina +Romola +Romona +Romonda +Romy +Rona +Ronalda +Ronda +Ronica +Ronna +Ronni +Ronnica +Ronnie +Ronny +Roobbie +Rora +Rori +Rorie +Rory +Ros +Rosa +Rosabel +Rosabella +Rosabelle +Rosaleen +Rosalia +Rosalie +Rosalind +Rosalinda +Rosalinde +Rosaline +Rosalyn +Rosalynd +Rosamond +Rosamund +Rosana +Rosanna +Rosanne +Rosario +Rose +Roseann +Roseanna +Roseanne +Roselia +Roselin +Roseline +Rosella +Roselle +Roselyn +Rosemaria +Rosemarie +Rosemary +Rosemonde +Rosene +Rosetta +Rosette +Roshelle +Rosie +Rosina +Rosita +Roslyn +Rosmunda +Rosy +Row +Rowe +Rowena +Roxana +Roxane +Roxanna +Roxanne +Roxi +Roxie +Roxine +Roxy +Roz +Rozalie +Rozalin +Rozamond +Rozanna +Rozanne +Roze +Rozele +Rozella +Rozelle +Rozina +Rubetta +Rubi +Rubia +Rubie +Rubina +Ruby +Ruella +Ruperta +Ruth +Ruthann +Ruthanne +Ruthe +Ruthi +Ruthie +Ruthy +Ryann +Rycca +Saba +Sabina +Sabine +Sabra +Sabrina +Sacha +Sada +Sadella +Sadie +Sal +Sallee +Salli +Sallie +Sally +Sallyann +Sallyanne +Salome +Sam +Samantha +Samara +Samaria +Sammy +Samuela +Samuella +Sande +Sandi +Sandie +Sandra +Sandy +Sandye +Sapphira +Sapphire +Sara +Sara-Ann +Saraann +Sarah +Sarajane +Saree +Sarena +Sarene +Sarette +Sari +Sarina +Sarine +Sarita +Sascha +Sasha +Sashenka +Saudra +Saundra +Savina +Sayre +Scarlet +Scarlett +Scotty +Sean +Seana +Secunda +Seka +Sela +Selena +Selene +Selestina +Selia +Selie +Selina +Selinda +Seline +Sella +Selle +Selma +Sena +Sephira +Serena +Serene +Shaina +Shaine +Shalna +Shalne +Shamit +Shana +Shanda +Shandee +Shandie +Shandra +Shandy +Shane +Shani +Shanie +Shanna +Shannah +Shannen +Shannon +Shanon +Shanta +Shantee +Shara +Sharai +Shari +Sharia +Sharie +Sharity +Sharl +Sharla +Sharleen +Sharlene +Sharline +Sharna +Sharon +Sharona +Sharra +Sharron +Sharyl +Shaun +Shauna +Shawn +Shawna +Shawnee +Shay +Shayla +Shaylah +Shaylyn +Shaylynn +Shayna +Shayne +Shea +Sheba +Sheela +Sheelagh +Sheelah +Sheena +Sheeree +Sheila +Sheila-Kathryn +Sheilah +Sheilakathryn +Shel +Shela +Shelagh +Shelba +Shelbi +Shelby +Shelia +Shell +Shelley +Shelli +Shellie +Shelly +Shena +Sher +Sheree +Sheri +Sherie +Sheril +Sherill +Sherilyn +Sherline +Sherri +Sherrie +Sherry +Sherye +Sheryl +Shilpa +Shina +Shir +Shira +Shirah +Shirl +Shirlee +Shirleen +Shirlene +Shirley +Shirline +Shoshana +Shoshanna +Shoshie +Siana +Sianna +Sib +Sibbie +Sibby +Sibeal +Sibel +Sibella +Sibelle +Sibilla +Sibley +Sibyl +Sibylla +Sibylle +Sidoney +Sidonia +Sidonnie +Sigrid +Sile +Sileas +Silva +Silvana +Silvia +Silvie +Simona +Simone +Simonette +Simonne +Sindee +Sinead +Siobhan +Sioux +Siouxie +Sisely +Sisile +Sissie +Sissy +Sofia +Sofie +Solange +Sondra +Sonia +Sonja +Sonni +Sonnie +Sonnnie +Sonny +Sonya +Sophey +Sophi +Sophia +Sophie +Sophronia +Sorcha +Sosanna +Stace +Stacee +Stacey +Staci +Stacia +Stacie +Stacy +Stafani +Star +Starla +Starlene +Starlin +Starr +Stefa +Stefania +Stefanie +Steffane +Steffi +Steffie +Stella +Stepha +Stephana +Stephani +Stephanie +Stephannie +Stephenie +Stephi +Stephie +Stephine +Stesha +Stevana +Stevena +Stoddard +Storey +Storm +Stormi +Stormie +Stormy +Sue +Sue-elle +Suellen +Sukey +Suki +Sula +Sunny +Sunshine +Susan +Susana +Susanetta +Susann +Susanna +Susannah +Susanne +Susette +Susi +Susie +Sussi +Susy +Suzan +Suzann +Suzanna +Suzanne +Suzetta +Suzette +Suzi +Suzie +Suzy +Suzzy +Sybil +Sybila +Sybilla +Sybille +Sybyl +Sydel +Sydelle +Sydney +Sylvia +Sylvie +Tabatha +Tabbatha +Tabbi +Tabbie +Tabbitha +Tabby +Tabina +Tabitha +Taffy +Talia +Tallia +Tallie +Tally +Talya +Talyah +Tamar +Tamara +Tamarah +Tamarra +Tamera +Tami +Tamiko +Tamma +Tammara +Tammi +Tammie +Tammy +Tamra +Tana +Tandi +Tandie +Tandy +Tani +Tania +Tansy +Tanya +Tara +Tarah +Tarra +Tarrah +Taryn +Tasha +Tasia +Tate +Tatiana +Tatiania +Tatum +Tawnya +Tawsha +Teane +Ted +Tedda +Teddi +Teddie +Teddy +Tedi +Tedra +Teena +Tella +Teodora +Tera +Teresa +TeresaAnne +Terese +Teresina +Teresita +Teressa +Teri +Teriann +Terina +Terra +Terri +Terri-Jo +Terrianne +Terrie +Terry +Terrye +Tersina +Teryl +Terza +Tess +Tessa +Tessi +Tessie +Tessy +Thalia +Thea +Theada +Theadora +Theda +Thekla +Thelma +Theo +Theodora +Theodosia +Theresa +Theresa-Marie +Therese +Theresina +Theresita +Theressa +Therine +Thia +Thomasa +Thomasin +Thomasina +Thomasine +Tia +Tiana +Tiena +Tierney +Tiertza +Tiff +Tiffani +Tiffanie +Tiffany +Tiffi +Tiffie +Tiffy +Tilda +Tildi +Tildie +Tildy +Tillie +Tilly +Tim +Timi +Timmi +Timmie +Timmy +Timothea +Tina +Tine +Tiphani +Tiphanie +Tiphany +Tish +Tisha +Tobe +Tobey +Tobi +Tobie +Toby +Tobye +Toinette +Toma +Tomasina +Tomasine +Tomi +Tomiko +Tommi +Tommie +Tommy +Toni +Tonia +Tonie +Tony +Tonya +Tootsie +Torey +Tori +Torie +Torrie +Tory +Tova +Tove +Trace +Tracee +Tracey +Traci +Tracie +Tracy +Trenna +Tresa +Trescha +Tressa +Tricia +Trina +Trish +Trisha +Trista +Trix +Trixi +Trixie +Trixy +Truda +Trude +Trudey +Trudi +Trudie +Trudy +Trula +Tuesday +Twila +Twyla +Tybi +Tybie +Tyne +Ula +Ulla +Ulrica +Ulrika +Ulrike +Umeko +Una +Ursa +Ursala +Ursola +Ursula +Ursulina +Ursuline +Uta +Val +Valaree +Valaria +Vale +Valeda +Valencia +Valene +Valenka +Valentia +Valentina +Valentine +Valera +Valeria +Valerie +Valery +Valerye +Valida +Valina +Valli +Vallie +Vally +Valma +Valry +Van +Vanda +Vanessa +Vania +Vanna +Vanni +Vannie +Vanny +Vanya +Veda +Velma +Velvet +Vena +Venita +Ventura +Venus +Vera +Veradis +Vere +Verena +Verene +Veriee +Verile +Verina +Verine +Verla +Verna +Vernice +Veronica +Veronika +Veronike +Veronique +Vi +Vicki +Vickie +Vicky +Victoria +Vida +Viki +Vikki +Vikkie +Vikky +Vilhelmina +Vilma +Vin +Vina +Vinita +Vinni +Vinnie +Vinny +Viola +Violante +Viole +Violet +Violetta +Violette +Virgie +Virgina +Virginia +Virginie +Vita +Vitia +Vitoria +Vittoria +Viv +Viva +Vivi +Vivia +Vivian +Viviana +Vivianna +Vivianne +Vivie +Vivien +Viviene +Vivienne +Viviyan +Vivyan +Vivyanne +Vonni +Vonnie +Vonny +Wallie +Wallis +Wally +Waly +Wanda +Wandie +Wandis +Waneta +Wenda +Wendeline +Wendi +Wendie +Wendy +Wenona +Wenonah +Whitney +Wileen +Wilhelmina +Wilhelmine +Wilie +Willa +Willabella +Willamina +Willetta +Willette +Willi +Willie +Willow +Willy +Willyt +Wilma +Wilmette +Wilona +Wilone +Wilow +Windy +Wini +Winifred +Winna +Winnah +Winne +Winni +Winnie +Winnifred +Winny +Winona +Winonah +Wren +Wrennie +Wylma +Wynn +Wynne +Wynnie +Wynny +Xaviera +Xena +Xenia +Xylia +Xylina +Yalonda +Yehudit +Yelena +Yetta +Yettie +Yetty +Yevette +Yoko +Yolanda +Yolande +Yolane +Yolanthe +Yonina +Yoshi +Yoshiko +Yovonnda +Yvette +Yvonne +Zabrina +Zahara +Zandra +Zaneta +Zara +Zarah +Zaria +Zarla +Zea +Zelda +Zelma +Zena +Zenia +Zia +Zilvia +Zita +Zitella +Zoe +Zola +Zonda +Zondra +Zonnya +Zora +Zorah +Zorana +Zorina +Zorine +Zsa Zsa +Zsazsa +Zulema +Zuzana diff --git a/transformations/gender_randomizer/names/male.txt b/transformations/gender_randomizer/names/male.txt new file mode 100644 index 000000000..bacce977f --- /dev/null +++ b/transformations/gender_randomizer/names/male.txt @@ -0,0 +1,2943 @@ +Aamir +Aaron +Abbey +Abbie +Abbot +Abbott +Abby +Abdel +Abdul +Abdulkarim +Abdullah +Abe +Abel +Abelard +Abner +Abraham +Abram +Ace +Adair +Adam +Adams +Addie +Adger +Aditya +Adlai +Adnan +Adolf +Adolfo +Adolph +Adolphe +Adolpho +Adolphus +Adrian +Adrick +Adrien +Agamemnon +Aguinaldo +Aguste +Agustin +Aharon +Ahmad +Ahmed +Ahmet +Ajai +Ajay +Al +Alaa +Alain +Alan +Alasdair +Alastair +Albatros +Albert +Alberto +Albrecht +Alden +Aldis +Aldo +Aldric +Aldrich +Aldus +Aldwin +Alec +Aleck +Alejandro +Aleks +Aleksandrs +Alessandro +Alex +Alexander +Alexei +Alexis +Alf +Alfie +Alfonse +Alfonso +Alfonzo +Alford +Alfred +Alfredo +Algernon +Ali +Alic +Alister +Alix +Allah +Allan +Allen +Alley +Allie +Allin +Allyn +Alonso +Alonzo +Aloysius +Alphonse +Alphonso +Alston +Alton +Alvin +Alwin +Amadeus +Ambros +Ambrose +Ambrosi +Ambrosio +Ambrosius +Amery +Amory +Amos +Anatol +Anatole +Anatollo +Anatoly +Anders +Andie +Andonis +Andre +Andrea +Andreas +Andrej +Andres +Andrew +Andrey +Andri +Andros +Andrus +Andrzej +Andy +Angel +Angelico +Angelo +Angie +Angus +Ansel +Ansell +Anselm +Anson +Anthony +Antin +Antoine +Anton +Antone +Antoni +Antonin +Antonino +Antonio +Antonius +Antony +Anurag +Apollo +Apostolos +Aram +Archibald +Archibold +Archie +Archon +Archy +Arel +Ari +Arie +Ariel +Aristotle +Arlo +Armand +Armando +Armond +Armstrong +Arne +Arnie +Arnold +Arnoldo +Aron +Arron +Art +Arther +Arthur +Artie +Artur +Arturo +Arvie +Arvin +Arvind +Arvy +Ash +Ashby +Ashish +Ashley +Ashton +Aub +Aube +Aubert +Aubrey +Augie +August +Augustin +Augustine +Augusto +Augustus +Austen +Austin +Ave +Averell +Averil +Averill +Avery +Avi +Avraham +Avram +Avrom +Axel +Aylmer +Aziz +Bailey +Bailie +Baillie +Baily +Baird +Baldwin +Bancroft +Barbabas +Barclay +Bard +Barde +Barn +Barnabas +Barnabe +Barnaby +Barnard +Barnebas +Barnett +Barney +Barnie +Barny +Baron +Barr +Barret +Barrett +Barri +Barrie +Barris +Barron +Barry +Bart +Bartel +Barth +Barthel +Bartholemy +Bartholomeo +Bartholomeus +Bartholomew +Bartie +Bartlet +Bartlett +Bartolemo +Bartolomei +Bartolomeo +Barton +Barty +Bary +Basil +Batholomew +Baxter +Bay +Bayard +Beale +Bealle +Bear +Bearnard +Beau +Beaufort +Beauregard +Beck +Bela +Ben +Benedict +Bengt +Benito +Benjamen +Benjamin +Benji +Benjie +Benjy +Benn +Bennet +Bennett +Bennie +Benny +Benson +Bentley +Benton +Beowulf +Berchtold +Berk +Berke +Berkeley +Berkie +Berkley +Bernard +Bernardo +Bernd +Bernhard +Bernie +Bert +Bertie +Bertram +Bertrand +Bharat +Biff +Bill +Billie +Billy +Bing +Binky +Bishop +Bjorn +Bjorne +Blaine +Blair +Blake +Blare +Blayne +Bo +Bob +Bobbie +Bobby +Bogart +Bogdan +Boniface +Boris +Boyce +Boyd +Brad +Braden +Bradford +Bradley +Bradly +Brady +Brandon +Brandy +Brant +Brendan +Brent +Bret +Brett +Brewer +Brewster +Brian +Brice +Briggs +Brinkley +Britt +Brock +Broddie +Broddy +Broderic +Broderick +Brodie +Brody +Bronson +Brook +Brooke +Brooks +Bruce +Bruno +Bryan +Bryant +Bryce +Bryn +Bryon +Bubba +Buck +Bucky +Bud +Buddy +Burgess +Burke +Burl +Burnaby +Burt +Burton +Buster +Butch +Butler +Byram +Byron +Caesar +Cain +Cal +Caldwell +Caleb +Calhoun +Calvin +Cam +Cameron +Cammy +Carey +Carl +Carleigh +Carlie +Carlin +Carlo +Carlos +Carlton +Carlyle +Carmine +Carroll +Carson +Carsten +Carter +Cary +Caryl +Case +Casey +Caspar +Casper +Cass +Cat +Cecil +Cesar +Chad +Chadd +Chaddie +Chaddy +Chadwick +Chaim +Chalmers +Chan +Chance +Chancey +Chanderjit +Chandler +Chane +Chariot +Charles +Charleton +Charley +Charlie +Charlton +Chas +Chase +Chaunce +Chauncey +Che +Chelton +Chen +Chester +Cheston +Chet +Chev +Chevalier +Chevy +Chip +Chris +Chrissy +Christ +Christian +Christiano +Christie +Christof +Christofer +Christoph +Christophe +Christopher +Christorpher +Christos +Christy +Chrisy +Chuck +Churchill +Clair +Claire +Clancy +Clarance +Clare +Clarence +Clark +Clarke +Claude +Claudio +Claudius +Claus +Clay +Clayborn +Clayborne +Claybourne +Clayton +Cleland +Clem +Clemens +Clement +Clemente +Clemmie +Cletus +Cleveland +Cliff +Clifford +Clifton +Clint +Clinten +Clinton +Clive +Clyde +Cob +Cobb +Cobbie +Cobby +Cody +Colbert +Cole +Coleman +Colin +Collin +Collins +Conan +Connie +Connolly +Connor +Conrad +Conroy +Constantin +Constantine +Constantinos +Conway +Cooper +Corbin +Corby +Corey +Corky +Cornelius +Cornellis +Corrie +Cortese +Corwin +Cory +Cosmo +Costa +Courtney +Craig +Crawford +Creighton +Cris +Cristopher +Curt +Curtice +Curtis +Cy +Cyril +Cyrill +Cyrille +Cyrillus +Cyrus +Dabney +Daffy +Dale +Dallas +Dalton +Damian +Damien +Damon +Dan +Dana +Dane +Dani +Danie +Daniel +Dannie +Danny +Dante +Darby +Darcy +Daren +Darian +Darien +Darin +Dario +Darius +Darrel +Darrell +Darren +Darrick +Darrin +Darryl +Darth +Darwin +Daryl +Daryle +Dave +Davey +David +Davidde +Davide +Davidson +Davie +Davin +Davis +Davon +Davoud +Davy +Dawson +Dean +Deane +Del +Delbert +Dell +Delmar +Demetre +Demetri +Demetris +Demetrius +Demosthenis +Denis +Dennie +Dennis +Denny +Derby +Derek +Derick +Derk +Derrek +Derrick +Derrin +Derrol +Derron +Deryl +Desmond +Desmund +Devin +Devon +Dewey +Dewitt +Dexter +Dick +Dickey +Dickie +Diego +Dieter +Dietrich +Dillon +Dimitri +Dimitrios +Dimitris +Dimitrou +Dimitry +Dino +Dion +Dionis +Dionysus +Dirk +Dmitri +Dom +Domenic +Domenico +Dominic +Dominick +Dominique +Don +Donal +Donald +Donn +Donnie +Donny +Donovan +Dorian +Dory +Doug +Douggie +Dougie +Douglas +Douglass +Douglis +Dov +Doyle +Drake +Drew +Dru +Dryke +Duane +Dudley +Duffie +Duffy +Dugan +Duke +Dunc +Duncan +Dunstan +Durand +Durant +Durante +Durward +Dustin +Dwain +Dwaine +Dwane +Dwayne +Dwight +Dylan +Dyson +Earl +Earle +Easton +Eben +Ebeneser +Ebenezer +Eberhard +Ed +Eddie +Eddy +Edgar +Edgardo +Edie +Edmond +Edmund +Edouard +Edsel +Eduard +Eduardo +Edward +Edwin +Efram +Egbert +Ehud +Elbert +Elden +Eldon +Eli +Elias +Elihu +Elijah +Eliot +Eliott +Elisha +Elliot +Elliott +Ellis +Ellsworth +Ellwood +Elmer +Elmore +Elnar +Elric +Elroy +Elton +Elvin +Elvis +Elwin +Elwood +Elwyn +Ely +Emanuel +Emerson +Emery +Emil +Emile +Emilio +Emmanuel +Emmery +Emmet +Emmett +Emmit +Emmott +Emmy +Emory +Ender +Engelbart +Engelbert +Englebart +Englebert +Enoch +Enrico +Enrique +Ephraim +Ephram +Ephrayim +Ephrem +Er +Erasmus +Erastus +Erek +Erhard +Erhart +Eric +Erich +Erick +Erik +Erin +Erl +Ernest +Ernesto +Ernie +Ernst +Erny +Errol +Ervin +Erwin +Esau +Esme +Esteban +Ethan +Ethelbert +Ethelred +Etienne +Euclid +Eugen +Eugene +Eustace +Ev +Evan +Evelyn +Everard +Everett +Ewan +Ewart +Ez +Ezechiel +Ezekiel +Ezra +Fabian +Fabio +Fairfax +Farley +Fazeel +Federico +Felice +Felicio +Felipe +Felix +Ferd +Ferdie +Ferdinand +Ferdy +Fergus +Ferguson +Ferinand +Fernando +Fidel +Filbert +Filip +Filipe +Filmore +Finley +Finn +Fitz +Fitzgerald +Flem +Fleming +Flemming +Fletch +Fletcher +Flin +Flinn +Flint +Flipper +Florian +Floyd +Flynn +Fons +Fonsie +Fonz +Fonzie +Forbes +Ford +Forest +Forester +Forrest +Forrester +Forster +Foster +Fowler +Fox +Fran +Francesco +Francis +Francisco +Francois +Frank +Frankie +Franklin +Franklyn +Franky +Frans +Franz +Fraser +Frazier +Fred +Freddie +Freddy +Frederic +Frederich +Frederick +Frederico +Frederik +Fredric +Fredrick +Freeman +Freemon +Fremont +French +Friedric +Friedrich +Friedrick +Fritz +Fulton +Fyodor +Gabe +Gabriel +Gabriele +Gabriell +Gabriello +Gail +Gale +Galen +Gallagher +Gamaliel +Garcia +Garcon +Gardener +Gardiner +Gardner +Garey +Garfield +Garfinkel +Garold +Garp +Garret +Garrett +Garrot +Garrott +Garry +Garth +Garv +Garvey +Garvin +Garvy +Garwin +Garwood +Gary +Gaspar +Gasper +Gaston +Gav +Gaven +Gavin +Gavriel +Gay +Gayle +Gearard +Gene +Geo +Geof +Geoff +Geoffrey +Geoffry +Georg +George +Georges +Georgia +Georgie +Georgy +Gerald +Geraldo +Gerard +Gere +Gerhard +Gerhardt +Geri +Germaine +Gerold +Gerome +Gerrard +Gerri +Gerrit +Gerry +Gershom +Gershon +Giacomo +Gian +Giancarlo +Giavani +Gibb +Gideon +Giff +Giffard +Giffer +Giffie +Gifford +Giffy +Gil +Gilbert +Gilberto +Gilburt +Giles +Gill +Gilles +Ginger +Gino +Giordano +Giorgi +Giorgio +Giovanne +Giovanni +Giraldo +Giraud +Giuseppe +Glen +Glenn +Glynn +Godard +Godart +Goddard +Goddart +Godfree +Godfrey +Godfry +Godwin +Gomer +Gonzales +Gonzalo +Goober +Goose +Gordan +Gordie +Gordon +Grace +Grady +Graehme +Graeme +Graham +Graig +Grant +Granville +Greg +Gregg +Greggory +Gregor +Gregorio +Gregory +Gretchen +Griff +Griffin +Griffith +Griswold +Grove +Grover +Guido +Guillaume +Guillermo +Gunner +Gunter +Gunther +Gus +Gustaf +Gustav +Gustave +Gustavo +Gustavus +Guthrey +Guthrie +Guthry +Guy +Hadleigh +Hadley +Hadrian +Hagan +Hagen +Hailey +Hakeem +Hakim +Hal +Hale +Haleigh +Haley +Hall +Hallam +Halvard +Ham +Hamel +Hamid +Hamil +Hamilton +Hamish +Hamlen +Hamlet +Hamlin +Hammad +Hamnet +Han +Hanan +Hanford +Hank +Hannibal +Hans +Hans-Peter +Hansel +Hanson +Harald +Harcourt +Hari +Harlan +Harland +Harley +Harlin +Harman +Harmon +Harold +Harris +Harrison +Harrold +Harry +Hart +Hartley +Hartwell +Harv +Harvard +Harvey +Harvie +Harwell +Hasheem +Hashim +Haskel +Haskell +Hassan +Hastings +Hasty +Haven +Hayden +Haydon +Hayes +Hayward +Haywood +Hazel +Heath +Heathcliff +Hebert +Hector +Heinrich +Heinz +Helmuth +Henderson +Hendrick +Hendrik +Henri +Henrie +Henrik +Henrique +Henry +Herb +Herbert +Herbie +Herby +Hercule +Hercules +Herculie +Herman +Hermann +Hermon +Hermy +Hernando +Herold +Herrick +Herrmann +Hersch +Herschel +Hersh +Hershel +Herve +Hervey +Hew +Hewe +Hewet +Hewett +Hewie +Hewitt +Heywood +Hezekiah +Higgins +Hilary +Hilbert +Hill +Hillard +Hillary +Hillel +Hillery +Hilliard +Hilton +Hiralal +Hiram +Hiro +Hirsch +Hobart +Hodge +Hogan +Hollis +Holly +Homer +Horace +Horacio +Horatio +Horatius +Horst +Howard +Howie +Hoyt +Hubert +Hudson +Huey +Hugh +Hugo +Humbert +Humphrey +Hunt +Hunter +Huntington +Huntlee +Huntley +Hurley +Husain +Husein +Hussein +Hy +Hyatt +Hyman +Hymie +Iago +Iain +Ian +Ibrahim +Ichabod +Iggie +Iggy +Ignace +Ignacio +Ignacius +Ignatius +Ignaz +Ignazio +Igor +Ike +Ikey +Immanuel +Ingamar +Ingelbert +Ingemar +Inglebert +Ingmar +Ingram +Inigo +Ira +Irvin +Irvine +Irving +Irwin +Isa +Isaac +Isaak +Isador +Isadore +Isaiah +Ishmael +Isidore +Ismail +Israel +Istvan +Ivan +Ivor +Izaak +Izak +Izzy +Jabez +Jack +Jackie +Jackson +Jacob +Jacques +Jae +Jaime +Jake +Jakob +James +Jameson +Jamey +Jamie +Jan +Janos +Janus +Jared +Jarrett +Jarvis +Jason +Jasper +Javier +Jay +Jean +Jean-Christophe +Jean-Francois +Jean-Lou +Jean-Luc +Jean-Marc +Jean-Paul +Jean-Pierre +Jeb +Jed +Jedediah +Jef +Jeff +Jefferey +Jefferson +Jeffery +Jeffie +Jeffrey +Jeffry +Jefry +Jehu +Jennings +Jens +Jephthah +Jerald +Jeramie +Jere +Jereme +Jeremiah +Jeremias +Jeremie +Jeremy +Jermain +Jermaine +Jermayne +Jerold +Jerome +Jeromy +Jerri +Jerrie +Jerrold +Jerrome +Jerry +Jervis +Jerzy +Jess +Jesse +Jessee +Jessey +Jessie +Jesus +Jeth +Jethro +Jim +Jimbo +Jimmie +Jimmy +Jo +Joab +Joachim +Joao +Joaquin +Job +Jock +Jodi +Jodie +Jody +Joe +Joel +Joey +Johan +Johann +Johannes +John +John-David +John-Patrick +Johnathan +Johnathon +Johnnie +Johnny +Johny +Jon +Jonah +Jonas +Jonathan +Jonathon +Jonny +Jordan +Jordon +Jordy +Jorge +Jory +Jose +Josef +Joseph +Josephus +Josh +Joshua +Joshuah +Josiah +Jotham +Juan +Juanita +Jud +Judah +Judas +Judd +Jude +Judith +Judson +Judy +Juergen +Jule +Jules +Julian +Julie +Julio +Julius +Justin +Justis +Kaiser +Kaleb +Kalil +Kalle +Kalman +Kalvin +Kam +Kane +Kareem +Karel +Karim +Karl +Karsten +Kaspar +Keefe +Keenan +Keene +Keil +Keith +Kellen +Kelley +Kelly +Kelsey +Kelvin +Kelwin +Ken +Kendal +Kendall +Kendrick +Kenn +Kennedy +Kenneth +Kenny +Kent +Kenton +Kenyon +Kermie +Kermit +Kerry +Kevan +Kevin +Kim +Kimball +Kimmo +Kin +Kincaid +King +Kingsley +Kingsly +Kingston +Kip +Kirby +Kirk +Kit +Klaus +Klee +Knox +Konrad +Konstantin +Kory +Kostas +Kraig +Kris +Krishna +Kristian +Kristopher +Kristos +Kurt +Kurtis +Kyle +Laird +Lamar +Lambert +Lamont +Lance +Lancelot +Lane +Langston +Lanny +Larry +Lars +Laurance +Lauren +Laurence +Laurens +Laurent +Laurie +Lawerence +Lawrence +Lawson +Lawton +Lay +Layton +Lazar +Lazare +Lazaro +Lazarus +Lazlo +Lee +Lefty +Leif +Leigh +Leighton +Leland +Lem +Lemar +Lemmie +Lemmy +Lemuel +Len +Lenard +Lennie +Lenny +Leo +Leon +Leonard +Leonardo +Leonerd +Leonhard +Leonid +Leonidas +Leopold +Leroy +Les +Lesley +Leslie +Lester +Lev +Levi +Levin +Levon +Levy +Lew +Lewis +Lex +Liam +Lin +Lincoln +Lind +Lindsay +Lindsey +Lindy +Linoel +Linus +Lion +Lionel +Lionello +Llewellyn +Lloyd +Locke +Lockwood +Logan +Lon +Lonnie +Lonny +Loren +Lorenzo +Lorne +Lorrie +Lothar +Lou +Louie +Louis +Lovell +Lowell +Lucas +Luce +Lucian +Luciano +Lucien +Lucio +Lucius +Ludvig +Ludwig +Luigi +Luis +Lukas +Luke +Luther +Lyle +Lyn +Lyndon +Lynn +Mac +Mace +Mack +Mackenzie +Maddie +Maddy +Madison +Magnum +Magnus +Mahesh +Mahmoud +Mahmud +Maison +Major +Malcolm +Manfred +Manish +Manny +Manuel +Marc +Marcel +Marcello +Marcellus +Marcelo +Marchall +Marcio +Marco +Marcos +Marcus +Marietta +Marilu +Mario +Marion +Marius +Mark +Marko +Markos +Markus +Marlin +Marlo +Marlon +Marlow +Marlowe +Marmaduke +Marsh +Marshal +Marshall +Mart +Martainn +Marten +Martie +Martin +Martino +Marty +Martyn +Marv +Marve +Marven +Marvin +Marwin +Mason +Mateo +Mathew +Mathias +Matias +Matt +Matteo +Matthaeus +Mattheus +Matthew +Matthias +Matthieu +Matthiew +Matthus +Mattias +Mattie +Matty +Maurice +Mauricio +Maurie +Maurise +Maurits +Mauritz +Maury +Max +Maxfield +Maxie +Maxim +Maximilian +Maximilien +Maxwell +Mayer +Maynard +Maynord +Mayor +Mead +Meade +Meier +Meir +Mel +Melvin +Melvyn +Menard +Mendel +Mendie +Meredeth +Meredith +Merell +Merill +Merle +Merlin +Merrel +Merrick +Merril +Merrill +Merry +Merv +Mervin +Merwin +Meryl +Meyer +Mic +Micah +Michael +Michail +Michal +Michale +Micheal +Micheil +Michel +Michele +Mick +Mickey +Mickie +Micky +Miguel +Mika +Mikael +Mike +Mikel +Mikey +Mikhail +Miles +Millicent +Milo +Milt +Milton +Mischa +Mitch +Mitchael +Mitchel +Mitchell +Moe +Mohamad +Mohamed +Mohammad +Mohammed +Mohan +Moise +Moises +Moishe +Monroe +Montague +Monte +Montgomery +Monty +Moore +Mordecai +Morgan +Morlee +Morley +Morly +Morrie +Morris +Morry +Morse +Mort +Morten +Mortie +Mortimer +Morton +Morty +Mose +Moses +Moshe +Moss +Muffin +Mugsy +Muhammad +Munmro +Munroe +Murdoch +Murdock +Murphy +Murray +Mustafa +Myke +Myles +Mylo +Myron +Nahum +Napoleon +Nat +Natale +Nate +Nathan +Nathanael +Nathanial +Nathaniel +Nathanil +Neal +Neale +Neall +Nealon +Nealson +Nealy +Ned +Neddie +Neddy +Neel +Neil +Nels +Nelsen +Nelson +Nero +Neron +Nester +Nestor +Nev +Nevil +Nevile +Neville +Nevin +Nevins +Newton +Niall +Niccolo +Nicholas +Nichole +Nichols +Nick +Nickey +Nickie +Nickolas +Nicky +Nico +Nicolas +Niels +Nigel +Niki +Nikita +Nikki +Nikolai +Nikos +Niles +Nils +Nilson +Niven +Noach +Noah +Noam +Noble +Noe +Noel +Nolan +Noland +Norbert +Norm +Norman +Normand +Normie +Norris +Northrop +Northrup +Norton +Norwood +Nunzio +Obadiah +Obadias +Oberon +Obie +Octavius +Odell +Odie +Odin +Odysseus +Olaf +Olag +Ole +Oleg +Olin +Oliver +Olivier +Olle +Ollie +Omar +Oral +Oran +Orazio +Orbadiah +Oren +Orin +Orion +Orlando +Orren +Orrin +Orson +Orton +Orville +Osbert +Osborn +Osborne +Osbourn +Osbourne +Oscar +Osgood +Osmond +Osmund +Ossie +Oswald +Oswell +Otes +Othello +Otho +Otis +Otto +Owen +Ozzie +Ozzy +Pablo +Pace +Paco +Paddie +Paddy +Padraig +Page +Paige +Pail +Palmer +Paolo +Park +Parke +Parker +Parnell +Parrnell +Parry +Parsifal +Partha +Pascal +Pascale +Pasquale +Pat +Pate +Patel +Paten +Patin +Paton +Patric +Patrice +Patricio +Patrick +Patrik +Patsy +Pattie +Patty +Paul +Paulo +Pavel +Pearce +Pedro +Peirce +Pembroke +Pen +Penn +Pennie +Penny +Penrod +Pepe +Pepillo +Pepito +Perceval +Percival +Percy +Perry +Pete +Peter +Petey +Petr +Peyter +Peyton +Phil +Philbert +Philip +Phillip +Phillipe +Phillipp +Phineas +Phip +Pierce +Pierre +Pierson +Piet +Pieter +Pietro +Piggy +Pincas +Pinchas +Pincus +Piotr +Pip +Plato +Pooh +Porter +Poul +Powell +Praneetf +Prasad +Prasun +Prent +Prentice +Prentiss +Prescott +Preston +Price +Prince +Pryce +Puff +Purcell +Putnam +Pyotr +Quent +Quentin +Quiggly +Quigly +Quigman +Quill +Quillan +Quincey +Quincy +Quinlan +Quinn +Quint +Quintin +Quinton +Quintus +Rab +Rabbi +Rabi +Rad +Radcliffe +Rafael +Rafe +Ragnar +Rahul +Raimund +Rainer +Raj +Rajeev +Raleigh +Ralf +Ralph +Ram +Ramesh +Ramon +Ramsay +Ramsey +Rand +Randal +Randall +Randell +Randi +Randie +Randolf +Randolph +Randy +Ransell +Ransom +Raoul +Raphael +Raul +Ravi +Ravil +Rawley +Ray +Raymond +Raymund +Raymundo +Raynard +Rayner +Raynor +Reagan +Red +Redford +Redmond +Reece +Reed +Rees +Reese +Reg +Regan +Regen +Reggie +Reggis +Reggy +Reginald +Reginauld +Reid +Reilly +Reinhard +Reinhold +Rem +Remington +Remus +Renado +Renaldo +Renard +Renato +Renaud +Renault +Rene +Reube +Reuben +Reuven +Rex +Rey +Reynard +Reynold +Reynolds +Reza +Rhett +Ric +Ricard +Ricardo +Riccardo +Rice +Rich +Richard +Richardo +Richie +Richmond +Richy +Rick +Rickard +Rickey +Ricki +Rickie +Ricky +Rik +Rikki +Riley +Rinaldo +Ripley +Ritch +Ritchie +Roarke +Rob +Robb +Robbert +Robbie +Robert +Roberto +Robin +Robinson +Rochester +Rock +Rockwell +Rocky +Rod +Rodd +Roddie +Roddy +Roderic +Roderich +Roderick +Roderigo +Rodge +Rodger +Rodney +Rodolfo +Rodolph +Rodolphe +Rodrick +Rodrigo +Rodrique +Rog +Roger +Rogers +Roice +Roland +Rolando +Rolf +Rolfe +Rolland +Rollin +Rollins +Rollo +Rolph +Romain +Roman +Romeo +Ron +Ronald +Ronen +Roni +Ronnie +Ronny +Roosevelt +Rory +Roscoe +Ross +Roth +Rourke +Rowland +Roy +Royal +Royce +Rube +Ruben +Rubin +Ruby +Rudd +Ruddie +Ruddy +Rudie +Rudiger +Rudolf +Rudolfo +Rudolph +Rudy +Rudyard +Rufe +Rufus +Rupert +Ruperto +Russ +Russel +Russell +Rustie +Rustin +Rusty +Rutger +Rutherford +Rutledge +Rutter +Ryan +Sal +Salem +Salim +Salman +Salmon +Salomo +Salomon +Salomone +Salvador +Salvatore +Salvidor +Sam +Sammie +Sammy +Sampson +Samson +Samuel +Samuele +Sancho +Sander +Sanders +Sanderson +Sandor +Sandro +Sandy +Sanford +Sanson +Sansone +Sarge +Sargent +Sascha +Sasha +Saul +Sauncho +Saunder +Saunders +Saunderson +Saundra +Saw +Sawyer +Sawyere +Sax +Saxe +Saxon +Say +Sayer +Sayers +Sayre +Sayres +Scarface +Schroeder +Schuyler +Scot +Scott +Scotti +Scottie +Scotty +Seamus +Sean +Sebastian +Sebastiano +Sebastien +See +Selby +Selig +Serge +Sergeant +Sergei +Sergent +Sergio +Seth +Seymour +Shadow +Shaine +Shalom +Shamus +Shanan +Shane +Shannan +Shannon +Shaughn +Shaun +Shaw +Shawn +Shay +Shayne +Shea +Sheff +Sheffie +Sheffield +Sheffy +Shelby +Shelden +Sheldon +Shell +Shelley +Shelton +Shem +Shep +Shepard +Shepherd +Sheppard +Shepperd +Sheridan +Sherlock +Sherlocke +Sherman +Sherwin +Sherwood +Sherwynd +Shimon +Shlomo +Sholom +Shorty +Shumeet +Shurlock +Shurlocke +Shurwood +Si +Sibyl +Sid +Siddhartha +Sidnee +Sidney +Siegfried +Siffre +Sig +Sigfrid +Sigfried +Sigmund +Silas +Silvain +Silvan +Silvano +Silvanus +Silvester +Silvio +Sim +Simeon +Simmonds +Simon +Simone +Sinclair +Sinclare +Sivert +Siward +Skell +Skelly +Skip +Skipp +Skipper +Skippie +Skippy +Skipton +Sky +Skye +Skylar +Skyler +Slade +Slim +Sloan +Sloane +Sly +Smith +Smitty +Socrates +Sol +Sollie +Solly +Solomon +Somerset +Son +Sonnie +Sonny +Sparky +Spence +Spencer +Spense +Spenser +Spike +Spiro +Spiros +Spud +Srinivas +Stacy +Staffard +Stafford +Staford +Stan +Standford +Stanfield +Stanford +Stanislaw +Stanleigh +Stanley +Stanly +Stanton +Stanwood +Stavros +Stearn +Stearne +Stefan +Stefano +Steffen +Stephan +Stephanus +Stephen +Sterling +Stern +Sterne +Steve +Steven +Stevie +Stevy +Stew +Steward +Stewart +Stig +Stillman +Stillmann +Sting +Stinky +Stirling +Stu +Stuart +Sturgis +Sullivan +Sully +Sumner +Sunny +Sutherland +Sutton +Sven +Swen +Syd +Sydney +Sylvan +Sylvester +Tab +Tabb +Tabbie +Tabby +Taber +Tabor +Tad +Tadd +Taddeo +Taddeus +Tadeas +Tailor +Tait +Taite +Talbert +Talbot +Tallie +Tally +Tam +Tamas +Tammie +Tammy +Tan +Tann +Tanner +Tanney +Tannie +Tanny +Tarrance +Tarrant +Tarzan +Tate +Taylor +Teador +Ted +Tedd +Teddie +Teddy +Tedie +Tedman +Tedmund +Tedrick +Temp +Temple +Templeton +Teodoor +Teodor +Teodorico +Teodoro +Terence +Terencio +Terrance +Terrel +Terrell +Terrence +Terri +Terrill +Terry +Thacher +Thad +Thaddeus +Thaddius +Thaddus +Thadeus +Thain +Thaine +Thane +Tharen +Thatch +Thatcher +Thaxter +Thayne +Thebault +Thedric +Thedrick +Theo +Theobald +Theodor +Theodore +Theodoric +Theophyllus +Thibaud +Thibaut +Thom +Thomas +Thor +Thorn +Thorndike +Thornie +Thornton +Thorny +Thorpe +Thorstein +Thorsten +Thorvald +Thurstan +Thurston +Tibold +Tiebold +Tiebout +Tiler +Tim +Timmie +Timmy +Timothee +Timotheus +Timothy +Tirrell +Tito +Titos +Titus +Tobe +Tobiah +Tobias +Tobie +Tobin +Tobit +Toby +Tod +Todd +Toddie +Toddy +Tom +Tomas +Tome +Tomkin +Tomlin +Tommie +Tommy +Tonnie +Tony +Tore +Torey +Torin +Torr +Torrance +Torre +Torrence +Torrey +Torrin +Torry +Town +Towney +Townie +Townsend +Towny +Trace +Tracey +Tracie +Tracy +Traver +Travers +Travis +Tray +Tre +Tremain +Tremaine +Tremayne +Trent +Trenton +Trev +Trevar +Trever +Trevor +Trey +Trip +Tristan +Troy +Truman +Tuck +Tucker +Tuckie +Tucky +Tudor +Tull +Tulley +Tully +Turner +Ty +Tybalt +Tye +Tyler +Tymon +Tymothy +Tynan +Tyrone +Tyrus +Tyson +Udale +Udall +Udell +Ugo +Ulberto +Uli +Ulick +Ulises +Ulric +Ulrich +Ulrick +Ulysses +Umberto +Upton +Urbain +Urban +Urbano +Urbanus +Uri +Uriah +Uriel +Urson +Vachel +Vaclav +Vail +Val +Valdemar +Vale +Valentin +Valentine +Van +Vance +Vasili +Vasilis +Vasily +Vassili +Vassily +Vaughan +Vaughn +Venkat +Verge +Vergil +Vern +Verne +Vernen +Verney +Vernon +Vernor +Vibhu +Vic +Vick +Victor +Vijay +Vilhelm +Vin +Vince +Vincent +Vincents +Vinnie +Vinny +Vinod +Virge +Virgie +Virgil +Virgilio +Vite +Vito +Vlad +Vladamir +Vladimir +Voltaire +Von +Wade +Wadsworth +Wain +Waine +Wainwright +Wait +Waite +Waiter +Wake +Wakefield +Wald +Waldemar +Walden +Waldo +Waldon +Waleed +Walker +Wallace +Wallache +Wallas +Wallie +Wallis +Wally +Walsh +Walt +Walter +Walther +Walton +Wang +Ward +Warde +Warden +Ware +Waring +Warner +Warren +Wash +Washington +Wat +Waverley +Waverly +Way +Waylan +Wayland +Waylen +Waylin +Waylon +Wayne +Web +Webb +Weber +Webster +Weidar +Weider +Welbie +Welby +Welch +Wells +Welsh +Wendall +Wendel +Wendell +Werner +Wes +Wesley +Weslie +West +Westbrook +Westbrooke +Westleigh +Westley +Weston +Weylin +Wheeler +Whit +Whitaker +Whitby +Whitman +Whitney +Whittaker +Wiatt +Wilber +Wilbert +Wilbur +Wilburn +Wilburt +Wilden +Wildon +Wilek +Wiley +Wilfred +Wilfrid +Wilhelm +Will +Willard +Willdon +Willem +Willey +Willi +William +Willie +Willis +Willmott +Willy +Wilmar +Wilmer +Wilson +Wilt +Wilton +Win +Windham +Winfield +Winford +Winfred +Winifield +Winn +Winnie +Winny +Winslow +Winston +Winthrop +Winton +Wit +Witold +Wittie +Witty +Wojciech +Wolf +Wolfgang +Wolfie +Wolfram +Wolfy +Woochang +Wood +Woodie +Woodman +Woodrow +Woody +Worden +Worth +Worthington +Worthy +Wright +Wyatan +Wyatt +Wye +Wylie +Wyn +Wyndham +Wynn +Wynton +Xavier +Xenos +Xerxes +Xever +Ximenes +Ximenez +Xymenes +Yaakov +Yacov +Yale +Yanaton +Yance +Yancey +Yancy +Yank +Yankee +Yard +Yardley +Yehudi +Yigal +Yule +Yuri +Yves +Zach +Zacharia +Zachariah +Zacharias +Zacharie +Zachary +Zacherie +Zachery +Zack +Zackariah +Zak +Zalman +Zane +Zared +Zary +Zeb +Zebadiah +Zebedee +Zebulen +Zebulon +Zechariah +Zed +Zedekiah +Zeke +Zelig +Zerk +Zeus +Zippy +Zollie +Zolly +Zorro diff --git a/transformations/gender_randomizer/requirements.txt b/transformations/gender_randomizer/requirements.txt new file mode 100644 index 000000000..24c454778 --- /dev/null +++ b/transformations/gender_randomizer/requirements.txt @@ -0,0 +1,10 @@ +# for gender_randomizer +pandas==1.1.5 +scipy==1.7.1 +lemminflect==0.2.2 +spacy==3.0.5 +https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz +tensorflow==2.6.0 +tensorflow-gpu==2.6.0 +transformations/gender_randomizer/coreferee +transformations/gender_randomizer/coreferee/models/coreferee_model_en.zip \ No newline at end of file diff --git a/transformations/gender_randomizer/test.json b/transformations/gender_randomizer/test.json new file mode 100644 index 000000000..7f1f2ca32 --- /dev/null +++ b/transformations/gender_randomizer/test.json @@ -0,0 +1,51 @@ +{ + "type": "GenderRandomizer", + "test_cases": [ + { + "class": "GenderRandomizer", + "inputs": { + "sentence": '“Edward turned to Miss Marple. “It’s like this, you see. As Uncle Mathew grew older, he got more and more suspicious. He didn’t trust anybody.” “Very wise of him,” said Miss Marple. “The depravity of human nature is unbelievable.” ' + + }, + "outputs": [{ + "sentence": '“Edward turned to Glynnis. “It’s like this, you see. As Ambros grew older, she got more and more suspicious. She didn’t trust anybody.” “Very wise of her,” said Glynnis. “The depravity of human nature is unbelievable.” ' + }] + }, + { + "class": "GenderRandomizer", + "inputs": { + "sentence": "Henry wanted to study abroad that summer but he decided to travel with his friends instead." + }, + "outputs": [{ + "sentence": "Ambros wanted to study abroad that summer but he decided to travel with his friends instead." + }] + }, + { + "class": "GenderRandomizer", + "inputs": { + "sentence": "I thought that Kelly would go to medschool, but she told me she was applying for law." + }, + "outputs": [{ + "sentence": "I thought that Ambros would go to medschool, but he told me he was applying for law." + }] + }, + { + "class": "GenderRandomizer", + "inputs": { + "sentence": "Grace went to New York for Christmas last year, but she wanted to stay with family for New Years." + }, + "outputs": [{ + "sentence": "Grace went to New York for Christmas last year, but she wanted to stay with family for New Years." + }] + }, + { + "class": "GenderRandomizer", + "inputs": { + "sentence": "I think Eliza never tells herself the truth." + }, + "outputs": [{ + "sentence": "I think Ambros never tells himself the truth." + }] + } + ] +} diff --git a/transformations/gender_randomizer/transformation.py b/transformations/gender_randomizer/transformation.py new file mode 100644 index 000000000..b16cc7d35 --- /dev/null +++ b/transformations/gender_randomizer/transformation.py @@ -0,0 +1,177 @@ +import random +import re + +import coreferee +import spacy + +from interfaces.SentenceOperation import SentenceOperation +from tasks.TaskTypes import TaskType +from initialize import spacy_nlp + +""" +Randomizes names in text for a 50/50 gender breakdown. Handles pronouns. +""" + +def randomize_gender(male, female): + """ + Randomly chooses between male and female names, and then randomly selects a name + male: list of male names + female: list of female names + """ + gender = random.choice(["male", "female"]) + if gender == "male": + return (gender, random.choice(male)) + elif gender == "female": + return (gender, random.choice(female)) + + +def case_match(inputted, suggested): + """ + Matches case to that of original word + inputted: original word + suggested: word to replace original world + """ + if inputted.islower(): + return suggested.lower() + elif inputted.isupper(): + return suggested.upper() + else: + return suggested.title() + + +def male_to_female(pronoun): + """ + Takes a pronoun and converts to appropriate feminine pronoun if not already masculine. + pronoun: pronoun to convert to feminine pronoun + """ + pronoun_text = pronoun.text.lower() + if pronoun_text == "he": + return "she" + if pronoun_text == "him": + return "her" + if pronoun_text == "his": + if pronoun.tag_ == "PRP": + return "hers" + else: + return "her" + if pronoun_text == 'himself': + return 'herself' + return None + +def female_to_male(pronoun): + """ + Takes a pronoun and converts to appropriate masculine pronoun if not already masculine + pronoun: pronoun to convert to masculine pronoun + """ + pronoun_text = pronoun.text.lower() + if pronoun_text == "she": + return "he" + if pronoun_text == "her": + if pronoun.tag_ == "PRP$": + return "his" + else: + return "him" + if pronoun_text == "hers": + return "his" + + if pronoun_text == 'herself': + return 'himself' + + return None +def make_name_map(parsed_text, male_names, female_names): + """ + Identifies person names in text and creates mapping to a random name (50/50 chance of male vs. female) + """ + name_map = {} + for ent in parsed_text.ents: + if ent.label_ == "PERSON": + name_map[ent.text] = randomize_gender( + male_names, female_names + ) + return name_map + +def swap(text, name_map): + """ + Replaces names with matched names + """ + for old_name, new_name in name_map.items(): + text = re.sub(r"\b%s\b" % old_name, new_name[1], text) + return text + +def pronoun_fix(text, parsed_text, name_map): + """ + Fixes pronouns to match new names + """ + pronouns = ["she", "her", "hers", "he", "his", "him", "himself", "herself"] + pronoun_dicts = {"male": female_to_male, "female": male_to_female} + + new_text = "" + i = 0 + + for tok in parsed_text: + + if tok.text.lower() in pronouns: + ref = parsed_text._.coref_chains.resolve(tok) +# print(tok, ref, tok.tag_, tok.pos_) + if ref is None or len(ref) > 1: + continue + + else: + name_options = [ + key for key in name_map if ref[0].text in key + ] + if len(name_options) == 1: + gender = name_map[name_options[0]][0] + replacer = pronoun_dicts[gender](tok) + + if replacer: + replacer = case_match(tok.text, replacer) + new_text += text[i : tok.idx] + new_text += replacer + i = tok.idx + len(tok.text) + + new_text += text[i : len(text)] + return new_text + +def run_swap(sentence, seed=42, nlp=None, male_names=None, female_names=None): + """ + Runs pronoun fix and name swaps to generate new text + """ + random.seed(seed) + text = sentence + parsed_text = nlp(sentence) + name_map = make_name_map(parsed_text, male_names, female_names) + text = pronoun_fix(text, parsed_text, name_map) + text = swap(text, name_map) + return text + +class GenderRandomizer(SentenceOperation): + tasks = [TaskType.TEXT_TO_TEXT_GENERATION] + languages = ["en"] + keywords = ["lexical", "model-based", "rule-based", "named entity recognition", "coreference resolution"] + + def __init__(self, seed=42, max_outputs=1): + + super().__init__(seed) + self.max_outputs = max_outputs + # These lists are from https://www.kaggle.com/nltkdata/names + with open( + "transformations/gender_randomizer/names/female.txt" + ) as female: + self.female_names = [ + name.strip("\n") for name in female.readlines() + ] + with open("transformations/gender_randomizer/names/male.txt") as male: + self.male_names = [name.strip("\n") for name in male.readlines()] + + self.nlp = spacy_nlp if spacy_nlp else spacy.load("en_core_web_sm", disable=["lemmatizer"]) #spacy.load("en_core_web_sm", disable=["lemmatizer"]) + self.nlp.add_pipe("coreferee") + + + def generate(self, sentence: str): + """ + Returns altered text, and saves it in self.text attribute + sentence: text to modify + """ + gender_randomized = run_swap(sentence, seed=self.seed, nlp=self.nlp, male_names=self.male_names, female_names=self.female_names) + return gender_randomized