GEM-benchmark · asnota · Oct 27, 2021 · Nov 25, 2021 · Nov 25, 2021 · Dec 3, 2021
diff --git a/filters/universal_bias/README.md b/filters/universal_bias/README.md
@@ -2,9 +2,11 @@
 
 ## What type of a filter is this?
 
-This filter is currently contains lexical seeds for 10 categories () in English, however it can be extended to any language or topic by simple addition of desired entries to `lexicals.json` 
-file in current directoryalong with the text corpus in corresponding language.
+This filter is currently contains lexical seeds for 10 categories (religion, race, ethnicity, gender, sexual orientation, age, appearance, disability, experience, education, economic status).
+The lexical seeds representing these categories are currently available in English and French languages, however the pool of languages can be extended by a simple addition of the lexical seeds in a desired language to the lexicals.json file, using a separate language key (Ex.: Polish language goes with "pl" key).
+
 The minority parameter is a potentially underrepresented group, defined with its own set of keywords; the majority parameter is a set of keywords, representing the dominating group.
+
 The filter returns "True" if the minority group is indeed underrepresented, "False" otherwise.
 
 Author: Anna Shvets
@@ -25,8 +27,8 @@ The beneficial impact of the current extrinsic filter is its complete transparen
 
 ## Example of use
 ```
-sentences = [ "He is going to make a cake.",
-              "Olivia is going to program",
+sentences = [ "He is going home.",
+              "Olivia is going to work",
               "Nobody likes washing dishes",
               "He agreed to help me" ]
 
@@ -67,8 +69,8 @@ print("This is a neutral group:", neutral_group)
 ```
 Output:
 ```
-This is a minority group: ['She is going to program']
-This is a majority group: ['He is going to make a cake.', 'He agreed to help me']
+This is a minority group: ['She is going to work']
+This is a majority group: ['He is going home.', 'He agreed to help me']
 This is a neutral group: ['Nobody likes washing dishes']
 ```
 

diff --git a/filters/universal_bias/filter.py b/filters/universal_bias/filter.py
@@ -1,17 +1,16 @@
 from interfaces.SentenceOperation import SentenceOperation
 from tasks.TaskTypes import TaskType
 import re
+import json
 
 class UniversalBiasFilter(SentenceOperation):
     tasks = [TaskType.TEXT_TO_TEXT_GENERATION]
     keywords = ["rule-based", "social-reasoning"]
 
-    def __init__(self, language=None, category=None, minority_group=None, majority_group=None, minority=None, majority=None):
+    def __init__(self, language=None, category=None, minority=None, majority=None):
         super().__init__()
         self.language = language
-        self.category = category
-        self.minority_group = minority_group
-        self.majority_group = majority_group
+        self.category = category        
         self.minority = minority
         self.majority = majority
 
@@ -31,8 +30,9 @@ def flag_sentences(self, sentences):
 
         # Retrieve relevant data extracts
         try:
-            minority_group = data[self.language][self.category][self.minority_group]
-            majority_group = data[self.language][self.category][self.majority_group]
+            minority_group = data[self.language][self.category][self.minority]
+            majority_group = data[self.language][self.category][self.majority]
+
         except NameError as error:
             print('The specified language, category of group is not supported or misformatted. Please provide valid arguments to the filter() method.') 
 
@@ -49,7 +49,7 @@ def flag_sentences(self, sentences):
             sentence_cleaned = sentence.lower()
             sentence_cleaned = re.sub('^',' ', sentence_cleaned)
             sentence_cleaned = re.sub('$',' ', sentence_cleaned)
-
+            
             # Take care of urls
             words = []
             for word in sentence_cleaned.split():
@@ -59,7 +59,7 @@ def flag_sentences(self, sentences):
                 words.append(word.strip())
             sentence_cleaned = ' '.join(words)
             sentence_cleaned = re.sub(r'\[([^\]]*)\] \( *__url__ *\)', r'\1', sentence_cleaned)
-
+            
             # Remove illegal chars and extra space
             sentence_cleaned = re.sub('__url__','URL', sentence_cleaned)
             sentence_cleaned = re.sub(r"[^A-Za-z0-9():,.!?\"\']", " ", sentence_cleaned)
@@ -70,13 +70,13 @@ def flag_sentences(self, sentences):
 
             # Split the words in the sentence to find the intersection with the minority array of keywords
             intersection_minority = set(sentence_cleaned.split()).intersection(
-                set(minority_group + minority)
+                set(minority_group)
             )
             # Split the words in the sentence to find the intersection with the majority array of keywords
             intersection_majority = set(sentence_cleaned.split()).intersection(
-                set(majority_group + majority)
+                set(majority_group)
             )
-
+            
             # If the intersection occurred, the intersection_minority and intersection_majority will contain at least one common keyword
             # use this intersection information to get the value for the corresponding flags
             minority_flag = len(intersection_minority) > 0
@@ -105,6 +105,7 @@ def flag_sentences(self, sentences):
                 "majority_flag": majority_flag,
                 "neutral_flag": neutral_flag,
             }
+            print("sentence_object", sentence_object)
 
             # Append the object to the array we return
             flagged_sentences.append(sentence_object)