-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
45 lines (38 loc) · 1.59 KB
/
main.py
File metadata and controls
45 lines (38 loc) · 1.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# main.py
import pandas as pd
from data_processing import load_and_preprocess_data, group_transactions
from category_mapping import map_category
from utils import clean_category
def main():
# Load and preprocess data
train_data, test_data = load_and_preprocess_data()
# Process and map categories
grouped_transactions = group_transactions(train_data)
# Replace 'your-api-key' with the actual GPT-3.5 API key
gpt_api_key = 'from env'
matched_transactions = map_category(test_data, grouped_transactions, gpt_api_key)
# Calculate match percentage
match_percentage = (sum(1 for t in matched_transactions if t.predicted_category is not None) / 220) * 100
print(f"Match percentage on the new test set: {match_percentage:.2f}%")
# Prepare and export results
results = pd.DataFrame([{
'date': t.date,
'name': t.name,
'memo': t.memo,
'amount': t.amount,
'transaction_type': t.transaction_type,
'cleaned_memo': t.cleaned_memo,
'cleaned_name': t.cleaned_name,
'predicted_category': clean_category(t.predicted_category),
'matching_steps': t.matching_steps,
'MatchId': getattr(t, 'MatchId', None),
'source': t.source,
'normalized_source': t.normalized_source,
'purpose': getattr(t, 'purpose', None),
'generated_name': getattr(t, 'generated_name', None)
} for t in matched_transactions])
output_file_path = 'ctrlf3-match.xlsx'
results.to_excel(output_file_path, index=False)
print(f'File saved to {output_file_path}')
if __name__ == "__main__":
main()