-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDensity_plot_standard_similarity.py
More file actions
executable file
·104 lines (89 loc) · 4.71 KB
/
Density_plot_standard_similarity.py
File metadata and controls
executable file
·104 lines (89 loc) · 4.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os
import pandas as pd
import json
import matplotlib.pyplot as plt
from rdflib import Graph
import scipy
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
# ==========analysis of similarity among standards belonging to the same framework=======
def get_standards(address_embedding, test_set):
g = Graph()
g.parse(test_set, format="nt")
print(len(g))
qres = g.query(
"""PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX sto: <https://w3id.org/i40/sto#>
select distinct ?s where {
?s ?p ?o .
}""")
with open(address_embedding, 'rb') as f:
# with open("sto/hasClassification/embeddings/TransR/entities_to_embeddings.json",'rb') as f:
array = json.load(f)
"""
new_dict = {}
for row in qres:
for key, value in array.items():
if key == "%s" % row:
tem = key
#print(key)
new_dict[tem] = array[key]
print(len(new_dict))
with open('output_standard_same_framework.json', 'w') as f:
# this would place the entire output on one line
# use json.dump(lista_items, f, indent=4) to "pretty-print" with four spaces per indent
json.dump(new_dict, f)
# to read the file containing standards/frameworks along with their embeddings
with open('output_standard_same_framework.json', 'r') as f:
array = json.load(f)
"""
print(len(array))
return array
def density_plot(array, config, fold_name):
similarity = []
for key, value in array.items():
for keyC, valueC in array.items():
if key != keyC:
sim = abs(1 - scipy.spatial.distance.cosine(value, valueC))
similarity.append(sim)
standard_similarity = pd.DataFrame()
standard_similarity.insert(0, 'similarity', similarity)
sns.kdeplot(standard_similarity["similarity"], shade=True, bw=0.01, label=fold_name)
plt.ylabel('Distribution %')
plt.ylim(0, 35)
plt.xlabel('Similarity')
#plt.ylabel('Probability Density')
plt.savefig('Density_plot/Density_plot_' + config + '.pdf', format='pdf', bbox_inches='tight')
list_embedding = {}
list_embedding['TransD'] = ['embeddings/training_set_relatedTo/TransD/entities_to_embeddings.json',
'embeddings/training_set_relatedTo1/TransD/entities_to_embeddings.json',
'embeddings/training_set_relatedTo2/TransD/entities_to_embeddings.json',
'embeddings/training_set_relatedTo3/TransD/entities_to_embeddings.json',
'embeddings/training_set_relatedTo4/TransD/entities_to_embeddings.json']
list_embedding['TransE'] = ['embeddings/training_set_relatedTo/TransE/entities_to_embeddings.json',
'embeddings/training_set_relatedTo1/TransE/entities_to_embeddings.json',
'embeddings/training_set_relatedTo2/TransE/entities_to_embeddings.json',
'embeddings/training_set_relatedTo3/TransE/entities_to_embeddings.json',
'embeddings/training_set_relatedTo4/TransE/entities_to_embeddings.json']
list_embedding['TransH'] = ['embeddings/training_set_relatedTo/TransH/entities_to_embeddings.json',
'embeddings/training_set_relatedTo1/TransH/entities_to_embeddings.json',
'embeddings/training_set_relatedTo2/TransH/entities_to_embeddings.json',
'embeddings/training_set_relatedTo3/TransH/entities_to_embeddings.json',
'embeddings/training_set_relatedTo4/TransH/entities_to_embeddings.json']
list_embedding['TransR'] = ['embeddings/training_set_relatedTo/TransR/entities_to_embeddings.json',
'embeddings/training_set_relatedTo1/TransR/entities_to_embeddings.json',
'embeddings/training_set_relatedTo2/TransR/entities_to_embeddings.json',
'embeddings/training_set_relatedTo3/TransR/entities_to_embeddings.json',
'embeddings/training_set_relatedTo4/TransR/entities_to_embeddings.json']
list_test_set = ['test_set/test_set_relatedTo0.nt', 'test_set/test_set_relatedTo1.nt',
'test_set/test_set_relatedTo2.nt', 'test_set/test_set_relatedTo3.nt',
'test_set/test_set_relatedTo4.nt']
k = 5
fold_name = 'Fold'
for key, address_embedding in list_embedding.items():
for fold in range(k):
config = str(key) + str(fold)
array = get_standards(address_embedding[fold], list_test_set[fold])
density_plot(array, config, fold_name+str(fold+1))
plt.close()