Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions pyhacrf/adjacent.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ cdef extern from "log1p.h" nogil:
cdef np.float64_t LOG_2 = 0.6931471805599453
cdef np.float64_t LOG_3 = 1.0986122886681098

cpdef dict forward(np.ndarray[np.float64_t, ndim=3] x_dot_parameters, int S):
cpdef np.ndarray[np.float64_t, ndim=3] forward(np.float64_t[:, :, :] x_dot_parameters, int S):
""" Helper to calculate the forward weights. """
cdef dict alpha = {}
cdef np.ndarray[np.float64_t, ndim=3] alpha = np.full_like(x_dot_parameters, -np.inf)

cdef int I, J
I, J = x_dot_parameters.shape[0], x_dot_parameters.shape[1]
I, J = alpha.shape[0], alpha.shape[1]

# Fill in the edges of the state matrices
#
Expand All @@ -40,13 +40,13 @@ cpdef dict forward(np.ndarray[np.float64_t, ndim=3] x_dot_parameters, int S):
x_dot_parameters[i, 0, insertion + s])
alpha[i, 0, s] = x_dot_parameters[i, 0, s] + insert

alpha[i - 1, 0, s, i, 0, s, insertion + s] = insert
alpha[i, 0, insertion + s] = insert
for j in range(1, J):
delete = (alpha[0, j - 1, s] +
x_dot_parameters[0, j, deletion + s])
alpha[0, j, s] = x_dot_parameters[0, j, s] + delete

alpha[0, j - 1, s, 0, j, s, deletion + s] = delete
alpha[0, j, deletion + s] = delete

# Now fill in the middle of the matrix
for i in range(1, I):
Expand All @@ -60,18 +60,18 @@ cpdef dict forward(np.ndarray[np.float64_t, ndim=3] x_dot_parameters, int S):
alpha[i, j, s] = (x_dot_parameters[i, j, s] +
logsumexp(insert, delete, match))

alpha[i - 1, j, s, i, j, s, insertion + s] = insert
alpha[i, j - 1, s, i, j, s, deletion + s] = delete
alpha[i - 1, j - 1, s, i, j, s, matching + s] = match
alpha[i, j, insertion + s] = insert
alpha[i, j, deletion + s] = delete
alpha[i, j, matching + s] = match

return alpha

cpdef dict backward(np.ndarray[np.float64_t, ndim=3] x_dot_parameters, int S):
cpdef np.ndarray[np.float64_t, ndim=3] backward(np.ndarray[np.float64_t, ndim=3] x_dot_parameters, int S):
""" Helper to calculate the forward weights. """
cdef dict beta = {}
cdef np.ndarray[np.float64_t, ndim=3] beta = np.full_like(x_dot_parameters, -np.inf)

cdef int I, J
I, J = x_dot_parameters.shape[0], x_dot_parameters.shape[1]
I, J = beta.shape[0], beta.shape[1]

# Fill in the edges of the state matrices
#
Expand All @@ -96,14 +96,14 @@ cpdef dict backward(np.ndarray[np.float64_t, ndim=3] x_dot_parameters, int S):
beta[i, last_col, s] = (x_dot_parameters[i + 1, last_col, insertion + s]
+ insert)

beta[i, last_col, s, i + 1, last_col, s, insertion + s] = insert
beta[i + 1, last_col, insertion + s] = insert
for j in range(last_col - 1, -1, -1):
delete = (beta[last_row, j + 1, s] +
x_dot_parameters[last_row, j + 1, s])
beta[last_row, j, s] = (x_dot_parameters[last_row, j + 1, deletion + s]
+ delete)

beta[last_row, j, s, last_row, j + 1, s, deletion + s] = delete
beta[last_row, j + 1, deletion + s] = delete

# Now fill in the middle of the matrix
for i in range(last_row - 1, -1, -1):
Expand All @@ -115,9 +115,9 @@ cpdef dict backward(np.ndarray[np.float64_t, ndim=3] x_dot_parameters, int S):
match = (beta[i + 1, j + 1, s] +
x_dot_parameters[i + 1, j + 1, s])

beta[i, j, s, i + 1, j, s, insertion + s] = insert
beta[i, j, s, i, j + 1, s, deletion + s] = delete
beta[i, j, s, i + 1, j + 1, s, matching + s] = match
beta[i + 1, j, insertion + s] = insert
beta[i, j + 1, deletion + s] = delete
beta[i + 1, j + 1, matching + s] = match

insert += x_dot_parameters[i + 1, j, insertion + s]
delete += x_dot_parameters[i, j + 1, deletion + s]
Expand Down
37 changes: 34 additions & 3 deletions pyhacrf/pyhacrf.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,6 @@ def __init__(self, state_machine, x, y=None):
self.x = x
self.y = y

self.forward_backward = self.dense_forward_backward

def predict(self, parameters):
""" Run forward algorithm to find the predicted distribution over classes. """
x_dot_parameters = np.matmul(self.x, parameters)
Expand Down Expand Up @@ -319,6 +317,8 @@ class _GeneralModel(_Model):
def __init__(self, state_machine, x, y=None):
super(_GeneralModel, self).__init__(state_machine, x, y)
self._lattice = self.state_machine.build_lattice(self.x)
self.forward_backward = self.dense_forward_backward


def _forward(self, x_dot_parameters):
""" Helper to calculate the forward weights. """
Expand All @@ -342,12 +342,43 @@ def _forward(self, x_dot_parameters) :
self.state_machine.n_states)

def _backward(self, x_dot_parameters) :
print(x_dot_parameters)
return adjacent.backward(x_dot_parameters,
self.state_machine.n_states)

def _forward_predict(self, x_dot_parameters):
return adjacent.forward_predict(x_dot_parameters,
self.state_machine.n_states)

def forward_backward(self, parameters):
""" Run the forward backward algorithm with the given parameters. """

x_dot_parameters = np.einsum('ijk,lk->ijl', self.x, parameters)

alpha = self._forward(x_dot_parameters)
beta = self._backward(x_dot_parameters)

ll, deriv = self._gradient(alpha,
beta,
self.x,
self.state_machine.classes.index(self.y),
self.state_machine.n_states)
return ll, deriv


def _gradient(self, alpha, beta, x, y, S):
"""Helper to calculate the marginals and from that the gradient given
the forward and backward weights.
"""

alphabeta = alpha + beta

Z = np.logaddexp.reduce(alpha[-1, -1, :S])
Z_y = alpha[-1, -1, y]

ab = -np.exp(alphabeta - Z)
ab[:, :, y::S] += np.exp(alphabeta[:, :, y::S] - Z_y)

derivative = np.einsum('ijk,ijl->kl', ab, x)

return Z_y - Z, derivative

111 changes: 60 additions & 51 deletions pyhacrf/tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,44 +248,52 @@ def test_forward_connected(self):
[[0, 1],
[1, 0]]], dtype=np.float64)
y = 'a'
expected_alpha = {
(0, 0, 0): np.exp(-7),
(0, 0, 0, 0, 1, 0, 4): np.exp(-7) * np.exp(1),
(0, 0, 0, 1, 0, 0, 6): np.exp(-7) * np.exp(5),
(0, 0, 0, 1, 1, 0, 2): np.exp(-7) * np.exp(-4),
(0, 0, 1): np.exp(-5),
(0, 0, 1, 0, 1, 1, 5): np.exp(-5) * np.exp(7),
(0, 0, 1, 1, 0, 1, 7): np.exp(-5) * np.exp(7),
(0, 0, 1, 1, 1, 1, 3): np.exp(-5) * np.exp(-2),
(0, 1, 0): np.exp(-7) * np.exp(1) * np.exp(-23),
(0, 1, 0, 1, 1, 0, 6): np.exp(-7) * np.exp(1) * np.exp(-23) * np.exp(4),
(0, 1, 1): np.exp(-5) * np.exp(7) * np.exp(-17),
(0, 1, 1, 1, 1, 1, 7): np.exp(-5) * np.exp(7) * np.exp(-17) * np.exp(6),
(1, 0, 0): np.exp(-7) * np.exp(5) * np.exp(-7),
(1, 0, 0, 1, 1, 0, 4): np.exp(-7) * np.exp(5) * np.exp(-7) * np.exp(0),
(1, 0, 1): np.exp(-5) * np.exp(7) * np.exp(-5),
(1, 0, 1, 1, 1, 1, 5): np.exp(-5) * np.exp(7) * np.exp(-5) * np.exp(2),
(1, 1, 0): (np.exp(-11) + np.exp(-25) + np.exp(-9)) * np.exp(-8),
(1, 1, 1): (np.exp(-1) + np.exp(-9) + np.exp(-7)) * np.exp(-6)
}
expected_alpha = {k: np.emath.log(v) for k, v in expected_alpha.items()}

# expected_alpha = {
# (0, 0, 0): np.exp(-7),
# (0, 0, 0, 1, 1, 0, 2): np.exp(-7) * np.exp(-4),
# (0, 0, 0, 0, 1, 0, 4): np.exp(-7) * np.exp(1),
# (0, 0, 0, 1, 0, 0, 6): np.exp(-7) * np.exp(5),
# (0, 0, 1): np.exp(-5),
# (0, 0, 1, 1, 1, 1, 3): np.exp(-5) * np.exp(-2),
# (0, 0, 1, 0, 1, 1, 5): np.exp(-5) * np.exp(7),
# (0, 0, 1, 1, 0, 1, 7): np.exp(-5) * np.exp(7),
# (0, 1, 0): np.exp(-7) * np.exp(1) * np.exp(-23),
# (0, 1, 0, 1, 1, 0, 6): np.exp(-7) * np.exp(1) * np.exp(-23) * np.exp(4),
# (0, 1, 1): np.exp(-5) * np.exp(7) * np.exp(-17),
# (0, 1, 1, 1, 1, 1, 7): np.exp(-5) * np.exp(7) * np.exp(-17) * np.exp(6),
# (1, 0, 0): np.exp(-7) * np.exp(5) * np.exp(-7),
# (1, 0, 0, 1, 1, 0, 4): np.exp(-7) * np.exp(5) * np.exp(-7) * np.exp(0),
# (1, 0, 1): np.exp(-5) * np.exp(7) * np.exp(-5),
# (1, 0, 1, 1, 1, 1, 5): np.exp(-5) * np.exp(7) * np.exp(-5) * np.exp(2),
# (1, 1, 0): (np.exp(-11) + np.exp(-25) + np.exp(-9)) * np.exp(-8),
# (1, 1, 1): (np.exp(-1) + np.exp(-9) + np.exp(-7)) * np.exp(-6)
# }
expected_alpha = np.array([[[ -7, -9],
[ -29, -16.87307189]],
[[ -5, -3],
[ -15, -6.99718973]],
[[-np.inf, -np.inf],
[-np.inf, -11]],
[[-np.inf, -np.inf],
[-np.inf, -7]],
[[-np.inf, -np.inf],
[ -6, -9]],
[[-np.inf, -np.inf],
[ 2, -1]],
[[-np.inf, -2],
[-np.inf, -25]],
[[-np.inf, 2],
[-np.inf, -9]]]).T
expected_alpha = expected_alpha

state_machine = DefaultStateMachine(classes)
test_model = _AdjacentModel(state_machine, x, y)
x_dot_parameters = np.dot(x, parameters.T) # Pre-compute the dot product
actual_alpha = test_model._forward(x_dot_parameters)
actual_alpha = np.asarray(actual_alpha)

self.assertEqual(len(actual_alpha), len(expected_alpha))
for key in sorted(expected_alpha.keys()):
try:
expected_alpha[key], actual_alpha[key]
except:
print(key)
print('expected', sorted(expected_alpha))
print('actual', sorted(actual_alpha))
raise

self.assertAlmostEqual(actual_alpha[key], expected_alpha[key])
assert_array_almost_equal(actual_alpha, expected_alpha)

def test_backward_connected(self):
parameters = np.array(range(-4, 4), dtype=np.float64).reshape((4, 2))
Expand All @@ -299,30 +307,33 @@ def test_backward_connected(self):
[[0, 1],
[1, 0]]], dtype=np.float64)
y = 'a'
expected_beta = {
(0, 0, 0): -3.872776558098594,
(0, 0, 0, 0, 1, 0, 2): -13,
(0, 0, 0, 1, 0, 0, 3): -7,
(0, 0, 0, 1, 1, 0, 1): -4,
(0, 1, 0): -2.0,
(0, 1, 0, 1, 1, 0, 3): -4.0,
(1, 0, 0): -4.0,
(1, 0, 0, 1, 1, 0, 2): -4.0,
(1, 1, 0): 0.0}
# expected_beta = {
# (0, 0, 0): -3.872776558098594,
# (0, 0, 0, 0, 1, 0, 2): -13,
# (0, 0, 0, 1, 0, 0, 3): -7,
# (0, 0, 0, 1, 1, 0, 1): -4,
# (0, 1, 0): -2.0,
# (0, 1, 0, 1, 1, 0, 3): -4.0,
# (1, 0, 0): -4.0,
# (1, 0, 0, 1, 1, 0, 2): -4.0,
# (1, 1, 0): 0.0}
expected_beta = np.array([[[ -3.872776558098594, -4],
[ -2, 0]],
[[ -np.inf, -np.inf],
[ -np.inf, -4]],
[[ -np.inf, -np.inf],
[ -13, -4]],
[[ -np.inf, -7],
[ -np.inf, -4]]]).T


state_machine = DefaultStateMachine(['a'])
test_model = _AdjacentModel(state_machine, x, y)

x_dot_parameters = np.dot(x, parameters.T) # Pre-compute the dot product
actual_beta = test_model._backward(x_dot_parameters)
print(sorted(actual_beta.items()))
print(sorted(expected_beta.items()))

self.assertEqual(len(actual_beta), len(expected_beta))
for key in sorted(expected_beta.keys(), reverse=True):
print(key, expected_beta[key], actual_beta[key])
self.assertAlmostEqual(actual_beta[key], expected_beta[key])
assert_array_almost_equal(actual_beta, expected_beta)

def test_forward_backward_same_partition_value(self):
classes = ['a', 'b']
Expand Down Expand Up @@ -385,14 +396,12 @@ def test_derivate_chain(self):
dg[s, d] = delta
y0, _ = test_model.forward_backward(parameters)
y1, _ = test_model.forward_backward(parameters + dg)
print(s, d, y0, y1)
expected_dll[s, d] = (y1 - y0) / delta

actual_ll, actual_dll = test_model.forward_backward(parameters)

print(expected_ll, actual_ll)
print(expected_dll)
print(actual_dll)

self.assertAlmostEqual(actual_ll, expected_ll)
assert_array_almost_equal(actual_dll, expected_dll, decimal=TEST_PRECISION)

Expand Down