dedupeio · fgregg · Jan 14, 2020 · Jan 15, 2020
diff --git a/pyhacrf/adjacent.pyx b/pyhacrf/adjacent.pyx
@@ -13,12 +13,12 @@ cdef extern from "log1p.h" nogil:
 cdef np.float64_t LOG_2 = 0.6931471805599453
 cdef np.float64_t LOG_3 = 1.0986122886681098
 
-cpdef dict forward(np.ndarray[np.float64_t, ndim=3] x_dot_parameters, int S):
+cpdef np.ndarray[np.float64_t, ndim=3] forward(np.float64_t[:, :, :] x_dot_parameters, int S):
     """ Helper to calculate the forward weights.  """
-    cdef dict alpha = {}
+    cdef np.ndarray[np.float64_t, ndim=3] alpha = np.full_like(x_dot_parameters, -np.inf)
 
     cdef int I, J
-    I, J = x_dot_parameters.shape[0], x_dot_parameters.shape[1]
+    I, J = alpha.shape[0], alpha.shape[1]
 
     # Fill in the edges of the state matrices
     #
@@ -40,13 +40,13 @@ cpdef dict forward(np.ndarray[np.float64_t, ndim=3] x_dot_parameters, int S):
                       x_dot_parameters[i, 0, insertion + s])
             alpha[i, 0, s] = x_dot_parameters[i, 0, s] + insert
 
-            alpha[i - 1, 0, s, i, 0, s, insertion + s] = insert
+            alpha[i, 0, insertion + s] = insert
         for j in range(1, J):
             delete = (alpha[0, j - 1, s] +
                       x_dot_parameters[0, j, deletion + s])
             alpha[0, j, s] = x_dot_parameters[0, j, s] + delete
 
-            alpha[0, j - 1, s, 0, j, s, deletion + s] = delete
+            alpha[0, j, deletion + s] = delete
 
         # Now fill in the middle of the matrix    
         for i in range(1, I):
@@ -60,18 +60,18 @@ cpdef dict forward(np.ndarray[np.float64_t, ndim=3] x_dot_parameters, int S):
                 alpha[i, j, s] = (x_dot_parameters[i, j, s] +
                                   logsumexp(insert, delete, match))
 
-                alpha[i - 1, j, s, i, j, s, insertion + s] = insert
-                alpha[i, j - 1, s, i, j, s, deletion + s] = delete
-                alpha[i - 1, j - 1, s, i, j, s, matching + s] = match
+                alpha[i, j, insertion + s] = insert
+                alpha[i, j, deletion + s] = delete
+                alpha[i, j, matching + s] = match
 
     return alpha
 
-cpdef dict backward(np.ndarray[np.float64_t, ndim=3] x_dot_parameters, int S):
+cpdef np.ndarray[np.float64_t, ndim=3] backward(np.ndarray[np.float64_t, ndim=3] x_dot_parameters, int S):
     """ Helper to calculate the forward weights.  """
-    cdef dict beta = {}
+    cdef np.ndarray[np.float64_t, ndim=3] beta = np.full_like(x_dot_parameters, -np.inf)
 
     cdef int I, J
-    I, J = x_dot_parameters.shape[0], x_dot_parameters.shape[1]
+    I, J = beta.shape[0], beta.shape[1]
 
     # Fill in the edges of the state matrices
     #
@@ -96,14 +96,14 @@ cpdef dict backward(np.ndarray[np.float64_t, ndim=3] x_dot_parameters, int S):
             beta[i, last_col, s] = (x_dot_parameters[i + 1, last_col, insertion + s]
                                     + insert)
 
-            beta[i, last_col, s, i + 1, last_col, s, insertion + s] = insert
+            beta[i + 1, last_col, insertion + s] = insert
         for j in range(last_col - 1, -1, -1):
             delete = (beta[last_row, j + 1, s] +
                       x_dot_parameters[last_row, j + 1, s])
             beta[last_row, j, s] = (x_dot_parameters[last_row, j + 1, deletion + s]
                                     + delete)
 
-            beta[last_row, j, s, last_row, j + 1, s, deletion + s] = delete
+            beta[last_row, j + 1, deletion + s] = delete
 
         # Now fill in the middle of the matrix    
         for i in range(last_row - 1, -1, -1):
@@ -115,9 +115,9 @@ cpdef dict backward(np.ndarray[np.float64_t, ndim=3] x_dot_parameters, int S):
                 match = (beta[i + 1, j + 1, s] +
                          x_dot_parameters[i + 1, j + 1, s])
 
-                beta[i, j, s, i + 1, j, s, insertion + s] = insert
-                beta[i, j, s, i, j + 1, s, deletion + s] = delete
-                beta[i, j, s, i + 1, j + 1, s, matching + s] = match
+                beta[i + 1, j, insertion + s] = insert
+                beta[i, j + 1, deletion + s] = delete
+                beta[i + 1, j + 1, matching + s] = match
 
                 insert += x_dot_parameters[i + 1, j, insertion + s]
                 delete += x_dot_parameters[i, j + 1, deletion + s]

diff --git a/pyhacrf/pyhacrf.py b/pyhacrf/pyhacrf.py
@@ -241,8 +241,6 @@ def __init__(self, state_machine, x, y=None):
         self.x = x
         self.y = y
 
-        self.forward_backward = self.dense_forward_backward
-
     def predict(self, parameters):
         """ Run forward algorithm to find the predicted distribution over classes. """
         x_dot_parameters = np.matmul(self.x, parameters)
@@ -319,6 +317,8 @@ class _GeneralModel(_Model):
     def __init__(self, state_machine, x, y=None):
         super(_GeneralModel, self).__init__(state_machine, x, y)
         self._lattice = self.state_machine.build_lattice(self.x)
+        self.forward_backward = self.dense_forward_backward
+
 
     def _forward(self, x_dot_parameters):
         """ Helper to calculate the forward weights.  """
@@ -342,12 +342,43 @@ def _forward(self, x_dot_parameters) :
                                 self.state_machine.n_states)
 
     def _backward(self, x_dot_parameters) :
-        print(x_dot_parameters)
         return adjacent.backward(x_dot_parameters,
                                  self.state_machine.n_states)
 
     def _forward_predict(self, x_dot_parameters):
         return adjacent.forward_predict(x_dot_parameters,
                                         self.state_machine.n_states)
 
+    def forward_backward(self, parameters):
+        """ Run the forward backward algorithm with the given parameters. """
+
+        x_dot_parameters = np.einsum('ijk,lk->ijl', self.x, parameters)
+
+        alpha = self._forward(x_dot_parameters)
+        beta = self._backward(x_dot_parameters)
+
+        ll, deriv = self._gradient(alpha,
+                                   beta,
+                                   self.x,
+                                   self.state_machine.classes.index(self.y),
+                                   self.state_machine.n_states)
+        return ll, deriv
+
+
+    def _gradient(self, alpha, beta, x, y, S):
+        """Helper to calculate the marginals and from that the gradient given
+           the forward and backward weights.
+        """
+
+        alphabeta = alpha + beta
+
+        Z = np.logaddexp.reduce(alpha[-1, -1, :S])
+        Z_y = alpha[-1, -1, y]
+
+        ab = -np.exp(alphabeta - Z)
+        ab[:, :, y::S] += np.exp(alphabeta[:, :, y::S] - Z_y)
+
+        derivative = np.einsum('ijk,ijl->kl', ab, x)
+
+        return Z_y - Z, derivative
 
diff --git a/pyhacrf/tests/test_model.py b/pyhacrf/tests/test_model.py
@@ -248,44 +248,52 @@ def test_forward_connected(self):
                       [[0, 1],
                        [1, 0]]], dtype=np.float64)
         y = 'a'
-        expected_alpha = {
-            (0, 0, 0): np.exp(-7),
-            (0, 0, 0, 0, 1, 0, 4): np.exp(-7) * np.exp(1),
-            (0, 0, 0, 1, 0, 0, 6): np.exp(-7) * np.exp(5),
-            (0, 0, 0, 1, 1, 0, 2): np.exp(-7) * np.exp(-4),
-            (0, 0, 1): np.exp(-5),
-            (0, 0, 1, 0, 1, 1, 5): np.exp(-5) * np.exp(7),
-            (0, 0, 1, 1, 0, 1, 7): np.exp(-5) * np.exp(7),
-            (0, 0, 1, 1, 1, 1, 3): np.exp(-5) * np.exp(-2),
-            (0, 1, 0): np.exp(-7) * np.exp(1) * np.exp(-23),
-            (0, 1, 0, 1, 1, 0, 6): np.exp(-7) * np.exp(1) * np.exp(-23) * np.exp(4),
-            (0, 1, 1): np.exp(-5) * np.exp(7) * np.exp(-17),
-            (0, 1, 1, 1, 1, 1, 7): np.exp(-5) * np.exp(7) * np.exp(-17) * np.exp(6),
-            (1, 0, 0): np.exp(-7) * np.exp(5) * np.exp(-7),
-            (1, 0, 0, 1, 1, 0, 4): np.exp(-7) * np.exp(5) * np.exp(-7) * np.exp(0),
-            (1, 0, 1): np.exp(-5) * np.exp(7) * np.exp(-5),
-            (1, 0, 1, 1, 1, 1, 5): np.exp(-5) * np.exp(7) * np.exp(-5) * np.exp(2),
-            (1, 1, 0): (np.exp(-11) + np.exp(-25) + np.exp(-9)) * np.exp(-8),
-            (1, 1, 1): (np.exp(-1) + np.exp(-9) + np.exp(-7)) * np.exp(-6)
-        }
-        expected_alpha = {k: np.emath.log(v) for k, v in expected_alpha.items()}
-
+        # expected_alpha = {
+        #     (0, 0, 0): np.exp(-7),
+        #     (0, 0, 0, 1, 1, 0, 2): np.exp(-7) * np.exp(-4),
+        #     (0, 0, 0, 0, 1, 0, 4): np.exp(-7) * np.exp(1),
+        #     (0, 0, 0, 1, 0, 0, 6): np.exp(-7) * np.exp(5),
+        #     (0, 0, 1): np.exp(-5),
+        #     (0, 0, 1, 1, 1, 1, 3): np.exp(-5) * np.exp(-2),
+        #     (0, 0, 1, 0, 1, 1, 5): np.exp(-5) * np.exp(7),
+        #     (0, 0, 1, 1, 0, 1, 7): np.exp(-5) * np.exp(7),
+        #     (0, 1, 0): np.exp(-7) * np.exp(1) * np.exp(-23),
+        #     (0, 1, 0, 1, 1, 0, 6): np.exp(-7) * np.exp(1) * np.exp(-23) * np.exp(4),
+        #     (0, 1, 1): np.exp(-5) * np.exp(7) * np.exp(-17),
+        #     (0, 1, 1, 1, 1, 1, 7): np.exp(-5) * np.exp(7) * np.exp(-17) * np.exp(6),
+        #     (1, 0, 0): np.exp(-7) * np.exp(5) * np.exp(-7),
+        #     (1, 0, 0, 1, 1, 0, 4): np.exp(-7) * np.exp(5) * np.exp(-7) * np.exp(0),
+        #     (1, 0, 1): np.exp(-5) * np.exp(7) * np.exp(-5),
+        #     (1, 0, 1, 1, 1, 1, 5): np.exp(-5) * np.exp(7) * np.exp(-5) * np.exp(2),
+        #     (1, 1, 0): (np.exp(-11) + np.exp(-25) + np.exp(-9)) * np.exp(-8),
+        #     (1, 1, 1): (np.exp(-1) + np.exp(-9) + np.exp(-7)) * np.exp(-6)
+        # }
+        expected_alpha = np.array([[[     -7,      -9],
+                                    [    -29,     -16.87307189]],
+                                   [[     -5,      -3],
+                                    [    -15,      -6.99718973]],
+                                   [[-np.inf, -np.inf],
+                                    [-np.inf,     -11]],
+                                   [[-np.inf, -np.inf],
+                                    [-np.inf,      -7]],
+                                   [[-np.inf, -np.inf],
+                                    [     -6,      -9]],
+                                   [[-np.inf, -np.inf],
+                                    [      2,      -1]],
+                                   [[-np.inf,      -2],
+                                    [-np.inf,    -25]],
+                                   [[-np.inf,      2],
+                                    [-np.inf,     -9]]]).T
+        expected_alpha = expected_alpha
+
         state_machine = DefaultStateMachine(classes)
         test_model = _AdjacentModel(state_machine, x, y)
         x_dot_parameters = np.dot(x, parameters.T)  # Pre-compute the dot product
         actual_alpha = test_model._forward(x_dot_parameters)
+        actual_alpha = np.asarray(actual_alpha)
 
         self.assertEqual(len(actual_alpha), len(expected_alpha))
-        for key in sorted(expected_alpha.keys()):
-            try:
-                expected_alpha[key], actual_alpha[key]
-            except:
-                print(key)
-                print('expected', sorted(expected_alpha))
-                print('actual', sorted(actual_alpha))
-                raise
-
-            self.assertAlmostEqual(actual_alpha[key], expected_alpha[key])
+        assert_array_almost_equal(actual_alpha, expected_alpha)
 
     def test_backward_connected(self):
         parameters = np.array(range(-4, 4), dtype=np.float64).reshape((4, 2))
@@ -299,30 +307,33 @@ def test_backward_connected(self):
                       [[0, 1],
                        [1, 0]]], dtype=np.float64)
         y = 'a'
-        expected_beta = {
-            (0, 0, 0): -3.872776558098594,
-            (0, 0, 0, 0, 1, 0, 2): -13,
-            (0, 0, 0, 1, 0, 0, 3): -7,
-            (0, 0, 0, 1, 1, 0, 1): -4,
-            (0, 1, 0): -2.0,
-            (0, 1, 0, 1, 1, 0, 3): -4.0,
-            (1, 0, 0): -4.0,
-            (1, 0, 0, 1, 1, 0, 2): -4.0,
-            (1, 1, 0): 0.0}
+        # expected_beta = {
+        #     (0, 0, 0): -3.872776558098594,
+        #     (0, 0, 0, 0, 1, 0, 2): -13,
+        #     (0, 0, 0, 1, 0, 0, 3): -7,
+        #     (0, 0, 0, 1, 1, 0, 1): -4,
+        #     (0, 1, 0): -2.0,
+        #     (0, 1, 0, 1, 1, 0, 3): -4.0,
+        #     (1, 0, 0): -4.0,
+        #     (1, 0, 0, 1, 1, 0, 2): -4.0,
+        #     (1, 1, 0): 0.0}
+        expected_beta = np.array([[[      -3.872776558098594, -4],
+                                   [      -2,       0]],
+                                  [[ -np.inf, -np.inf],
+                                   [ -np.inf,      -4]],
+                                  [[ -np.inf, -np.inf],
+                                   [     -13,      -4]],
+                                  [[ -np.inf,      -7],
+                                   [ -np.inf,      -4]]]).T
 
 
         state_machine = DefaultStateMachine(['a'])
         test_model = _AdjacentModel(state_machine, x, y)
 
         x_dot_parameters = np.dot(x, parameters.T)  # Pre-compute the dot product
         actual_beta = test_model._backward(x_dot_parameters)
-        print(sorted(actual_beta.items()))
-        print(sorted(expected_beta.items()))
 
-        self.assertEqual(len(actual_beta), len(expected_beta))
-        for key in sorted(expected_beta.keys(), reverse=True):
-            print(key, expected_beta[key], actual_beta[key])
-            self.assertAlmostEqual(actual_beta[key], expected_beta[key])
+        assert_array_almost_equal(actual_beta, expected_beta)
 
     def test_forward_backward_same_partition_value(self):
         classes = ['a', 'b']
@@ -385,14 +396,12 @@ def test_derivate_chain(self):
                 dg[s, d] = delta
                 y0, _ = test_model.forward_backward(parameters)
                 y1, _ = test_model.forward_backward(parameters + dg)
-                print(s, d, y0, y1)
                 expected_dll[s, d] = (y1 - y0) / delta
 
         actual_ll, actual_dll = test_model.forward_backward(parameters)
-
-        print(expected_ll, actual_ll)
         print(expected_dll)
         print(actual_dll)
+
         self.assertAlmostEqual(actual_ll, expected_ll)
         assert_array_almost_equal(actual_dll, expected_dll, decimal=TEST_PRECISION)