From 5c7a47c371756a0099b681701fe1dfbb68adaa17 Mon Sep 17 00:00:00 2001
From: Ryan Shaw <ryanshaw@unc.edu>
Date: Thu, 12 Mar 2015 14:59:21 -0400
Subject: [PATCH 1/3] =?UTF-8?q?Fix=20calculation=20of=20agreement=20for=20?=
 =?UTF-8?q?=CF=80*,=20maybe.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 segeval/agreement/pi.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/segeval/agreement/pi.py b/segeval/agreement/pi.py
index 791b31b..a72062a 100644
--- a/segeval/agreement/pi.py
+++ b/segeval/agreement/pi.py
@@ -6,6 +6,7 @@
 from __future__ import absolute_import, division
 from decimal import Decimal
 from segeval.agreement import __fnc_metric__, __actual_agreement_linear__
+from itertools import chain
 
 
 def __fleiss_pi_linear__(dataset, **kwargs):
@@ -29,13 +30,10 @@ def __fleiss_pi_linear__(dataset, **kwargs):
     A_a = Decimal(sum(all_numerators)) / sum(all_denominators)
     # Calculate Ae
     p_e_segs = list()
-    for boundaries_info in coders_boundaries.values():
-        for item in boundaries_info:
-            boundaries, total_boundaries = item
-            p_e_seg = Decimal(boundaries) / total_boundaries
-            p_e_segs.append(p_e_seg)
+    boundary_ratios = chain.from_iterable(coders_boundaries.values())
+    b_placed, b_possible = map(sum, zip(*boundary_ratios))
     # Calculate P_e_seg
-    P_e_seg = Decimal(sum(p_e_segs)) / len(p_e_segs)
+    P_e_seg = Decimal(b_placed) / b_possible
     A_e = (P_e_seg ** 2)
     # Calculate pi
     pi = (A_a - A_e) / (Decimal('1') - A_e)

From c46e12d82c3854b77f6db6477c23932a85eb58c3 Mon Sep 17 00:00:00 2001
From: Ryan Shaw <ryanshaw@unc.edu>
Date: Wed, 25 Mar 2015 15:33:14 -0400
Subject: [PATCH 2/3] We are counting (near) matches, not edits, so we want
 1-weight.

---
 segeval/similarity/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/segeval/similarity/__init__.py b/segeval/similarity/__init__.py
index 220811b..d1ff32a 100644
--- a/segeval/similarity/__init__.py
+++ b/segeval/similarity/__init__.py
@@ -103,7 +103,7 @@ def __boundary_confusion_matrix__(*args, **kwargs):
     # Add weighted near misses
     for transposition in statistics['transpositions']:
         match = transposition[2]
-        matrix[match][match] += fnc_weight_t([transposition], n_t)
+        matrix[match][match] += (1 - fnc_weight_t([transposition], n_t))
     # Add confusion errors
     for substitution in statistics['substitutions']:
         hyp, ref = substitution

From 7cd095cfd950c53eca24a582456cdffe8401c0dc Mon Sep 17 00:00:00 2001
From: Ryan Shaw <ryanshaw@unc.edu>
Date: Wed, 25 Mar 2015 17:17:52 -0400
Subject: [PATCH 3/3] Hypothesis is on the left (a), reference is on the right
 (b).

---
 segeval/similarity/__init__.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/segeval/similarity/__init__.py b/segeval/similarity/__init__.py
index d1ff32a..185accc 100644
--- a/segeval/similarity/__init__.py
+++ b/segeval/similarity/__init__.py
@@ -113,11 +113,11 @@ def __boundary_confusion_matrix__(*args, **kwargs):
         hyp, ref = None, None
         boundary_type, side = addition
         if side == 'a':
-            hyp = None
-            ref = boundary_type
-        else:  # side == 'b'
             hyp = boundary_type
             ref = None
+        else:  # side == 'b'
+            hyp = None
+            ref = boundary_type
         assert side == 'a' or side == 'b'
         matrix[hyp][ref] += 1
     return matrix