BTrDB
diff --git a/‎btrdb/transformers.py‎
Lines changed: 79 additions & 23 deletions b/‎btrdb/transformers.py‎
Lines changed: 79 additions & 23 deletions
@@ -29,18 +29,18 @@ def _get_time_from_row(row):
     raise Exception("Row contains no data")
 
 
-def _stream_names(stream_set):
+def _stream_names(streamset):
     return tuple(
         s.collection + "/" +  s.name \
-        for s in stream_set._streams
+        for s in streamset._streams
     )
 
 
 ##########################################################################
 ## Transform Functions
 ##########################################################################
 
-def to_series(stream_set, datetime64_index=True):
+def to_series(streamset, datetime64_index=True, agg="mean"):
     """
     Returns a list of Pandas Series objects indexed by time
 
@@ -50,20 +50,28 @@ def to_series(stream_set, datetime64_index=True):
         Directs function to convert Series index to np.datetime64[ns] or
         leave as np.int64.
 
+    agg : str, default: "mean"
+        Specify the StatPoint field (e.g. aggregating function) to create the Series
+        from. Must be one of "min", "mean", "max", "count", or "stddev". This
+        argument is ignored if RawPoint values are passed into the function.
+
     """
     try:
         import pandas as pd
     except ImportError:
         raise ImportError("Please install Pandas to use this transformation function.")
 
     result = []
-    stream_names = _stream_names(stream_set)
+    stream_names = _stream_names(streamset)
 
-    for idx, output in enumerate(stream_set.values()):
+    for idx, output in enumerate(streamset.values()):
         times, values = [], []
-        for item in output:
-            times.append(item.time)
-            values.append(item.value)
+        for point in output:
+            times.append(point.time)
+            if point.__class__.__name__ == "RawPoint":
+                values.append(point.value)
+            else:
+                values.append(getattr(point, agg))
 
         if datetime64_index:
             times = pd.Index(times, dtype='datetime64[ns]')
@@ -73,7 +81,8 @@ def to_series(stream_set, datetime64_index=True):
         ))
     return result
 
-def to_dataframe(stream_set, columns=None):
+
+def to_dataframe(streamset, columns=None, agg="mean"):
     """
     Returns a Pandas DataFrame object indexed by time and using the values of a
     stream for each column.
@@ -82,47 +91,82 @@ def to_dataframe(stream_set, columns=None):
     ----------
     columns: sequence
         column names to use for DataFrame
+
+    agg : str, default: "mean"
+        Specify the StatPoint field (e.g. aggregating function) to create the Series
+        from. Must be one of "min", "mean", "max", "count", or "stddev". This
+        argument is ignored if RawPoint values are passed into the function.
+
     """
     try:
         import pandas as pd
     except ImportError:
         raise ImportError("Please install Pandas to use this transformation function.")
 
-    stream_names = _stream_names(stream_set)
+    stream_names = _stream_names(streamset)
     columns = columns if columns else ["time"] + list(stream_names)
-    return pd.DataFrame(to_dict(stream_set), columns=columns).set_index("time")
+    return pd.DataFrame(to_dict(streamset,agg=agg), columns=columns).set_index("time")
 
 
-def to_array(stream_set):
+def to_array(streamset, agg="mean"):
     """
-    Returns a list of Numpy arrays (one per stream) containing point classes.
+    Returns a multidimensional numpy array (similar to a list of lists) containing point
+    classes.
+
+    Parameters
+    ----------
+    agg : str, default: "mean"
+        Specify the StatPoint field (e.g. aggregating function) to return for the
+        arrays. Must be one of "min", "mean", "max", "count", or "stddev". This
+        argument is ignored if RawPoint values are passed into the function.
+
     """
     try:
         import numpy as np
     except ImportError:
         raise ImportError("Please install Numpy to use this transformation function.")
 
-    return [np.array(output) for output in stream_set.values()]
+    results = []
+    for points in streamset.values():
+        segment = []
+        for point in points:
+            if point.__class__.__name__ == "RawPoint":
+                segment.append(point.value)
+            else:
+                segment.append(getattr(point, agg))
+        results.append(segment)
+    return np.array(results)
 
 
-def to_dict(stream_set):
+def to_dict(streamset, agg="mean"):
     """
     Returns a list of OrderedDict for each time code with the appropriate
     stream data attached.
+
+    Parameters
+    ----------
+    agg : str, default: "mean"
+        Specify the StatPoint field (e.g. aggregating function) to constrain dict
+        keys. Must be one of "min", "mean", "max", "count", or "stddev". This
+        argument is ignored if RawPoint values are passed into the function.
+
     """
     data = []
-    stream_names = _stream_names(stream_set)
-    for row in stream_set.rows():
+    stream_names = _stream_names(streamset)
+    for row in streamset.rows():
         item = OrderedDict({
             "time": _get_time_from_row(row),
         })
         for idx, col in enumerate(stream_names):
-            item[col] = row[idx].value if row[idx] else None
+            if row[idx].__class__.__name__ == "RawPoint":
+                item[col] = row[idx].value if row[idx] else None
+            else:
+                item[col] = getattr(row[idx], agg) if row[idx] else None
         data.append(item)
     return data
 
 
-def to_csv(stream_set, fobj, dialect=None, fieldnames=None):
+def to_csv(streamset, fobj, dialect=None, fieldnames=None, agg="mean"):
     """
     Saves stream data as a CSV file.
 
@@ -139,6 +183,10 @@ def to_csv(stream_set, fobj, dialect=None, fieldnames=None):
         A sequence of strings to use as fieldnames in the CSV header.  See
         Python's csv module for more information.
 
+    agg : str, default: "mean"
+        Specify the StatPoint field (e.g. aggregating function) to return when
+        limiting results. Must be one of "min", "mean", "max", "count", or "stddev".
+        This argument is ignored if RawPoint values are passed into the function.
     """
 
     @contextlib.contextmanager
@@ -155,27 +203,35 @@ def open_path_or_file(path_or_file):
                 file_to_close.close()
 
     with open_path_or_file(fobj) as csvfile:
-        stream_names = _stream_names(stream_set)
+        stream_names = _stream_names(streamset)
         fieldnames = fieldnames if fieldnames else ["time"] + list(stream_names)
 
         writer = csv.DictWriter(csvfile, fieldnames=fieldnames, dialect=dialect)
         writer.writeheader()
 
-        for item in to_dict(stream_set):
+        for item in to_dict(streamset, agg=agg):
             writer.writerow(item)
 
 
-def to_table(stream_set):
+def to_table(streamset, agg="mean"):
     """
     Returns string representation of the data in tabular form using the tabulate
     library.
+
+    Parameters
+    ----------
+    agg : str, default: "mean"
+        Specify the StatPoint field (e.g. aggregating function) to create the Series
+        from. Must be one of "min", "mean", "max", "count", or "stddev". This
+        argument is ignored if RawPoint values are passed into the function.
+
     """
     try:
         from tabulate import tabulate
     except ImportError:
         raise ImportError("Please install tabulate to use this transformation function.")
 
-    return tabulate(stream_set.to_dict(), headers="keys")
+    return tabulate(streamset.to_dict(agg=agg), headers="keys")
 
 
 ##########################################################################