From b194db009b9a16fda5f489ca358733dcb396eea6 Mon Sep 17 00:00:00 2001
From: Shu Shen <shu.shen@neur.io>
Date: Fri, 1 Jun 2018 12:04:32 -0700
Subject: [PATCH] Fix performance degradation with line protocol

Assemble line by line in the commit bf232a7aef to remove NaN has
significant performance impact.

This change fixes the issue by keeping the NaN fields before stringify
the dataframe, replacing the fields with empty string, and reverting
back to use pd.DataFrame.sum() function to yield the lines.

Fixes: #591
---
 influxdb/_dataframe_client.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/influxdb/_dataframe_client.py b/influxdb/_dataframe_client.py
index 4273ef1b..646f298c 100644
--- a/influxdb/_dataframe_client.py
+++ b/influxdb/_dataframe_client.py
@@ -365,16 +365,18 @@ def _convert_dataframe_to_lines(self,
 
         # Make an array of formatted field keys and values
         field_df = dataframe[field_columns]
+        # Keep the positions where Null values are found
+        mask_null = field_df.isnull().values
 
         field_df = self._stringify_dataframe(field_df,
                                              numeric_precision,
                                              datatype='field')
 
-        def format_line(line):
-            line = line[~line.isnull()]  # drop None entries
-            return ",".join((line.index + '=' + line.values))
-
-        fields = field_df.apply(format_line, axis=1)
+        field_df = (field_df.columns.values + '=').tolist() + field_df
+        field_df[field_df.columns[1:]] = ',' + field_df[
+            field_df.columns[1:]]
+        field_df = field_df.where(~mask_null, '')  # drop Null entries
+        fields = field_df.sum(axis=1)
         del field_df
 
         # Generate line protocol string
@@ -388,9 +390,6 @@ def _stringify_dataframe(dframe, numeric_precision, datatype='field'):
         # Prevent modification of input dataframe
         dframe = dframe.copy()
 
-        # Keep the positions where Null values are found
-        mask_null = dframe.isnull().values
-
         # Find int and string columns for field-type data
         int_columns = dframe.select_dtypes(include=['integer']).columns
         string_columns = dframe.select_dtypes(include=['object']).columns
@@ -435,7 +434,6 @@ def _stringify_dataframe(dframe, numeric_precision, datatype='field'):
 
         dframe.columns = dframe.columns.astype(str)
 
-        dframe = dframe.where(~mask_null, None)
         return dframe
 
     def _datetime_to_epoch(self, datetime, time_precision='s'):