From b194db009b9a16fda5f489ca358733dcb396eea6 Mon Sep 17 00:00:00 2001 From: Shu Shen Date: Fri, 1 Jun 2018 12:04:32 -0700 Subject: [PATCH] Fix performance degradation with line protocol Assemble line by line in the commit bf232a7aef to remove NaN has significant performance impact. This change fixes the issue by keeping the NaN fields before stringify the dataframe, replacing the fields with empty string, and reverting back to use pd.DataFrame.sum() function to yield the lines. Fixes: #591 --- influxdb/_dataframe_client.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/influxdb/_dataframe_client.py b/influxdb/_dataframe_client.py index 4273ef1b..646f298c 100644 --- a/influxdb/_dataframe_client.py +++ b/influxdb/_dataframe_client.py @@ -365,16 +365,18 @@ def _convert_dataframe_to_lines(self, # Make an array of formatted field keys and values field_df = dataframe[field_columns] + # Keep the positions where Null values are found + mask_null = field_df.isnull().values field_df = self._stringify_dataframe(field_df, numeric_precision, datatype='field') - def format_line(line): - line = line[~line.isnull()] # drop None entries - return ",".join((line.index + '=' + line.values)) - - fields = field_df.apply(format_line, axis=1) + field_df = (field_df.columns.values + '=').tolist() + field_df + field_df[field_df.columns[1:]] = ',' + field_df[ + field_df.columns[1:]] + field_df = field_df.where(~mask_null, '') # drop Null entries + fields = field_df.sum(axis=1) del field_df # Generate line protocol string @@ -388,9 +390,6 @@ def _stringify_dataframe(dframe, numeric_precision, datatype='field'): # Prevent modification of input dataframe dframe = dframe.copy() - # Keep the positions where Null values are found - mask_null = dframe.isnull().values - # Find int and string columns for field-type data int_columns = dframe.select_dtypes(include=['integer']).columns string_columns = dframe.select_dtypes(include=['object']).columns @@ -435,7 +434,6 @@ def _stringify_dataframe(dframe, numeric_precision, datatype='field'): dframe.columns = dframe.columns.astype(str) - dframe = dframe.where(~mask_null, None) return dframe def _datetime_to_epoch(self, datetime, time_precision='s'):