Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit eeaef74

Browse filesBrowse files
committed
Added append class
1 parent aa7f793 commit eeaef74
Copy full SHA for eeaef74

File tree

Expand file treeCollapse file tree

2 files changed

+74
-0
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+74
-0
lines changed

‎pandas_to_postgres/__init__.py

Copy file name to clipboardExpand all lines: pandas_to_postgres/__init__.py
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from .copy_df import DataFrameCopy
22
from .copy_hdf import HDFTableCopy, SmallHDFTableCopy, BigHDFTableCopy
3+
from .append_df import DataFrameCopyAppend
34
from .hdf_to_postgres import hdf_to_postgres, create_hdf_table_objects, copy_worker
45
from .utilities import (
56
hdf_metadata,

‎pandas_to_postgres/append_df.py

Copy file name to clipboard
+73Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
from .utilities import create_file_object, df_generator, cast_pandas
2+
from ._base_copy import BaseCopy
3+
4+
5+
class DataFrameCopyAppend(BaseCopy):
6+
"""
7+
Class for handling a standard case of iterating over a pandas DataFrame in chunks
8+
and COPYing to PostgreSQL via StringIO CSV.
9+
10+
Differs from main DataFrameCopy class with that it doesn't drop fks, pks and indexes, and appends df
11+
to existing data in table. This will cause it to lose a lot of performance.
12+
"""
13+
14+
def __init__(
15+
self, df, defer_sql_objs=False, conn=None, table_obj=None, csv_chunksize=10 ** 6
16+
):
17+
"""
18+
Parameters
19+
----------
20+
df: pandas DataFrame
21+
Data to copy to database table
22+
defer_sql_objs: bool
23+
multiprocessing has issue with passing SQLALchemy objects, so if
24+
True, defer attributing these to the object until after pickled by Pool
25+
conn: SQlAlchemy Connection
26+
Managed outside of the object
27+
table_obj: SQLAlchemy model object
28+
Destination SQL Table
29+
csv_chunksize: int
30+
Max rows to keep in memory when generating CSV for COPY
31+
"""
32+
super().__init__(defer_sql_objs, conn, table_obj, csv_chunksize)
33+
34+
self.df = df
35+
self.rows = self.df.shape[0]
36+
37+
def truncate(self):
38+
pass
39+
40+
def create_pk(self):
41+
pass
42+
43+
def create_fks(self):
44+
pass
45+
46+
def drop_fks(self):
47+
pass
48+
49+
def drop_pk(self):
50+
pass
51+
52+
def copy(self, functions=[cast_pandas]):
53+
self.drop_fks()
54+
self.drop_pk()
55+
self.df = self.data_formatting(self.df, functions=functions)
56+
with self.conn.begin():
57+
self.truncate()
58+
59+
self.logger.info("Creating generator for chunking dataframe")
60+
for chunk in df_generator(self.df, self.csv_chunksize):
61+
62+
self.logger.info("Creating CSV in memory")
63+
fo = create_file_object(chunk)
64+
65+
self.logger.info("Copying chunk to database")
66+
self.copy_from_file(fo)
67+
del fo
68+
69+
self.logger.info("All chunks copied ({} rows)".format(self.rows))
70+
71+
self.create_pk()
72+
self.create_fks()
73+
self.analyze()

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.