diff --git a/.gitignore b/.gitignore index 87031a3..c5edfd4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,9 @@ .DS_Store _tmp env -venv +venv/* __pycache__ .env +.idea/ +data/ +/scripts/zipper_2019.12.27h1159.zip diff --git a/requirements.txt b/requirements.txt index beee86c..19ad743 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ +wand +numpy beautifulsoup4==4.4.1 PyYAML==3.11 requests==2.12.4 diff --git a/scripts/02_find_all_links.py b/scripts/02_find_all_links.py index 37dff11..ce66776 100755 --- a/scripts/02_find_all_links.py +++ b/scripts/02_find_all_links.py @@ -1,8 +1,12 @@ -import requests +""" +Get all links from a webpage +""" + import re +import requests # get url -url = input('Enter a URL (include `http://`): ') +url = input('Enter a url, include: http://') # connect to the url website = requests.get(url) @@ -11,7 +15,7 @@ html = website.text # use re.findall to grab all the links -links = re.findall('"((http|ftp)s?://.*?)"', html) +links = re.findall(r'"((http|ftp)s?://.*?)"', html) # output links for link in links: diff --git a/scripts/03_simple_twitter_manager.py b/scripts/03_simple_twitter_manager.py index e39a20b..ba30386 100755 --- a/scripts/03_simple_twitter_manager.py +++ b/scripts/03_simple_twitter_manager.py @@ -19,7 +19,7 @@ zombie_follows = [following_id for following_id in following_ids if following_id not in follower_ids] - confirm = raw_input( + confirm = input( "Are you sure you want to unfollow {0} tweeps [y|n]? ".format( (len(zombie_follows)))) if confirm.lower() == 'y': diff --git a/scripts/04_rename_with_slice.py b/scripts/04_rename_with_slice.py index dd849ef..c39e191 100755 --- a/scripts/04_rename_with_slice.py +++ b/scripts/04_rename_with_slice.py @@ -1,11 +1,17 @@ -import os import glob +import os -os.chdir("/Users/mikeherman/repos/bugs/se-platform/se/core/permissions") -for file in glob.glob("*.json"): +DIR = "D:\\MyDownload\\" +EXTENSION = '*.ass' + +os.chdir(DIR) +for file in glob.glob(EXTENSION): file_name = os.path.splitext(file)[0] extension = os.path.splitext(file)[1] - new_file_name = file_name[:-6] + extension + file_name = file_name.split('.')[:3] + + new_file_name = '.'.join(file_name[:3]) + extension + print(new_file_name) try: os.rename(file, new_file_name) except OSError as e: diff --git a/scripts/05_load_json_without_dupes.py b/scripts/05_load_json_without_dupes.py index 2cbe318..38e543e 100755 --- a/scripts/05_load_json_without_dupes.py +++ b/scripts/05_load_json_without_dupes.py @@ -1,9 +1,19 @@ +import json + + def dict_raise_on_duplicates(ordered_pairs): """reject duplicate keys""" my_dict = dict() - for key, values in ordered_pairs: + for key, values in ordered_pairs.items(): if key in my_dict: - raise ValueError("Duplicate key: {}".format(key,)) + raise ValueError("Duplicate key: {}".format(key, )) else: my_dict[key] = values return my_dict + + +with open('./data/demo_json.json') as jf: + data = json.load(jf) + print(data) + dict_data = dict_raise_on_duplicates(data) + print(dict_data) diff --git a/scripts/06_execution_time.py b/scripts/06_execution_time.py index 9614bbd..c9271cd 100755 --- a/scripts/06_execution_time.py +++ b/scripts/06_execution_time.py @@ -11,9 +11,9 @@ """ +import random import time -import random class ExecutionTime: diff --git a/scripts/07_benchmark_permissions_loading_django.py b/scripts/07_benchmark_permissions_loading_django.py index e1e6900..9e38bea 100755 --- a/scripts/07_benchmark_permissions_loading_django.py +++ b/scripts/07_benchmark_permissions_loading_django.py @@ -1,12 +1,13 @@ import os -import time + import numpy +import time + # temp file for benchmarking def timeit(method): - def timed(*args, **kw): ts = time.time() diff --git a/scripts/08_basic_email_web_crawler.py b/scripts/08_basic_email_web_crawler.py index b56c747..d20d6f8 100755 --- a/scripts/08_basic_email_web_crawler.py +++ b/scripts/08_basic_email_web_crawler.py @@ -1,5 +1,5 @@ -import requests import re +import requests # get url url = input('Enter a URL (include `http://`): ') @@ -17,5 +17,6 @@ # print the number of links in the list print("\nFound {} links".format(len(links))) +print(links[:10]) for email in emails: print(email) diff --git a/scripts/09_basic_link_web_crawler.py b/scripts/09_basic_link_web_crawler.py index 87e2fab..ca5cd1b 100755 --- a/scripts/09_basic_link_web_crawler.py +++ b/scripts/09_basic_link_web_crawler.py @@ -1,20 +1,17 @@ -import requests import re -try: - from urllib.parse import urljoin -except ImportError: - from urlparse import urljoin +import requests + +from urllib.parse import urljoin # regex link_re = re.compile(r'href="(.*?)"') def crawl(url): - req = requests.get(url) # Check if successful - if(req.status_code != 200): + if (req.status_code != 200): return [] # Find links @@ -24,11 +21,11 @@ def crawl(url): # Search links for emails for link in links: - # Get an absolute URL for a link link = urljoin(url, link) print(link) + if __name__ == '__main__': crawl('http://www.realpython.com') diff --git a/scripts/10_find_files_recursively.py b/scripts/10_find_files_recursively.py index 0c8e1eb..b28146f 100755 --- a/scripts/10_find_files_recursively.py +++ b/scripts/10_find_files_recursively.py @@ -3,7 +3,7 @@ # constants PATH = './' -PATTERN = '*.md' +PATTERN = '*.py' def get_file_names(filepath, pattern): diff --git a/scripts/11_optimize_images_with_wand.py b/scripts/11_optimize_images_with_wand.py index a95b8b0..332e0eb 100755 --- a/scripts/11_optimize_images_with_wand.py +++ b/scripts/11_optimize_images_with_wand.py @@ -1,14 +1,13 @@ import fnmatch import os -# pip install Wand -from wand.image import Image # pip install http://pypi.python.org/packages/source/h/hurry.filesize/hurry.filesize-0.9.tar.gz from hurry.filesize import size - +# pip install Wand +from wand.image import Image # constants -PATH = '/../../../..' +PATH = u'D:\MyPhoto\下载图片' PATTERN = '*.jpg' @@ -38,16 +37,19 @@ def get_total_size(list_of_image_names): def resize_images(list_of_image_names): print("Optimizing ... ") for index, image_name in enumerate(list_of_image_names): - with open(image_name) as f: + with open(image_name, 'rb') as f: image_binary = f.read() with Image(blob=image_binary) as img: - if img.height >= 600: + if img.height >= 100: img.transform(resize='x600') - img.save(filename=image_name) + new_image_name = './data/' + image_name.split('.')[0].split('\\')[-1] + '_resized.jpg' + print(new_image_name) + + img.save(filename=new_image_name) print("Optimization complete.") if __name__ == '__main__': all_images = get_image_file_names(PATH, PATTERN) resize_images(all_images) - get_image_file_names(PATH, PATTERN) + get_image_file_names('./data/', PATTERN) diff --git a/scripts/12_csv_split.py b/scripts/12_csv_split.py index 43ed1ee..a4ec229 100755 --- a/scripts/12_csv_split.py +++ b/scripts/12_csv_split.py @@ -1,12 +1,13 @@ -import sys -import os import csv +import os + import argparse +import sys """ Splits a CSV file into multiple files based on command line arguments. - + Arguments: `-h`: help file of usage of the script @@ -25,6 +26,7 @@ ``` # split csv by every 100 rows >> python csv_split.py -i input.csv -o output -r 100 + >> python 12_csv_split.py -i ./data/12_sample_csv.csv -o ./data/sample_csv -r 10 ``` """ @@ -105,7 +107,7 @@ def parse_file(arguments): current_output = os.path.join( # Create new output file output_path, - "{}-{}.csv".format(output_file, current_chunk) + "{}_{}.csv".format(output_file, current_chunk) ) # Add header diff --git a/scripts/12_sample_csv.csv b/scripts/12_sample_csv.csv deleted file mode 100755 index ce6a3f5..0000000 --- a/scripts/12_sample_csv.csv +++ /dev/null @@ -1,101 +0,0 @@ -First Name,Last Name,Email Address,Phone Number,Company,Date Hired -Abigail,Branch,volutpat.ornare.facilisis@Phasellusvitaemauris.co.uk,(412) 540-6276,Sem Eget PC,07/02/2013 -Roanna,Lambert,tristique.pharetra@arcuvelquam.ca,(747) 536-6748,Eget Laoreet Foundation,11/23/2013 -Amanda,England,semper.rutrum@blandit.com,(669) 164-6411,Magna Nec Quam Limited,08/11/2012 -Hilel,Chapman,ultrices@tempor.ca,(683) 531-0279,Sed Molestie PC,06/25/2012 -Basia,Bowers,Quisque.ornare@tinciduntnibh.com,(135) 986-6437,Tincidunt Nunc Ac Associates,05/11/2013 -Dylan,Dunlap,est.Mauris@etnetuset.org,(877) 604-4603,Eu Ultrices Institute,07/02/2012 -Regan,Cardenas,vitae.semper@ultriciesornareelit.org,(693) 378-7235,Neque Morbi Corporation,10/30/2012 -Sade,Green,tortor@sagittis.co.uk,(816) 255-5508,Eleifend Ltd,09/03/2012 -Marshall,Richardson,sed.facilisis@eu.com,(460) 132-4621,Purus Maecenas Libero LLC,12/21/2012 -Regina,Brown,semper.auctor@sem.co.uk,(185) 963-9365,Vulputate Consulting,06/16/2013 -Irma,Rivers,vitae@luctusvulputate.net,(701) 393-3679,Nec Leo Morbi Incorporated,05/07/2013 -Rudyard,Cline,fringilla@risusatfringilla.org,(971) 228-3147,Risus Quis Consulting,04/25/2013 -Justina,Richmond,sapien.Nunc.pulvinar@vitaeerat.co.uk,(755) 103-3125,Ullamcorper Associates,02/12/2013 -Reece,Blackburn,felis@Aliquamauctor.com,(239) 528-2742,Suspendisse Associates,04/03/2014 -Lillith,Holden,ut.dolor.dapibus@porttitor.net,(305) 797-1579,Dapibus Id Blandit LLP,09/11/2013 -Taylor,Vinson,ac@vellectusCum.net,(355) 993-1099,Egestas Institute,05/16/2012 -Colton,Barker,volutpat@necluctus.ca,(705) 978-5992,Ornare Consulting,04/24/2013 -Vladimir,Walls,mollis.lectus@imperdietullamcorperDuis.edu,(311) 406-4856,Faucibus Ut Nulla LLP,08/12/2012 -Freya,Rowland,sagittis@elementumduiquis.co.uk,(284) 850-7506,Turpis PC,05/31/2013 -Cullen,Phelps,Nam.ligula@orciluctus.ca,(425) 280-1763,Rhoncus Id Mollis Consulting,09/10/2013 -Boris,Lopez,posuere@adipiscingligula.edu,(769) 701-0055,Nunc Sed Orci Industries,07/26/2013 -Alvin,Meyer,Etiam@felis.ca,(783) 312-0821,Dignissim Pharetra Ltd,03/02/2013 -Nicole,Boyle,tortor.Integer@imperdiet.edu,(675) 678-1160,Dictum Eleifend Nunc LLC,05/05/2012 -Flynn,Petersen,dui@lectusrutrum.com,(787) 543-7411,Penatibus Et Associates,03/11/2013 -Troy,Herman,a.felis.ullamcorper@sem.ca,(932) 900-7922,Dolor Donec Associates,11/16/2012 -Constance,Shields,nec.leo.Morbi@eunulla.com,(221) 761-2368,Vel Quam Company,02/14/2014 -Ocean,Green,vulputate.dui@bibendumDonecfelis.net,(481) 832-0298,Nunc Associates,03/03/2013 -Steven,Lopez,Suspendisse.ac@sedpedeCum.net,(294) 415-0435,Ipsum Company,07/25/2013 -Adara,Lee,magna.Duis@erat.org,(760) 291-7826,Eu Ultrices PC,10/05/2013 -Noble,Hancock,Donec.tincidunt.Donec@dictumcursusNunc.edu,(333) 272-8234,Vitae Risus Duis LLC,09/13/2012 -Kendall,Wilcox,quis.pede@Pellentesqueut.ca,(173) 982-4381,Ultrices Industries,01/26/2013 -Sebastian,Barton,orci.Ut@ametfaucibus.ca,(951) 817-9217,In Mi Pede Corporation,05/11/2014 -Gavin,Clark,metus.facilisis.lorem@Sedetlibero.ca,(671) 714-8378,Vestibulum Neque Limited,06/06/2012 -Charles,Woods,Maecenas.mi.felis@lacusvarius.org,(559) 935-9739,Amet Ante Company,09/02/2013 -Elvis,Roberts,tempor.diam@risus.co.uk,(184) 182-5324,Facilisis Vitae Inc.,01/07/2014 -Caldwell,Carey,Suspendisse@Proin.edu,(125) 243-9354,Egestas Lacinia Sed Inc.,10/24/2012 -Jesse,Leblanc,sit@tellussemmollis.com,(726) 216-8000,Lectus Ltd,11/22/2013 -Hu,Adkins,purus.in.molestie@acmattisvelit.co.uk,(370) 317-7556,Aliquam Vulputate Company,10/19/2013 -Hamilton,Tyler,taciti.sociosqu.ad@Sedmalesuadaaugue.com,(234) 744-3868,Nunc Sed LLC,10/19/2012 -Cade,Osborn,at.iaculis.quis@doloregestas.org,(501) 753-9793,Consectetuer Industries,08/14/2013 -Ashely,Kent,Cum.sociis.natoque@odioPhasellusat.edu,(789) 869-6558,Imperdiet Ornare Corporation,02/04/2013 -Veda,Cameron,tristique.pharetra@necenimNunc.co.uk,(522) 127-0654,Egestas Incorporated,12/29/2012 -Burke,Ferrell,orci.sem@semPellentesque.co.uk,(975) 891-3694,Purus Accumsan Institute,07/26/2013 -Fuller,Lamb,orci.Donec@vulputatedui.edu,(523) 614-5785,Pede Cum Sociis Limited,12/02/2013 -Natalie,Taylor,In@lorem.ca,(117) 594-2685,A Facilisis Non LLP,12/06/2013 -Astra,Morton,nec@scelerisquenequeNullam.com,(390) 867-2558,Non Ante Bibendum Foundation,05/07/2012 -David,Espinoza,gravida@a.co.uk,(287) 945-5239,Lobortis Nisi Nibh Industries,05/11/2014 -Sybil,Todd,risus@sitametrisus.edu,(611) 848-4765,Massa Mauris Vestibulum Incorporated,01/19/2013 -Lee,Barron,cursus.non@Praesentinterdumligula.ca,(765) 654-9167,In Ornare Inc.,01/01/2013 -Zachery,Reed,nulla.Integer.urna@amet.edu,(667) 465-1222,Ac Corp.,10/07/2012 -Marshall,Brady,lobortis.nisi.nibh@molestiearcu.edu,(391) 336-5310,Ac Sem Ut Incorporated,07/12/2012 -Selma,Floyd,eros.turpis.non@lectusconvallis.net,(398) 920-1076,Non Foundation,07/21/2012 -Ivy,Garrison,posuere@euodio.net,(428) 321-5542,Semper Erat Foundation,12/19/2013 -Wyatt,Gibbs,Sed@nequeNullamut.ca,(973) 141-9840,Pellentesque Corp.,11/21/2013 -Vaughan,Moss,adipiscing@Phasellusfermentum.net,(597) 730-0228,Tempor Institute,10/27/2013 -Elijah,Mcgowan,Aliquam@Quisqueornaretortor.ca,(127) 171-1859,Tempor Bibendum Donec LLC,08/26/2012 -Miranda,Ingram,fermentum@velitSedmalesuada.net,(864) 873-7359,Feugiat Non Lobortis Institute,08/20/2012 -Anastasia,Lawrence,Mauris.eu@pedeultrices.net,(106) 260-8688,Sit Amet Consulting,05/31/2012 -Samson,Patton,non.arcu@enimnislelementum.ca,(302) 330-4251,Hendrerit Associates,12/27/2013 -Erasmus,Sexton,lectus.justo@aliquam.org,(972) 793-9187,Feugiat Industries,10/15/2013 -Emery,Gardner,erat@lorem.org,(848) 534-1656,Nunc Sit Amet Industries,08/24/2012 -Nomlanga,Hensley,Fusce@leoVivamus.org,(644) 169-6243,Consectetuer Company,08/29/2012 -Jason,Craft,nunc.nulla@sapien.ca,(691) 770-9143,Blandit LLC,03/23/2013 -Kathleen,Haley,sed.dolor.Fusce@imperdietornare.edu,(891) 454-8400,Lorem Company,07/02/2012 -Aline,Flynn,a@Nunclaoreet.edu,(563) 400-6803,Et Netus LLP,01/28/2013 -Ursa,Dickson,Integer.sem@ullamcorpervelit.com,(371) 615-7750,Nullam Company,12/22/2012 -Wesley,Lopez,enim.non.nisi@vulputateduinec.edu,(287) 777-3724,Lobortis Ultrices Vivamus Corp.,06/17/2013 -Victoria,Mcleod,lectus.justo.eu@ut.ca,(583) 108-1294,Justo Faucibus Lectus Corporation,10/17/2012 -Shana,Roach,scelerisque.sed.sapien@afelisullamcorper.edu,(921) 385-2342,Quis Turpis Vitae Incorporated,05/26/2014 -Maxine,Ruiz,Donec.porttitor@hymenaeosMaurisut.edu,(520) 801-0808,Luctus Foundation,12/05/2013 -Harriet,Bishop,Quisque@Crasdictum.com,(758) 716-9401,Dictum Phasellus In Inc.,09/08/2013 -Serina,Williams,tincidunt.vehicula.risus@sedliberoProin.ca,(270) 288-0136,At Egestas A Corporation,03/17/2014 -Rhea,Copeland,laoreet.ipsum@Aliquam.co.uk,(775) 493-9118,Ipsum Incorporated,05/22/2013 -Evan,Holcomb,neque.sed@ullamcorperDuis.ca,(695) 656-8621,Sem Institute,02/16/2013 -Basil,Mccall,arcu.Vestibulum.ante@luctuslobortis.co.uk,(144) 989-4125,Feugiat Tellus Lorem Institute,02/25/2013 -Florence,Riley,sit.amet@Proinvel.org,(663) 529-4829,Enim Sit PC,01/14/2014 -Heather,Peck,mauris@scelerisqueneque.edu,(850) 444-0917,Curabitur Limited,01/16/2014 -Dara,Robinson,egestas@utnisi.net,(106) 576-1355,Urna Incorporated,12/15/2012 -Kylan,Maxwell,conubia.nostra@accumsan.com,(973) 206-2558,Aliquam Eros Turpis Company,08/21/2012 -Petra,Blake,faucibus.orci.luctus@dapibusrutrum.ca,(901) 207-9872,Ac Metus Institute,06/17/2013 -Fiona,Goff,tincidunt@enim.net,(265) 255-7749,Odio Phasellus Corp.,12/03/2012 -Kameko,Diaz,ac@turpisNulla.edu,(731) 354-4848,Montes Nascetur Corporation,08/16/2013 -Craig,Valentine,tristique@urnaVivamus.net,(437) 229-8198,Etiam Gravida Molestie Consulting,05/06/2014 -Samson,Cunningham,semper.pretium@auctor.edu,(335) 666-7758,Nec Ante Associates,07/02/2013 -Yoko,Rogers,nunc@Vivamus.net,(893) 405-6889,Fermentum Vel Mauris Corp.,03/29/2014 -Walter,Burnett,nisi.Mauris.nulla@felis.co.uk,(336) 411-9222,Suscipit Est Institute,06/26/2012 -Gisela,Nash,euismod@lectusrutrum.ca,(917) 249-0166,Non Magna LLP,11/23/2012 -Wanda,Pierce,Nulla@dolorsit.com,(480) 872-3389,Cum Sociis Natoque Limited,11/02/2013 -Jane,Dixon,eu.odio@Infaucibus.com,(112) 139-8563,Id Ante Dictum LLC,03/14/2014 -Octavius,Shannon,iaculis.aliquet@ante.ca,(541) 652-3295,Libero Est Institute,05/28/2014 -Rigel,Hunt,metus.Aenean.sed@inhendrerit.org,(792) 358-7505,Enim PC,09/05/2013 -Rachel,Gray,erat.in.consectetuer@Fuscealiquetmagna.org,(165) 973-1366,Suscipit Nonummy Fusce LLC,05/08/2013 -Madeline,Bradley,dignissim.Maecenas@egetmassaSuspendisse.co.uk,(436) 223-3135,Posuere PC,01/24/2014 -Emma,Conner,dictum@magnaDuisdignissim.com,(304) 429-2622,Nulla Incorporated,11/05/2013 -Halee,Mclean,amet.faucibus@Phasellus.net,(669) 364-0148,Ligula Consulting,03/05/2014 -Conan,Williams,massa@felisNulla.net,(999) 649-4433,Velit Eu Limited,05/15/2014 -Martena,Fowler,mi.lacinia@maurisa.ca,(405) 661-1762,Blandit Nam Institute,02/27/2013 -Robin,Buckley,cursus.Nunc.mauris@nislQuisque.net,(376) 771-9862,Sed Corp.,10/30/2012 -Isadora,Adams,arcu.Vestibulum@urna.co.uk,(138) 774-6058,Blandit Viverra Donec Institute,08/07/2012 -Bernard,Price,ultrices@Praesent.ca,(368) 882-6146,Egestas Blandit LLP,11/03/2013 \ No newline at end of file diff --git a/scripts/13_random_name_generator.py b/scripts/13_random_name_generator.py index 0acb9bf..ae2904a 100755 --- a/scripts/13_random_name_generator.py +++ b/scripts/13_random_name_generator.py @@ -9,10 +9,10 @@ def random_name_generator(first, second, x): - list of last names - number of random names """ - names = [] + names_list = [] for i in range(x): - names.append("{0} {1}".format(choice(first), choice(second))) - return set(names) + names_list.append("{0} {1}".format(choice(first), choice(second))) + return set(names_list) first_names = ["Drew", "Mike", "Landon", "Jeremy", "Tyler", "Tom", "Avery"] diff --git a/scripts/16_jinja_quick_load.py b/scripts/16_jinja_quick_load.py index 7af9b3d..8db9101 100755 --- a/scripts/16_jinja_quick_load.py +++ b/scripts/16_jinja_quick_load.py @@ -4,16 +4,15 @@ Example: ->>> from jinja_quick_load import render_from_template ->>> data = { -... "date": "June 12, 2014", -... "items": ["oranges", "bananas", "steak", "milk"] -... } ->>> render_from_template(".", "shopping_list.html", **data) +# >>> from jinja_quick_load import render_from_template +# >>> data = { +# ... "date": "June 12, 2014", +# ... "items": ["oranges", "bananas", "steak", "milk"] +# ... } +# >>> render_from_template(".", "shopping_list.html", **data) """ - from jinja2 import FileSystemLoader, Environment @@ -22,3 +21,11 @@ def render_from_template(directory, template_name, **kwargs): env = Environment(loader=loader) template = env.get_template(template_name) return template.render(**kwargs) + + +data = { + "date": "June 12, 2014", + "items": ["oranges", "bananas", "steak", "milk"] +} + +render_from_template(".", "shopping_list.html", **data) diff --git a/scripts/18_zipper.py b/scripts/18_zipper.py index 43c956d..4798515 100755 --- a/scripts/18_zipper.py +++ b/scripts/18_zipper.py @@ -1,12 +1,12 @@ import os -from datetime import datetime from zipfile import ZipFile +from datetime import datetime # set file name and time of creation today = datetime.now() file_name = 'zipper_' + today.strftime('%Y.%m.%dh%H%M') + '.zip' -dir_name = 'tmp/' # update path +dir_name = './data/' # update path def zipdir(path, zip): @@ -14,6 +14,7 @@ def zipdir(path, zip): for file in files: zip.write(os.path.join(root, file)) + if __name__ == '__main__': zipfile = ZipFile(file_name, 'w') zipdir(dir_name, zipfile) diff --git a/scripts/19_tsv-to-csv.py b/scripts/19_tsv-to-csv.py index f10deed..1e7bf9b 100755 --- a/scripts/19_tsv-to-csv.py +++ b/scripts/19_tsv-to-csv.py @@ -1,16 +1,36 @@ -import os -import sys import csv -def convert(input, out): - if os.path.exists(out): - raise ValueError("Output file already exists") +# import os + + +def convert_tsv_to_csv(input, out): + # if os.path.exists(out): + # raise ValueError("Output file already exists") reader = csv.reader(open(input, 'rU'), dialect=csv.excel_tab) - writer = csv.writer(open(out, "wb+"), dialect="excel") + writer = csv.writer(open(out, "w+"), dialect="excel") for row in reader: writer.writerow(row) + +def convert_csv_to_tsv(input, out): + # if os.path.exists(out): + # raise ValueError("Output file already exists") + + reader = csv.reader(open(input, 'rU'), dialect='excel') + writer = csv.writer(open(out, "w+"), dialect=csv.excel_tab) + + for row in reader: + writer.writerow(row) + + if __name__ == "__main__": - convert(sys.argv[1], sys.argv[2]) + csv_file = './data/sample_csv-1.csv' + tsv_file = './data/csv_dmo.tsv' + + convert_csv_to_tsv(csv_file, tsv_file) + print('csv to tsv, Done') + + convert_tsv_to_csv(tsv_file, csv_file) + print('tsv to csv, Done') diff --git a/scripts/20_restore_file_from_git.py b/scripts/20_restore_file_from_git.py index b1f581b..123f3e3 100755 --- a/scripts/20_restore_file_from_git.py +++ b/scripts/20_restore_file_from_git.py @@ -1,13 +1,11 @@ -from subprocess import check_output, call - - -file_name = str(input('Enter the file name: ')) -commit = check_output(["git", "rev-list", "-n", "1", "HEAD", "--", file_name]) -print(str(commit).rstrip()) -call(["git", "checkout", str(commit).rstrip()+"~1", file_name]) - - """ After entering a filename, this script searches your Git history for that file. If the file exists, then it will restore it. """ + +from subprocess import check_output, call + +file_name = str(input('Enter the file name: ')) +commit = check_output(["git", "rev-list", "-n", "1", "HEAD", "--", file_name]) +print(str(commit).strip('\n')) +call(["git", "checkout", str(commit).rstrip() + '~1', file_name]) diff --git a/scripts/22_git_tag.py b/scripts/22_git_tag.py index 4849c07..e7606ac 100755 --- a/scripts/22_git_tag.py +++ b/scripts/22_git_tag.py @@ -1,7 +1,6 @@ import subprocess import sys - if len(sys.argv) == 3: tag = sys.argv[1] commit = sys.argv[2] diff --git a/scripts/24_sql2csv.py b/scripts/24_sql2csv.py index 4e8f484..ad0b9be 100755 --- a/scripts/24_sql2csv.py +++ b/scripts/24_sql2csv.py @@ -1,6 +1,7 @@ -import sys import csv + import sqlite3 +import sys if len(sys.argv) < 3: print("Use: {0} DATABASE_NAME TABLE_NAME".format(sys.argv[0])) diff --git a/scripts/25_ip2geolocation.py b/scripts/25_ip2geolocation.py index f593676..df361a2 100755 --- a/scripts/25_ip2geolocation.py +++ b/scripts/25_ip2geolocation.py @@ -1,4 +1,5 @@ import csv + import requests @@ -9,7 +10,7 @@ def get_addresses(filename): row info from the csv file. """ all_addresses = [] - with open(filename, 'rt') as f: + with open(filename, 'r') as f: reader = csv.reader(f) for row in reader: all_addresses.append(row) @@ -31,6 +32,7 @@ def get_geolocation(all_the_ip_address): for line in all_the_ip_address: print("Grabbing geo info for row # {0}".format(counter)) r = requests.get('https://freegeoip.net/json/{0}'.format(line[0])) + print(r.json()) line.extend([str(r.json()['country_name']), str(r.json()['city'])]) updated_addresses.append(line) counter += 1 @@ -45,9 +47,9 @@ def create_csv(updated_address_list): """ import sys if sys.version_info >= (3, 0, 0): - f = open('output.csv', 'w', newline='') + f = open('./data/output.csv', 'w', newline='') else: - f = open('output.csv', 'wb') + f = open('./data/output.csv', 'wb') with f: writer = csv.writer(f) writer.writerows(updated_address_list) diff --git a/scripts/26_stock_scraper.py b/scripts/26_stock_scraper.py index 3e69cc2..5359af1 100755 --- a/scripts/26_stock_scraper.py +++ b/scripts/26_stock_scraper.py @@ -1,21 +1,26 @@ +from collections import defaultdict + import requests from lxml import html -from collections import defaultdict def get_stocks(url): # Make Request - page = requests.get(url) + + proxies = {"http": "http://127.0.0.1:1080", "https": "http://127.0.0.1:1080", } + + page = requests.get(url, proxies=proxies) # Parse/Scrape tree = html.fromstring(page.text) xpath = '//*[@id="mw-content-text"]/table[1]' - rows = tree.xpath(xpath)[0].findall("tr") - rows = [(row.getchildren()[0], row.getchildren()[3]) for row in rows[1:]] - rows = [(row[0].getchildren()[0].text, row[1].text) for row in rows] - industries = defaultdict(list) - for row in rows: - industries[row[1]].append(row[0]) - return industries + if tree.xpath(xpath): + rows = tree.xpath(xpath)[0].findall("tr") + rows = [(row.getchildren()[0], row.getchildren()[3]) for row in rows[1:]] + rows = [(row[0].getchildren()[0].text, row[1].text) for row in rows] + industries = defaultdict(list) + for row in rows: + industries[row[1]].append(row[0]) + return industries def output_data(data_dict): @@ -27,6 +32,6 @@ def output_data(data_dict): if __name__ == '__main__': - url = 'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies' + url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies' scraped_data = get_stocks(url) output_data(scraped_data) diff --git a/scripts/27_send_sms.py b/scripts/27_send_sms.py index 0ad1d58..bbc1329 100755 --- a/scripts/27_send_sms.py +++ b/scripts/27_send_sms.py @@ -1,8 +1,7 @@ import requests -message = raw_input('Enter a Message: ') -number = raw_input('Enter the phone number: ') - +message = input('Enter a Message: ') +number = input('Enter the phone number: ') payload = {'number': number, 'message': message} r = requests.post("http://textbelt.com/text", data=payload) diff --git a/scripts/28_income_tax_calculator.py b/scripts/28_income_tax_calculator.py index 3fc0758..d99875e 100755 --- a/scripts/28_income_tax_calculator.py +++ b/scripts/28_income_tax_calculator.py @@ -11,7 +11,7 @@ 'pay_periods': 1, 'state': 'CO', 'year': - '2014' + '2014' } r = requests.post( diff --git a/scripts/29_json_to_yaml.py b/scripts/29_json_to_yaml.py index b22f64d..f9001a1 100755 --- a/scripts/29_json_to_yaml.py +++ b/scripts/29_json_to_yaml.py @@ -1,5 +1,5 @@ -import sys import json + import yaml """ @@ -9,7 +9,7 @@ """ # load json data -json_data = json.loads(open(sys.argv[1]).read()) +json_data = json.loads(open('data/demo_json.json').read()) # convert unicode to string converted_json_data = json.dumps(json_data) # output yaml diff --git a/scripts/30_fullcontact.py b/scripts/30_fullcontact.py index 3ee2822..eff8036 100644 --- a/scripts/30_fullcontact.py +++ b/scripts/30_fullcontact.py @@ -1,5 +1,6 @@ import os import sys + import requests """ @@ -13,7 +14,6 @@ $ python 30_fullcontact.py twitter TWITTER_HANDLE """ - # constants API_KEY = os.environ.get('FULLCONTACT_API_KEY') diff --git a/scripts/31_youtube_sentiment.py b/scripts/31_youtube_sentiment.py index f0f2129..f38a2d2 100644 --- a/scripts/31_youtube_sentiment.py +++ b/scripts/31_youtube_sentiment.py @@ -1,4 +1,5 @@ import sys + import requests from bs4 import BeautifulSoup as bs4 @@ -18,9 +19,13 @@ def get_arguments(): def get_comments(url): - html = requests.get('https://plus.googleapis.com/u/0/_/widget/render/comments?first_party_property=YOUTUBE&href=' + url) + proxies = {"http": "http://127.0.0.1:1080", "https": "http://127.0.0.1:1080", } + html = requests.get( + 'https://plus.googleapis.com/u/0/_/widget/render/comments?first_party_property=YOUTUBE&href=' + url, + proxies=proxies) soup = bs4(html.text, 'html.parser') - return [comment.string for comment in soup.findAll('div', class_='Ct')] + print(soup) + return [comment.string for comment in soup.findAll('yt-formatted-string', id='content-text')] def calculate_sentiment(comments): @@ -47,7 +52,8 @@ def calculate_sentiment(comments): def main(): - url = get_arguments() + # url = get_arguments() + url = 'https://www.youtube.com/watch?v=iBt2aTjCNmI' if url: comments = get_comments(url) if len(comments) <= 0: diff --git a/scripts/32_stock_scraper.py b/scripts/32_stock_scraper.py index 3bc2e7d..964ddfd 100644 --- a/scripts/32_stock_scraper.py +++ b/scripts/32_stock_scraper.py @@ -1,4 +1,5 @@ import urllib.request + from bs4 import BeautifulSoup diff --git a/scripts/33_country_code.py b/scripts/33_country_code.py index 134236c..7611e46 100644 --- a/scripts/33_country_code.py +++ b/scripts/33_country_code.py @@ -1,6 +1,6 @@ import csv -import sys import json +import sys """ Example usage: diff --git a/scripts/34_git_all_repos.py b/scripts/34_git_all_repos.py index b3e2d5b..096fb09 100644 --- a/scripts/34_git_all_repos.py +++ b/scripts/34_git_all_repos.py @@ -1,5 +1,6 @@ -import sys import os +import sys + import requests