diff --git a/Deployment/Linear_regression/request_pred.py b/Deployment/Linear_regression/request_pred.py index 9b66cf4..61d13f1 100644 --- a/Deployment/Linear_regression/request_pred.py +++ b/Deployment/Linear_regression/request_pred.py @@ -27,11 +27,11 @@ if (str(resp.status_code)=='200'): print("Response received correctly.") print() - + x=resp.json() j = json.loads(x) d = dict(j) for k,v in (d.items()): - print("{}: {}".format(k,round(v,2))) + print(f"{k}: {round(v, 2)}") print() diff --git a/Deployment/Linear_regression/server_lm.py b/Deployment/Linear_regression/server_lm.py index 55efb7e..3ede3c8 100644 --- a/Deployment/Linear_regression/server_lm.py +++ b/Deployment/Linear_regression/server_lm.py @@ -21,25 +21,24 @@ def apicall(): except Exception as e: raise e - clf = 'lm_model_v1.pk' - if test.empty: return(bad_request()) - else: - #Load the saved model - print("Loading the model...") - loaded_model = None - with open('./models/'+clf,'rb') as f: - loaded_model = pickle.load(f) - - print("The model has been loaded...doing predictions now...") - print() - predictions = loaded_model.predict(test) - - prediction_series = pd.Series(predictions) - response = jsonify(prediction_series.to_json()) - response.status_code = 200 - return (response) + #Load the saved model + print("Loading the model...") + loaded_model = None + clf = 'lm_model_v1.pk' + + with open(f'./models/{clf}', 'rb') as f: + loaded_model = pickle.load(f) + + print("The model has been loaded...doing predictions now...") + print() + predictions = loaded_model.predict(test) + + prediction_series = pd.Series(predictions) + response = jsonify(prediction_series.to_json()) + response.status_code = 200 + return (response) @app.errorhandler(400) def bad_request(error=None): diff --git a/Deployment/Linear_regression/training_housing.py b/Deployment/Linear_regression/training_housing.py index 2e64aa6..5e3728c 100644 --- a/Deployment/Linear_regression/training_housing.py +++ b/Deployment/Linear_regression/training_housing.py @@ -1,7 +1,7 @@ import numpy as np import pandas as pd -import os +import os import json import io import requests @@ -17,36 +17,36 @@ # Checks if the dataset is in the local '/data' folder # If not present, pulls from Github repo, otherwise reads from the local folder -if not os.path.isdir(cwd+"/data") or data_filename not in os.listdir(cwd+"/data"): +if not os.path.isdir(f"{cwd}/data") or data_filename not in os.listdir( + f"{cwd}/data" +): url="https://raw.githubusercontent.com/tirthajyoti/Machine-Learning-with-Python/master/Datasets/USA_Housing.csv" - print("Downloading data from {} ".format(url)) + print(f"Downloading data from {url} ") s=requests.get(url).content df = pd.read_csv(io.StringIO(s.decode('utf-8'))) print("Dataset is downloaded.") # Save the data in local '/data' folder - if not os.path.isdir(cwd+"/data"): - os.makedirs(cwd+"/data") + if not os.path.isdir(f"{cwd}/data"): + os.makedirs(f"{cwd}/data") df.to_csv("data/USA_housing.csv") - print() else: df = pd.read_csv("data/USA_housing.csv") print("Dataset loaded from local directory") - print() - +print() # Make a list of data frame column names l_column = list(df.columns) # Making a list out of column names len_feature = len(l_column) # Length of column vector list # Put all the numerical features in X and Price in y, # Ignore Address which is string for linear regression -X = df[l_column[0:len_feature-2]] +X = df[l_column[:len_feature-2]] y = df[l_column[len_feature-2]] #print("Feature set size:",X.shape) #print("Variable set size:",y.shape) #print() -print("Features variables: ",l_column[0:len_feature-2]) +print("Features variables: ", l_column[:len_feature-2]) print() # Create X and y train and test splits in one command using a split ratio and a random seed @@ -83,16 +83,14 @@ if __name__ == '__main__': filename = 'lm_model_v1.pk' print("Now saving the model to a serialized format (pickle)...") - if not os.path.isdir(cwd+"/models"): + if not os.path.isdir(f"{cwd}/models"): os.makedirs(cwd+"/models") with open('models/'+filename, 'wb') as file: pickle.dump(lm, file) # Save some of the test data in a CSV print("Saving test data to a file...") print() - if os.path.isdir(cwd+"/data"): - X_test.to_csv("data/housing_test.csv") - else: + if not os.path.isdir(cwd + "/data"): os.makedirs(cwd+"/data") - X_test.to_csv("data/housing_test.csv") + X_test.to_csv("data/housing_test.csv") \ No newline at end of file diff --git a/Deployment/rnn_app/utils.py b/Deployment/rnn_app/utils.py index dedb533..b8d4381 100644 --- a/Deployment/rnn_app/utils.py +++ b/Deployment/rnn_app/utils.py @@ -32,8 +32,7 @@ def generate_random_start(model, graph, seed_length=50,new_words=50,diversity=1, actual = generated[:] + seq[end_idx:end_idx + new_words] # Keep adding new words - for i in range(new_words): - + for _ in range(new_words): # Make a prediction from the seed preds = model.predict(np.array(seed).reshape(1, -1))[0].astype(np.float64) @@ -54,26 +53,17 @@ def generate_random_start(model, graph, seed_length=50,new_words=50,diversity=1, seed += [next_idx] generated.append(next_idx) - # Showing generated and actual abstract - n = [] - - for i in generated: - n.append(idx_word.get(i, '===')) - + n = [idx_word.get(i, '===') for i in generated] gen_list.append(n) - a = [] - - for i in actual: - a.append(idx_word.get(i, '===')) - + a = [idx_word.get(i, '===') for i in actual] a = a[seed_length:] gen_list = [gen[seed_length:seed_length + len(a)] for gen in gen_list] if return_output: return original_sequence, gen_list, a - + # HTML formatting seed_html = '' seed_html = addContent(seed_html, header( @@ -87,16 +77,14 @@ def generate_random_start(model, graph, seed_length=50,new_words=50,diversity=1, a_html = '' a_html = addContent(a_html, header('Actual', color='darkgreen')) a_html = addContent(a_html, box(remove_spaces(' '.join(a)))) - - st = "
" + seed_html + "
" + gen_html + "
" + a_html + "
" - #return f"
{seed_html}
{gen_html}
{a_html}
" - return st + + return f"
{seed_html}
{gen_html}
{a_html}
" def generate_from_seed(model, graph, seed,new_words=50, diversity=0.75): """Generate output from a sequence""" # Mapping of words to integers - word_idx = json.load(open('data/word-index.json')) + word_idx = json.load(open('data/word-index.json')) idx_word = {idx: word for word, idx in word_idx.items()} # Original formated text @@ -133,33 +121,33 @@ def generate_from_seed(model, graph, seed,new_words=50, diversity=0.75): html = addContent(html, header( 'Input Seed ', color='black', gen_text='Network Output')) html = addContent(html, box(start, gen)) - st = "
"+html+"
" - return st + return f"
{html}
" def header(text, color='black', gen_text=None): """Create an HTML header""" - if gen_text: - raw_html = '

' + str( - text) + '' + str(gen_text) + '

' - else: - raw_html = '

' + str( - text) + '

' - return raw_html + return ( + '

' + + str(text) + + '' + + str(gen_text) + + '

' + if gen_text + else '

' + + str(text) + + '

' + ) def box(text, gen_text=None): """Create an HTML box of text""" - if gen_text: - raw_html = '
' + str( - text) + '' + str(gen_text) + '
' - - else: - raw_html = '
' + str( - text) + '
' - return raw_html + return ( + f'
{str(text)}{str(gen_text)}
' + if gen_text + else f'
{str(text)}
' + ) def addContent(old_html, raw_html): diff --git a/Memory-profiling/Scalene/linearmodel.py b/Memory-profiling/Scalene/linearmodel.py index b4b3e23..35f81c8 100644 --- a/Memory-profiling/Scalene/linearmodel.py +++ b/Memory-profiling/Scalene/linearmodel.py @@ -11,7 +11,11 @@ def make_data(): X,y = make_regression(n_samples=NUM_SAMPLES,n_features=NUM_FEATURES, n_informative=NUM_FEATURES,noise=0.5) - data = pd.DataFrame(X,columns=['X'+str(i) for i in range(1,NUM_FEATURES+1)],dtype=np.float16) + data = pd.DataFrame( + X, + columns=[f'X{str(i)}' for i in range(1, NUM_FEATURES + 1)], + dtype=np.float16, + ) data['y']=np.array(y,dtype=np.float16) return data diff --git a/Memory-profiling/Scalene/mlp.py b/Memory-profiling/Scalene/mlp.py index 9b2d534..49e3b62 100644 --- a/Memory-profiling/Scalene/mlp.py +++ b/Memory-profiling/Scalene/mlp.py @@ -11,7 +11,11 @@ def make_data(): X,y = make_regression(n_samples=NUM_SAMPLES,n_features=NUM_FEATURES, n_informative=NUM_FEATURES,noise=0.5) - data = pd.DataFrame(X,columns=['X'+str(i) for i in range(1,NUM_FEATURES+1)],dtype=np.float16) + data = pd.DataFrame( + X, + columns=[f'X{str(i)}' for i in range(1, NUM_FEATURES + 1)], + dtype=np.float16, + ) data['y']=np.array(y,dtype=np.float16) return data diff --git a/OOP_in_ML/Class_MyLinearRegression.py b/OOP_in_ML/Class_MyLinearRegression.py index db2129c..a942013 100644 --- a/OOP_in_ML/Class_MyLinearRegression.py +++ b/OOP_in_ML/Class_MyLinearRegression.py @@ -101,7 +101,6 @@ def summary_metrics(self): if not self.is_fitted: print("Model not fitted yet!") return None - metrics = {} items = ( ("sse", self.sse()), ("sst", self.sst()), @@ -111,9 +110,7 @@ def summary_metrics(self): ("AIC:", self.aic()), ("BIC:", self.bic()), ) - for item in items: - metrics[item[0]] = item[1] - return metrics + return {item[0]: item[1] for item in items} class Inference: @@ -208,10 +205,7 @@ def fitted_vs_features(self): print("Model not fitted yet!") return None num_plots = self.features_.shape[1] - if num_plots % 3 == 0: - nrows = int(num_plots / 3) - else: - nrows = int(num_plots / 3) + 1 + nrows = int(num_plots / 3) if num_plots % 3 == 0 else int(num_plots / 3) + 1 ncols = 3 fig, ax = plt.subplots(nrows, ncols, figsize=(15, nrows * 3.5)) axes = ax.ravel() @@ -226,7 +220,7 @@ def fitted_vs_features(self): alpha=0.8, ) axes[i].grid(True) - axes[i].set_xlabel("Feature X[{}]".format(i)) + axes[i].set_xlabel(f"Feature X[{i}]") axes[i].set_ylabel("Residuals") axes[i].hlines( y=0, @@ -412,7 +406,7 @@ def vif(self): lm = sm.OLS(self.target_, sm.add_constant(self.features_)).fit() for i in range(self.features_.shape[1]): v = vif(np.matrix(self.features_), i) - print("Variance inflation factor for feature {}: {}".format(i, round(v, 2))) + print(f"Variance inflation factor for feature {i}: {round(v, 2)}") class MyLinearRegression( diff --git a/Pytest/linear_model.py b/Pytest/linear_model.py index eb7cf76..9bc4e0c 100644 --- a/Pytest/linear_model.py +++ b/Pytest/linear_model.py @@ -31,14 +31,14 @@ def train_linear_model(X,y, # Fit model.fit(X_train, y_train) # Save - fname = filename+'.sav' + fname = f'{filename}.sav' dump(model, fname) # Compute scores r2_train = model.score(X_train,y_train) r2_test = model.score(X_test,y_test) # Return scores in a dictionary return {'Train-score':r2_train, 'Test-score': r2_test} - + except AssertionError as msg: print(msg) return msg \ No newline at end of file diff --git a/Random Function Generator/Symbolic_regression_classification_generator.py b/Random Function Generator/Symbolic_regression_classification_generator.py index ed303f6..ab1986b 100644 --- a/Random Function Generator/Symbolic_regression_classification_generator.py +++ b/Random Function Generator/Symbolic_regression_classification_generator.py @@ -10,9 +10,7 @@ def symbolize(s): from sympy import sympify s1=s.replace('.','*') s2=s1.replace('^','**') - s3=sympify(s2) - - return(s3) + return sympify(s2) def eval_multinomial(s,vals=None,symbolic_eval=False): @@ -24,9 +22,7 @@ def eval_multinomial(s,vals=None,symbolic_eval=False): from sympy import Symbol sym_s=symbolize(s) sym_set=sym_s.atoms(Symbol) - sym_lst=[] - for s in sym_set: - sym_lst.append(str(s)) + sym_lst = [str(s) for s in sym_set] sym_lst.sort() if symbolic_eval==False and len(sym_set)!=len(vals): print("Length of the input values did not match number of variables and symbolic evaluation is not selected") @@ -35,16 +31,13 @@ def eval_multinomial(s,vals=None,symbolic_eval=False): if type(vals)==list: sub=list(zip(sym_lst,vals)) elif type(vals)==dict: - l=list(vals.keys()) - l.sort() - lst=[] - for i in l: - lst.append(vals[i]) + l = sorted(vals.keys()) + lst = [vals[i] for i in l] sub=list(zip(sym_lst,lst)) elif type(vals)==tuple: sub=list(zip(sym_lst,list(vals))) result=sym_s.subs(sub) - + return result @@ -53,7 +46,7 @@ def eval_multinomial(s,vals=None,symbolic_eval=False): def flip(y,p): import numpy as np lst=[] - for i in range(len(y)): + for _ in range(len(y)): f=np.random.choice([1,0],p=[p,1-p]) lst.append(f) lst=np.array(lst) @@ -79,36 +72,34 @@ def gen_classification_symbolic(m=None,n_samples=100,n_features=2,flip_y=0.0): import numpy as np from sympy import Symbol,sympify - - if m==None: + + if m is None: m='' for i in range(1,n_features+1): - c='x'+str(i) + c = f'x{str(i)}' c+=np.random.choice(['+','-'],p=[0.5,0.5]) m+=c m=m[:-1] sym_m=sympify(m) n_features=len(sym_m.atoms(Symbol)) - evals=[] - lst_features=[] - for i in range(n_features): - lst_features.append(np.random.normal(scale=5,size=n_samples)) + lst_features = [ + np.random.normal(scale=5, size=n_samples) for _ in range(n_features) + ] lst_features=np.array(lst_features) lst_features=lst_features.T - for i in range(n_samples): - evals.append(eval_multinomial(m,vals=list(lst_features[i]))) - + evals = [ + eval_multinomial(m, vals=list(lst_features[i])) + for i in range(n_samples) + ] evals=np.array(evals) evals_binary=evals>0 evals_binary=evals_binary.flatten() evals_binary=np.array(evals_binary,dtype=int) evals_binary=flip(evals_binary,p=flip_y) evals_binary=evals_binary.reshape(n_samples,1) - + lst_features=lst_features.reshape(n_samples,n_features) - x=np.hstack((lst_features,evals_binary)) - - return (x) + return np.hstack((lst_features,evals_binary)) # ### Regression sample generation based on a symbolic expression @@ -131,32 +122,31 @@ def gen_regression_symbolic(m=None,n_samples=100,n_features=2,noise=0.0,noise_di import numpy as np from sympy import Symbol,sympify - - if m==None: + + if m is None: m='' for i in range(1,n_features+1): - c='x'+str(i) + c = f'x{str(i)}' c+=np.random.choice(['+','-'],p=[0.5,0.5]) m+=c m=m[:-1] - + sym_m=sympify(m) n_features=len(sym_m.atoms(Symbol)) - evals=[] - lst_features=[] - - for i in range(n_features): - lst_features.append(np.random.normal(scale=5,size=n_samples)) + lst_features = [ + np.random.normal(scale=5, size=n_samples) for _ in range(n_features) + ] lst_features=np.array(lst_features) lst_features=lst_features.T lst_features=lst_features.reshape(n_samples,n_features) - - for i in range(n_samples): - evals.append(eval_multinomial(m,vals=list(lst_features[i]))) - + + evals = [ + eval_multinomial(m, vals=list(lst_features[i])) + for i in range(n_samples) + ] evals=np.array(evals) evals=evals.reshape(n_samples,1) - + if noise_dist=='normal': noise_sample=noise*np.random.normal(loc=0,scale=1.0,size=n_samples) elif noise_dist=='uniform': @@ -167,10 +157,8 @@ def gen_regression_symbolic(m=None,n_samples=100,n_features=2,noise=0.0,noise_di noise_sample=noise*np.random.gamma(shape=1.0,scale=1.0,size=n_samples) elif noise_dist=='laplace': noise_sample=noise*np.random.laplace(loc=0.0,scale=1.0,size=n_samples) - + noise_sample=noise_sample.reshape(n_samples,1) evals=evals+noise_sample - - x=np.hstack((lst_features,evals)) - - return (x) + + return np.hstack((lst_features,evals)) diff --git a/Streamlit-ML-apps/scripts/Streamlit-demo-one.py b/Streamlit-ML-apps/scripts/Streamlit-demo-one.py index 44f61b7..f3bdb61 100644 --- a/Streamlit-ML-apps/scripts/Streamlit-demo-one.py +++ b/Streamlit-ML-apps/scripts/Streamlit-demo-one.py @@ -277,9 +277,10 @@ def square (x): and the plot below regenerates as well. """ # Random data-filled coulmns -df = pd.DataFrame(np.random.normal(loc=5, -scale=5, size=50).reshape(10, 5), -columns = ['A'+ str(i) for i in range(1, 6)]) +df = pd.DataFrame( + np.random.normal(loc=5, scale=5, size=50).reshape(10, 5), + columns=[f'A{str(i)}' for i in range(1, 6)], +) # Two derived columns df['A6'] = 10*np.sin(df['A1']) @@ -312,9 +313,9 @@ def square (x): csv_filename = str(st.text_input("Enter a filename for saving the DataFrame as a CSV file", max_chars=30)) -if ('.csv' not in csv_filename and len(csv_filename)>0): +if '.csv' not in csv_filename and csv_filename != "": csv_filename += ".csv" -if len(csv_filename)>0: +if csv_filename != "": df_filtered.to_csv(csv_filename) st.markdown("#### File was saved.") else: @@ -400,6 +401,7 @@ def f(x): ``` x = st.slider('x', -8, 8) """ + x = st.slider('x', -8, 8) """ @@ -411,7 +413,7 @@ def f(x): We are printing the function value below. Move the slidebar and see how the evaluation changes. """ -st.write(f"$f(x)$ evaluated at {x} is: "+str(round(f(x), 3))) +st.write(f"$f(x)$ evaluated at {x} is: {str(round(f(x), 3))}") """ --- diff --git a/Synthetic_data_generation/Symbolic_regression_classification_generator.py b/Synthetic_data_generation/Symbolic_regression_classification_generator.py index b669efa..35601e5 100644 --- a/Synthetic_data_generation/Symbolic_regression_classification_generator.py +++ b/Synthetic_data_generation/Symbolic_regression_classification_generator.py @@ -7,9 +7,7 @@ def symbolize(s): """ s1=s.replace('.','*') s2=s1.replace('^','**') - s3=sympify(s2) - - return(s3) + return sympify(s2) def eval_multinomial(s,vals=None,symbolic_eval=False): """ @@ -19,9 +17,7 @@ def eval_multinomial(s,vals=None,symbolic_eval=False): """ sym_s=symbolize(s) sym_set=sym_s.atoms(Symbol) - sym_lst=[] - for s in sym_set: - sym_lst.append(str(s)) + sym_lst = [str(s) for s in sym_set] sym_lst.sort() if symbolic_eval==False and len(sym_set)!=len(vals): print("Length of the input values did not match number of variables and symbolic evaluation is not selected") @@ -30,16 +26,13 @@ def eval_multinomial(s,vals=None,symbolic_eval=False): if type(vals)==list: sub=list(zip(sym_lst,vals)) elif type(vals)==dict: - l=list(vals.keys()) - l.sort() - lst=[] - for i in l: - lst.append(vals[i]) + l = sorted(vals.keys()) + lst = [vals[i] for i in l] sub=list(zip(sym_lst,lst)) elif type(vals)==tuple: sub=list(zip(sym_lst,list(vals))) result=sym_s.subs(sub) - + return result def flip(y,p): @@ -47,7 +40,7 @@ def flip(y,p): Flips random bit (used to make a classification problem haredr) """ lst=[] - for i in range(len(y)): + for _ in range(len(y)): f=np.random.choice([1,0],p=[p,1-p]) lst.append(f) lst=np.array(lst) @@ -69,35 +62,33 @@ def gen_classification_symbolic(m=None,n_samples=100,n_features=2,flip_y=0.0): Returns a numpy ndarray with dimension (n_samples,n_features+1). Last column is the response vector. """ - if m==None: + if m is None: m='' for i in range(1,n_features+1): - c='x'+str(i) + c = f'x{str(i)}' c+=np.random.choice(['+','-'],p=[0.5,0.5]) m+=c m=m[:-1] sym_m=sympify(m) n_features=len(sym_m.atoms(Symbol)) - evals=[] - lst_features=[] - for i in range(n_features): - lst_features.append(np.random.normal(scale=5,size=n_samples)) + lst_features = [ + np.random.normal(scale=5, size=n_samples) for _ in range(n_features) + ] lst_features=np.array(lst_features) lst_features=lst_features.T - for i in range(n_samples): - evals.append(eval_multinomial(m,vals=list(lst_features[i]))) - + evals = [ + eval_multinomial(m, vals=list(lst_features[i])) + for i in range(n_samples) + ] evals=np.array(evals) evals_binary=evals>0 evals_binary=evals_binary.flatten() evals_binary=np.array(evals_binary,dtype=int) evals_binary=flip(evals_binary,p=flip_y) evals_binary=evals_binary.reshape(n_samples,1) - + lst_features=lst_features.reshape(n_samples,n_features) - x=np.hstack((lst_features,evals_binary)) - - return (x) + return np.hstack((lst_features,evals_binary)) def gen_regression_symbolic(m=None,n_samples=100,n_features=2,noise=0.0,noise_dist='normal'): @@ -116,31 +107,30 @@ def gen_regression_symbolic(m=None,n_samples=100,n_features=2,noise=0.0,noise_di Returns a numpy ndarray with dimension (n_samples,n_features+1). Last column is the response vector. """ - if m==None: + if m is None: m='' for i in range(1,n_features+1): - c='x'+str(i) + c = f'x{str(i)}' c+=np.random.choice(['+','-'],p=[0.5,0.5]) m+=c m=m[:-1] - + sym_m=sympify(m) n_features=len(sym_m.atoms(Symbol)) - evals=[] - lst_features=[] - - for i in range(n_features): - lst_features.append(np.random.normal(scale=5,size=n_samples)) + lst_features = [ + np.random.normal(scale=5, size=n_samples) for _ in range(n_features) + ] lst_features=np.array(lst_features) lst_features=lst_features.T lst_features=lst_features.reshape(n_samples,n_features) - - for i in range(n_samples): - evals.append(eval_multinomial(m,vals=list(lst_features[i]))) - + + evals = [ + eval_multinomial(m, vals=list(lst_features[i])) + for i in range(n_samples) + ] evals=np.array(evals) evals=evals.reshape(n_samples,1) - + if noise_dist=='normal': noise_sample=noise*np.random.normal(loc=0,scale=1.0,size=n_samples) elif noise_dist=='uniform': @@ -151,10 +141,8 @@ def gen_regression_symbolic(m=None,n_samples=100,n_features=2,noise=0.0,noise_di noise_sample=noise*np.random.gamma(shape=1.0,scale=1.0,size=n_samples) elif noise_dist=='laplace': noise_sample=noise*np.random.laplace(loc=0.0,scale=1.0,size=n_samples) - + noise_sample=noise_sample.reshape(n_samples,1) evals=evals+noise_sample - - x=np.hstack((lst_features,evals)) - - return (x) \ No newline at end of file + + return np.hstack((lst_features,evals)) \ No newline at end of file