Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

sklearn.svm.SVR use more RAM memory on newer versions #24138

Copy link
Copy link
Open
@Piecer-plc

Description

@Piecer-plc
Issue body actions

Describe the bug

Hi, I've previously reported a bug that RandomForestClassifier consumes more memory on newer versions, and I've also noticed that the svm API also consumes more memory on newer versions. The detailed test information is as follows.

Memory Version
274MB 1.0.1
276MB 0.20.3
180MB 0.19.2

Steps/Code to Reproduce

Download dataset

import math
import pandas as pd
import numpy as np
import random
import torch
from sklearn.svm import SVR
from sklearn.ensemble import BaggingRegressor

def random_init(**kwargs):
    random.seed(kwargs['seed'])
    np.random.seed(kwargs['seed'])
    torch.manual_seed(kwargs['seed'])
    torch.cuda.manual_seed(kwargs['seed'])
    torch.backends.cudnn.deterministic = True
def load_data(df,cv=False,target=False,**kwargs):
    num_samples = len(df)
    sample_size = sum([len(args['categories'][c]) for c in args['categories']]) + len(args['num_feats'])
    dataset = torch.zeros((num_samples,sample_size),dtype=torch.float)
    idx = 0
    for c in args['cat_feats']:
        for i in range(len(args['categories'][c])):
            dataset[np.array(df[c])==args['categories'][c][i],idx] = 1.0
            idx += 1
    for n in args['num_feats']:
        dataset[:,idx] = torch.from_numpy(np.array(df[n]))
        idx += 1
    if target:
        targets = torch.from_numpy(np.array(df['target']))
    else:
        targets = None
    
    if cv == False:
        return dataset, targets

    idx = [i for i in range(num_samples)]
    random.shuffle(idx)
    trainset = dataset[idx[0:int(num_samples*(1-kwargs['cv_percentage']))]]
    traintargets = targets[idx[0:int(num_samples*(1-kwargs['cv_percentage']))]]
    validset = dataset[idx[int(num_samples*(1-kwargs['cv_percentage'])):]]
    validtargets = targets[idx[int(num_samples*(1-kwargs['cv_percentage'])):]]
    return trainset, validset, traintargets, validtargets  
def get_stats(trainset):
    mean = torch.mean(trainset,dim=0)
    std = torch.std(trainset,dim=0)
    for i in range(trainset.shape[1]):
        if ((trainset[:,i]==0) | (trainset[:,i]==1)).all():
            mean[i] = 0.5
            std[i] = 1.0
    return mean, std
#Global arguments
args = {
    'cv_percentage': 0.1,
    'seed': 0,
    }
# Load data
random_init(**args)
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
args['cat_feats'] = [c for c in np.sort(train_data.columns) if 'cat' in c]
args['num_feats'] = [c for c in np.sort(train_data.columns) if 'cont' in c]
args['categories'] = {c: np.unique(train_data[c]) for c in args['cat_feats']}
testset, _ = load_data(test_data,cv=False,target=False,**args)
trainset, validset, traintargets, validtargets = load_data(train_data,cv=True,target=True,**args)
args['mean'],args['std'] = get_stats(trainset)
val_pred = {}
test_pred = {}
# Bagged SVRs
import tracemalloc
tracemalloc.start()
print('Bagged SVRs...')
# 0.19.2 180MB
# 0.20.3 276
# SVR arguments
args = {**args,**{
    'kernel': 'linear',
    'n_estimators': 3,
    'max_samples': 20000,
    'max_features':50
    }}
random_init(**args)
svrs = BaggingRegressor(SVR(kernel=args['kernel'],C=1.0, epsilon=0.2),
                        n_estimators=args['n_estimators'],max_samples=args['max_samples'],max_features=args['max_features'])
svrs.fit(((trainset-args['mean'])/args['std']).numpy(),traintargets.numpy())
val_pred['svrs'] = svrs.predict(((validset-args['mean'])/args['std']).numpy())
test_pred['svrs'] = svrs.predict(((testset-args['mean'])/args['std']).numpy())
currentSVR, peakSVR = tracemalloc.get_traced_memory()
print("SVR current memory usage is {",currentSVR /1024/1024,"}MB; SVR Peak memory was :{",peakSVR / 1024/1024,"}MB")

Expected Results

same memory usage

Actual Results

new version use more memory.

Versions

1.0.1, 0.20.3, 0.19.2

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions

      Morty Proxy This is a proxified and sanitized view of the page, visit original site.