Logistic Regression using PyTorch

.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;

up vote
2
down vote

favorite

I want to get familiar with PyTorch and decided to implement a simple neural network that is essentially a logistic regression classifier to solve the Dogs vs. Cats problem.

I move 5000 random examples out of the 25000 in total to the test set, so the train/test split is 80/20.

I was able to achieve the accuracy of 59 - 60% on the train set and about 56-58% on the test set. However, I don't think it is a good result for logistic regression. I'm not sure whether I've made no mistakes in the data loading and training routines, since it was basically my first exposure to PyTorch.

Is there any way to improve the performance of logistic regression on this dataset?

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.autograd import Variable
from torchvision import datasets, transforms
import os
import shutil
import random
import numpy as np
import matplotlib.pyplot as plt

'''
Implementation of the logistic regression classifier which solves the problem of recognition of cats and dogs.

The dataset can be found here:
 https://www.kaggle.com/c/dogs-vs-cats
'''

'''
Splits train and test data into to the following folder structure:
---/train
------/cat
------/dog
---/test
------/cat
------/dog

Randomly selects 5000 images as test examples (2500 for each class).
'''


def split_dogs_and_cats(source):
 cats_test = './test/cat'
 dogs_test = './test/dog'
 cats_train = './train/cat'
 dogs_train = './train/dog'

 files = os.listdir(source)
 os.makedirs(cats_test)
 os.makedirs(dogs_test)
 os.makedirs(cats_train)
 os.makedirs(dogs_train)

 cats_test_index = [i for i in range(12500)]
 dogs_test_index = [i for i in range(12500)]
 random.shuffle(cats_test_index)
 random.shuffle(dogs_test_index)
 cats_test_index = cats_test_index[:2500]
 dogs_test_index = dogs_test_index[:2500]

 for file in files:
 srcname = os.path.join(source, file)
 tag, number, _ = file.split('.')
 number = int(number)
 if tag == 'cat':
 if number in cats_test_index:
 dst = cats_test
 else:
 dst = cats_train
 else:
 if number in dogs_test_index:
 dst = dogs_test
 else:
 dst = dogs_train

 dstname = os.path.join(dst, file)
 shutil.move(srcname, dstname)


split_dogs_and_cats('./dataset')

batch_size = 32
image_size = 128


#Normalize the data.


transformation = transforms.Compose([
 transforms.Resize((image_size, image_size)),
 transforms.ToTensor(),
 transforms.Normalize(mean=[0.485, 0.456, 0.406],
 std=[0.229, 0.224, 0.225])
])


#Loads the data.


train_data = datasets.ImageFolder(root='./train', transform=transformation)
test_data = datasets.ImageFolder(root='./test', transform=transformation)

torchvision.utils.make_grid(train_data[0][0])

train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
test_data_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=4)


#Let's check that we loaded the data correctly.



def imshow(inp, title=None):
 inp = inp.numpy().transpose((1, 2, 0))
 mean = np.array([0.485, 0.456, 0.406])
 std = np.array([0.229, 0.224, 0.225])
 inp = std * inp + mean
 inp = np.clip(inp, 0, 1)
 plt.imshow(inp)
 if title is not None:
 plt.title(title)
 plt.pause(0.001)


#1 - dog, 0 - cat. 


inputs, classes = next(iter(train_data_loader))

out = torchvision.utils.make_grid(inputs)

imshow(out, title=classes)

'''
Class that represents logistic regression.
'''


class NeuralNetwork(nn.Module):

 def __init__(self):
 super(NeuralNetwork, self).__init__()
 self.layer = nn.Linear(3 * image_size * image_size, 1)

 def forward(self, x):
 return F.sigmoid(self.layer(x)).squeeze()


network = NeuralNetwork()
criterion = nn.BCELoss()

'''
Main procedure.
Firstly, we train our classifier then test.
'''


def run(learning_rate):
 optimizer = optim.Adam(network.parameters(), lr=learning_rate)
 for epoch in range(1, 3):
 for batch_idx, (data, target) in enumerate(train_data_loader):
 data, target = Variable(data), Variable(target)
 data = data.view(-1, image_size * image_size * 3)
 output = network(data)
 cost = criterion(output, target.float())
 optimizer.zero_grad()
 cost.backward()
 optimizer.step()

 if batch_idx % 10 == 0:
 print('Train Epoch: [/ (:.0f%)]tLoss: :.6f'.format(
 epoch, batch_idx * len(data), len(train_data_loader.dataset),
 100. * batch_idx / len(train_data_loader), cost.item()))

 '''
 Test set evaluation.
 '''

 network.eval()
 test_loss = 0
 correct = 0
 for data, target in test_data_loader:
 data, target = Variable(data), Variable(target)
 data = data.view(-1, image_size * image_size * 3)
 output = network(data)
 test_loss += criterion(output, target.float()).item()
 pred = output.ge(0.5)
 correct += torch.eq(pred, target.byte()).sum()

 test_loss /= len(test_data_loader.dataset)

 print('nTest set: Average loss: :.4f, Accuracy: / (:.0f%)n'.format(
 test_loss, correct, len(test_data_loader.dataset),
 100. * correct / len(test_data_loader.dataset)))

 '''
 Train set evaluation.
 '''

 train_loss = 0
 correct = 0
 for data, target in train_data_loader:
 data, target = Variable(data), Variable(target)
 data = data.view(-1, image_size * image_size * 3)
 output = network(data)
 train_loss += criterion(output, target.float()).item()
 pred = output.ge(0.5)
 correct += torch.eq(pred, target.byte()).sum()

 train_loss /= len(train_data_loader.dataset)

 print('nTrain set: Average loss: :.4f, Accuracy: / (:.0f%)n'.format(
 train_loss, correct, len(train_data_loader.dataset),
 100. * correct / len(train_data_loader.dataset)))


run(0.0001)

edited May 27 at 18:11

asked May 26 at 21:20

False Promise

1336

Have you tried checking the accuracy if you use a well-known implementation of logistic regression, like from sk-learn? If you get similar results, then the problem probably is not your implementation but features/model complexity. Sk-learn logistic regression does use regularization by default, which may improve your models ability to generalize and reduce overfitting.
â€“Â mochi
May 28 at 0:38

@mochi Sorry for late resplonse, got a lot of work last week. I've tried built-in sklearn logistic regression classifier and even with regularization it performs worse than my PyTorch implementation on 1-2% on both train and test sets. Thanks for suggestion!
â€“Â False Promise
Jun 2 at 23:47

add a commentÂ |Â

up vote
2
down vote

favorite

I want to get familiar with PyTorch and decided to implement a simple neural network that is essentially a logistic regression classifier to solve the Dogs vs. Cats problem.

I move 5000 random examples out of the 25000 in total to the test set, so the train/test split is 80/20.

Is there any way to improve the performance of logistic regression on this dataset?

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.autograd import Variable
from torchvision import datasets, transforms
import os
import shutil
import random
import numpy as np
import matplotlib.pyplot as plt

'''
Implementation of the logistic regression classifier which solves the problem of recognition of cats and dogs.

The dataset can be found here:
 https://www.kaggle.com/c/dogs-vs-cats
'''

'''
Splits train and test data into to the following folder structure:
---/train
------/cat
------/dog
---/test
------/cat
------/dog

Randomly selects 5000 images as test examples (2500 for each class).
'''


def split_dogs_and_cats(source):
 cats_test = './test/cat'
 dogs_test = './test/dog'
 cats_train = './train/cat'
 dogs_train = './train/dog'

 files = os.listdir(source)
 os.makedirs(cats_test)
 os.makedirs(dogs_test)
 os.makedirs(cats_train)
 os.makedirs(dogs_train)

 cats_test_index = [i for i in range(12500)]
 dogs_test_index = [i for i in range(12500)]
 random.shuffle(cats_test_index)
 random.shuffle(dogs_test_index)
 cats_test_index = cats_test_index[:2500]
 dogs_test_index = dogs_test_index[:2500]

 for file in files:
 srcname = os.path.join(source, file)
 tag, number, _ = file.split('.')
 number = int(number)
 if tag == 'cat':
 if number in cats_test_index:
 dst = cats_test
 else:
 dst = cats_train
 else:
 if number in dogs_test_index:
 dst = dogs_test
 else:
 dst = dogs_train

 dstname = os.path.join(dst, file)
 shutil.move(srcname, dstname)


split_dogs_and_cats('./dataset')

batch_size = 32
image_size = 128


#Normalize the data.


transformation = transforms.Compose([
 transforms.Resize((image_size, image_size)),
 transforms.ToTensor(),
 transforms.Normalize(mean=[0.485, 0.456, 0.406],
 std=[0.229, 0.224, 0.225])
])


#Loads the data.


train_data = datasets.ImageFolder(root='./train', transform=transformation)
test_data = datasets.ImageFolder(root='./test', transform=transformation)

torchvision.utils.make_grid(train_data[0][0])

train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
test_data_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=4)


#Let's check that we loaded the data correctly.



def imshow(inp, title=None):
 inp = inp.numpy().transpose((1, 2, 0))
 mean = np.array([0.485, 0.456, 0.406])
 std = np.array([0.229, 0.224, 0.225])
 inp = std * inp + mean
 inp = np.clip(inp, 0, 1)
 plt.imshow(inp)
 if title is not None:
 plt.title(title)
 plt.pause(0.001)


#1 - dog, 0 - cat. 


inputs, classes = next(iter(train_data_loader))

out = torchvision.utils.make_grid(inputs)

imshow(out, title=classes)

'''
Class that represents logistic regression.
'''


class NeuralNetwork(nn.Module):

 def __init__(self):
 super(NeuralNetwork, self).__init__()
 self.layer = nn.Linear(3 * image_size * image_size, 1)

 def forward(self, x):
 return F.sigmoid(self.layer(x)).squeeze()


network = NeuralNetwork()
criterion = nn.BCELoss()

'''
Main procedure.
Firstly, we train our classifier then test.
'''


def run(learning_rate):
 optimizer = optim.Adam(network.parameters(), lr=learning_rate)
 for epoch in range(1, 3):
 for batch_idx, (data, target) in enumerate(train_data_loader):
 data, target = Variable(data), Variable(target)
 data = data.view(-1, image_size * image_size * 3)
 output = network(data)
 cost = criterion(output, target.float())
 optimizer.zero_grad()
 cost.backward()
 optimizer.step()

 if batch_idx % 10 == 0:
 print('Train Epoch: [/ (:.0f%)]tLoss: :.6f'.format(
 epoch, batch_idx * len(data), len(train_data_loader.dataset),
 100. * batch_idx / len(train_data_loader), cost.item()))

 '''
 Test set evaluation.
 '''

 network.eval()
 test_loss = 0
 correct = 0
 for data, target in test_data_loader:
 data, target = Variable(data), Variable(target)
 data = data.view(-1, image_size * image_size * 3)
 output = network(data)
 test_loss += criterion(output, target.float()).item()
 pred = output.ge(0.5)
 correct += torch.eq(pred, target.byte()).sum()

 test_loss /= len(test_data_loader.dataset)

 print('nTest set: Average loss: :.4f, Accuracy: / (:.0f%)n'.format(
 test_loss, correct, len(test_data_loader.dataset),
 100. * correct / len(test_data_loader.dataset)))

 '''
 Train set evaluation.
 '''

 train_loss = 0
 correct = 0
 for data, target in train_data_loader:
 data, target = Variable(data), Variable(target)
 data = data.view(-1, image_size * image_size * 3)
 output = network(data)
 train_loss += criterion(output, target.float()).item()
 pred = output.ge(0.5)
 correct += torch.eq(pred, target.byte()).sum()

 train_loss /= len(train_data_loader.dataset)

 print('nTrain set: Average loss: :.4f, Accuracy: / (:.0f%)n'.format(
 train_loss, correct, len(train_data_loader.dataset),
 100. * correct / len(train_data_loader.dataset)))


run(0.0001)

edited May 27 at 18:11

asked May 26 at 21:20

False Promise

1336

Have you tried checking the accuracy if you use a well-known implementation of logistic regression, like from sk-learn? If you get similar results, then the problem probably is not your implementation but features/model complexity. Sk-learn logistic regression does use regularization by default, which may improve your models ability to generalize and reduce overfitting.
â€“Â mochi
May 28 at 0:38

@mochi Sorry for late resplonse, got a lot of work last week. I've tried built-in sklearn logistic regression classifier and even with regularization it performs worse than my PyTorch implementation on 1-2% on both train and test sets. Thanks for suggestion!
â€“Â False Promise
Jun 2 at 23:47

add a commentÂ |Â

up vote
2
down vote

favorite

I want to get familiar with PyTorch and decided to implement a simple neural network that is essentially a logistic regression classifier to solve the Dogs vs. Cats problem.

I move 5000 random examples out of the 25000 in total to the test set, so the train/test split is 80/20.

Is there any way to improve the performance of logistic regression on this dataset?

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.autograd import Variable
from torchvision import datasets, transforms
import os
import shutil
import random
import numpy as np
import matplotlib.pyplot as plt

'''
Implementation of the logistic regression classifier which solves the problem of recognition of cats and dogs.

The dataset can be found here:
 https://www.kaggle.com/c/dogs-vs-cats
'''

'''
Splits train and test data into to the following folder structure:
---/train
------/cat
------/dog
---/test
------/cat
------/dog

Randomly selects 5000 images as test examples (2500 for each class).
'''


def split_dogs_and_cats(source):
 cats_test = './test/cat'
 dogs_test = './test/dog'
 cats_train = './train/cat'
 dogs_train = './train/dog'

 files = os.listdir(source)
 os.makedirs(cats_test)
 os.makedirs(dogs_test)
 os.makedirs(cats_train)
 os.makedirs(dogs_train)

 cats_test_index = [i for i in range(12500)]
 dogs_test_index = [i for i in range(12500)]
 random.shuffle(cats_test_index)
 random.shuffle(dogs_test_index)
 cats_test_index = cats_test_index[:2500]
 dogs_test_index = dogs_test_index[:2500]

 for file in files:
 srcname = os.path.join(source, file)
 tag, number, _ = file.split('.')
 number = int(number)
 if tag == 'cat':
 if number in cats_test_index:
 dst = cats_test
 else:
 dst = cats_train
 else:
 if number in dogs_test_index:
 dst = dogs_test
 else:
 dst = dogs_train

 dstname = os.path.join(dst, file)
 shutil.move(srcname, dstname)


split_dogs_and_cats('./dataset')

batch_size = 32
image_size = 128


#Normalize the data.


transformation = transforms.Compose([
 transforms.Resize((image_size, image_size)),
 transforms.ToTensor(),
 transforms.Normalize(mean=[0.485, 0.456, 0.406],
 std=[0.229, 0.224, 0.225])
])


#Loads the data.


train_data = datasets.ImageFolder(root='./train', transform=transformation)
test_data = datasets.ImageFolder(root='./test', transform=transformation)

torchvision.utils.make_grid(train_data[0][0])

train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
test_data_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=4)


#Let's check that we loaded the data correctly.



def imshow(inp, title=None):
 inp = inp.numpy().transpose((1, 2, 0))
 mean = np.array([0.485, 0.456, 0.406])
 std = np.array([0.229, 0.224, 0.225])
 inp = std * inp + mean
 inp = np.clip(inp, 0, 1)
 plt.imshow(inp)
 if title is not None:
 plt.title(title)
 plt.pause(0.001)


#1 - dog, 0 - cat. 


inputs, classes = next(iter(train_data_loader))

out = torchvision.utils.make_grid(inputs)

imshow(out, title=classes)

'''
Class that represents logistic regression.
'''


class NeuralNetwork(nn.Module):

 def __init__(self):
 super(NeuralNetwork, self).__init__()
 self.layer = nn.Linear(3 * image_size * image_size, 1)

 def forward(self, x):
 return F.sigmoid(self.layer(x)).squeeze()


network = NeuralNetwork()
criterion = nn.BCELoss()

'''
Main procedure.
Firstly, we train our classifier then test.
'''


def run(learning_rate):
 optimizer = optim.Adam(network.parameters(), lr=learning_rate)
 for epoch in range(1, 3):
 for batch_idx, (data, target) in enumerate(train_data_loader):
 data, target = Variable(data), Variable(target)
 data = data.view(-1, image_size * image_size * 3)
 output = network(data)
 cost = criterion(output, target.float())
 optimizer.zero_grad()
 cost.backward()
 optimizer.step()

 if batch_idx % 10 == 0:
 print('Train Epoch: [/ (:.0f%)]tLoss: :.6f'.format(
 epoch, batch_idx * len(data), len(train_data_loader.dataset),
 100. * batch_idx / len(train_data_loader), cost.item()))

 '''
 Test set evaluation.
 '''

 network.eval()
 test_loss = 0
 correct = 0
 for data, target in test_data_loader:
 data, target = Variable(data), Variable(target)
 data = data.view(-1, image_size * image_size * 3)
 output = network(data)
 test_loss += criterion(output, target.float()).item()
 pred = output.ge(0.5)
 correct += torch.eq(pred, target.byte()).sum()

 test_loss /= len(test_data_loader.dataset)

 print('nTest set: Average loss: :.4f, Accuracy: / (:.0f%)n'.format(
 test_loss, correct, len(test_data_loader.dataset),
 100. * correct / len(test_data_loader.dataset)))

 '''
 Train set evaluation.
 '''

 train_loss = 0
 correct = 0
 for data, target in train_data_loader:
 data, target = Variable(data), Variable(target)
 data = data.view(-1, image_size * image_size * 3)
 output = network(data)
 train_loss += criterion(output, target.float()).item()
 pred = output.ge(0.5)
 correct += torch.eq(pred, target.byte()).sum()

 train_loss /= len(train_data_loader.dataset)

 print('nTrain set: Average loss: :.4f, Accuracy: / (:.0f%)n'.format(
 train_loss, correct, len(train_data_loader.dataset),
 100. * correct / len(train_data_loader.dataset)))


run(0.0001)

edited May 27 at 18:11

asked May 26 at 21:20

False Promise

1336

I want to get familiar with PyTorch and decided to implement a simple neural network that is essentially a logistic regression classifier to solve the Dogs vs. Cats problem.

I move 5000 random examples out of the 25000 in total to the test set, so the train/test split is 80/20.

Is there any way to improve the performance of logistic regression on this dataset?

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.autograd import Variable
from torchvision import datasets, transforms
import os
import shutil
import random
import numpy as np
import matplotlib.pyplot as plt

'''
Implementation of the logistic regression classifier which solves the problem of recognition of cats and dogs.

The dataset can be found here:
 https://www.kaggle.com/c/dogs-vs-cats
'''

'''
Splits train and test data into to the following folder structure:
---/train
------/cat
------/dog
---/test
------/cat
------/dog

Randomly selects 5000 images as test examples (2500 for each class).
'''


def split_dogs_and_cats(source):
 cats_test = './test/cat'
 dogs_test = './test/dog'
 cats_train = './train/cat'
 dogs_train = './train/dog'

 files = os.listdir(source)
 os.makedirs(cats_test)
 os.makedirs(dogs_test)
 os.makedirs(cats_train)
 os.makedirs(dogs_train)

 cats_test_index = [i for i in range(12500)]
 dogs_test_index = [i for i in range(12500)]
 random.shuffle(cats_test_index)
 random.shuffle(dogs_test_index)
 cats_test_index = cats_test_index[:2500]
 dogs_test_index = dogs_test_index[:2500]

 for file in files:
 srcname = os.path.join(source, file)
 tag, number, _ = file.split('.')
 number = int(number)
 if tag == 'cat':
 if number in cats_test_index:
 dst = cats_test
 else:
 dst = cats_train
 else:
 if number in dogs_test_index:
 dst = dogs_test
 else:
 dst = dogs_train

 dstname = os.path.join(dst, file)
 shutil.move(srcname, dstname)


split_dogs_and_cats('./dataset')

batch_size = 32
image_size = 128


#Normalize the data.


transformation = transforms.Compose([
 transforms.Resize((image_size, image_size)),
 transforms.ToTensor(),
 transforms.Normalize(mean=[0.485, 0.456, 0.406],
 std=[0.229, 0.224, 0.225])
])


#Loads the data.


train_data = datasets.ImageFolder(root='./train', transform=transformation)
test_data = datasets.ImageFolder(root='./test', transform=transformation)

torchvision.utils.make_grid(train_data[0][0])

train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
test_data_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=4)


#Let's check that we loaded the data correctly.



def imshow(inp, title=None):
 inp = inp.numpy().transpose((1, 2, 0))
 mean = np.array([0.485, 0.456, 0.406])
 std = np.array([0.229, 0.224, 0.225])
 inp = std * inp + mean
 inp = np.clip(inp, 0, 1)
 plt.imshow(inp)
 if title is not None:
 plt.title(title)
 plt.pause(0.001)


#1 - dog, 0 - cat. 


inputs, classes = next(iter(train_data_loader))

out = torchvision.utils.make_grid(inputs)

imshow(out, title=classes)

'''
Class that represents logistic regression.
'''


class NeuralNetwork(nn.Module):

 def __init__(self):
 super(NeuralNetwork, self).__init__()
 self.layer = nn.Linear(3 * image_size * image_size, 1)

 def forward(self, x):
 return F.sigmoid(self.layer(x)).squeeze()


network = NeuralNetwork()
criterion = nn.BCELoss()

'''
Main procedure.
Firstly, we train our classifier then test.
'''


def run(learning_rate):
 optimizer = optim.Adam(network.parameters(), lr=learning_rate)
 for epoch in range(1, 3):
 for batch_idx, (data, target) in enumerate(train_data_loader):
 data, target = Variable(data), Variable(target)
 data = data.view(-1, image_size * image_size * 3)
 output = network(data)
 cost = criterion(output, target.float())
 optimizer.zero_grad()
 cost.backward()
 optimizer.step()

 if batch_idx % 10 == 0:
 print('Train Epoch: [/ (:.0f%)]tLoss: :.6f'.format(
 epoch, batch_idx * len(data), len(train_data_loader.dataset),
 100. * batch_idx / len(train_data_loader), cost.item()))

 '''
 Test set evaluation.
 '''

 network.eval()
 test_loss = 0
 correct = 0
 for data, target in test_data_loader:
 data, target = Variable(data), Variable(target)
 data = data.view(-1, image_size * image_size * 3)
 output = network(data)
 test_loss += criterion(output, target.float()).item()
 pred = output.ge(0.5)
 correct += torch.eq(pred, target.byte()).sum()

 test_loss /= len(test_data_loader.dataset)

 print('nTest set: Average loss: :.4f, Accuracy: / (:.0f%)n'.format(
 test_loss, correct, len(test_data_loader.dataset),
 100. * correct / len(test_data_loader.dataset)))

 '''
 Train set evaluation.
 '''

 train_loss = 0
 correct = 0
 for data, target in train_data_loader:
 data, target = Variable(data), Variable(target)
 data = data.view(-1, image_size * image_size * 3)
 output = network(data)
 train_loss += criterion(output, target.float()).item()
 pred = output.ge(0.5)
 correct += torch.eq(pred, target.byte()).sum()

 train_loss /= len(train_data_loader.dataset)

 print('nTrain set: Average loss: :.4f, Accuracy: / (:.0f%)n'.format(
 train_loss, correct, len(train_data_loader.dataset),
 100. * correct / len(train_data_loader.dataset)))


run(0.0001)

edited May 27 at 18:11

asked May 26 at 21:20

False Promise

1336

edited May 27 at 18:11

asked May 26 at 21:20

False Promise

1336

asked May 26 at 21:20

False Promise

1336

asked May 26 at 21:20

False Promise

1336

Have you tried checking the accuracy if you use a well-known implementation of logistic regression, like from sk-learn? If you get similar results, then the problem probably is not your implementation but features/model complexity. Sk-learn logistic regression does use regularization by default, which may improve your models ability to generalize and reduce overfitting.
â€“Â mochi
May 28 at 0:38

@mochi Sorry for late resplonse, got a lot of work last week. I've tried built-in sklearn logistic regression classifier and even with regularization it performs worse than my PyTorch implementation on 1-2% on both train and test sets. Thanks for suggestion!
â€“Â False Promise
Jun 2 at 23:47

add a commentÂ |Â

Have you tried checking the accuracy if you use a well-known implementation of logistic regression, like from sk-learn? If you get similar results, then the problem probably is not your implementation but features/model complexity. Sk-learn logistic regression does use regularization by default, which may improve your models ability to generalize and reduce overfitting.
â€“Â mochi
May 28 at 0:38

@mochi Sorry for late resplonse, got a lot of work last week. I've tried built-in sklearn logistic regression classifier and even with regularization it performs worse than my PyTorch implementation on 1-2% on both train and test sets. Thanks for suggestion!
â€“Â False Promise
Jun 2 at 23:47

Have you tried checking the accuracy if you use a well-known implementation of logistic regression, like from sk-learn? If you get similar results, then the problem probably is not your implementation but features/model complexity. Sk-learn logistic regression does use regularization by default, which may improve your models ability to generalize and reduce overfitting.
â€“Â mochi
May 28 at 0:38

@mochi Sorry for late resplonse, got a lot of work last week. I've tried built-in sklearn logistic regression classifier and even with regularization it performs worse than my PyTorch implementation on 1-2% on both train and test sets. Thanks for suggestion!
â€“Â False Promise
Jun 2 at 23:47

add a commentÂ |Â

active

oldest

votes

Your Answer

StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f195240%2flogistic-regression-using-pytorch%23new-answer', 'question_page');

);

Post as a guest

Name

active

oldest

votes

draft saved

draft discarded

draft saved

draft discarded

Post as a guest

Name

搜尋此網誌

trjhtr