This post was kindly contributed by DATA ANALYSIS - go there to comment and to read the full post. |
In his book Machine Learning in Action, Peter Harrington provides a solution for parameter estimation of logistic regression . I use
pandas
and ggplot
to realize a recursive alternative. Comparing with the iterative method, the recursion costs more space but may bring the improvement of performance.# -*- coding: utf-8 -*-
"""
Use recursion and gradient ascent to solve logistic regression in Python
"""
import pandas as pd
from ggplot import *
def sigmoid(inX):
return 1.0/(1+exp(-inX))
def grad_ascent(dataMatrix, labelMat, cycle):
"""
A function to use gradient ascent to calculate the coefficients
"""
if isinstance(cycle, int) == False or cycle < 0:
raise ValueError("Must be a valid value for the number of iterations")
m, n = shape(dataMatrix)
alpha = 0.001
if cycle == 0:
return ones((n, 1))
else:
weights = grad_ascent(dataMatrix, labelMat, cycle-1)
h = sigmoid(dataMatrix * weights)
errors = (labelMat - h)
return weights + alpha * dataMatrix.transpose()* errors
def plot(vector):
"""
A funtion to use ggplot to visualize the result
"""
x = arange(-3, 3, 0.1)
y = (-vector[0]-vector[1]*x) / vector[2]
new = pd.DataFrame()
new['x'] = x
new['y'] = array(y).flatten()
infile.classlab = infile.classlab.astype(str)
p = ggplot(aes(x='x', y='y', colour='classlab'), data=infile) + geom_point()
return p + geom_line
# Use pandas to manipulate data
if __name__ == '__main__':
infile = pd.read_csv("https://raw.githubusercontent.com/pbharrin/machinelearninginaction/master/Ch05/testSet.txt", sep='\t', header=None, names=['x', 'y', 'classlab'])
infile['one'] = 1
mat1 = mat(infile[['one', 'x', 'y']])
mat2 = mat(infile['classlab']).transpose()
result1 = grad_ascent(mat1, mat2, 500)
print plot(result1)
r
This post was kindly contributed by DATA ANALYSIS - go there to comment and to read the full post. |