Simple Linear Regression in Python




Simple Linear Regression


Problem statement: Find a relation between the independent variable and dependent variable

Download The Dataset

Download The Code File


Variables:

Independent Variables : years_of_service

Dependent Variable : salary


#Importing the libraries

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd



#Change the working directory and set it as current console's working directory

#Importing the Dataset

dataset = pd.read_csv('Salaries.csv')

X = dataset.iloc[:, :-1].values

y = dataset.iloc[:, 1].values








#Splitting the dataset into the Training set and Test set


from sklearn.cross_validation import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3)












#Fitting Simple Linear Regression to the Training set

from sklearn.linear_model import LinearRegression

regressor = LinearRegression()

regressor.fit(X_train, y_train)



#Predicting the Test set results

y_pred = regressor.predict(X_test)




#Visualising the Training set results

plt.scatter(X_train, y_train, color = 'green')

plt.plot(X_train, regressor.predict(X_train), color = "red")

plt.title('Experience vs Salary (train set)')

plt.xlabel('experience')

plt.ylabel('salary')

plt.show()








#Visualising the Test set results

plt.scatter(X_test, y_test, color = 'red')

plt.plot(X_train, regressor.predict(X_train), color = 'yellow')

plt.title('Experience vs Salary (test set)')

plt.xlabel('experience')

plt.ylabel('salary')

plt.show()