Linear Regression Using R Studio

Subhodip Pal
4 min readNov 27, 2020

> mydata=read.csv(file.choose(), header = T)

> View(mydata)

> attach(mydata)

> #check the dimension of the data set

> dim(mydata)

[1] 545 13

> #to check the variable names

> names(mydata)

[1] "price" "area" "bedrooms" "bathrooms" "stories"

[6] "mainroad" "guestroom" "basement" "hotwaterheating" "airconditioning"

[11] "parking" "prefarea" "furnishingstatus"

> plot(price~area)

> abline(lm(price~area))

> #Simple Linear Regression

> #DV= Price; IDV= Area

> sreg= lm(price~area)

> summary(sreg)


lm(formula = price ~ area)


Min 1Q Median 3Q Max

-4867112 -1022228 -200135 683027 7484838


Estimate Std. Error t value Pr(>|t|)

(Intercept) 2.387e+06 1.745e+05 13.68 <2e-16 ***

area 4.620e+02 3.123e+01 14.79 <2e-16 ***

— -

Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1581000 on 543 degrees of freedom

Multiple R-squared: 0.2873, Adjusted R-squared: 0.286

F-statistic: 218.9 on 1 and 543 DF, p-value: < 2.2e-16

> plot(area,price, main = “scatter plot”)

> abline(sreg)

> plot(sreg)

> par(mfrow=c(2,2))

> plot(sreg)

> #Multiple Linaer Regression

> mreg = lm(price~area+bedrooms+bathrooms+stories+mainroad+guestroom+basement+hotwaterheating+airconditioning)

> summary(mreg)


lm(formula = price ~ area + bedrooms + bathrooms + stories + mainroad + guestroom + basement + hotwaterheating + airconditioning)


Min 1Q Median 3Q Max

-2790328 -642601 -20387 582368 5931692


Estimate Std. Error t value Pr(>|t|)

(Intercept) -573514.10 247246.32 -2.320 0.020738 *

area 303.30 24.47 12.392 < 2e-16 ***

bedrooms 164594.14 76756.38 2.144 0.032453 *

bathrooms 1041793.18 109119.50 9.547 < 2e-16 ***

stories 445078.09 67889.83 6.556 1.30e-10 ***

mainroad 663026.18 147891.26 4.483 9.00e-06 ***

guestroom 318748.08 139464.36 2.286 0.022673 *

basement 507993.18 114628.11 4.432 1.13e-05 ***

hotwaterheating 919690.70 235479.04 3.906 0.000106 ***

airconditioning 951148.06 113990.62 8.344 6.14e-16 ***

— -

Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1134000 on 535 degrees of freedom

Multiple R-squared: 0.6385, Adjusted R-squared: 0.6324

F-statistic: 105 on 9 and 535 DF, p-value: < 2.2e-16

> par(mfrow=c(2,2))

> plot(mreg)

> #Durbin Watson test for Autocorrelation

> install.packages(“lmtest”)

> library(lmtest)

Loading required package: zoo

Attaching package: ‘zoo’

The following objects are masked from ‘package:base’:

as.Date, as.Date.numeric

Warning messages:

1: package ‘lmtest’ was built under R version 4.0.3

2: package ‘zoo’ was built under R version 4.0.3

> dwtest(mreg)

Durbin-Watson test

data: mreg

DW = 1.1538, p-value < 2.2e-16

alternative hypothesis: true autocorrelation is greater than 0

> #VIF to check multicolinearity

> install.packages(“car”)

> library(car)

Loading required package: carData

Warning message:

package ‘car’ was built under R version 4.0.3

> car::vif(mreg)

area bedrooms bathrooms stories mainroad guestroom

1.193222 1.357446 1.271540 1.467056 1.124429 1.205831

basement hotwaterheating airconditioning

1.267445 1.028391 1.189285

> hist(price)

> hist(price, freq = F)

> lines(density(price))

