import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math

titanic_data= pd.read_csv(r"C:\Users\huzaifa\Downloads\titanicdataset-traincsv\train.csv")
titanic_data.head(10)

print("# of passengers in original data: " +str(len(titanic_data.index)))

# of passengers in original data: 891

# Analyzing data 550 did not survived 300 survived
sns.countplot(x="Survived", data=titanic_data)

<matplotlib.axes._subplots.AxesSubplot at 0x2640abdad08>

sns.countplot(x="Survived", hue="Sex", data=titanic_data)

<matplotlib.axes._subplots.AxesSubplot at 0x2640c4d0508>

sns.countplot(x="Survived", hue="Pclass", data=titanic_data)

<matplotlib.axes._subplots.AxesSubplot at 0x2640c551408>

titanic_data['Age'].plot.hist()

<matplotlib.axes._subplots.AxesSubplot at 0x2640c5c8d08>

titanic_data['Fare'].plot.hist(bins=20, figsize=(10,5))

<matplotlib.axes._subplots.AxesSubplot at 0x2640c6ce448>

titanic_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
PassengerId    891 non-null int64
Survived       891 non-null int64
Pclass         891 non-null int64
Name           891 non-null object
Sex            891 non-null object
Age            714 non-null float64
SibSp          891 non-null int64
Parch          891 non-null int64
Ticket         891 non-null object
Fare           891 non-null float64
Cabin          204 non-null object
Embarked       889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB

sns.countplot(x="SibSp", data=titanic_data)

<matplotlib.axes._subplots.AxesSubplot at 0x2640c783208>

#data wrangling 
titanic_data.isnull()

titanic_data.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

sns.heatmap(titanic_data.isnull(), yticklabels=False, cmap="viridis")

<matplotlib.axes._subplots.AxesSubplot at 0x2640ca0be88>

#drop age columns
sns.boxplot(x="Pclass",y="Age", data=titanic_data)

<matplotlib.axes._subplots.AxesSubplot at 0x2640c9eb308>

titanic_data.head(5)

titanic_data.drop("Cabin", axis=1, inplace=True)

titanic_data.head(5)

titanic_data.dropna(inplace=True)

sns.heatmap(titanic_data.isnull(), yticklabels=False, cbar=False)

<matplotlib.axes._subplots.AxesSubplot at 0x2640fce2bc8>

titanic_data.isnull().sum()

PassengerId    0
Survived       0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Embarked       0
dtype: int64

titanic_data.head(2)

sex=pd.get_dummies(titanic_data['Sex'],drop_first=True)
sex.head(5)

embark = pd.get_dummies(titanic_data['Embarked'],drop_first=True)
embark.head(5)

pcl = pd.get_dummies(titanic_data['Pclass'],drop_first=True)
pcl.head(5)

#concatenate
titanic_data=pd.concat([titanic_data,sex,embark,pcl],axis=1)

titanic_data.head(5)

titanic_data.drop(['Sex','Embarked','PassengerId','Name','Ticket','Pclass'],axis=1,inplace=True)

titanic_data.head(5)

#training dataset
x=titanic_data.drop("Survived", axis=1)
y=titanic_data["Survived"]

from sklearn.model_selection import train_test_split

 X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)

from sklearn.linear_model import LogisticRegression

logmodel=LogisticRegression()

logmodel.fit(X_train,y_train)

C:\Users\huzaifa\Anaconda3\lib\site-packages\sklearn\linear_model\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

prediction = logmodel.predict(X_test)

#accuracy
from sklearn.metrics import classification_report

classification_report(y_test,prediction)

'              precision    recall  f1-score   support\n\n           0       0.81      0.83      0.82       126\n           1       0.75      0.72      0.73        88\n\n    accuracy                           0.79       214\n   macro avg       0.78      0.77      0.78       214\nweighted avg       0.78      0.79      0.78       214\n'

from sklearn.metrics import confusion_matrix

confusion_matrix(y_test,prediction)

array([[105,  21],
       [ 25,  63]], dtype=int64)

from sklearn.metrics import accuracy_score

accuracy_score(y_test,prediction)*100

78.50467289719626

>

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	0	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	0	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	0	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	0	373450	8.0500	NaN	S
5	6	0	3	Moran, Mr. James	male	NaN	0	0	330877	8.4583	NaN	Q
6	7	0	1	McCarthy, Mr. Timothy J	male	54.0	0	0	17463	51.8625	E46	S
7	8	0	3	Palsson, Master. Gosta Leonard	male	2.0	3	1	349909	21.0750	NaN	S
8	9	1	3	Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)	female	27.0	0	2	347742	11.1333	NaN	S
9	10	1	2	Nasser, Mrs. Nicholas (Adele Achem)	female	14.0	1	0	237736	30.0708	NaN	C

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	373450	8.0500	NaN	S

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	PC 17599	71.2833	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	373450	8.0500	S

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Embarked	male	S	3
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	S	1	1	1
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	PC 17599	71.2833	C	0	0	0
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	S	0	1	1
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	S	0	1	0
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	373450	8.0500	S	1	1	1

	Survived	Age	SibSp	Fare	male	S	3
0	0	22.0	1	7.2500	1	1	1
1	1	38.0	1	71.2833	0	0	0
2	1	26.0	0	7.9250	0	1	1
3	1	35.0	1	53.1000	0	1	0
4	0	35.0	0	8.0500	1	1	1

Search This Blog

Computer Science World

titanix data analysis edureka

Comments

Post a Comment

Popular posts from this blog

gamee

pima-indians-diabetes.csv

Interview Preparation Kit

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
0	False	False	False	False	False	False	False	False	False	False	True	False
1	False	False	False	False	False	False	False	False	False	False	False	False
2	False	False	False	False	False	False	False	False	False	False	True	False
3	False	False	False	False	False	False	False	False	False	False	False	False
4	False	False	False	False	False	False	False	False	False	False	True	False
...	...	...	...	...	...	...	...	...	...	...	...	...
886	False	False	False	False	False	False	False	False	False	False	True	False
887	False	False	False	False	False	False	False	False	False	False	False	False
888	False	False	False	False	False	True	False	False	False	False	True	False
889	False	False	False	False	False	False	False	False	False	False	False	False
890	False	False	False	False	False	False	False	False	False	False	True	False