Blog

Daily Learning

Day Next

import pandas as pd

## Series: (1-dimensional)

series=pd.Series(["Jigyasu","Mayank","Pankaj","Suraj","Hardik","Saurabh"])

series

series_1=pd.Series(["Black","Pink","White","Red","Green","Blue"])

series_1

## DataFrame: (2-dimensional)

fav_color=pd.DataFrame({"Name":series,"Fav. Color":series_1})
fav_color

## Import data

Car_Data=pd.read_csv("car-sales.csv")

Car_Data

## Exporting Dataframe

Car_Data.to_csv("Exported_Car_Data",index=False)

Ex_Car_Data=pd.read_csv("Exported_Car_Data")
Ex_Car_Data

## Describing Data

# Attributes
Car_Data.dtypes

Car_Data.columns

Car_Column=Car_Data.columns
Car_Column

Car_Data.index

Car_Data.describe()

Car_Data.info()

Car_Data.sum()

Car_Data["Odometer (KM)"].mean()

Car_Data["Odometer (KM)"].sum()

len(Car_Data)

## Selection and viewing

Car_Data.head()

Car_Data.head(7)

Car_Data.tail()

Car_Data.tail(3)

Car_Data.loc[3]

iloc: actual index return

Car_Data.iloc[:3]

Car_Data.loc[:3]

Car_Data["Make"]

Car_Data.Make

Car_Data[Car_Data["Make"]=="Toyota"]

Car_Data[Car_Data["Odometer (KM)"]>=50000]

Car_Data

pd.crosstab(Car_Data["Make"],Car_Data["Doors"])

Car_Data["Odometer (KM)"].mean()

Car_Data["Odometer (KM)"].plot()

Car_Data["Odometer (KM)"].hist()

Car_Data["Price"].plot()



Car_Data

Car_Data["Price"]

Car_Data["Price"] = Car_Data["Price"].str.replace('[\$,]', '', regex=True).astype(float)

Car_Data

Car_Data["Price"].plot()

Car_Data["Price"].hist()

## Manipulating Data

Car_Data["Make"].str.lower()

Car_Data["Make"]=Car_Data["Make"].str.upper()

Car_Data

Car_Data["Colour"]=Car_Data["Colour"].str.lower()
Car_Data

Car_Data_Missing=pd.read_csv("car-sales-missing-data.csv")
Car_Data_Missing

Car_Data_Missing["Odometer"].mean()

Car_Data_Missing

Car_Data_Missing.dropna(inplace=True)
Car_Data_Missing

Car_Data

series=pd.Series([5,5,4,5,6,4,5,4])

series

Car_Data["Seats"]=series

Car_Data

Car_Data["Seats"].fillna(Car_Data["Seats"].mean(),inplace=True)

Car_Data

li=[4.2,5.6,4.9,7.5,6.4,8.2]
Car_Data["Milage"]=li
Car_Data

pd series need to have same arguments as data table but python list must have 

li=[4.2,5.6,4.9,7.5,6.4,8.2,7.5,6.4,5.0,10.9]
Car_Data["Milage"]=li
Car_Data

Car_Data["Fuel Used"]=Car_Data["Odometer (KM)"]/Car_Data["Milage"]
Car_Data

Car_Data["No. of wheels"]=4
Car_Data

Car_Data["Pollution Reciept"]=True
Car_Data

Car_Data["Sample"]="Has to remove"
Car_Data

Car_Data.drop("Sample",axis=1,inplace=True)

Car_Data

Car_Data_Shuffled=Car_Data.sample(frac=1)
Car_Data_Shuffled

Insurance=pd.read_csv("Student insurance E-card details. (2).csv")
Insurance.head()

Insurance

Insurance_Random=Insurance.sample(frac=1)

Insurance_Random

Insurance_Random.to_csv("Insurance_Random")

Insurance.sample(frac=0.03)

Insurance_Random.reset_index()

Insurance_Random.reset_index(drop=True)

Car_Data

Car_Data["Odometer (KM)"]=Car_Data["Odometer (KM)"].apply(lambda x:x/1.6)
Car_Data