import pandas as pd
- DataFrame
- Series
arabalar = pd.Series(["BMW", "Toyota","Honda"])
arabalar
0 BMW
1 Toyota
2 Honda
dtype: object
renkler = pd.Series(["Kırmızı", "Mavi", "Sarı"])
renkler
0 Kırmızı
1 Mavi
2 Sarı
dtype: object
df = pd.DataFrame({"araba":arabalar, "renk": renkler})
df
araba | renk | |
---|---|---|
0 | BMW | Kırmızı |
1 | Toyota | Mavi |
2 | Honda | Sarı |
df["araba"]
0 BMW
1 Toyota
2 Honda
Name: araba, dtype: object
# arac_satislar = pd.read_csv("https://raw.githubusercontent.com/mrdbourke/zero-to-mastery-ml/master/data/car-sales.csv")
arac_satislar = pd.read_excel("car-sales.xlsx")
arac_satislar
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | $4,000.00 |
1 | Honda | Red | 87899 | 4 | $5,000.00 |
2 | Toyota | Blue | 32549 | 3 | $7,000.00 |
3 | BMW | Black | 11179 | 5 | $22,000.00 |
4 | Nissan | White | 213095 | 4 | $3,500.00 |
5 | Toyota | Green | 99213 | 4 | $4,500.00 |
6 | Honda | Blue | 45698 | 4 | $7,500.00 |
7 | Honda | Blue | 54738 | 4 | $7,000.00 |
8 | Toyota | White | 60000 | 4 | $6,250.00 |
9 | Nissan | White | 31600 | 4 | $9,700.00 |
arac_satislar.describe()
Odometer (KM) | Doors | |
---|---|---|
count | 10.000000 | 10.000000 |
mean | 78601.400000 | 4.000000 |
std | 61983.471735 | 0.471405 |
min | 11179.000000 | 3.000000 |
25% | 35836.250000 | 4.000000 |
50% | 57369.000000 | 4.000000 |
75% | 96384.500000 | 4.000000 |
max | 213095.000000 | 5.000000 |
arac_satislar.columns
Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')
arac_satislar.head(2)
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | $4,000.00 |
1 | Honda | Red | 87899 | 4 | $5,000.00 |
arac_satislar.tail(2)
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
8 | Toyota | White | 60000 | 4 | $6,250.00 |
9 | Nissan | White | 31600 | 4 | $9,700.00 |
len(arac_satislar)
10
arac_satislar.shape
(10, 5)
arac_satislar.iloc[5:7]
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
5 | Toyota | Green | 99213 | 4 | $4,500.00 |
6 | Honda | Blue | 45698 | 4 | $7,500.00 |
arac_satislar[["Make", "Price"]]
Make | Price | |
---|---|---|
0 | Toyota | $4,000.00 |
1 | Honda | $5,000.00 |
2 | Toyota | $7,000.00 |
3 | BMW | $22,000.00 |
4 | Nissan | $3,500.00 |
5 | Toyota | $4,500.00 |
6 | Honda | $7,500.00 |
7 | Honda | $7,000.00 |
8 | Toyota | $6,250.00 |
9 | Nissan | $9,700.00 |
arac_satislar[["Make", "Price"]].iloc[3:6]
Make | Price | |
---|---|---|
3 | BMW | $22,000.00 |
4 | Nissan | $3,500.00 |
5 | Toyota | $4,500.00 |
arac_satislar
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | $4,000.00 |
1 | Honda | Red | 87899 | 4 | $5,000.00 |
2 | Toyota | Blue | 32549 | 3 | $7,000.00 |
3 | BMW | Black | 11179 | 5 | $22,000.00 |
4 | Nissan | White | 213095 | 4 | $3,500.00 |
5 | Toyota | Green | 99213 | 4 | $4,500.00 |
6 | Honda | Blue | 45698 | 4 | $7,500.00 |
7 | Honda | Blue | 54738 | 4 | $7,000.00 |
8 | Toyota | White | 60000 | 4 | $6,250.00 |
9 | Nissan | White | 31600 | 4 | $9,700.00 |
arac_satislar["Odometer (KM)"]>100000
0 True
1 False
2 False
3 False
4 True
5 False
6 False
7 False
8 False
9 False
Name: Odometer (KM), dtype: bool
arac_satislar[arac_satislar["Odometer (KM)"]>100000]
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | $4,000.00 |
4 | Nissan | White | 213095 | 4 | $3,500.00 |
arac_satislar[(arac_satislar["Odometer (KM)"]<100000) & (arac_satislar["Doors"]==4)]
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
1 | Honda | Red | 87899 | 4 | $5,000.00 |
5 | Toyota | Green | 99213 | 4 | $4,500.00 |
6 | Honda | Blue | 45698 | 4 | $7,500.00 |
7 | Honda | Blue | 54738 | 4 | $7,000.00 |
8 | Toyota | White | 60000 | 4 | $6,250.00 |
9 | Nissan | White | 31600 | 4 | $9,700.00 |
arac_satislar["Price"]=arac_satislar["Price"].str.replace("$","")
arac_satislar
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | 4,000.00 |
1 | Honda | Red | 87899 | 4 | 5,000.00 |
2 | Toyota | Blue | 32549 | 3 | 7,000.00 |
3 | BMW | Black | 11179 | 5 | 22,000.00 |
4 | Nissan | White | 213095 | 4 | 3,500.00 |
5 | Toyota | Green | 99213 | 4 | 4,500.00 |
6 | Honda | Blue | 45698 | 4 | 7,500.00 |
7 | Honda | Blue | 54738 | 4 | 7,000.00 |
8 | Toyota | White | 60000 | 4 | 6,250.00 |
9 | Nissan | White | 31600 | 4 | 9,700.00 |
arac_satislar["Price"]=arac_satislar["Price"].str.replace(".00","").str.replace(",","")
arac_satislar["Price"]=arac_satislar["Price"].astype(int)
arac_satislar.describe()
Odometer (KM) | Doors | Price | |
---|---|---|---|
count | 10.000000 | 10.000000 | 10.000000 |
mean | 78601.400000 | 4.000000 | 7645.000000 |
std | 61983.471735 | 0.471405 | 5379.407753 |
min | 11179.000000 | 3.000000 | 3500.000000 |
25% | 35836.250000 | 4.000000 | 4625.000000 |
50% | 57369.000000 | 4.000000 | 6625.000000 |
75% | 96384.500000 | 4.000000 | 7375.000000 |
max | 213095.000000 | 5.000000 | 22000.000000 |
arac_satislar
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | 4000 |
1 | Honda | Red | 87899 | 4 | 5000 |
2 | Toyota | Blue | 32549 | 3 | 7000 |
3 | BMW | Black | 11179 | 5 | 22000 |
4 | Nissan | White | 213095 | 4 | 3500 |
5 | Toyota | Green | 99213 | 4 | 4500 |
6 | Honda | Blue | 45698 | 4 | 7500 |
7 | Honda | Blue | 54738 | 4 | 7000 |
8 | Toyota | White | 60000 | 4 | 6250 |
9 | Nissan | White | 31600 | 4 | 9700 |
pd.get_dummies(arac_satislar["Make"])
BMW | Honda | Nissan | Toyota | |
---|---|---|---|---|
0 | False | False | False | True |
1 | False | True | False | False |
2 | False | False | False | True |
3 | True | False | False | False |
4 | False | False | True | False |
5 | False | False | False | True |
6 | False | True | False | False |
7 | False | True | False | False |
8 | False | False | False | True |
9 | False | False | True | False |
df_one_hot_enc=pd.get_dummies(arac_satislar, columns=["Make","Colour"])
df_one_hot_enc
Odometer (KM) | Doors | Price | Make_BMW | Make_Honda | Make_Nissan | Make_Toyota | Colour_Black | Colour_Blue | Colour_Green | Colour_Red | Colour_White | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 150043 | 4 | 4000 | False | False | False | True | False | False | False | False | True |
1 | 87899 | 4 | 5000 | False | True | False | False | False | False | False | True | False |
2 | 32549 | 3 | 7000 | False | False | False | True | False | True | False | False | False |
3 | 11179 | 5 | 22000 | True | False | False | False | True | False | False | False | False |
4 | 213095 | 4 | 3500 | False | False | True | False | False | False | False | False | True |
5 | 99213 | 4 | 4500 | False | False | False | True | False | False | True | False | False |
6 | 45698 | 4 | 7500 | False | True | False | False | False | True | False | False | False |
7 | 54738 | 4 | 7000 | False | True | False | False | False | True | False | False | False |
8 | 60000 | 4 | 6250 | False | False | False | True | False | False | False | False | True |
9 | 31600 | 4 | 9700 | False | False | True | False | False | False | False | False | True |
df_one_hot_enc = df_one_hot_enc.astype(int)
df_one_hot_enc
Odometer (KM) | Doors | Price | Make_BMW | Make_Honda | Make_Nissan | Make_Toyota | Colour_Black | Colour_Blue | Colour_Green | Colour_Red | Colour_White | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 150043 | 4 | 4000 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
1 | 87899 | 4 | 5000 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
2 | 32549 | 3 | 7000 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 |
3 | 11179 | 5 | 22000 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
4 | 213095 | 4 | 3500 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
5 | 99213 | 4 | 4500 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
6 | 45698 | 4 | 7500 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
7 | 54738 | 4 | 7000 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
8 | 60000 | 4 | 6250 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
9 | 31600 | 4 | 9700 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
arac_satislar
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | 4000 |
1 | Honda | Red | 87899 | 4 | 5000 |
2 | Toyota | Blue | 32549 | 3 | 7000 |
3 | BMW | Black | 11179 | 5 | 22000 |
4 | Nissan | White | 213095 | 4 | 3500 |
5 | Toyota | Green | 99213 | 4 | 4500 |
6 | Honda | Blue | 45698 | 4 | 7500 |
7 | Honda | Blue | 54738 | 4 | 7000 |
8 | Toyota | White | 60000 | 4 | 6250 |
9 | Nissan | White | 31600 | 4 | 9700 |
df=arac_satislar
df.at[3,"Make"]="bmw"
df
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | 4000 |
1 | Honda | Red | 87899 | 4 | 5000 |
2 | Toyota | Blue | 32549 | 3 | 7000 |
3 | bmw | Black | 11179 | 5 | 22000 |
4 | Nissan | White | 213095 | 4 | 3500 |
5 | Toyota | Green | 99213 | 4 | 4500 |
6 | Honda | Blue | 45698 | 4 | 7500 |
7 | Honda | Blue | 54738 | 4 | 7000 |
8 | Toyota | White | 60000 | 4 | 6250 |
9 | Nissan | White | 31600 | 4 | 9700 |
df["Wheels"]=4
df
Make | Colour | Odometer (KM) | Doors | Price | Wheels | |
---|---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | 4000 | 4 |
1 | Honda | Red | 87899 | 4 | 5000 | 4 |
2 | Toyota | Blue | 32549 | 3 | 7000 | 4 |
3 | bmw | Black | 11179 | 5 | 22000 | 4 |
4 | Nissan | White | 213095 | 4 | 3500 | 4 |
5 | Toyota | Green | 99213 | 4 | 4500 | 4 |
6 | Honda | Blue | 45698 | 4 | 7500 | 4 |
7 | Honda | Blue | 54738 | 4 | 7000 | 4 |
8 | Toyota | White | 60000 | 4 | 6250 | 4 |
9 | Nissan | White | 31600 | 4 | 9700 | 4 |
df=df.drop("Wheels", axis=1)
df
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | 4000 |
1 | Honda | Red | 87899 | 4 | 5000 |
2 | Toyota | Blue | 32549 | 3 | 7000 |
3 | bmw | Black | 11179 | 5 | 22000 |
4 | Nissan | White | 213095 | 4 | 3500 |
5 | Toyota | Green | 99213 | 4 | 4500 |
6 | Honda | Blue | 45698 | 4 | 7500 |
7 | Honda | Blue | 54738 | 4 | 7000 |
8 | Toyota | White | 60000 | 4 | 6250 |
9 | Nissan | White | 31600 | 4 | 9700 |
df=df.drop(3, axis=0)
df
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | 4000 |
1 | Honda | Red | 87899 | 4 | 5000 |
2 | Toyota | Blue | 32549 | 3 | 7000 |
4 | Nissan | White | 213095 | 4 | 3500 |
5 | Toyota | Green | 99213 | 4 | 4500 |
6 | Honda | Blue | 45698 | 4 | 7500 |
7 | Honda | Blue | 54738 | 4 | 7000 |
8 | Toyota | White | 60000 | 4 | 6250 |
9 | Nissan | White | 31600 | 4 | 9700 |
df["Price"] = df["Price"].apply(lambda x: x*40)
df
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | 160000 |
1 | Honda | Red | 87899 | 4 | 200000 |
2 | Toyota | Blue | 32549 | 3 | 280000 |
4 | Nissan | White | 213095 | 4 | 140000 |
5 | Toyota | Green | 99213 | 4 | 180000 |
6 | Honda | Blue | 45698 | 4 | 300000 |
7 | Honda | Blue | 54738 | 4 | 280000 |
8 | Toyota | White | 60000 | 4 | 250000 |
9 | Nissan | White | 31600 | 4 | 388000 |
for index, satir in df.iterrows():
marka = satir["Make"]
fiyat = satir["Price"]
print(index, f"marka: {marka}, fiyat: {fiyat}")
0 marka: Toyota, fiyat: 160000
1 marka: Honda, fiyat: 200000
2 marka: Toyota, fiyat: 280000
4 marka: Nissan, fiyat: 140000
5 marka: Toyota, fiyat: 180000
6 marka: Honda, fiyat: 300000
7 marka: Honda, fiyat: 280000
8 marka: Toyota, fiyat: 250000
9 marka: Nissan, fiyat: 388000
df
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | 160000 |
1 | Honda | Red | 87899 | 4 | 200000 |
2 | Toyota | Blue | 32549 | 3 | 280000 |
4 | Nissan | White | 213095 | 4 | 140000 |
5 | Toyota | Green | 99213 | 4 | 180000 |
6 | Honda | Blue | 45698 | 4 | 300000 |
7 | Honda | Blue | 54738 | 4 | 280000 |
8 | Toyota | White | 60000 | 4 | 250000 |
9 | Nissan | White | 31600 | 4 | 388000 |
import numpy as np
df.at[5, "Price"]=np.nan
df
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | 160000.0 |
1 | Honda | Red | 87899 | 4 | 200000.0 |
2 | Toyota | Blue | 32549 | 3 | 280000.0 |
4 | Nissan | White | 213095 | 4 | 140000.0 |
5 | Toyota | Green | 99213 | 4 | NaN |
6 | Honda | Blue | 45698 | 4 | 300000.0 |
7 | Honda | Blue | 54738 | 4 | 280000.0 |
8 | Toyota | White | 60000 | 4 | 250000.0 |
9 | Nissan | White | 31600 | 4 | 388000.0 |
df=df.dropna()
df
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | 160000.0 |
1 | Honda | Red | 87899 | 4 | 200000.0 |
2 | Toyota | Blue | 32549 | 3 | 280000.0 |
4 | Nissan | White | 213095 | 4 | 140000.0 |
6 | Honda | Blue | 45698 | 4 | 300000.0 |
7 | Honda | Blue | 54738 | 4 | 280000.0 |
8 | Toyota | White | 60000 | 4 | 250000.0 |
9 | Nissan | White | 31600 | 4 | 388000.0 |
df.at[6,"Price"]=np.nan
df
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | 160000.0 |
1 | Honda | Red | 87899 | 4 | 200000.0 |
2 | Toyota | Blue | 32549 | 3 | 280000.0 |
4 | Nissan | White | 213095 | 4 | 140000.0 |
6 | Honda | Blue | 45698 | 4 | NaN |
7 | Honda | Blue | 54738 | 4 | 280000.0 |
8 | Toyota | White | 60000 | 4 | 250000.0 |
9 | Nissan | White | 31600 | 4 | 388000.0 |
df["Price"] = df["Price"].fillna(df["Price"].mean())
df
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
0 | Toyota | White | 150043 | 4 | 160000.000000 |
1 | Honda | Red | 87899 | 4 | 200000.000000 |
2 | Toyota | Blue | 32549 | 3 | 280000.000000 |
4 | Nissan | White | 213095 | 4 | 140000.000000 |
6 | Honda | Blue | 45698 | 4 | 242571.428571 |
7 | Honda | Blue | 54738 | 4 | 280000.000000 |
8 | Toyota | White | 60000 | 4 | 250000.000000 |
9 | Nissan | White | 31600 | 4 | 388000.000000 |
df.sort_values(by=["Price"])
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
4 | Nissan | White | 213095 | 4 | 140000.000000 |
0 | Toyota | White | 150043 | 4 | 160000.000000 |
1 | Honda | Red | 87899 | 4 | 200000.000000 |
6 | Honda | Blue | 45698 | 4 | 242571.428571 |
8 | Toyota | White | 60000 | 4 | 250000.000000 |
2 | Toyota | Blue | 32549 | 3 | 280000.000000 |
7 | Honda | Blue | 54738 | 4 | 280000.000000 |
9 | Nissan | White | 31600 | 4 | 388000.000000 |
df=df.sort_values(by=["Doors", "Price"], ascending=[False, True])
df
Make | Colour | Odometer (KM) | Doors | Price | |
---|---|---|---|---|---|
4 | Nissan | White | 213095 | 4 | 140000.000000 |
0 | Toyota | White | 150043 | 4 | 160000.000000 |
1 | Honda | Red | 87899 | 4 | 200000.000000 |
6 | Honda | Blue | 45698 | 4 | 242571.428571 |
8 | Toyota | White | 60000 | 4 | 250000.000000 |
7 | Honda | Blue | 54738 | 4 | 280000.000000 |
9 | Nissan | White | 31600 | 4 | 388000.000000 |
2 | Toyota | Blue | 32549 | 3 | 280000.000000 |
fiyatlar=[100,200,300]
urunler=["1. urun", "2. urun", "3. urun"]
df=pd.DataFrame([fiyatlar,urunler])
df
0 | 1 | 2 | |
---|---|---|---|
0 | 100 | 200 | 300 |
1 | 1. urun | 2. urun | 3. urun |
liste=[["1. urun", 100], ["2. urun", 200]]
df=pd.DataFrame(liste, columns=["Ürün Adı", "Fiyat"])
df
Ürün Adı | Fiyat | |
---|---|---|
0 | 1. urun | 100 |
1 | 2. urun | 200 |
sozluk= {"urun adı":["urun1","urun2"], "fiyat":[100,200]}
df=pd.DataFrame(sozluk)
df
urun adı | fiyat | |
---|---|---|
0 | urun1 | 100 |
1 | urun2 | 200 |
df.to_excel("yeni.xlsx")