import pandas as pd
- DataFrame
- Series
arabalar = pd.Series(["BMW", "Toyota","Honda"])
arabalar
0 BMW
1 Toyota
2 Honda
dtype: object
renkler = pd.Series(["Kırmızı", "Mavi", "Sarı"])
renkler
0 Kırmızı
1 Mavi
2 Sarı
dtype: object
df = pd.DataFrame({"araba":arabalar, "renk": renkler})
df
| araba | renk | |
|---|---|---|
| 0 | BMW | Kırmızı |
| 1 | Toyota | Mavi |
| 2 | Honda | Sarı |
df["araba"]
0 BMW
1 Toyota
2 Honda
Name: araba, dtype: object
# arac_satislar = pd.read_csv("https://raw.githubusercontent.com/mrdbourke/zero-to-mastery-ml/master/data/car-sales.csv")
arac_satislar = pd.read_excel("car-sales.xlsx")
arac_satislar
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | $4,000.00 |
| 1 | Honda | Red | 87899 | 4 | $5,000.00 |
| 2 | Toyota | Blue | 32549 | 3 | $7,000.00 |
| 3 | BMW | Black | 11179 | 5 | $22,000.00 |
| 4 | Nissan | White | 213095 | 4 | $3,500.00 |
| 5 | Toyota | Green | 99213 | 4 | $4,500.00 |
| 6 | Honda | Blue | 45698 | 4 | $7,500.00 |
| 7 | Honda | Blue | 54738 | 4 | $7,000.00 |
| 8 | Toyota | White | 60000 | 4 | $6,250.00 |
| 9 | Nissan | White | 31600 | 4 | $9,700.00 |
arac_satislar.describe()
| Odometer (KM) | Doors | |
|---|---|---|
| count | 10.000000 | 10.000000 |
| mean | 78601.400000 | 4.000000 |
| std | 61983.471735 | 0.471405 |
| min | 11179.000000 | 3.000000 |
| 25% | 35836.250000 | 4.000000 |
| 50% | 57369.000000 | 4.000000 |
| 75% | 96384.500000 | 4.000000 |
| max | 213095.000000 | 5.000000 |
arac_satislar.columns
Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')
arac_satislar.head(2)
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | $4,000.00 |
| 1 | Honda | Red | 87899 | 4 | $5,000.00 |
arac_satislar.tail(2)
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 8 | Toyota | White | 60000 | 4 | $6,250.00 |
| 9 | Nissan | White | 31600 | 4 | $9,700.00 |
len(arac_satislar)
10
arac_satislar.shape
(10, 5)
arac_satislar.iloc[5:7]
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 5 | Toyota | Green | 99213 | 4 | $4,500.00 |
| 6 | Honda | Blue | 45698 | 4 | $7,500.00 |
arac_satislar[["Make", "Price"]]
| Make | Price | |
|---|---|---|
| 0 | Toyota | $4,000.00 |
| 1 | Honda | $5,000.00 |
| 2 | Toyota | $7,000.00 |
| 3 | BMW | $22,000.00 |
| 4 | Nissan | $3,500.00 |
| 5 | Toyota | $4,500.00 |
| 6 | Honda | $7,500.00 |
| 7 | Honda | $7,000.00 |
| 8 | Toyota | $6,250.00 |
| 9 | Nissan | $9,700.00 |
arac_satislar[["Make", "Price"]].iloc[3:6]
| Make | Price | |
|---|---|---|
| 3 | BMW | $22,000.00 |
| 4 | Nissan | $3,500.00 |
| 5 | Toyota | $4,500.00 |
arac_satislar
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | $4,000.00 |
| 1 | Honda | Red | 87899 | 4 | $5,000.00 |
| 2 | Toyota | Blue | 32549 | 3 | $7,000.00 |
| 3 | BMW | Black | 11179 | 5 | $22,000.00 |
| 4 | Nissan | White | 213095 | 4 | $3,500.00 |
| 5 | Toyota | Green | 99213 | 4 | $4,500.00 |
| 6 | Honda | Blue | 45698 | 4 | $7,500.00 |
| 7 | Honda | Blue | 54738 | 4 | $7,000.00 |
| 8 | Toyota | White | 60000 | 4 | $6,250.00 |
| 9 | Nissan | White | 31600 | 4 | $9,700.00 |
arac_satislar["Odometer (KM)"]>100000
0 True
1 False
2 False
3 False
4 True
5 False
6 False
7 False
8 False
9 False
Name: Odometer (KM), dtype: bool
arac_satislar[arac_satislar["Odometer (KM)"]>100000]
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | $4,000.00 |
| 4 | Nissan | White | 213095 | 4 | $3,500.00 |
arac_satislar[(arac_satislar["Odometer (KM)"]<100000) & (arac_satislar["Doors"]==4)]
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 1 | Honda | Red | 87899 | 4 | $5,000.00 |
| 5 | Toyota | Green | 99213 | 4 | $4,500.00 |
| 6 | Honda | Blue | 45698 | 4 | $7,500.00 |
| 7 | Honda | Blue | 54738 | 4 | $7,000.00 |
| 8 | Toyota | White | 60000 | 4 | $6,250.00 |
| 9 | Nissan | White | 31600 | 4 | $9,700.00 |
arac_satislar["Price"]=arac_satislar["Price"].str.replace("$","")
arac_satislar
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | 4,000.00 |
| 1 | Honda | Red | 87899 | 4 | 5,000.00 |
| 2 | Toyota | Blue | 32549 | 3 | 7,000.00 |
| 3 | BMW | Black | 11179 | 5 | 22,000.00 |
| 4 | Nissan | White | 213095 | 4 | 3,500.00 |
| 5 | Toyota | Green | 99213 | 4 | 4,500.00 |
| 6 | Honda | Blue | 45698 | 4 | 7,500.00 |
| 7 | Honda | Blue | 54738 | 4 | 7,000.00 |
| 8 | Toyota | White | 60000 | 4 | 6,250.00 |
| 9 | Nissan | White | 31600 | 4 | 9,700.00 |
arac_satislar["Price"]=arac_satislar["Price"].str.replace(".00","").str.replace(",","")
arac_satislar["Price"]=arac_satislar["Price"].astype(int)
arac_satislar.describe()
| Odometer (KM) | Doors | Price | |
|---|---|---|---|
| count | 10.000000 | 10.000000 | 10.000000 |
| mean | 78601.400000 | 4.000000 | 7645.000000 |
| std | 61983.471735 | 0.471405 | 5379.407753 |
| min | 11179.000000 | 3.000000 | 3500.000000 |
| 25% | 35836.250000 | 4.000000 | 4625.000000 |
| 50% | 57369.000000 | 4.000000 | 6625.000000 |
| 75% | 96384.500000 | 4.000000 | 7375.000000 |
| max | 213095.000000 | 5.000000 | 22000.000000 |
arac_satislar
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | 4000 |
| 1 | Honda | Red | 87899 | 4 | 5000 |
| 2 | Toyota | Blue | 32549 | 3 | 7000 |
| 3 | BMW | Black | 11179 | 5 | 22000 |
| 4 | Nissan | White | 213095 | 4 | 3500 |
| 5 | Toyota | Green | 99213 | 4 | 4500 |
| 6 | Honda | Blue | 45698 | 4 | 7500 |
| 7 | Honda | Blue | 54738 | 4 | 7000 |
| 8 | Toyota | White | 60000 | 4 | 6250 |
| 9 | Nissan | White | 31600 | 4 | 9700 |
pd.get_dummies(arac_satislar["Make"])
| BMW | Honda | Nissan | Toyota | |
|---|---|---|---|---|
| 0 | False | False | False | True |
| 1 | False | True | False | False |
| 2 | False | False | False | True |
| 3 | True | False | False | False |
| 4 | False | False | True | False |
| 5 | False | False | False | True |
| 6 | False | True | False | False |
| 7 | False | True | False | False |
| 8 | False | False | False | True |
| 9 | False | False | True | False |
df_one_hot_enc=pd.get_dummies(arac_satislar, columns=["Make","Colour"])
df_one_hot_enc
| Odometer (KM) | Doors | Price | Make_BMW | Make_Honda | Make_Nissan | Make_Toyota | Colour_Black | Colour_Blue | Colour_Green | Colour_Red | Colour_White | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 150043 | 4 | 4000 | False | False | False | True | False | False | False | False | True |
| 1 | 87899 | 4 | 5000 | False | True | False | False | False | False | False | True | False |
| 2 | 32549 | 3 | 7000 | False | False | False | True | False | True | False | False | False |
| 3 | 11179 | 5 | 22000 | True | False | False | False | True | False | False | False | False |
| 4 | 213095 | 4 | 3500 | False | False | True | False | False | False | False | False | True |
| 5 | 99213 | 4 | 4500 | False | False | False | True | False | False | True | False | False |
| 6 | 45698 | 4 | 7500 | False | True | False | False | False | True | False | False | False |
| 7 | 54738 | 4 | 7000 | False | True | False | False | False | True | False | False | False |
| 8 | 60000 | 4 | 6250 | False | False | False | True | False | False | False | False | True |
| 9 | 31600 | 4 | 9700 | False | False | True | False | False | False | False | False | True |
df_one_hot_enc = df_one_hot_enc.astype(int)
df_one_hot_enc
| Odometer (KM) | Doors | Price | Make_BMW | Make_Honda | Make_Nissan | Make_Toyota | Colour_Black | Colour_Blue | Colour_Green | Colour_Red | Colour_White | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 150043 | 4 | 4000 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
| 1 | 87899 | 4 | 5000 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2 | 32549 | 3 | 7000 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 |
| 3 | 11179 | 5 | 22000 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| 4 | 213095 | 4 | 3500 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
| 5 | 99213 | 4 | 4500 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
| 6 | 45698 | 4 | 7500 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 7 | 54738 | 4 | 7000 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 8 | 60000 | 4 | 6250 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
| 9 | 31600 | 4 | 9700 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
arac_satislar
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | 4000 |
| 1 | Honda | Red | 87899 | 4 | 5000 |
| 2 | Toyota | Blue | 32549 | 3 | 7000 |
| 3 | BMW | Black | 11179 | 5 | 22000 |
| 4 | Nissan | White | 213095 | 4 | 3500 |
| 5 | Toyota | Green | 99213 | 4 | 4500 |
| 6 | Honda | Blue | 45698 | 4 | 7500 |
| 7 | Honda | Blue | 54738 | 4 | 7000 |
| 8 | Toyota | White | 60000 | 4 | 6250 |
| 9 | Nissan | White | 31600 | 4 | 9700 |
df=arac_satislar
df.at[3,"Make"]="bmw"
df
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | 4000 |
| 1 | Honda | Red | 87899 | 4 | 5000 |
| 2 | Toyota | Blue | 32549 | 3 | 7000 |
| 3 | bmw | Black | 11179 | 5 | 22000 |
| 4 | Nissan | White | 213095 | 4 | 3500 |
| 5 | Toyota | Green | 99213 | 4 | 4500 |
| 6 | Honda | Blue | 45698 | 4 | 7500 |
| 7 | Honda | Blue | 54738 | 4 | 7000 |
| 8 | Toyota | White | 60000 | 4 | 6250 |
| 9 | Nissan | White | 31600 | 4 | 9700 |
df["Wheels"]=4
df
| Make | Colour | Odometer (KM) | Doors | Price | Wheels | |
|---|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | 4000 | 4 |
| 1 | Honda | Red | 87899 | 4 | 5000 | 4 |
| 2 | Toyota | Blue | 32549 | 3 | 7000 | 4 |
| 3 | bmw | Black | 11179 | 5 | 22000 | 4 |
| 4 | Nissan | White | 213095 | 4 | 3500 | 4 |
| 5 | Toyota | Green | 99213 | 4 | 4500 | 4 |
| 6 | Honda | Blue | 45698 | 4 | 7500 | 4 |
| 7 | Honda | Blue | 54738 | 4 | 7000 | 4 |
| 8 | Toyota | White | 60000 | 4 | 6250 | 4 |
| 9 | Nissan | White | 31600 | 4 | 9700 | 4 |
df=df.drop("Wheels", axis=1)
df
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | 4000 |
| 1 | Honda | Red | 87899 | 4 | 5000 |
| 2 | Toyota | Blue | 32549 | 3 | 7000 |
| 3 | bmw | Black | 11179 | 5 | 22000 |
| 4 | Nissan | White | 213095 | 4 | 3500 |
| 5 | Toyota | Green | 99213 | 4 | 4500 |
| 6 | Honda | Blue | 45698 | 4 | 7500 |
| 7 | Honda | Blue | 54738 | 4 | 7000 |
| 8 | Toyota | White | 60000 | 4 | 6250 |
| 9 | Nissan | White | 31600 | 4 | 9700 |
df=df.drop(3, axis=0)
df
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | 4000 |
| 1 | Honda | Red | 87899 | 4 | 5000 |
| 2 | Toyota | Blue | 32549 | 3 | 7000 |
| 4 | Nissan | White | 213095 | 4 | 3500 |
| 5 | Toyota | Green | 99213 | 4 | 4500 |
| 6 | Honda | Blue | 45698 | 4 | 7500 |
| 7 | Honda | Blue | 54738 | 4 | 7000 |
| 8 | Toyota | White | 60000 | 4 | 6250 |
| 9 | Nissan | White | 31600 | 4 | 9700 |
df["Price"] = df["Price"].apply(lambda x: x*40)
df
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | 160000 |
| 1 | Honda | Red | 87899 | 4 | 200000 |
| 2 | Toyota | Blue | 32549 | 3 | 280000 |
| 4 | Nissan | White | 213095 | 4 | 140000 |
| 5 | Toyota | Green | 99213 | 4 | 180000 |
| 6 | Honda | Blue | 45698 | 4 | 300000 |
| 7 | Honda | Blue | 54738 | 4 | 280000 |
| 8 | Toyota | White | 60000 | 4 | 250000 |
| 9 | Nissan | White | 31600 | 4 | 388000 |
for index, satir in df.iterrows():
marka = satir["Make"]
fiyat = satir["Price"]
print(index, f"marka: {marka}, fiyat: {fiyat}")
0 marka: Toyota, fiyat: 160000
1 marka: Honda, fiyat: 200000
2 marka: Toyota, fiyat: 280000
4 marka: Nissan, fiyat: 140000
5 marka: Toyota, fiyat: 180000
6 marka: Honda, fiyat: 300000
7 marka: Honda, fiyat: 280000
8 marka: Toyota, fiyat: 250000
9 marka: Nissan, fiyat: 388000
df
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | 160000 |
| 1 | Honda | Red | 87899 | 4 | 200000 |
| 2 | Toyota | Blue | 32549 | 3 | 280000 |
| 4 | Nissan | White | 213095 | 4 | 140000 |
| 5 | Toyota | Green | 99213 | 4 | 180000 |
| 6 | Honda | Blue | 45698 | 4 | 300000 |
| 7 | Honda | Blue | 54738 | 4 | 280000 |
| 8 | Toyota | White | 60000 | 4 | 250000 |
| 9 | Nissan | White | 31600 | 4 | 388000 |
import numpy as np
df.at[5, "Price"]=np.nan
df
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | 160000.0 |
| 1 | Honda | Red | 87899 | 4 | 200000.0 |
| 2 | Toyota | Blue | 32549 | 3 | 280000.0 |
| 4 | Nissan | White | 213095 | 4 | 140000.0 |
| 5 | Toyota | Green | 99213 | 4 | NaN |
| 6 | Honda | Blue | 45698 | 4 | 300000.0 |
| 7 | Honda | Blue | 54738 | 4 | 280000.0 |
| 8 | Toyota | White | 60000 | 4 | 250000.0 |
| 9 | Nissan | White | 31600 | 4 | 388000.0 |
df=df.dropna()
df
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | 160000.0 |
| 1 | Honda | Red | 87899 | 4 | 200000.0 |
| 2 | Toyota | Blue | 32549 | 3 | 280000.0 |
| 4 | Nissan | White | 213095 | 4 | 140000.0 |
| 6 | Honda | Blue | 45698 | 4 | 300000.0 |
| 7 | Honda | Blue | 54738 | 4 | 280000.0 |
| 8 | Toyota | White | 60000 | 4 | 250000.0 |
| 9 | Nissan | White | 31600 | 4 | 388000.0 |
df.at[6,"Price"]=np.nan
df
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | 160000.0 |
| 1 | Honda | Red | 87899 | 4 | 200000.0 |
| 2 | Toyota | Blue | 32549 | 3 | 280000.0 |
| 4 | Nissan | White | 213095 | 4 | 140000.0 |
| 6 | Honda | Blue | 45698 | 4 | NaN |
| 7 | Honda | Blue | 54738 | 4 | 280000.0 |
| 8 | Toyota | White | 60000 | 4 | 250000.0 |
| 9 | Nissan | White | 31600 | 4 | 388000.0 |
df["Price"] = df["Price"].fillna(df["Price"].mean())
df
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 0 | Toyota | White | 150043 | 4 | 160000.000000 |
| 1 | Honda | Red | 87899 | 4 | 200000.000000 |
| 2 | Toyota | Blue | 32549 | 3 | 280000.000000 |
| 4 | Nissan | White | 213095 | 4 | 140000.000000 |
| 6 | Honda | Blue | 45698 | 4 | 242571.428571 |
| 7 | Honda | Blue | 54738 | 4 | 280000.000000 |
| 8 | Toyota | White | 60000 | 4 | 250000.000000 |
| 9 | Nissan | White | 31600 | 4 | 388000.000000 |
df.sort_values(by=["Price"])
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 4 | Nissan | White | 213095 | 4 | 140000.000000 |
| 0 | Toyota | White | 150043 | 4 | 160000.000000 |
| 1 | Honda | Red | 87899 | 4 | 200000.000000 |
| 6 | Honda | Blue | 45698 | 4 | 242571.428571 |
| 8 | Toyota | White | 60000 | 4 | 250000.000000 |
| 2 | Toyota | Blue | 32549 | 3 | 280000.000000 |
| 7 | Honda | Blue | 54738 | 4 | 280000.000000 |
| 9 | Nissan | White | 31600 | 4 | 388000.000000 |
df=df.sort_values(by=["Doors", "Price"], ascending=[False, True])
df
| Make | Colour | Odometer (KM) | Doors | Price | |
|---|---|---|---|---|---|
| 4 | Nissan | White | 213095 | 4 | 140000.000000 |
| 0 | Toyota | White | 150043 | 4 | 160000.000000 |
| 1 | Honda | Red | 87899 | 4 | 200000.000000 |
| 6 | Honda | Blue | 45698 | 4 | 242571.428571 |
| 8 | Toyota | White | 60000 | 4 | 250000.000000 |
| 7 | Honda | Blue | 54738 | 4 | 280000.000000 |
| 9 | Nissan | White | 31600 | 4 | 388000.000000 |
| 2 | Toyota | Blue | 32549 | 3 | 280000.000000 |
fiyatlar=[100,200,300]
urunler=["1. urun", "2. urun", "3. urun"]
df=pd.DataFrame([fiyatlar,urunler])
df
| 0 | 1 | 2 | |
|---|---|---|---|
| 0 | 100 | 200 | 300 |
| 1 | 1. urun | 2. urun | 3. urun |
liste=[["1. urun", 100], ["2. urun", 200]]
df=pd.DataFrame(liste, columns=["Ürün Adı", "Fiyat"])
df
| Ürün Adı | Fiyat | |
|---|---|---|
| 0 | 1. urun | 100 |
| 1 | 2. urun | 200 |
sozluk= {"urun adı":["urun1","urun2"], "fiyat":[100,200]}
df=pd.DataFrame(sozluk)
df
| urun adı | fiyat | |
|---|---|---|
| 0 | urun1 | 100 |
| 1 | urun2 | 200 |
df.to_excel("yeni.xlsx")