Aug 15, 2021

[Python] statistics with Pandas DataFrame

import pandas as pd 

ng_list = [{"name": "Jake", "math": 61, "chemistry": 80}, {"name": "Annie", "math": 78, "chemistry": 90}, {"name": "Jane", "math": 71, "chemistry": 54}, {"name": "Sam", "math": 75, "chemistry": 74}, {"name": "Ben", "math": 46, "chemistry": 64}, {"name": "Sky", "math": 38, "chemistry": 77}]  

# create data frame 
df = pd.DataFrame(ng_list) 
print(df, "\n") 

# sum for each row
df["total"] = df["math"] + df["chemistry"]
print(df, "\n") 

# sum for each column
print(df["math"].sum(), "\n") 

# mean with condition
mean_math_bet60_80 = df.loc[(df["math"] >= 60) & (df["math"] <= 80), "math"].mean()
print(mean_math_bet60_80, "\n")

# common statistics for numerical values
print(df.describe(), "\n")

# common statistics for non-numerical values
print(df.describe(include='object'), "\n")


# functions for other staticstics:
# max, min: maximum, mimimum values 
# count: count
# sem: standard error
# mode: most frequently appeared value
# quantile: quantile, e.g., 10% quantile --> df['math'].quantile(0.1)
# corr: corelation between two columes, e.g., df['math'].corr(df['chemistry'])

No comments:

Post a Comment