import pandas as pd ng_list = [{"name": "Jake", "math": 61, "chemistry": 80}, {"name": "Annie", "math": 78, "chemistry": 90}, {"name": "Jane", "math": 71, "chemistry": 54}, {"name": "Sam", "math": 75, "chemistry": 74}, {"name": "Ben", "math": 46, "chemistry": 64}, {"name": "Sky", "math": 38, "chemistry": 77}] # create data frame df = pd.DataFrame(ng_list) print(df, "\n") # sum for each row df["total"] = df["math"] + df["chemistry"] print(df, "\n") # sum for each column print(df["math"].sum(), "\n") # mean with condition mean_math_bet60_80 = df.loc[(df["math"] >= 60) & (df["math"] <= 80), "math"].mean() print(mean_math_bet60_80, "\n") # common statistics for numerical values print(df.describe(), "\n") # common statistics for non-numerical values print(df.describe(include='object'), "\n") # functions for other staticstics: # max, min: maximum, mimimum values # count: count # sem: standard error # mode: most frequently appeared value # quantile: quantile, e.g., 10% quantile --> df['math'].quantile(0.1) # corr: corelation between two columes, e.g., df['math'].corr(df['chemistry'])
Aug 15, 2021
[Python] statistics with Pandas DataFrame
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment