Pandas Groupby
# usage example gb = df.groupby(["col1", "col2"]) counts = gb.size().to_frame(name="counts") count ( counts.join(gb.agg({"col3": "mean"}).rename(columns={"col3": "col3_mean"})) .join(gb.agg({"col4": "median"}).rename(columns={"col4": "col4_median"})) .join(gb.agg({"col4": "min"}).rename(columns={"col4": "col4_min"})) .reset_index() ) # to create dataframe keys = np.array( [ ["A", "B"], ["A", "B"], ["A", "B"], ["A", "B"], ["C", "D"], ["C", "D"], ["C", "D"], ["E", "F"], ["E", "F"], ["G", "H"], ] ) df = pd.DataFrame( np.hstack([keys, np.random.randn(10, 4).round(2)]), columns=["col1", "col2", "col3", "col4", "col5", "col6"] ) df[["col3", "col4", "col5", "col6"]] = df[["col3", "col4", "col5", "col6"]].astype(float)
Comment grouper en pandas
# importing pandas as pd import pandas as pd # Creating the dataframe df = pd.read_csv("nba.csv") # Print the dataframe df
Groupy Pandas
python groupby