-
pandas - 1Base Line/python 기초 코드 2022. 7. 1. 22:04
from pandas import Series, DataFrame import pandas as pd list_data = [1,2,3,4,5] example_obj = Series(data = list_data) example_obj list_data = [1,2,3,4,5] list_name = ["a","b","c","d","e"] example_obj = Series(data = list_data, index = list_name) example_obj import numpy as np dict_data = {"a":1,"b":2,"c":3,"d":4,"e":5} example_obj = Series(dict_data,dtype=np.float32,name="example_data") example_obj example_obj["a"] example_obj["a"] = 3.2 example_obj example_obj.values example_obj.index example_obj.name = "number" example_obj.index.name = "alphabet" example_obj dict_data_1 = {"a":1,"b":2,"c":3,"d":4,"e":5} indexes = ["a","b","c","d","e","f","g","h"] series_obj_1 = Series(dict_data_1,index = indexes) series_obj_1 # dataframe : Series를 모아서 만든 Data Table = 기본 2차원 from pandas import Series, DataFrame import pandas as pd import numpy as np
from pandas import Series, DataFrame import pandas as pd import numpy as np raw_data = {'first_name' : ['Jason','Molly','Tina','Jake','Any'], 'last_name':['Miler','Jacobson','Ali','Milner','Cooze'], 'age':[42,52,36,24,73], 'city':['San Francisco','Baltimore','Miami','Douglas','Boston']} df = pd.DataFrame(raw_data,columns = ['first_name','last_name','age','city']) df df = DataFrame(raw_data,columns=["first_name","last_name","age","city","debt"]) df.first_name df["first_name"] df #index location df.loc[1] #index position df["age"].iloc[1:] s= pd.Series(np.nan,index = [49,48,47,46,45,1,2,3,4,5]) s.loc[:3] s.iloc[:3] df.debt = df.age>40 df df.T df.values df.to_csv() del df["debt"] df df[:3] df import pandas as pd import numpy as np url = 'exceltasks/excel-comp-data.xlsx' df = pd.read_excel(url) df.head()
df["account"][:3] account_series = df["account"] account_series[:3] account_series[[0,1,2]] account_series[account_series<250000] df.index = df["account"] del df["account"] df.head() df[["name","street"]][:2] df.iloc[:,:2] df.loc[[211829,320563],["name","street"]] df.index = list(range(0,15)) df.head() df.drop(1) df.drop([0,1,2,3]) df.drop("city",axis=1) #df.drop(["city","state"],axis=1) from pandas import Series s1 = Series(range(1,6),index = list("abcde")) s1 s2 = Series(range(5,11),index = list("bcedef")) s2 s1.add(s2) s1+s2 from pandas import DataFrame df1 = DataFrame(np.arange(9).reshape(3,3),columns = list("abc")) df1 df2 = DataFrame(np.arange(16).reshape(4,4),columns = list("abcd")) df2 df1+df2 df1.add(df2,fill_value=0) df = DataFrame(np.arange(16).reshape(4,4),columns = list("abcd")) df s2 = Series(np.arange(10,14)) s2 df+s2 df.add(s2,axis=0) s1 = Series(np.arange(10)) s1.head(5) s1.map(lambda x :x**2).head(5)
z={1:'A',2:'B',3:'C'} s1.map(z).head(5) s2 = Series(np.arange(10,20)) s1.map(s2).head(5) df = pd.read_csv("wages.csv") df.head() df.sex.unique() df["sex_code"] = df.sex.map({"male":0,"female":1}).head() df.sex.replace(["male","female"],[0,1],inplace = True) df.head(5) df_info = df[["earn","height","age"]] df_info.head() f = lambda x : x.max() - x.min() df_info.apply(f) df_info.sum() df_info.apply(sum) f = lambda x : -x df_info.applymap(f).head(5) f = lambda x : -x df_info["earn"].apply(f).head(5) df = pd.read_csv("wages.csv") df.head() df.describe() df.race.unique() np.array(dict(enumerate(df["race"].unique()))) value = list(map(int,np.array(list(enumerate(df["race"].unique())))[:,0].tolist())) key = np.array(list(enumerate(df["race"].unique())),dtype=str)[:,1].tolist() value,key df["race"].replace(to_replace=key,value = value,inplace = True) df.head(5) #column df.sum(axis = 0) #row df.sum(axis = 1) df.isnull() df.isnull().sum() df.sort_values(["age","earn"],ascending=True).head(10) df.age.corr(df.earn) df.age.cov(df.earn) df.corrwith(df.earn) df.corr()
# data import pandas as pd ipl_data = {'Team': ['Riders', 'Riders', 'Devils', 'Devils', 'Kings', 'kings', 'Kings', 'Kings', 'Riders', 'Royals', 'Royals', 'Riders'], 'Rank': [1, 2, 2, 3, 3,4 ,1 ,1,2 , 4,1,2], 'Year': [2014,2015,2014,2015,2014,2015,2016,2017,2016,2014,2015,2017], 'Points':[876,789,863,673,741,812,756,788,694,701,804,690]} df = pd.DataFrame(ipl_data) df df.groupby("Team")["Points"].sum() df.groupby(["Team","Year"])["Points"].sum() df h_index = df.groupby(["Team","Year"])["Points"].sum() h_index h_index.index h_index["Devils":"Kings"] h_index.unstack() h_index.swaplevel() h_index.swaplevel().sort_index(level=0) h_index.sum(level=0) h_index.sum(level=1) grouped = df.groupby("Team") for name, group in grouped: print(name) print(group) grouped.get_group("Devils") grouped.agg(sum) import numpy as np grouped.agg(np.mean) grouped['Points'].agg([np.sum, np.mean,np.std]) df score = lambda x:(x) grouped.transform(score) df score = lambda x : (x.max()) grouped.transform(score) df score = lambda x : (x - x.mean())/x.std() grouped.transform(score) df.groupby('Team').filter(lambda x: len(x) >=3) import dateutil import pandas as pd df_phone = pd.read_csv("phone_data.csv") df_phone['date'] = df_phone['date'] df_phone.groupby('month')['duration'].sum() df_phone[df_phone['item']=='call'].groupby('network')['duration'].sum() df_phone.groupby(['month','item'])['date'].count() df_phone.groupby(['month','item'])['date'].count().unstack() df_phone.groupby('month',as_index=False).agg({"duration":"sum"}) df_phone.groupby(['month','item']).agg({'duration':sum, 'network_type':"count", 'date':'first'}) df_phone.groupby(['month','item']).agg({'duration':[min,max,sum], 'network_type':"count", 'date':[min,'first','nunique']}) import numpy as np grouped = df_phone.groupby('month').agg({"duration":[min,max,np.mean]}) grouped.columns = grouped.columns.droplevel(level=0) grouped.rename(columns={"min":"min_duration","max":"max_duration","mean":"mean_duration"}) df_phone.pivot_table(["duration"], index = [df_phone.month,df_phone.item], columns = df_phone.network, aggfunc="sum",fill_value=0) df_movie = pd.read_csv("movie_rating.csv") df_movie.head() pd.crosstab(index=df_movie.critic,columns=df_movie.title,values = df_movie.rating,aggfunc="first").fillna(0) df_movie.pivot_table(["rating"],index = df_movie.critic,columns = df_movie.title, aggfunc="sum",fill_value=0) pd.merge(df_a,df_b,left_on = 'subject_id',right_on='subject_id') pd.merge(df_a,df_b,on='subject_id',how = 'left') pd.merge(df_a,df_b,on = 'subject_id',how = 'right') pd.merge(df_a,df_b,on'subject_id',how = 'outer') pd.merge(df_a,df_b,on = 'subject_id',how = 'inner') pd.merge(df_a,df_b,right_index=True,left_index=True) df_new = pd.concat([df_a,df_b]) df_new.reset_index() df_a.append(df_b) df_new = pd.concat([df_a,df_b],axis=1) df_new.reset_index() import sqlite3 conn = sqlite3.connext("./data/flights.db") cur = conn.cursor() cur.execute("select*from airlines limit 5;") results = cur.fetchall() results df_airplines = pd.read_sql_query("select*from airlines;",conn) df_airports = pd.read_sql_query("select*from airports;",conn) df_routes = pd.read_sql_query("select * from routes;",conn) df_routes.to_pickle("./data/df_routes.pickle") df_routes_pickle = pd.read_pickle("./data/df_routes.pickle") df_routes_pickle.head() writer = pd.ExcelWriter('./data/df_routes.xlsx',engine = 'xlsxwriter') df_routes.to_excel(writer,sheet_name = 'Sheet1')
reference : https://github.com/TeamLab/introduction_to_python_TEAMLAB_MOOC
'Base Line > python 기초 코드' 카테고리의 다른 글
torch check list (0) 2022.08.14 pandas - 2 (0) 2022.07.12 blender python script (0) 2022.06.07 pcl + open3d (0) 2022.06.06 pillow 이미지 rotation, flip (1) 2022.04.18