More Pandas Examples
Some examples are below
f1
def f1(location1):
'''importing files formatted as f1'''
xls_files=glob.glob(location1+'*.xls')
list_dfs=[]
for xls_file in xls_files:
xls_pd=pd.ExcelFile(xls_file)
df_init=xls_pd.parse(xls_pd.sheet_names[0],header=5)
df_init.drop(df_init.tail(2).index,inplace=True) # drop last n rows
df_init.drop(df_init.columns[-6:],axis=1,inplace=True) # drop last n columns
date=pd.to_datetime(df_init['Date'])
description=df_init['Description ']
amount=df_init.Amount.apply(lambda x:float(x.replace(',','')))
balance=df_init.Balance.apply(lambda x:float(x.replace(',','')))
credit=amount.apply(lambda x: x if (x>0) else 0)
debit=amount.apply(lambda x: abs(x) if (x<0) else 0)
df_parsed=DataFrame({'Date':date,'Description':description,
'Credit':credit,'Debit':debit,'Balance':balance})
list_dfs.append(df_parsed)
return list_dfs
f2
def f2(location2):
'''importing files formatted as f2'''
xls_files=glob.glob(location2+'*.xls')
list_dfs=[]
for xls_file in xls_files:
xls_pd=pd.ExcelFile(xls_file)
df_init=xls_pd.parse(xls_pd.sheet_names[0],header=4)
df_init.drop(df_init.tail(3).index,inplace=True) # drop last n rows
df_init.drop(df_init.columns[-5:],axis=1,inplace=True) # drop last n columns
opening_balance=0
date=pd.to_datetime(df_init['Value Date'],format='%d/%m/%Y')
description=df_init['Description ']
amount=df_init['Amount']
credit=amount.apply(lambda x: x if (x>0) else 0)
debit=amount.apply(lambda x: abs(x) if (x<0) else 0)
B=[0]*len(df_init)
B[0]=opening_balance+credit.iloc[0]-debit.iloc[0]
for i in range(1,len(df_init)):
B[i]=B[i-1]-debit.loc[i]+credit.loc[i]
balance=Series(B)
df_parsed=DataFrame({'Date':date,'Description':description,
'Credit':credit,'Debit':debit,'Balance':balance})
list_dfs.append(df_parsed)
return list_dfs
f3
def f3(location3):
'''importing files formatted as f3'''
csv_files=glob.glob(location3+'*.csv')
list_dfs=[]
for csv_file in csv_files:
df_init=pd.read_csv(csv_file,header=2)
df_init.drop(df_init.tail(1).index,inplace=True) # drop last n rows
df_init.drop(df_init.columns[-1:],axis=1,inplace=True) # drop last n columns
df_init.fillna(value=0,inplace=True)
df_init=df_init[['Value Date','Transaction Details','Credit','Debit','Balance']]
df_init.columns=['Date','Description','Credit','Debit','Balance']
date=pd.to_datetime(df_init['Date'],format='%d-%m-%Y')
df_init.drop(columns=['Date'],inplace=True)
df_parsed=df_init.assign(Date=date)
df_parsed=df_parsed[['Date','Description','Credit','Debit','Balance']]
list_dfs.append(df_parsed)
return list_dfs
f4
def f4(location4):
'''importing files formatted as f4'''
csv_files=glob.glob(location4+'*.csv')
# reading files
list_dfs=[]
for csv_file in csv_files:
df_init=pd.read_csv(csv_file)
df_init=df_init.iloc[4:-2,0:-2]
df_init.drop(columns=['Payment Due Date'],inplace=True)
df_init.columns=['Date','Description','Amount']
df_init.reset_index(drop=True,inplace=True)
opening_balance=df_init['Amount'].apply(lambda x: -float(x) if x[0:2] !='CR' else float(x[2:])).iloc[0]
closing_balance=df_init['Amount'].apply(lambda x: -float(x) if x[0:2] !='CR' else float(x[2:])).iloc[-1]
df_init.drop([0],inplace=True)
df_init.drop(df_init.tail(1).index,inplace=True)
df_init.reset_index(drop=True,inplace=True)
# parsing
date=pd.to_datetime(df_init['Date'].apply(lambda x: str(current_year)+x[-5:]),format='%Y%d/%m')
description=df_init['Description']
amount=df_init['Amount'].apply(lambda x: float(x) if x[0:2] !='CR' else -float(x[2:]))
debit=amount.apply(lambda x: x if (x>0) else 0)
credit=amount.apply(lambda x: abs(x) if (x<0) else 0)
B=[0]*len(df_init)
B[0]=opening_balance+credit.iloc[0]-debit.iloc[0]
for i in range(1,len(df_init)):
B[i]=B[i-1]-debit.loc[i]+credit.loc[i]
balance=Series(B)
df_parsed=DataFrame({'Date':date,'Description':description,
'Credit':credit,'Debit':debit,'Balance':balance})
hasDec= (df_parsed['Date'].apply(lambda x: x.month)==12).any()
if hasDec:
a=iter(df_parsed['Date'].apply(lambda x: x.month))
nextelem=next(a)
status=[] # status of each record
found=False # thiselem=Dec, nextelem=Jan
problem=False # report True if found
n=1
while n<len(df_parsed):
if found:
problem=True
thiselem, nextelem = nextelem, next(a)
if thiselem==12 and nextelem==1:
found=True
status.append(problem)
n=n+1
status.append(status[-1])
S=Series(status)
keepgoing=True
while keepgoing:
reply=input('[D] delete December and before, [J] delete January and after, [N] do Nothing > ')
if reply=='D':
df_parsed_trimmed=df_parsed[S]
keepgoing=False
elif reply=='J':
df_parsed_trimmed=df_parsed[~S]
keepgoing=False
elif reply=='N':
keepgoing=False
df_parsed_trimmed=df_parsed
else:
keepgoing=True
list_dfs.append(df_parsed_trimmed)
else:
list_dfs.append(df_parsed)
return list_dfs