pandas操作Excel
# pandas读取Exlce常用操作
import os
import pandas as pd
from pandas.core.frame import DataFrame
from pandas._typing import IntStrT
import datetime
def read_excel(path, sheet_name=0, index_col=None, header=None, dtype=None, na_values=None, comment='#'):
"""
读取Exlce文件数据
:param path: Exlce文件路径
:param sheet_name: 指定sheet
:param index_col: 指定列索引(序号)
:param header: 指定表头索引
:param dtype: 指定列的数据格式 {'age': float}
:param na_values: 自定义缺失值 eg: 将列中张三变为NAN: {'name':"张三"}
:param comment: 指定Excel中的注释符号,如 #号,读取后数据变为NAN
:return:
"""
return pd.read_excel(path, sheet_name=sheet_name, index_col=index_col, dtype=dtype, na_values=na_values,
comment=comment)
def info(_pd):
return _pd.info
def count_row(_pd: pd):
"""
返回总行数
:param _pd: pandas实例
:return:
"""
return _pd.index.stop
def count_col(_pd):
# shape为一个元祖,分别为行数和列数,默认第一行是表头不算行数
shape = _pd.shape
return shape[1]
def get_index_col(_pd, filterEmpty=False):
"""
获取列索引
:param _pd: pandas实例
:param filterEmpty: 是否去空
:return:
{
col1_name: 0,
col2_name: 1,
col3_name: 2
}
"""
columnsArray = _pd.columns.values
columns = {}
columnsIdx = 0
for item in columnsArray:
if filterEmpty and "Unnamed:" in item:
continue
columns[item] = columnsIdx
columnsIdx += 1
# print("columns", columns)
return columns
def get_col_val(_pd):
"""
获取列值
:param _pd: pandas实例
:return: 数组 列值
"""
return _pd.columns.values
def get_rows_array(_pd):
"""
将每行数据以数组形式返回
:param _pd: pandas实例
:return:
"""
rowData = []
rowIdx = 0
while rowIdx < count_row(_pd):
columnIdx = 0
baseData = []
while columnIdx < count_col(_pd):
baseData.append(_pd.iat[rowIdx, columnIdx])
columnIdx += 1
rowData.append(baseData)
rowIdx += 1
return rowData
def get_rows_obj(_pd, filterEmpty=False):
"""
将每行数据以对象形式返回
:param _pd: pandas实例
:param filterEmpty: 是否去空
:return:
"""
indexCol = get_index_col(_pd, filterEmpty=filterEmpty)
rowData = []
rowIdx = 0
while rowIdx < count_row(_pd):
rowObj = {}
for k, v in indexCol.items():
rowObj[k] = _pd.iat[rowIdx, v]
rowData.append(rowObj)
rowIdx += 1
return rowData
def rows_obj_to_array(rows_obj):
"""
将对象形式的数据转为数组返回
:param rows_obj:
:return: 元组(列索引,行数据)
"""
rowArrayData = []
colData = []
for item in rows_obj:
rowData = []
for k, v in item.items():
if k not in colData:
colData.append(k) # 列值
rowData.append(v)
rowArrayData.append(rowData)
return colData, rowArrayData
def to_excel(rowArrayData, columns=[], outputPath='', isObjData=False):
"""
将数据输出为excel文件
:param rowArrayData: 每行数据,数组形式
:param columns: 列索引
:param outputPath: 输出地址
:param isObjData: rowArrayData是否是对象形式的数据
:return:
"""
outPath = outputPath
if len(outPath) <= 0:
tDate = datetime.datetime.now()
tDate = tDate.strftime("%Y_%m_%d_%H_%M_%S")
outPath = os.getcwd() + '/outPath' + tDate + '.xlsx'
rowArrayList = rowArrayData
if isObjData:
rowArrayList = rows_obj_to_array(rowArrayList)[1]
pd.DataFrame(rowArrayList, columns=columns).to_excel(outPath)
上次更新: 2022/08/23, 18:58:25