问题描述

现有大约160份水文年鉴数据Excel表格，包含水位、降水量、水文要素摘录表和逐日表，格式如下图所示。

需将其存储到数据库中，如下图所示。

问题分析

如此大量的数据，如果采用手动复制粘贴的方法，需要耗费大量时间。考虑到水文年鉴数据格式统一，可以采用Python对Excel表格数据进行提取，大致分为3步。

将水文年鉴数据提取到对应的Newstyle Excel表中，数据组织形式如下图所示。
将所有的Newstyle Excel表汇总成一个水文数据汇总表，如下图所示
在Access中导入各水文要素记录

实现方法

逐日降水量

import xlrd
import xlwt
import calendar
import arrow
import os  # 输出该目录下所有子文件目录
def get_file_name(file_dir):list_of_files = []for root, dirs, files in os.walk(file_dir):  #print(root) #当前目录路径  #print(dirs) #当前路径下所有子目录  #print(files) #当前路径下所有非目录子文件for file in files:#print(os.path.join(root, file))list_of_files.append(os.path.join(root, file))return list_of_filesdef isLeapYear(years):'''通过判断闰年，获取年份years下一年的总天数:param years: 年份，int:return:days_sum，一年的总天数'''# 断言：年份不为整数时，抛出异常。assert isinstance(years, int), "请输入整数年，如 2018"if ((years % 4 == 0 and years % 100 != 0) or (years % 400 == 0)):  # 判断是否是闰年# print(years, "是闰年")days_sum = 366return days_sumelse:# print(years, '不是闰年')days_sum = 365return days_sumdef getAllDayPerYear(years):'''获取一年的所有日期:param years:年份:return:全部日期列表'''start_date = '%s-1-1' % yearsa = 0all_date_list = []days_sum = isLeapYear(int(years))while a < days_sum:b = arrow.get(start_date).shift(days=a).format("YYYY-MM-DD")a += 1all_date_list.append(b)# print(all_date_list)return all_date_list# 用列表推导去除列表空白列
def remove_space(list_of_data, index_of_space):list_of_no_space_data = []list_of_no_space_data = [data for index, data in enumerate(list_of_data) if index not in index_of_space]return list_of_no_space_data# 从原始逐日降水量表转换成archigh能接受的形式
def transform(file_name):data = xlrd.open_workbook(file_name)table = data.sheets()[0]# 一年中的逐日降水量list_of_precipitation = []# 年份year = int(table.cell(1, 2).value)# 测站编码station_code = table.cell(1, 5).valuefor month in range(1, 13):# 天数days = calendar.monthrange(year, month)[1]list_of_precipitation += table.col_values(1 + month)[4 : 4 + days]# 去除降水量数据中的·*符号for index, precipitation in enumerate(list_of_precipitation):list_of_precipitation[index] = precipitation.replace("·",'').replace("*",'')# 获取该年份所有日期days_of_year = getAllDayPerYear(str(year))# 开始日期start_date = days_of_year# 结束日期为开始日期后移一天end_date = days_of_year[:]del end_date[0]end_date.append(str(year + 1) + "-01-01")# 去除降水量为0（空白）的记录# 记录空白行的索引index_of_space = []for index, precipitation in enumerate(list_of_precipitation):if(precipitation == ""):index_of_space.append(index)# 用列表推导删除降水量的空白行start_date = remove_space(start_date, index_of_space)end_date = remove_space(end_date, index_of_space)list_of_precipitation = remove_space(list_of_precipitation, index_of_space)# 输出output = xlwt.Workbook()sheet1 = output.add_sheet("逐日降水量")sheet1.write(0, 0, "测站编码")sheet1.write(0, 1, "开始日期")sheet1.write(0, 2, "结束日期")sheet1.write(0, 3, "降水量(mm)")# 写入测站编码for row, _ in enumerate(list_of_precipitation, 1):sheet1.write(row, 0, station_code)# 写入开始日期for row, day in enumerate(start_date, 1):sheet1.write(row, 1, day)# 写入结束日期for row, day in enumerate(end_date, 1):sheet1.write(row, 2, day)# 写入降水量    for row, precipitation in enumerate(list_of_precipitation, 1):sheet1.write(row, 3, precipitation)output.save(os.path.splitext(file_name)[0] + "NewStyle" + ".xls")# 主函数
if __name__=="__main__":print("===本程序将逐日降水量表转换为ArcHIGH能接受的形式NewStyle===")# 该目录下所有文件名list_of_files = []list_of_files = get_file_name(os.getcwd())#print(list_of_files)for file_name in list_of_files:if "逐日降水量表.xls" in file_name:transform(file_name)print("===转换完毕，请查看...逐日降水量表NewStyle.xls文件===")

逐日平均水位

import xlrd
import xlwt
import calendar
import arrow
import os  # 输出该目录下所有子文件目录
def get_file_name(file_dir):list_of_files = []for root, dirs, files in os.walk(file_dir):  #print(root) #当前目录路径  #print(dirs) #当前路径下所有子目录  #print(files) #当前路径下所有非目录子文件for file in files:#print(os.path.join(root, file))list_of_files.append(os.path.join(root, file))return list_of_filesdef isLeapYear(years):'''通过判断闰年，获取年份years下一年的总天数:param years: 年份，int:return:days_sum，一年的总天数'''# 断言：年份不为整数时，抛出异常。assert isinstance(years, int), "请输入整数年，如 2018"if ((years % 4 == 0 and years % 100 != 0) or (years % 400 == 0)):  # 判断是否是闰年# print(years, "是闰年")days_sum = 366return days_sumelse:# print(years, '不是闰年')days_sum = 365return days_sumdef getAllDayPerYear(years):'''获取一年的所有日期:param years:年份:return:全部日期列表'''start_date = '%s-1-1' % yearsa = 0all_date_list = []days_sum = isLeapYear(int(years))while a < days_sum:b = arrow.get(start_date).shift(days=a).format("YYYY-MM-DD")a += 1all_date_list.append(b)# print(all_date_list)return all_date_list# 从原始逐日平均水位表转换成archigh能接受的形式
def transform(file_name):data = xlrd.open_workbook(file_name)table = data.sheets()[0]# 一年中的逐日平均水位stage_of_year = []# 年份year = int(table.cell(2, 1).value)# 测站编码station_code = table.cell(2, 4).value# 表内水位与85基准水位差值stage_change = eval(table.cell(2, 9).value)# 基准面转换关系datum = table.cell(2, 6).value + table.cell(2, 9).value\+ table.cell(2, 10).value + table.cell(2, 11).value# 读取每天的水位for month in range(1, 13):# 天数days = calendar.monthrange(year, month)[1]stage_of_year += table.col_values(1 + month)[5 : 5 + days]# 补齐缺少整数位的水位数值stage_int = '0'    # 水位整数位，初始值为0for index, stage in enumerate(stage_of_year):if '.' in stage:stage_int = stage.split('.')[0]else:stage = stage_int + '.' + stagestage_of_year[index] = stage# 去除水位数据中的X符号for index, stage in enumerate(stage_of_year):stage_of_year[index] = stage.replace("X",'')# 计算85基准水位list_of_stage_85 = [str(round(eval(stage) + stage_change, 3)) for stage in stage_of_year]# 输出output = xlwt.Workbook()sheet1 = output.add_sheet("逐日平均水位")sheet1.write(0, 0, "测站编码")sheet1.write(0, 1, "日期")sheet1.write(0, 2, "85基准水位(m)")sheet1.write(0, 3, "表内水位(m)")sheet1.write(0, 4, "基准面转换关系")sheet1.write(1, 4, datum)# 获取该年份所有日期days_of_year = getAllDayPerYear(str(year))# 写入测站编码for row, _ in enumerate(stage_of_year, 1):sheet1.write(row, 0, station_code)# 日期    for row, day in enumerate(days_of_year, 1):sheet1.write(row, 1, day)# 85水位for row, stage in enumerate(list_of_stage_85, 1):sheet1.write(row, 2, stage)# 表内水位    for row, stage in enumerate(stage_of_year, 1):sheet1.write(row, 3, stage)output.save(os.path.splitext(file_name)[0] + "NewStyle" + ".xls")# 主函数
if __name__=="__main__":print("===本程序将逐日平均水位表转换为ArcHIGH能接受的形式NewStyle===")# 该目录下所有文件名list_of_files = []list_of_files = get_file_name(os.getcwd())#print(list_of_files)for file_name in list_of_files:if "逐日平均水位表.xls" in file_name:print(file_name)transform(file_name)print("===转换完毕，请查看...逐日平均水位表NewStyle.xls文件===")

逐日水面蒸发量

import xlrd
import xlwt
import calendar
import arrow
import os
import re# 输出该目录下所有子文件目录
def get_file_name(file_dir):list_of_files = []for root, dirs, files in os.walk(file_dir):  #print(root) #当前目录路径  #print(dirs) #当前路径下所有子目录  #print(files) #当前路径下所有非目录子文件for file in files:#print(os.path.join(root, file))list_of_files.append(os.path.join(root, file))return list_of_filesdef isLeapYear(years):'''通过判断闰年，获取年份years下一年的总天数:param years: 年份，int:return:days_sum，一年的总天数'''# 断言：年份不为整数时，抛出异常。assert isinstance(years, int), "请输入整数年，如 2018"if ((years % 4 == 0 and years % 100 != 0) or (years % 400 == 0)):  # 判断是否是闰年# print(years, "是闰年")days_sum = 366return days_sumelse:# print(years, '不是闰年')days_sum = 365return days_sumdef getAllDayPerYear(years):'''获取一年的所有日期:param years:年份:return:全部日期列表'''start_date = '%s-1-1' % yearsa = 0all_date_list = []days_sum = isLeapYear(int(years))while a < days_sum:b = arrow.get(start_date).shift(days=a).format("YYYY-MM-DD")a += 1all_date_list.append(b)# print(all_date_list)return all_date_list# 从原始逐日水面蒸发量表转换成archigh能接受的形式
def transform(file_name):data = xlrd.open_workbook(file_name)table = data.sheets()[0]# 一年中的逐日水面蒸发量evaporation_of_year = []# 年份和测站编码从合并单元格最左格提取year_stcd_info = table.cell(1, 0).value # 最左格值year_stcd_info = re.split('[: ]', year_stcd_info) # 用空格和冒号分割列表year = int(year_stcd_info[1]) # 年份信息在列表第2位，并转化为整型# 测站编码station_code = int(year_stcd_info[3])for month in range(1, 13):# 天数days = calendar.monthrange(year, month)[1]evaporation_of_year += table.col_values(1 + month)[4 : 4 + days]# 输出output = xlwt.Workbook()sheet1 = output.add_sheet("逐日水面蒸发量")sheet1.write(0, 0, "测站编码")sheet1.write(0, 1, "日期")sheet1.write(0, 2, "水面蒸发量(mm)")# 获取该年份所有日期days_of_year = getAllDayPerYear(str(year))# 写入测站编码for row, _ in enumerate(evaporation_of_year, 1):sheet1.write(row, 0, station_code)# 日期    for row, day in enumerate(days_of_year, 1):sheet1.write(row, 1, day)# 蒸散发for row, evaporation in enumerate(evaporation_of_year, 1):sheet1.write(row, 2, evaporation)output.save(os.path.splitext(file_name)[0] + "NewStyle" + ".xls")# 主函数
if __name__=="__main__":print("===本程序将逐日水面蒸发量表转换为ArcHIGH能接受的形式NewStyle===")# 该目录下所有文件名list_of_files = []list_of_files = get_file_name(os.getcwd())#print(list_of_files)for file_name in list_of_files:if "逐日水面蒸发量表.xls" in file_name:transform(file_name)print("===转换完毕，请查看...逐日水面蒸发量表NewStyle.xls文件===")

降水量摘录

import xlrd
import xlwt
import os
import datetime# 输出该目录下所有子文件目录
def get_file_name(file_dir):list_of_files = []for root, dirs, files in os.walk(file_dir):  #print(root) #当前目录路径  #print(dirs) #当前路径下所有子目录  #print(files) #当前路径下所有非目录子文件for file in files:#print(os.path.join(root, file))list_of_files.append(os.path.join(root, file))return list_of_files# 用列表推导去除列表空白列
def remove_space(list_of_data, index_of_space):list_of_no_space_data = []list_of_no_space_data = [data for index, data in enumerate(list_of_data) if index not in index_of_space]return list_of_no_space_data# 降水量摘录表中结束时间有24时，这个不合格，弄成下一天0时
def change_time_type(year, month, day):current_day = datetime.datetime(year, month, day, 00, 00, 00)next_day = current_day + datetime.timedelta(days = 1)return next_day.strftime("%Y-%m-%d %H:%M:%S")# 从降水量摘录表转换成archigh能接受的形式
def transform(file_name):data = xlrd.open_workbook(file_name)table = data.sheets()[0]# 年份year = table.cell(1, 2).value# 测站编码station_code = table.cell(1, 5).value# 页数pages = int(table.cell(1, 17).value.split()[1])# 月份list_of_month = []# 日期list_of_day = []# 开始时间 时:分list_of_start_time = []# 结束时间list_of_end_time = []# 降水量list_of_precipitation = []# 按照页码和列循环读取日期和降水量      for page in range(0, pages):for col in range(0, 4):list_of_month += table.col_values(0 + 5 * col)[4 + 54 * page : 4 + 50 + 54 * page]list_of_day += table.col_values(1 + 5 * col)[4 + 54 * page : 4 + 50 + 54 * page]list_of_start_time += table.col_values(2 + 5 * col)[4 + 54 * page : 4 + 50 + 54 * page]list_of_end_time += table.col_values(3 + 5 * col)[4 + 54 * page : 4 + 50 + 54 * page]list_of_precipitation += table.col_values(4 + 5 * col)[4 + 54 * page : 4 + 50 + 54 * page]# 去除掉最后一页的空白行# 记录空白行的索引index_of_space = []for index, precipitation in enumerate(list_of_precipitation):if(precipitation == ""):index_of_space.append(index)# 用列表推导删除月份、日期、开始时间、结束时间、降水量的最后一页空白行list_of_month = remove_space(list_of_month, index_of_space)list_of_day = remove_space(list_of_day, index_of_space)list_of_start_time = remove_space(list_of_start_time, index_of_space)list_of_end_time = remove_space(list_of_end_time, index_of_space)list_of_precipitation = remove_space(list_of_precipitation, index_of_space)##    # 去除降水量数据中的·*符号
##    for index, precipitation in enumerate(precipitation_of_year):
##        precipitation_of_year[index] = precipitation.replace("·",'').replace("*",'')# 处理开始时间和结束时间# 开始日期时间list_of_start_date_time = []# 结束日期时间list_of_end_date_time = []# 记录当前月份和日期current_month = "0"current_day = "0"for month, day, start_time, end_time in zip(list_of_month, list_of_day, list_of_start_time, list_of_end_time):# 只有需要更新日期时才有可能要更新月份信息if(day != ""):current_day = dayif(month != ""):current_month = month# 给出日期时间格式start_date_time = "{}-{}-{} {}:00:00".format(int(year), int(current_month), int(current_day), int(start_time))# 处理结束时间为24时的特殊情况if end_time == 24:end_date_time = change_time_type(int(year), int(current_month), int(current_day))else:end_date_time = "{}-{}-{} {}:00:00".format(int(year), int(current_month), int(current_day), int(end_time))# 记录日期时间到列表中以便于输出list_of_start_date_time.append(start_date_time)list_of_end_date_time.append(end_date_time)# 输出output = xlwt.Workbook()sheet1 = output.add_sheet("降水量摘录")sheet1.write(0, 0, "测站编码")sheet1.write(0, 1, "开始时间")sheet1.write(0, 2, "结束时间")sheet1.write(0, 3, "降水量(mm)")# 写入测站编码for row, _ in enumerate(list_of_precipitation, 1):sheet1.write(row, 0, station_code)# 写入开始时间for row, start_date_time in enumerate(list_of_start_date_time, 1):sheet1.write(row, 1, start_date_time)# 写入结束时间for row, end_date_time in enumerate(list_of_end_date_time, 1):sheet1.write(row, 2, end_date_time)# 写入降水量    for row, precipitation in enumerate(list_of_precipitation, 1):sheet1.write(row, 3, precipitation)output.save(os.path.splitext(file_name)[0] + "NewStyle" + ".xls")# 主函数
if __name__=="__main__":print("===本程序将降水量摘录表转换为ArcHIGH能接受的形式NewStyle===")# 该目录下所有文件名list_of_files = []list_of_files = get_file_name(os.getcwd())#print(list_of_files)for file_name in list_of_files:if "降水量摘录表.xls" in file_name:transform(file_name)print("===转换完毕，请查看...降水量摘录表NewStyle.xls文件===")

洪水水位摘录

import xlrd
import xlwt
import os
import re# 输出该目录下所有子文件目录
def get_file_name(file_dir):list_of_files = []for root, dirs, files in os.walk(file_dir):  #print(root) #当前目录路径  #print(dirs) #当前路径下所有子目录  #print(files) #当前路径下所有非目录子文件for file in files:#print(os.path.join(root, file))list_of_files.append(os.path.join(root, file))return list_of_files# 用列表推导去除列表空白列
def remove_space(list_of_data, index_of_space):list_of_no_space_data = []list_of_no_space_data = [data for index, data in enumerate(list_of_data) if index not in index_of_space]return list_of_no_space_data# 从洪水水位摘录表转换成archigh能接受的形式
def transform(file_name):data = xlrd.open_workbook(file_name)table = data.sheets()[0]# 年份year = table.cell(1, 2).value# 测站编码station_code = table.cell(1, 5).value# 从相应逐日平均水位表读取基准面转换关系data1 = xlrd.open_workbook(file_name.replace("洪水水位摘录表", "逐日平均水位表"))table1 = data1.sheets()[0]# 表内水位与85基准水位差值stage_change = eval(table1.cell(2, 9).value)# 基准面转换关系datum = table1.cell(2, 6).value + table1.cell(2, 9).value\+ table1.cell(2, 10).value + table1.cell(2, 11).value# 页数pages = int(re.findall("\d+", table.cell(1, 17).value.split()[0])[0])# 月份list_of_month = []# 日期list_of_day = []# 时间 时:分list_of_time = []# 水位list_of_stage = []# 按照页码和列循环读取日期和降水量      for page in range(0, pages):for col in range(0, 5):list_of_month += table.col_values(0 + 4 * col)[4 + 54 * page : 4 + 50 + 54 * page]list_of_day += table.col_values(1 + 4 * col)[4 + 54 * page : 4 + 50 + 54 * page]list_of_time += table.col_values(2 + 4 * col)[4 + 54 * page : 4 + 50 + 54 * page]list_of_stage += table.col_values(3 + 4 * col)[4 + 54 * page : 4 + 50 + 54 * page]# 去除掉最后一页的空白行# 记录空白行的索引index_of_space = []for index, stage in enumerate(list_of_stage):if(stage == ""):index_of_space.append(index)# 用列表推导删除月份、日期、时间、水位的最后一页空白行list_of_month = remove_space(list_of_month, index_of_space)list_of_day = remove_space(list_of_day, index_of_space)list_of_time = remove_space(list_of_time, index_of_space)list_of_stage = remove_space(list_of_stage, index_of_space)# 处理日期时间list_of_date_time = []# 记录当前月份和日期current_month = "0"current_day = "0"for month, day, time in zip(list_of_month, list_of_day, list_of_time):# 只有需要更新日期时才有可能要更新月份信息if(day != ""):current_day = dayif(month != ""):current_month = month# 给出日期时间格式if(type(time) == float):time = str(int(time)) + ":00"date_time = "{}-{}-{} {}:00".format(int(year), int(current_month), int(current_day), time)# 记录日期时间到列表中以便于输出list_of_date_time.append(date_time)# 补齐缺少整数位的水位数值stage_int = '0'    # 水位整数位for index, stage in enumerate(list_of_stage):if '.' in stage:stage_int = stage.split('.')[0]else:stage = stage_int + '.' + stagelist_of_stage[index] = stage# 计算85基准水位list_of_stage_85 = [str(round(eval(stage) + stage_change, 3)) for stage in list_of_stage]# 输出output = xlwt.Workbook()sheet1 = output.add_sheet("洪水水位摘录")sheet1.write(0, 0, "测站编码")sheet1.write(0, 1, "日期时间")sheet1.write(0, 2, "85基准水位(m)")sheet1.write(0, 3, "表内水位(m)")sheet1.write(0, 4, "基准面转换关系")sheet1.write(1, 4, datum)# 写入测站编码for row, _ in enumerate(list_of_stage, 1):sheet1.write(row, 0, station_code)# 写入日期时间for row, date_time in enumerate(list_of_date_time, 1):sheet1.write(row, 1, date_time)# 85水位for row, stage in enumerate(list_of_stage_85, 1):sheet1.write(row, 2, stage)# 表内水位    for row, stage in enumerate(list_of_stage, 1):sheet1.write(row, 3, stage)output.save(os.path.splitext(file_name)[0] + "NewStyle" + ".xls")# 主函数
if __name__=="__main__":print("===本程序将洪水水位摘录表转换为ArcHIGH能接受的形式NewStyle===")# 该目录下所有文件名list_of_files = []list_of_files = get_file_name(os.getcwd())#print(list_of_files)for file_name in list_of_files:if "洪水水位摘录表.xls" in file_name:transform(file_name)print("===转换完毕，请查看...洪水水位摘录表NewStyle.xls文件===")

洪水水文要素摘录

import xlrd
import xlwt
import os
import re# 输出该目录下所有子文件目录
def get_file_name(file_dir):list_of_files = []for root, dirs, files in os.walk(file_dir):  #print(root) #当前目录路径  #print(dirs) #当前路径下所有子目录  #print(files) #当前路径下所有非目录子文件for file in files:#print(os.path.join(root, file))list_of_files.append(os.path.join(root, file))return list_of_files# 用列表推导去除列表空白列
def remove_space(list_of_data, index_of_space):list_of_no_space_data = []list_of_no_space_data = [data for index, data in enumerate(list_of_data) if index not in index_of_space]return list_of_no_space_data# 从洪水水文要素摘录表转换成archigh能接受的形式
def transform(file_name):data = xlrd.open_workbook(file_name)table = data.sheets()[0]# 年份year = table.cell(2, 2).value# 测站编码station_code = table.cell(2, 6).value# 从相应逐日平均水位表读取基准面转换关系data1 = xlrd.open_workbook(file_name.replace("洪水水文要素摘录表(二要素)", "逐日平均水位表"))table1 = data1.sheets()[0]# 表内水位与85基准水位差值stage_change = eval(table1.cell(2, 9).value)# 基准面转换关系datum = table1.cell(2, 6).value + table1.cell(2, 9).value\+ table1.cell(2, 10).value + table1.cell(2, 11).value# 页数pages = int(re.findall("\d+", table.cell(2, 18).value.split()[0])[0])# 月份list_of_month = []# 日期list_of_day = []# 时间 时:分list_of_time = []# 水位list_of_stage = []# 流量list_of_discharge = []# 按照页码和列循环读取日期、水位和流量for page in range(0, pages):for col in range(0, 4):list_of_month += table.col_values(0 + 5 * col)[5 + 55 * page : 5 + 50 + 55 * page]list_of_day += table.col_values(1 + 5 * col)[5 + 55 * page : 5 + 50 + 55 * page]list_of_time += table.col_values(2 + 5 * col)[5 + 55 * page : 5 + 50 + 55 * page]list_of_stage += table.col_values(3 + 5 * col)[5 + 55 * page : 5 + 50 + 55 * page]list_of_discharge += table.col_values(4 + 5 * col)[5 + 55 * page : 5 + 50 + 55 * page]# 去除掉最后一页的空白行# 记录空白行的索引index_of_space = []for index, stage in enumerate(list_of_stage):if(stage == ""):index_of_space.append(index)# 用列表推导删除月份、日期、时间、水位的最后一页空白行list_of_month = remove_space(list_of_month, index_of_space)list_of_day = remove_space(list_of_day, index_of_space)list_of_time = remove_space(list_of_time, index_of_space)list_of_stage = remove_space(list_of_stage, index_of_space)list_of_discharge = remove_space(list_of_discharge, index_of_space)# 处理日期时间list_of_date_time = []# 记录当前月份和日期current_month = "0"current_day = "0"for month, day, time in zip(list_of_month, list_of_day, list_of_time):# 只有需要更新日期时才有可能要更新月份信息if(day != ""):current_day = dayif(month != ""):current_month = month# 给出日期时间格式if(type(time) == float):time = str(int(time)) + ":00"date_time = "{}-{}-{} {}:00".format(int(year), int(current_month), int(current_day), time)# 记录日期时间到列表中以便于输出list_of_date_time.append(date_time)# 补齐缺少整数位的水位数值stage_int = '0'    # 水位整数位for index, stage in enumerate(list_of_stage):if '.' in stage:stage_int = stage.split('.')[0]else:stage = stage_int + '.' + stagelist_of_stage[index] = stage# 计算85基准水位list_of_stage_85 = [str(round(eval(stage) + stage_change, 3)) for stage in list_of_stage]# 输出output = xlwt.Workbook()sheet1 = output.add_sheet("洪水水文要素摘录")sheet1.write(0, 0, "测站编码")sheet1.write(0, 1, "日期时间")sheet1.write(0, 2, "85基准水位(m)")sheet1.write(0, 3, "流量(m3/s)")sheet1.write(0, 4, "表内水位(m)")sheet1.write(0, 5, "基准面转换关系")sheet1.write(1, 5, datum)# 写入测站编码for row, _ in enumerate(list_of_stage, 1):sheet1.write(row, 0, station_code)# 写入日期时间for row, date_time in enumerate(list_of_date_time, 1):sheet1.write(row, 1, date_time)# 85水位for row, stage in enumerate(list_of_stage_85, 1):sheet1.write(row, 2, stage)# 写入流量   for row, discharge in enumerate(list_of_discharge, 1):sheet1.write(row, 3, discharge)# 写入表内水位   for row, stage in enumerate(list_of_stage, 1):sheet1.write(row, 4, stage)output.save(os.path.splitext(file_name)[0] + "NewStyle" + ".xls")# 主函数
if __name__=="__main__":print("===本程序将洪水水文要素摘录表(二要素)转换为ArcHIGH能接受的形式NewStyle===")# 该目录下所有文件名list_of_files = []list_of_files = get_file_name(os.getcwd())#print(list_of_files)for file_name in list_of_files:if "洪水水文要素摘录表(二要素).xls" in file_name:transform(file_name)print("===转换完毕，请查看...洪洪水水文要素摘录表(二要素)NewStyle.xls文件===")

水文数据汇总

import xlrd
import xlwt
import os  #====将所有的NewStyle提取到一张汇总表上====# 输出该目录下所有子文件目录
def get_file_name(file_dir):list_of_files = []for root, dirs, files in os.walk(file_dir):  #print(root) #当前目录路径  #print(dirs) #当前路径下所有子目录  #print(files) #当前路径下所有非目录子文件for file in files:#print(os.path.join(root, file))list_of_files.append(os.path.join(root, file))return list_of_filesdef extract_to_summary(file_name, sheet, cols, sheet_num):data = xlrd.open_workbook(file_name)table = data.sheets()[0]# 当前表含有记录数rows_num = table.nrows - 1# 对需要提取列遍历for i in range(0, cols):col_data = table.col_values(i)[1:]# 写入汇总表中for row, col_cell_data in enumerate(col_data, 1 + rows_sum[sheet_num]):sheet.write(row, i, col_cell_data)rows_sum[sheet_num] += rows_num# 主函数
if __name__=="__main__":print("===本程序将所有的NewStyle提取到一张水文数据汇总表上===")# 该目录下所有文件名list_of_files = []list_of_files = get_file_name(os.getcwd())# 输出# 输出表格总行数rows_sum = [0, 0, 0, 0, 0, 0]# 创建输出表output = xlwt.Workbook()sheet1 = output.add_sheet("逐日降水量")sheet2 = output.add_sheet("逐日平均水位")sheet3 = output.add_sheet("逐日水面蒸发量")sheet4 = output.add_sheet("降水量摘录")sheet5 = output.add_sheet("洪水水位摘录")sheet6 = output.add_sheet("洪水水文要素摘录")sheet1.write(0, 0, "STCD")sheet1.write(0, 1, "TMBEG")sheet1.write(0, 2, "TMEND")sheet1.write(0, 3, "R")sheet2.write(0, 0, "STCD")sheet2.write(0, 1, "TM")sheet2.write(0, 2, "Z")sheet3.write(0, 0, "STCD")sheet3.write(0, 1, "TM")sheet3.write(0, 2, "E")sheet4.write(0, 0, "STCD")sheet4.write(0, 1, "TMBEG")sheet4.write(0, 2, "TMEND")sheet4.write(0, 3, "R")sheet5.write(0, 0, "STCD")sheet5.write(0, 1, "TM")sheet5.write(0, 2, "Z")sheet6.write(0, 0, "STCD")sheet6.write(0, 1, "TM")sheet6.write(0, 2, "Z")sheet6.write(0, 3, "Q")list_of_cols = [4, 3, 3, 4, 3, 4]list_of_sheets = [sheet1, sheet2, sheet3, sheet4, sheet5, sheet6]list_of_sheet_names = ["逐日降水量表", "逐日平均水位表", "逐日水面蒸发量表", "降水量摘录表", "洪水水位摘录表", "洪水水文要素摘录表(二要素)"]for file_name in list_of_files:for index, sheet_name in enumerate(list_of_sheet_names):if sheet_name + "NewStyle" in file_name:extract_to_summary(file_name, list_of_sheets[index], list_of_cols[index], index)output.save("水文数据汇总表.xls")print("===转换完毕，请查看水文数据汇总表.xls文件===")

所有文件目录

import xlrd
import xlwt
import os  # 输出该目录下所有子文件目录
def get_file_name(file_dir):list_of_files = []for root, dirs, files in os.walk(file_dir):  #print(root) #当前目录路径  #print(dirs) #当前路径下所有子目录  #print(files) #当前路径下所有非目录子文件for file in files:#print(os.path.join(root, file))list_of_files.append(file)return list_of_files# 主函数
if __name__=="__main__":print("===打印目录中所有子文件===")# 该目录下所有文件名list_of_files = []list_of_files = get_file_name(os.getcwd())list_of_origin_files = [file for file in list_of_files if "NewStyle" not in file]print('\n'.join(list_of_origin_files))# 输出output = xlwt.Workbook()sheet1 = output.add_sheet("目录名")sheet1.write(0, 0, "目录名")for row, file in enumerate(list_of_origin_files, 1):sheet1.write(row, 0, file)output.save("目录名.xls")

数据文件及程序

本地目录：笔记本电脑D:\Projects\数据清洗\杭州南排\第一次任务\提供杭州林水局，办公室电脑E:\Projects\数据清洗\杭州南排

【Python】08 通用水文年鉴数据提取方法相关推荐

【2020】10 水文年鉴
系列文章 [2020]02 记录生活 [2020]03 从中二走向理性 [2020]04 期盼开学 [2020]05 毕业论文 [2020]06 毕业 [2020]07 留校 [2020]08 回家 ...
Python分布式通用爬虫（4）
Python分布式通用爬虫(4):处理并统一时间格式划分.py文件部分处理并统一时间格式普通时间转换代码展示佛历时间转换代码展示缅甸语时间转换代码展示其他需要替换月份的时间转换代码 ...
入门科普：Python、R、大数据、云计算最全学习资源都在这里
导读:本文写给有抱负的新兴数据科学家.知道各种专业知识的程序员,还有那些不懂任何编程技巧的初学者.本文提供了简单的教程和可实践的分析,而不是理论.我还试图将Python与R结合起来,为学习者提供对比的 ...
list可以存放python中任意类型的数据_Python中常见的数据类型小结
Python提供多种数据类型来存放数据项集合,主要包括序列(列表list和元组tuple),映射(如字典dict),集合(set),下面对这几种一一介绍: 一序列 1.列表list 列表是一种有序的 ...
Python 连接开放航空交通数据，轻松构建航班跟踪应用！
作者 | geomatics 译者 | 弯月,责编 | 郑丽媛头图 | CSDN 下载自视觉中国出品 | CSDN(ID:CSDNnews) 以下为译文: 我喜欢跟踪位置这个主题,因为感觉像是我有 ...
python爬取网页json数据_python爬取json数据库
手把手教你使用Python抓取QQ音乐数据(第一弹) [一.项目目标] 获取 QQ 音乐指定歌手单曲排行指定页数的歌曲的歌名.专辑名.播放链接. 由浅入深,层层递进,非常适合刚入门的同学练手. [二. ...
云计算和python学哪个_大数据、云计算该如何学习？
大数据之Linux+大数据开发篇阶段一.大数据.云计算 - Hadoop大数据开发技术课程一.大数据运维之Linux基础本部分是基础课程,帮大家进入大数据领域打好Linux基础,以便更好地学习H ...
python 语音特征提取_使用python实现语音文件的特征提取方法
概述语音识别是当前人工智能的比较热门的方向,技术也比较成熟,各大公司也相继推出了各自的语音助手机器人,如百度的小度机器人.阿里的天猫精灵等.语音识别算法当前主要是由rnn.lstm.dnn-hmm等 ...
python脚本之将mysql数据转发给第三方udp网络端口
python脚本之将mysql数据转发给第三方udp网络端口前言代码实现原文地址:https://program-park.github.io/2021/03/12/python_2/ 前言最 ...

【Python】08 通用水文年鉴数据提取方法