在数字化转型时代,Python已成为办公自动化的核心技术。本文将深入解析7个经过经典案例,涵盖文件处理、数据分析、报告生成等核心场景,每个案例均可直接应用于实际工作环境。
案例1:Excel数据清洗与合并
import pandas as pd
from pathlib import Path
def excel_clean_merge(folder_path, output_name):
all_data = []
for file in Path(folder_path).glob('*.xlsx'):
df = pd.read_excel(file, skiprows=2) # 跳过标题行
df['Source'] = file.name
all_data.append(df)
merged = pd.concat(all_data, ignore_index=True)
# 关键清洗步骤
merged.dropna(subset=['OrderID'], inplace=True)
merged['Amount'] = merged['Amount'].astype(float)
# 保存处理结果
merged.to_excel(f"{output_name}.xlsx", index=False)
# 使用示例
excel_clean_merge('/data/reports', 'Q1_Consolidated_Report')
案例2:邮件自动批量发送
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import csv
def send_bulk_emails(config_file, template_file):
# 读取配置文件
with open(config_file) as f:
reader = csv.DictReader(f)
contacts = list(reader)
# 加载模板
with open(template_file) as f:
template = f.read()
server = smtplib.SMTP('smtp.office365.com', 587)
server.starttls()
server.login('your_email@domain.com', 'secure_password')
for contact in contacts:
msg = MIMEMultipart()
personalized = template.format(**contact)
msg.attach(MIMEText(personalized, 'html'))
msg['Subject'] = f"重要通知:{contact['name']}的专属更新"
server.sendmail('noreply@domain.com', contact['email'], msg.as_string())
server.quit()
# 使用前需配置SMTP服务和联系人CSV
# send_bulk_emails('contacts.csv', 'template.html')
重要安全提示: 使用环境变量存储邮箱密码 os.getenv('EMAIL_PASSWORD')
案例3:PDF批量信息提取
import PyPDF2
import re
def extract_pdf_data(file_path):
result = []
with open(file_path, 'rb') as pdf_file:
reader = PyPDF2.PdfReader(pdf_file)
for page in reader.pages:
text = page.extract_text()
# 提取发票关键信息
inv_pattern = r'Invoice No:\s*(\w+)'
amount_pattern = r'Total Due:\s*(\$\d+\.\d{2})'
if match := re.search(inv_pattern, text):
invoice_num = match.group(1)
if match := re.search(amount_pattern, text):
amount = match.group(1)
result.append((invoice_num, amount))
return result
# 批量处理示例
for pdf in Path().glob('invoices/*.pdf'):
print(extract_pdf_data(str(pdf)))
案例4:自动化日报生成
from openpyxl import load_workbook
from datetime import datetime
def auto_daily_report(data_file):
wb = load_workbook(data_file)
ws = wb.active
today = datetime.now().strftime("%Y-%m-%d")
# 动态获取最新数据区域
max_row = ws.max_row
daily_sales = ws.cell(row=max_row, column=3).value
report = f"""
╭──────────────────────────╮
│ 每日销售报告 ({today}) │
├──────────────────────────┤
│ 当日销售额:${daily_sales} │
╰──────────────────────────╯
"""
# 写入日志文件
with open(f'daily_report_{today}.txt', 'w') as f:
f.write(report)
return report
print(auto_daily_report('sales_database.xlsx'))
案例5:远程文件自动同步(AWS S3)
import boto3
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
class SyncHandler(FileSystemEventHandler):
def on_modified(self, event):
s3 = boto3.client('s3')
s3.upload_file(event.src_path, 'my-bucket', f'backups/{event.src_path}')
observer = Observer()
observer.schedule(SyncHandler(), path='/critical_docs', recursive=True)
observer.start() # 后台运行
案例6:会议自动时间安排
import win32com.client
import pandas as pd
def schedule_meetings(participants_file):
outlook = win32com.client.Dispatch('Outlook.Application')
df = pd.read_excel(participants_file)
for _, row in df.iterrows():
appt = outlook.CreateItem(1) # 1=olAppointmentItem
appt.Subject = "季度业务复盘会议"
appt.Start = row['PreferredTime'] + " 14:00"
appt.Duration = 60
appt.Location = "Teams会议"
appt.MeetingStatus = 1# olMeeting
appt.Recipients.Add(row['Email'])
appt.Save()
案例7:构建自动化工作流
通过Airflow实现任务调度:
from airflow import DAG
from airflow.operators.python import PythonOperator
from datetime import datetime
def run_daily_tasks():
excel_clean_merge()
generate_reports()
send_notifications()
with DAG('office_auto',
start_date=datetime(2023, 1, 1),
schedule_interval='@daily') as dag:
main_task = PythonOperator(
task_id='daily_workflow',
python_callable=run_daily_tasks
)
结语
Python自动化办公的核心价值在于释放创造力。本文介绍了7个常见的案例供大家学习,希望大家有所收获。