# pip install pypdf2,pypdf3
# 读取pdf文档
def read_pdf(path):
# pip install pypdf2
from PyPDF2 import PdfReader
with open(path ,'rb') as f:
pdf = PdfReader(f)
info = pdf.metadata
number = len(pdf.pages)
for i in range(number):
print(pdf.pages[i].extract_text())
print(info) # {'/Producer': 'Microsoft® Word 2021', '/Creator': 'Microsoft® Word 2021', '/CreationDate': "D:20240509101719+08'00'", '/ModDate': "D:20240509101719+08'00'"}
print(number) # 1
# 读取pdf文档方法2
def read_pdf2(path):
# pip install pdfplumber
import pdfplumber
with pdfplumber.open(path) as pdf:
print(pdf.pages)
for i in range(len(pdf.pages)):
page = pdf.pages[i]
print(page.extract_text())
# 或
for page in pdf.pages:
print(page.extract_text())
# 合并pdf
def merger_pdf(path1, path2, outPath):
from PyPDF2 import PdfReader, PdfWriter
write = PdfWriter()
for path in [path1,path2]:
tmp_pdf = PdfReader(open(path, 'rb'))
for page in tmp_pdf.pages:
write.add_page(page)
with open(outPath, 'wb') as out:
write.write(out)
# 拆分pdf
def split_pdf(path, outDir):
from PyPDF2 import PdfReader, PdfWriter
pdf = PdfReader(open(path, 'rb'))
for i, page in enumerate(pdf.pages):
write = PdfWriter()
write.add_page(page)
with open(f"{outDir}拆分PDF_{i}.pdf", 'wb') as out:
write.write(out)
# pdf 加密
def jia_mi(path, outPath):
from PyPDF2 import PdfReader, PdfWriter
pdf = PdfReader(open(path, 'rb'))
if pdf.is_encrypted: # 当读取PDF有密码时填写
pdf.decrypt('123456')
write = PdfWriter()
write.encrypt('123456') # 设置密码
for page in pdf.pages:
write.add_page(page)
with open(outPath, 'wb') as out:
write.write(out)
if __name__ == '__main__':
# read_pdf('./办公自动化/files/违章通知书.pdf')
# read_pdf2('./办公自动化/files/违章通知书.pdf')
# merger_pdf('./办公自动化/files/违章通知书.pdf', './办公自动化/files/违章通知书.pdf', './办公自动化/files/合并PDF.pdf')
# split_pdf('./办公自动化/files/合并PDF.pdf', './办公自动化/files/')
jia_mi('./办公自动化/files/违章通知书.pdf', './办公自动化/files/加密PDF.pdf')