第3章 高效办公文件管理
统计文本中字符出现的频率
from collections import Counter
list = []
punctuation = ',。!?、()【】<>《》=:+-*——“”...\n'
with open('Doc\doc.txt', encoding='utf8', mode='r') as file_object:
for line in file_object:
for word in line:
if word not in punctuation:
list.append(word)
counter = Counter(list)
print(counter)
判断文件类型
将对应文件头 前8个字节 打印出来,这样就能够判断文件类型。
with open('Doc\KCA-KCP培训课件名称.xlsx',mode='rb') as file_object:
content=file_object.read(8)
print(content)
with open('Doc\R6-15 TOAST行外存储v0.13.docx',mode='rb') as file_object:
content=file_object.read(8)
print(content)
| 文件类型 | 文件头 |
|---|---|
| Office97-2003 | D0CF11E0A1BA1AE1 |
| Office2021 | PK03041400060000 |
| JPEG(.jpg) | FFD8FF |
| PNG | 89504E47 |
| GIF(.gif) | 47494638 |
| PDF(.pdf) | 25504462D312E |
| AVI(.avi) | 41564920 |
| ZIP Archive(.zip) | 504B0304 |
| RAR Achive(.rar) | 52617221 |
遍历文件目录
path=f"D:\SynologyDrive\永辉金融"
#for folName,subfolders, filenames in os.walk(path):
for filename in filenames:
print(folName,filename)
path=f"D:\SynologyDrive\永辉金融\项目交付\数据库业务ER模型图示例.png.list"
print(os.path.split(path))
print(os.path.basename(path))
print(os.path.splitext(path))
print(os.path.join(os.getcwd(),'子文件夹',os.path.basename(path)))
获取文件属性
path=f"D:\SynologyDrive\永辉金融\项目交付\数据库业务ER模型图示例.png"
print(time.ctime(os.path.getctime(path)))
print(time.ctime(os.path.getmtime(path)))
print(time.ctime(os.path.getatime(path)))
这里的创建时间,并不是指这个文件内容的原创时间,如果文件从别处复制过来,那就是复制的时间
删除小文件
import os
os.chdir('./Doc')
print(os.getcwd())
print(os.listdir())
for file in os.listdir():
path = os.path.abspath(file)
filesize = os.path.getsize(path)
print(os.path.splitext(path)[-1])
if (filesize > 2000) & (os.path.splitext(path)[-1] != '.txt'):
os.remove(file)
Warning
os.remove 会直接删除文件,不会进入回收站,所以要小心。
os.path.splitext(path)[-1]是提取文件的后缀名。
批量更名
import os
os.chdir('./Doc')
path = os.getcwd()
for foldName, subfolders, filenames in os.walk(path):
for filename in filenames:
abspath = os.path.join(foldName, filename)
old_name=os.path.basename(abspath)
print(old_name)
extension = os.path.splitext(abspath)[-1]
new_name = filename.replace(extension, '2022' + extension)
new_name = '2022' + new_name
new_abspath=os.path.join(foldName,new_name)
print(new_abspath)
os.renames(abspath, new_abspath)