深入理解shutil模块
copyfileobj、copyfile、copymode、copystat
copy、copy2、copytree
rmtree
move
shutil
- python中提供的高级文件操作库
- 官方文档
复制
实现一个简单的复制
with open('a.txt', 'w+') as f1:
f1.write('abcd\n1234')
f1.flush()
f1.seek(0) # 移动文件指针
with open('b.txt', 'w+') as f2:
f2.write(f1.read())
copyfileobj和copyfile
copyfileobj(fsrc, fdst[, length])
- 文件对象的复制,fsrc和fdst是open打开的文件对象,复制内容
- fdst要求可写
- length 指定了表示buffer的大小
# copyfileobj源码 COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 64 * 1024 def copyfileobj(fsrc, fdst, length=0): """copy data from file-like object fsrc to file-like object fdst""" # Localize variable access to minimize overhead. if not length: length = COPY_BUFSIZE fsrc_read = fsrc.read fdst_write = fdst.write while True: buf = fsrc_read(length) if not buf: break fdst_write(buf)
# copyfileobj的使用 import shutil with open('a.txt', 'w+') as f1: f1.write('abcd\n1234') f1.flush() f1.seek(0) with open('b.txt', 'w+') as f2: shutil.copyfileobj(f1, f2)
copyfile(src, dst, *, follow_symlinks=True)
- 复制文件内容,不含元数据
- src、dst为文件的路径字符串
- 本质上调用的就是copyfileobj,所以不带元数据二进制内容复制,其源码如下:
def copyfile(src, dst, *, follow_symlinks=True): ...... ...... if _samefile(src, dst): raise SameFileError("{!r} and {!r} are the same file".format(src, dst)) ...... ...... if not follow_symlinks and _islink(src): os.symlink(os.readlink(src), dst) else: with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst: ...... ...... copyfileobj(fsrc, fdst) return dst
# copyfile的使用 import shutil shutil.copyfile('a.txt', 'c.txt')
copymode和copystat
-
copymode(src, dst, *, follow_symlinks=True)
- 仅复制权限,源码如下:
def copymode(src, dst, *, follow_symlinks=True): """Copy mode bits from src to dst. If follow_symlinks is not set, symlinks aren't followed if and only if both `src` and `dst` are symlinks. If `lchmod` isn't available (e.g. Linux) this method does nothing. """ sys.audit("shutil.copymode", src, dst) if not follow_symlinks and _islink(src) and os.path.islink(dst): if hasattr(os, 'lchmod'): stat_func, chmod_func = os.lstat, os.lchmod else: return else: stat_func, chmod_func = _stat, os.chmod st = stat_func(src) chmod_func(dst, stat.S_IMODE(st.st_mode))
shutil.copymode('test1','test')
查看并比较test1和test的权限
-
copystat(src, dst, *, follow_symlinks=True)
- 复制元数据,stat包含权限
shutil.copystat('test1','test')
查看并比较test1和test的权限
copy和copy2
copy(src, dst, *, follow_symlinks=True)
- 复制文件内容、权限和部分元数据,不包括创建时间和修改时间
- 本质上调用的是copyfile和copymode,其源码如下:
def copy(src, dst, *, follow_symlinks=True): """Copy data and mode bits ("cp src dst"). Return the file's destination. The destination may be a directory. If follow_symlinks is false, symlinks won't be followed. This resembles GNU's "cp -P src dst". If source and destination are the same file, a SameFileError will be raised. """ if os.path.isdir(dst): dst = os.path.join(dst, os.path.basename(src)) copyfile(src, dst, follow_symlinks=follow_symlinks) copymode(src, dst, follow_symlinks=follow_symlinks) return dst
copy2(src, dst, *, follow_symlinks=True)
- copy2 比copy多了复制全部元数据
- 本质上是调用了copyfile和copystat,其源码如下:
def copy2(src, dst, *, follow_symlinks=True): """Copy data and metadata. Return the file's destination. Metadata is copied with copystat(). Please see the copystat function for more information. The destination may be a directory. If follow_symlinks is false, symlinks won't be followed. This resembles GNU's "cp -P src dst". """ if os.path.isdir(dst): dst = os.path.join(dst, os.path.basename(src)) copyfile(src, dst, follow_symlinks=follow_symlinks) copystat(src, dst, follow_symlinks=follow_symlinks) return dst
copytree
copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, ignore_dangling_symlinks=False, dirs_exist_ok=False)
- 递归复制目录。默认使用copy2,也就是说复制了更多的元数据
- src、dst必须是目录,src必须存在,dst必须不存在
ignore
中设置不需要拷贝的内容,需要传的是一个函数,且返回值是set类型,源码解读:def _copytree(entries, src, dst, symlinks, ignore, copy_function, ignore_dangling_symlinks, dirs_exist_ok=False): if ignore is not None: ignored_names = ignore(os.fspath(src), [x.name for x in entries]) # ignore是可调用(callable)对象 else: ignored_names = set() # 集合 ...... ...... for srcentry in entries: if srcentry.name in ignored_names: continue .... .... elif srcentry.is_dir(): copytree(srcobj, dstname, symlinks, ignore, copy_function, dirs_exist_ok=dirs_exist_ok) # 递归调用copytree,完成遍历 else: # Will raise a SpecialFileError for unsupported file types copy_function(srcobj, dstname) # 即默认调用copy2 ...... ......
- ignore设置不需要拷贝的内容,使用示例
# 将a文件夹下的b文件夹(除了a开头的文件)拷贝到当前目录c文件夹中
import shutil
from pathlib import Path
def ignore(src, names):
s = set()
for name in names:
if name.startswith('a'):
s.add(name) # 留下的是即将用于copytree需要过滤的,不需要copy的
return s
def ignore1(src, names):
return {name for name in names if name.startswith('a')} # 用推导式改写
def ignore2(src, names):
return set(filter(lambda x: x.startswith('a'), names)) # 用filter改写
p = Path('a/b')
p.mkdir(parents=True, exist_ok=True)
p1 = p / 'a.txt'
p1.touch()
p2 = p / 'b.txt'
p2.touch()
shutil.copytree('a/', p.cwd() / 'c', ignore=ignore2)
复制使用结论
- 如果需要拷贝更多元数据可使用copy2
- 如果需要需要递归复制,可使用copytree
删除
rmtree
shutil.rmtree(path, ignore_errors=False, onerror=None)
- 递归删除。如同rm -rf一样危险,慎用
- 它不是原子操作,有可能删除错误,就会中断,已经删除的就删除了
ignore_errors
为true
,忽略错误。当为False或者omitted时onerror生效onerror
为callable,接受函数function、path和execinfo
移动
move
move(src, dst, copy_function=copy2)
- 递归移动文件、目录到目标,返回目标
- 本身使用的是 os.rename方法
- 如果不支持rename,如果是目录则copytree再删除源目录
- 默认使用copy2方法
参考
- magedu