-
Notifications
You must be signed in to change notification settings - Fork 1
/
file-stable
executable file
·144 lines (114 loc) · 4.14 KB
/
file-stable
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/usr/bin/python3
# written by Bart Alewijnse
'''
Prints (once) the absolute filenames for files that have not changed size/ctime in a while
...under a given directory
...that match a given shellglob
Meant to feed things to xargs, e.g.
file-stable '*-[0-9][0-9][0-9][0-9].mrc' . | xargs -t -n 1 -P 3 pigz -3 -k
...to only compress certain files (that we know will compress).
when you e.g. run on files that are actively being copied in, the interval can be low.
If you don't know exactly how often files are written to (e.g. preallocated tomo stack), you may want it higher.
TODO:
- distinguish 'ignore because not file', so we can add a 'changed again' feature later
- verbose option
- 'quiet on stderr' option
'''
import time
import os
import stat
import sys
import fnmatch
try:
import setproctitle
setproctitle.setproctitle( os.path.basename(sys.argv[0]) )
except ImportError:
pass
interval_sec = 10
ffn_lastseen = {} # filename -> stat_object
ffn_ignore = {}
verbose = 1
def printerr(s,newline=True):
if verbose:
if newline:
s += '\n'
sys.stderr.write(s)
sys.stderr.flush()
def consider_stable(ffn):
''' Given a filename and earlier info we build up,
Return:
True, False, or None (None is used as a 'not applicable' distinct from False)
'''
try:
stob = os.stat(ffn)
except OSError as e : # assume broken symlink. Could actually check that
printerr('Ignoring %r (%r)'%(ffn,e))
ffn_ignore[ffn]=True
return None
mode = stob.st_mode
if stat.S_ISREG(mode): # only look at regular files (no directory structure)
if ffn in ffn_lastseen:
try:
#print " have seen before: %r"%ffn
last_stob = ffn_lastseen[ffn]
same_size = (last_stob.st_size == stob.st_size)
same_time = (last_stob.st_ctime == stob.st_ctime)
#print " same size, time: %r %r"%(same_size, same_time)
if same_size and same_time:
#print " stable: %r"%ffn
return True
else:
#printerr(" changing: %r"%ffn)
pass
ffn_lastseen[ffn] = stob
return False
except Exception as e:
printerr(" [ERROR] in check phase, ignoring")
printerr(e)
pass
else:
#printerr( " New file: %r"%ffn)
ffn_lastseen[ffn] = stob
# implied else: new file, so don't act
# still update what we saw:
ffn_lastseen[ffn] = stob
else: #elif stat.S_ISDIR(mode):
#printerr( "Ignoring non-file %r"%ffn )
ffn_ignore[ffn] = True
return None
return False
if len(sys.argv)<3:
print( "Usage: %s glob directory [directory...]"%(os.path.basename(sys.argv[0])))
sys.exit()
pat = sys.argv[1]
dirs = sys.argv[2:]
fdirs = sorted( set( list(os.path.abspath(dirname) for dirname in dirs ) ) )
printerr( "Searching in %r"%fdirs )
printerr( "Matching on %r"%pat )
while True:
printerr( "===================== iteration =====================" )
now = time.time()
count_dir = 0
count_file = 0
count_stat = 0
for dirname in fdirs:
for r,ds,fs in os.walk( dirname ):
count_dir += 1
if 0:
sp = r
if sp.startswith(dirname):
sp = sp[len(dirname):].lstrip('/')
printerr( ' scanning %r'%sp )
for fn in fs:
count_file += 1
if fnmatch.fnmatch(fn, pat):
ffn = os.path.join( r,fn )
if ffn in ffn_ignore:
continue
count_stat+=1
if consider_stable( ffn ):
print( ffn)
ffn_ignore[ffn] = True
printerr( "%d listdirs, %d filenames seen, %d files statted"%(count_dir, count_file, count_stat) )
printerr( "Sleeping %d seconds"%interval_sec )
time.sleep( interval_sec )