-
Notifications
You must be signed in to change notification settings - Fork 1
/
file-estimate-homediruse
executable file
·349 lines (295 loc) · 12.5 KB
/
file-estimate-homediruse
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
#!/usr/bin/python3
'''
Summarizes each user's homedir - used space, recent files.
Written by Bart Alewijnse
'''
import os
import datetime
import time
import stat
import math
def sfloat(f, fixedwidth=10, aligndigit=4, removetrail=1, digits=None, extradigits=0, estyleabove=100000):
''' Idea: when studying a screen full of numbers,
show only one or two significant digits at any scale
to make for easily skimmable figures by having
their length, and alignment, indicate their scale.
'''
if type(fixedwidth) in (int, #long,
float):
fixedwidth=str(int(fixedwidth))
af = abs(f)
if af>estyleabove:
ret= '%.1e'%f # '6.1e+07' style (in part to keep it shortinsh so that fixedwith makes sense)
removetrail=0 # there's a digit before the e and there may be a trailing 0, but we don't want that removed.
else:
if 0: # could do something clever here, say...
digits = max(0,round( 1.+0.5*-math.log(af) ))
# (still needs a af=max(af,machine_epsilon, or a test.)
else: # but this is easier to understand and tweak
if digits is None:
if af == 0.: digits = 0
elif af < .00001: digits = 7
elif af < .0001: digits = 6
elif af < .001: digits = 5
elif af < .01: digits = 4
elif af < .1: digits = 3
elif af < .5: digits = 3
elif af < 7: digits = 2
elif af < 100: digits = 1
elif af < 1000: digits = 0
else: digits = 0 # higher? show as integer
digits += extradigits
digits = max(0,digits) # negative extradigits can make that happen
ret = ('%% .%df'%digits)%f
if removetrail: # remove trailing zeroes, and a dot if it's there afterwards
if '.' in ret:
while ret.endswith('0'):
ret=ret[:-1]
if ret.endswith('.'):
ret=ret[:-1]
if aligndigit:
if 'e' in ret:
epos = ret.rindex('e')
pos_after_e = len(ret)-epos-2
aligndigit-=1
if pos_after_e<aligndigit:
ret+=' '*(aligndigit-pos_after_e)
elif '.' in ret:
aligndigit+=1 # cheat for off by 1
dpos = ret.rindex('.')
pos_after_dot = len(ret)-dpos
#print( 'positions after dot:',pos_after_dot)
aligndigit-=1
if pos_after_dot<aligndigit:
ret+=' '*(aligndigit-pos_after_dot)
else:
aligndigit+=1 # cheat for off by 1
i=len(ret)-1
while ret[i]==' ':
i-=1
final_spaces = len(ret)-i
#print( 'final spaces:',final_spaces)
if final_spaces<aligndigit:
ret+=' '*(aligndigit-final_spaces)
if fixedwidth:
ret = ('%% %ss'%fixedwidth)%ret
return ret
def kmg(bytes,kilo=1000, append='',thresh=15,rstrip0=1, extradigits=0):
""" Readable size formatter
e.g. '%sB'%kmg(2342342324) = '2.3 GB'
kmg(3429873278462) == '3.1T'
kmg(342987327) == '327M'
kmg(34298) == '33K'
Decimal/SI kilos by default. Specify kilo=1024 if you want binary kilos.
Maybe use sfloat-like behaviour for the number?
"""
ret = None
mega = kilo*kilo
giga = mega*kilo
tera = giga*kilo
peta = tera*kilo
# Yes, can be abstracted now
if abs(bytes)>(0.80*peta):
showval = bytes/float(peta)
if showval<thresh:
showdigits = 1 + extradigits
else:
showdigits = 0 + extradigits
append+='P'
elif abs(bytes)>(0.80*tera):
showval = bytes/float(tera)
if showval<thresh:
showdigits = 1 + extradigits
else:
showdigits = 0 + extradigits
append+='T'
elif abs(bytes)>(0.95*giga):
showval = bytes/float(giga)
if showval<thresh: # e.g. 1.3GB but 15GB
showdigits = 1 + extradigits
else:
showdigits = 0 + extradigits
append+='G'
elif abs(bytes)>(0.9*mega):
showval = bytes/float(mega)
if showval<thresh:
showdigits = 1 + extradigits
else:
showdigits = 0 + extradigits
append+='M'
elif abs(bytes)>(0.85*kilo):
showval = bytes/float(kilo)
if showval<thresh:
showdigits = 1 + extradigits
else:
showdigits = 0 + extradigits
append+='K'
else:
showval = bytes
showdigits=0
ret = ("%%.%df"%(showdigits))%showval
if rstrip0:
if '.' in ret:
ret=ret.rstrip('0').rstrip('.')
ret+=append
return ret
def nicetimelength(sec, long=False, joinon=' ', parts=2):
""" Takes a relative amount of time (seconds as float/int, or a timedelta)
Returns a string describing that is human terms
e.g. nicetimelength(767) == '12min 47sec',
nicetimelength(2615958475) == '82yr 11mo',
"""
if type(sec) is datetime.timedelta:
sec = sec.days*86400 + sec.seconds
vals = [
#('century','centuries','cent', 60.*60.*24.*365.*100. ),
('year', 'years', 'yr', 60.*60.*24.*365. ),
('month', 'months', 'mo', 60.*60.*24.*30.6 ),
('week', 'weeks', 'wk', 60.*60.*24.*7 ),
('day', 'days', 'dy', 60.*60.*24. ),
('hour', 'hours', 'hr', 60.*60. ),
('minute', 'minutes', 'min', 60. ),
#('second', 'seconds', 'sec', 1. ),
]
ret=[]
left = sec
roundme=False
if left>10:
roundme=True
for one,many,shorts,insec in vals:
if left>insec:
howmany = int(left/insec)
left -= howmany*insec
if long:
if howmany==1:
ret.append( '1 %s'%(one) )
else:
ret.append( '%d %s'%(howmany,many) )
else: # short form
ret.append('%2d%-3s'%(howmany,shorts))
if left>0.:
if roundme:
if long:
ret.append( '%d seconds'%(left) )
else:
ret.append( '%dsec'%(left) )
else:
if long:
ret.append( '%s seconds'%(sfloat(left,fixedwidth='')) )
else:
ret.append( '%ssec'%(sfloat(left,fixedwidth='', digits=2)) )
return joinon.join(ret[:parts])
show_file_amt = 14
quick_dirty = False
whitelist_users=[]#'worker',]
minsize = 10000
minute = 60.
hour = 60*minute
day = 24*hour
week = 7*day
month = 30.6*day
year = 365*day
halfyear = year/2
passwd=open('/etc/passwd')
for line in passwd:
if ':' in line:
l=line.split(':')
if (int(l[2])>500 and #UIDs for real accounts are usually >=1000
len(l[5])>8 and 'home' in l[5]): # more checking for what seem like real users
# Looks real enough, start reporting this homedir
print()
username = l[0]
print( 'User %r'%username)
if username in whitelist_users:
print( ' is whitelisted')
continue
### The work:
homedir = os.path.expanduser('~%s'%username).rstrip('/')
if not os.path.exists(homedir):
print( ' INFO: homedir %s does not exist. Not a real user? Moved?'%homedir)
else:
if not os.access(homedir, os.R_OK):
print( ' WARNING: Could not read homedir %r -- are we running as root?'%homedir)
continue
showfiles=[]
filecount = 0
errors, ignored = 0,0
ignored = 0
fsizeby = 0
youngest = None
#avgage = 0.
#avgcount = 0
for rt,ds,fs in os.walk(homedir):
# ignore hidden directories
if quick_dirty:
dl=[]
for e in ds:
if e.startswith('.'):
#print( "[ %-12s ] ignoring dot-dir %r"%(username,e))
dl.append(e)
ignored+=1
for de in dl:
ds.remove(de)
for fn in fs:
if quick_dirty:
if fn.startswith('.'): # ignore hidden files (slightly more dangerous)
#print( '[ %-12s ] ignoring dotfile %r'%(username,fn))
ignored+=1
continue
try:
fullpath = os.path.join(rt,fn)
stob = os.stat(fullpath)
mtime = stob.st_mtime
size = stob.st_size
fsizeby += size
age_sec=time.time()-mtime
if youngest==None:
youngest = age_sec
else:
youngest = min(youngest,age_sec)
# hidden files are likelier to come from skel and be old, and/or distort from everyday work (e.g. thumbnail cache)
if [0]=='.' or '/.' in fullpath:
continue
#if ('/bin' in fullpath or '/lib' in fullpath or
# '.so' in fullpath or '.pyc' in fullpath): #software is likely to have old timestamps
# continue
#if '.' in fn:
# if fn.rsplit('.',1)[1] in ('c','h','cpp','hpp','o','cxx','html'):
# continue
# ...so don't count them in printed age, and don't show them.
#avgage+=age_sec
#avgcount+=1
if stob.st_size > max(1,minsize): # don't report small files. Forces >=1 for the log below (and who cares about empty files?)
showfiles.append( (age_sec, fullpath, size) )
except OSError:
errors+=1
if ignored>0:
print( ' INFO: Ignored %s hidden files/dirs'%(ignored))
if errors>0:
print( ' WARNING: Could not check %d files'%(errors))
print( ' INFO: Size: %sB'%(kmg(fsizeby)))
print( ' INFO: Youngest file: %s'%nicetimelength(youngest))
#print( ' INFO: Average age: %s (keep in mind that software may have old timestamps)'%nicetimelength(avgage/avgcount))
def sortkey(a):
' tries to bias towards things both large and recent (more likely to be interesting data files)'
return min(1.0, a[0]/year ) * (1. / math.log( a[2], 100000) ) # this is probably the easiest to tweak
showfiles.sort( key=sortkey )
printme = []
if len(showfiles)>0:
shown_files=0 #
for age,ffn,size in showfiles:
bn = os.path.basename(ffn)
reln = ffn[len(homedir)+1:]
if bn[0]!='.' and reln[0]!='.' and '/.' not in reln: # hide hidden files (even though they counted)
#printme.append( " %-15s %5sB %s"%(nicetimelength(age), kmg(size), reln) )
printme.append( " %-15s %s"%(nicetimelength(age), reln) )
shown_files+=1
if shown_files>show_file_amt:
break
if len(printme)>0:
print( ' Some of the larger and recent files: ')
for line in printme:
print( line)
else:
print (' No recently modified files. ' )
print()