backuppc-deletefile : peaufinage (bis)
[auf-serveur.git] / backuppc-deletefile / jLib.pm
CommitLineData
cf225af2
P
1#============================================================= -*-perl-*-
2#
3# BackupPC::jLib package
4#
5# DESCRIPTION
6#
7# This library includes various supporting subroutines for use with BackupPC
8# functions used by BackupPC.
9# Some of the routines are variants/extensions of routines originally written
10# by Craig Barratt as part of the main BackupPC release.
11#
12# AUTHOR
13# Jeff Kosowsky
14#
15# COPYRIGHT
16# Copyright (C) 2008-2011 Jeff Kosowsky
17#
18# This program is free software; you can redistribute it and/or modify
19# it under the terms of the GNU General Public License as published by
20# the Free Software Foundation; either version 2 of the License, or
21# (at your option) any later version.
22#
23# This program is distributed in the hope that it will be useful,
24# but WITHOUT ANY WARRANTY; without even the implied warranty of
25# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26# GNU General Public License for more details.
27#
28# You should have received a copy of the GNU General Public License
29# along with this program; if not, write to the Free Software
30# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31#
32#========================================================================
33#
34# Version 0.4.0, released January 2011
35#
36#========================================================================
37
38package BackupPC::jLib;
39
40use strict;
41use vars qw($VERSION);
42$VERSION = '0.4.0';
43
44use warnings;
45use File::Copy;
46use File::Path;
47use File::Temp;
48use Fcntl; #Required for RW I/O masks
49
50use BackupPC::Lib;
51use BackupPC::Attrib;
52use BackupPC::FileZIO;
53use Data::Dumper; #Just used for debugging...
54
55no utf8;
56
57use constant _128KB => 131072;
58use constant _1MB => 1048576;
59use constant LINUX_BLOCK_SIZE => 4096;
60use constant TOO_BIG => 2097152; # 1024*1024*2 (2MB)
61
62require Exporter;
63our @ISA = qw(Exporter);
64our @EXPORT = qw(
65 %Conf $dryrun $errorcount
66 LINUX_BLOCK_SIZE TOO_BIG
67 printerr warnerr firstbyte
68 zFile2MD5 zFile2FullMD5
69 link_recursively_topool
70 jfileNameUnmangle
71 getattr read_attrib count_file_attribs
72 get_bakinfo get_file_attrib get_attrib_value get_attrib_type
73 write_attrib write_file_attrib
74 attrib
75 GetPoolLink jMakeFileLink
76 poolname2number renumber_pool_chain delete_pool_file
77 run_nightly
78 jcompare zcompare zcompare2
79 delete_files touch
80 jcopy jlink junlink jmkdir jmkpath jmake_path jrename jrmtree);
81
82#Global variables
83our %Conf;
84our $errorcount=0;
85our $dryrun=1; #global variable - set to 1 to be safe -- should be set to
86 #0 in actual program files if you don't want a dry-run
87 #None of the routines below will change/delete/write actual
88 #file data if this flag is set. The goal is to do everything but
89 #the final write to assist with debugging or cold feet :)
90
91sub printerr
92{
93 print "ERROR: " . $_[0];
94 $errorcount++;
95}
96
97sub warnerr
98{
99 $|++; # flush printbuffer first
100 warn "ERROR: " . $_[0];
101 $errorcount++;
102}
103
104# Returns the firstbyte of a file.
105# If coding $coding undefined or 0, return as unpacked 2 char hexadecimal
106# string. Otherwise, return as binary byte.
107# Return -1 on error.
108# Useful for checking the type of compressed file/checksum coding.
109sub firstbyte {
110 my ($file, $coding) = @_;
111 my $fbyte='';
112 sysopen(my $fh, $file, O_RDONLY) || return -1;
113 $fbyte = -1 unless sysread($fh, $fbyte, 1) == 1;
114 close($fh);
115 if (! defined($coding) || $coding == 0) {
116 $fbyte = unpack('H*',$fbyte); # Unpack as 2 char hexadecimal string
117 }
118 else {
119 $fbyte = vec($fbyte, 0, 8); # Return as binary byte
120 }
121 return $fbyte;
122}
123
124# Compute the MD5 digest of a compressed file. This is the compressed
125# file version of the Lib.pm function File2MD5.
126# For efficiency we don't use the whole file for big files
127# - for files <= 256K we use the file size and the whole file.
128# - for files <= 1M we use the file size, the first 128K and
129# the last 128K.
130# - for files > 1M, we use the file size, the first 128K and
131# the 8th 128K (ie: the 128K up to 1MB).
132# See the documentation for a discussion of the tradeoffs in
133# how much data we use and how many collisions we get.
134#
135# Returns the MD5 digest (a hex string) and the file size if suceeeds.
136# (or "" and error code if fails).
137# Note return for a zero size file is ("", 0).
138#
139# If $size < 0 then calculate size of file by fully decompressing
140# If $size = 0 then first try to read corresponding attrib file
141# (if it exists), if doesn't work then calculate by fully decompressing
142# IF $size >0 then use that as the size of the file
143#
144# If compreslvl is undefined then use the default compress level from
145# the config file
146
147sub zFile2MD5
148{
149 my($bpc, $md5, $name, $size, $compresslvl) = @_;
150
151 my ($fh, $rsize, $filesize, $md5size);
152
153 return ("", -1) unless -f $name;
154 return ("", 0) if (stat(_))[7] == 0; #Zero size file
155 $compresslvl = $Conf{CompressLevel} unless defined $compresslvl;
156 unless (defined ($fh = BackupPC::FileZIO->open($name, 0, $compresslvl))) {
157 printerr "Can't open $name\n";
158 return ("", -1);
159 }
160
161 my ($datafirst, $datalast);
162 my @data;
163 #First try to read up to the first 128K (131072 bytes)
164 if ( ($md5size = $fh->read(\$datafirst, _128KB)) < 0 ) { #Fist 128K
165 printerr "Can't read & decompress $name\n";
166 return ("", -1);
167 }
168
169 if ($md5size == _128KB) { # If full read, continue reading up to 1st MB
170 my $i=0;
171 #Read in up to 1MB (_1MB), 128K at a time and alternate between 2 data buffers
172 while ( ($rsize = $fh->read(\$data[(++$i)%2], _128KB)) == _128KB
173 && ($md5size += $rsize) < _1MB ) {}
174 $md5size +=$rsize if $rsize < _128KB; # Add back in partial read
175 $datalast = ($i > 1 ?
176 substr($data[($i-1)%2], $rsize, _128KB-$rsize) : '')
177 . substr($data[$i%2], 0 ,$rsize); #Last 128KB (up to 1MB)
178 }
179 if($md5size < _1MB) { #Already know the size because read it all (note don't do <=)
180 $filesize = $md5size;
181 } elsif($size > 0) { #Use given size
182 $filesize = $size;
183 } elsif($compresslvl == 0) { #Not compressed, so: size = actual size
184 $filesize = -s $name;
185 }elsif($size == 0) { # Try to find size from attrib file
186 $filesize = get_attrib_value($name, "size");
187 if(!defined($filesize)) {
188 warn "Can't read size of $name from attrib file so calculating manually\n";
189 }
190 }
191 if(!defined($filesize)) { #No choice but continue reading to find size
192 $filesize = $md5size;
193 while (($rsize = $fh->read(\($data[0]), _128KB)) > 0) {
194 $filesize +=$rsize;
195 }
196 }
197 $fh->close();
198
199 $md5->reset();
200 $md5->add($filesize);
201 $md5->add($datafirst);
202 $md5->add($datalast) if defined($datalast);
203 return ($md5->hexdigest, $filesize);
204}
205
206#Compute md5sum of the full data contents of a file.
207#If the file is compressed, calculate the md5sum of the inflated
208#version (using the zlib routines to uncompress the stream). Note this
209#gives the md5sum of the FULL file -- not just the partial file md5sum
210#above.
211sub zFile2FullMD5
212{
213 my($bpc, $md5, $name, $compresslvl) = @_;
214
215 my $fh;
216 my $data;
217
218 $compresslvl = $Conf{CompressLevel} unless defined $compresslvl;
219 unless (defined ($fh = BackupPC::FileZIO->open($name, 0, $compresslvl))) {
220 printerr "Can't open $name\n";
221 return -1;
222 }
223
224 $md5->reset();
225 while ($fh->read(\$data, 65536) > 0) {
226 $md5->add($data);
227 }
228
229 return $md5->hexdigest;
230}
231
232
233# Like MakeFileLink but for existing files where we don't have the
234# digest available. So compute digest and call MakeFileLink after
235# For each file, check if the file exists in $bpc->{TopDir}/pool.
236# If so, remove the file and make a hardlink to the file in
237# the pool. Otherwise, if the newFile flag is set, make a
238# hardlink in the pool to the new file.
239#
240# Returns 0 if a link should be made to a new file (ie: when the file
241# is a new file but the newFile flag is 0).
242# Returns 1 if a link to an existing file is made,
243# Returns 2 if a link to a new file is made (only if $newFile is set)
244# Returns negative on error.
245sub zMakeFileLink
246{
247 my($bpc, $md5, $name, $newFile, $compress) = @_;
248
249 $compress = $Conf{CompressLevel} unless defined $compress;
250
251 my ($d,$ret) = zFile2MD5($bpc, $md5, $name, 0, $compress);
252 return -5 if $ret < 0;
253 $bpc->MakeFileLink($name, $d, $newFile, $compress);
254}
255
256
257# Use the following to create a new file link ($copy) that links to
258# the same pool file as the original ($orig) file does (or
259# should). i.e. this doesn't assume that $orig is properly linked to
260# the pool. This is computationally more costly than just making the
261# link, but will avoid further corruption whereby you get archive
262# files with multiple links to each other but no link to pool.
263
264# First, check if $orig links to the pool and if not create a link
265# via MakeFileLink. Don't create a new pool entry if newFile is zero.
266# If a link either already exists from the original to the pool or if
267# one was successfully created, then simply link $copy to the same
268# pool entry. Otherwise, just copy (don't link) $orig to $copy
269# and leave it unlinked to the pool.
270
271# Note that checking whether $orig is linked to the pool is
272# cheaper than running MakeFileLink since we only need the md5sum
273# checksum.
274# Note we assume that the attrib entry for $orig (if it exists) is
275# correct, since we use that as a shortcut to determine the filesize
276# (via zFile2MD5)
277# Returns 1 if a link to an existing file is made,
278# Returns 2 if a link to a new file is made (only if $newFile is set)
279# Returns 0 if file was copied (either because MakeFileLink failed or
280# because newFile=0 and no existing pool match
281# Returns negative on error.
282sub zCopyFileLink
283{
284 my ($bpc, $orig, $copy, $newFile, $compress) = @_;
285 my $ret=1;
286 $compress = $Conf{CompressLevel} unless defined $compress;
287 my $md5 = Digest::MD5->new;
288 my ($md5sum, $md5ret) = zFile2MD5($bpc, $md5, $orig, 0, $compress);
289
290 #If $orig is already properly linked to the pool (or linkable to pool after running
291 #MakeFileLink on $orig) and HardLinkMax is not exceeded, then just link to $orig.
292 if($md5ret > 0) { #If valid md5sum and non-zero length file (so should be linked to pool)...
293 if((GetPoolLinkMD5($bpc, $orig, $md5sum, $compress, 0) == 1 || #If pool link already exists
294 ($ret = $bpc->MakeFileLink($orig, $md5sum, $newFile, $compress))> 0) #or creatable by MakeFileLink
295 && (stat($orig))[3] < $bpc->{Conf}{HardLinkMax}) { # AND (still) less than max links
296 return $ret if link($orig, $copy); # Then link from copy to orig
297 }
298 }
299 if(copy($orig, $copy) == 1) { #Otherwise first copy file then try to link the copy to pool
300 if($md5ret > 0 && ($ret = $bpc->MakeFileLink($copy, $md5sum, $newFile, $compress))> 0) {
301 return 2; #Link is to a new copy
302 }
303 printerr "Failed to link copied file to pool: $copy\n";
304 return 0; #Copy but not linked
305 }
306 die "Failed to link or copy file: $copy\n";
307 return -1;
308}
309
310# Copy $source to $target, recursing down directory trees as
311# needed. The 4th argument if non-zero, means (for files) use
312# zCopyFileLink to make sure that everything is linked properly to the
313# pool; otherwise, just do a simple link. The 5th argument $force,
314# will erase a target directory if already present, otherwise an error
315# is signalled. The final 6th argument is the CompressionLevel which
316# can be left out and will the be calculated from
317# bpc->Conf{CompressLevel}
318
319# Note the recursion is set up so that the return is negative if
320# error, positive if consistent with a valid pool link (i.e. zero
321# length file or directories are consistent too), and zero if
322# successful but not consistent with a valid pool. The overall result
323# is -1 if there is any error and otherwise the AND'ed result of the
324# operations. That means if the overall result is positive then the
325# whole tree is successfully linked to the pool, so the next time
326# around we can use a simple linking (i.e $linkcheck=0). Note $source
327# and $target must be full paths from root. Note: if files are not
328# compressed properly then you won't be able to link them to pool.
329sub link_recursively_topool
330{
331 my ($bpc, $source, $target, $linkcheck, $force, $compress) = @_;
332 my $ret=1;
333 die "Error: '$source' doesn't exist" unless -e $source;
334 if (-e $target) {
335 die "Error can't overwrite: $target (unless 'force' set)\n" unless $force;
336 die "Error can't remove: $target ($!)\n" unless rmtree($target, 0, 0);
337 }
338 if (-d $source) {
339 die "Error: mkdir failed to create new directory: $target ($!)\n"
340 unless jmkdir($target);
341 opendir( my $dh, $source) || die "Error: Could not open dir $source ($!)\n";
342 foreach my $elem (readdir($dh)) {
343 next if /^\.\.?$/; # skip dot files (. and ..)
344 my $newsource = "$source/$elem";
345 my $newtarget = "$target/$elem";
346 my $newret = link_recursively_topool($bpc, $newsource, $newtarget, $linkcheck, $force, $compress);
347 if ($newret < 0) { #Error so break out of loop & return
348 closedir $dh;
349 return -1
350 }
351 $ret = 0 if $newret == 0; #note $ret stays at 1 only if no elements return -1 or 0
352 }
353 closedir $dh;
354 return $ret;
355 }
356 elsif ($dryrun) {return 1} # Return before making changes to filesystem
357 elsif ( ! -s $source) { # zero size
358 copy($source, $target); #No link since zero size
359 }
360 elsif ($linkcheck) { #Makes sure first that source properly linked to pool
361 return(zCopyFileLink($bpc, $source, $target, 1, $compress));
362 }
363 else {#Otherwise, OK to perform simple link to source
364 return (link($source, $target) == 1 ? 1 : -1)
365 }
366}
367
368sub get_bakinfo
369{
370 my ($bakinfofile, $entry) = @_;
371 our %backupInfo = ();
372
373 $bakinfofile .= "/backupInfo";
374 warn "Can't read $bakinfofile\n" unless -f $bakinfofile;
375
376 unless (my $ret = do $bakinfofile) { # Load the backupInfo file
377 if ($@) {
378 warn "couldn't parse $bakinfofile: $@\n";
379 }
380 elsif (!defined $ret) {
381 warn "couldn't do $bakinfofile: $!\n";
382 }
383 elsif (! $ret) {
384 warn "couldn't run $bakinfofile\n";
385 }
386 }
387 my $value = $backupInfo{$entry};
388 warn "$bakinfofile is empty or missing entry for '$entry'\n"
389 unless defined $value;
390 return $value;
391}
392
393# Note: getattr($attr) =$attr->{files}
394# getattr($attr, $file) =$attr->{files}{$file}
395# getattr($attr, $file, $attribute) =$attr->{files}{$file}{$attribute}
396
397sub getattr
398{
399 my($attr, $fileName, $Attribute) = @_;
400 return $attr->{files}{$fileName}{$Attribute} if ( defined($Attribute) );
401 return $attr->{files}{$fileName} if ( defined($fileName) );
402 return $attr->{files};
403}
404
405
406#Reads in the attrib file for directory $_[1] and (optional alternative
407#attrib file name $_[2]) and #stores it in the hashref $_[0] passed to
408#the function
409#Returns -1 and a blank $_[0] hash ref if attrib file doesn't exist
410#already (not necessarily an error)
411#Dies if attrib file exists but can't be read in.
412sub read_attrib
413{
414#Note: $_[0] = hash reference to attrib object
415#SO DO NOT USE LOCAL VARIABLE FOR IT (i.e. don't do my $attr=$_[0]
416 $_[0] = BackupPC::Attrib->new({ compress => $Conf{CompressLevel} });
417
418# unless (defined $_[1]) { #JJK: DEBUGGING
419# print "read_attrib: \$_[1] undefined\n";
420# print Dumper @_;
421# }
422 return -1 unless -f attrib($_[1], $_[2]);
423 #This is not necessarily an error because dir may be empty
424
425 $_[0]->read($_[1],$_[2]) or
426 die "Error: Cannot read attrib file: " . attrib($_[1],$_[2]) . "\n";
427
428 return 1;
429}
430
431#Same as Lib.pm fileNameUnmangle but doesn't require
432#unneccessary '$bpc'
433sub jfileNameUnmangle {
434 my($name) = @_;
435
436 $name =~ s{/f}{/}g;
437 $name =~ s{^f}{};
438 $name =~ s{%(..)}{chr(hex($1))}eg;
439 return $name;
440}
441
442sub count_file_attribs
443{
444 my ($attrib) = @_;
445 return( scalar (keys (%{$attrib->get()})));
446}
447
448# Get attrib entry for $fullfilname. The corresponding hash is both returned and
449# also fills the hash reference (optionally) passed via $fileattrib.
450# If attrib file not present, return -1 (which may not be an error)
451# Returns -2 if not a mangled file
452# Dies if error
453sub get_file_attrib
454{
455 my ($fullfilename, $fileattrib) = @_;
456 $fullfilename =~ m{(.+)/(.+)}; #1=dir; $2=file
457 return -2 unless defined $2;
458
459 return -1 if read_attrib(my $attr, $1) < 0;
460
461 %{$fileattrib} = %{$attr->{files}{jfileNameUnmangle($2)}};
462 #Note unmangling removes initial 'f' AND undoes char mapping
463}
464
465# Returns value of attrib $key for $fullfilename (full path)
466# If not a mangled file or attrib file not present or there is not an
467# entry for the specificed key for the given file, then return 'undef'
468sub get_attrib_value
469{
470 my ($fullfilename, $key) = @_;
471 $fullfilename =~ m{(.+)/(.+)}; #1=dir; $2=file
472
473 return undef unless defined $2;
474 return undef if read_attrib(my $attr, $1) < 0;
475 return $attr->{files}{jfileNameUnmangle($2)}{$key};
476 #Note this returns undefined if key not present
477 #Note unmangling removes initial 'f' AND undoes char mapping
478}
479
480# Returns value of attrib type key for $fullfilename (full path)
481# If attrib file present but filename not an entry, return -1 [not an error if file nonexistent]
482# If no attrib file (but directory exists), return -2 [not an error if directory empty]
483# If directory non-existent, return -3
484# If attrib file present but not readble, return -4 [true error]
485# Note there may an entry even if file non-existent (e.g. type 10 = delete)
486sub get_attrib_type
487{
488 my ($fullfilename) = @_;
489 $fullfilename =~ m{(.+)/(.+)}; #1=dir; $2=file
490
491# unless (defined $1) { #JJK: DEBUGGING
492# print "get_attrib_type: \$1 undefined\n";
493# print Dumper @_;
494# }
495
496 return -3 unless -d $1;
497 return -2 unless -f attrib($1);
498 return -4 unless read_attrib(my $attr, $1) >= 0;
499 my $type = $attr->{files}{jfileNameUnmangle($2)}{type};
500 #Note unmangling removes initial 'f' AND undoes char mapping
501 return (defined($type) ? $type : -1);
502}
503
504# 4th argument $poollink says whether to write to file (0) or link to
505# pool (using MakeFileLink).
506# 5th argument tells what to do if no files in $attrib
507# (0=return error, 1=delete attrib file and return success)
508# 6th argument is an optional alternative name for the attrib file itself
509# Note does an unlink first since if there are hard links, we don't want
510# to modify them
511# Returns positive if successful, 0 if not
512# Specifically, 1 if linked to existing, 2 if linked to new,
513# 3 if written without linking, 4 if (empty) attrib file deleted
514
515sub write_attrib
516{
517 my ($bpc, $attrib, $dir, $poollink, $delempty, $attfilename) = @_;
518 die "Error: Cannot write to directory: $dir\n" unless -w $dir;
519
520# unless (defined $dir) { #JJK: DEBUGGING
521# print "write_attrib: \$dir undefined";
522# print Dumper @_;
523# }
524
525 my $attfilepath = attrib($dir, $attfilename);
526 return 1 if $dryrun; #Return before writing changes
527 die "Error: could not unlink old attrib file: $attfilepath\n"
528 if (-e $attfilepath && ! unlink($attfilepath)); #Delete old attrib file if exists cuz may be hard-linked
529 return 4 if(count_file_attribs($attrib) == 0 && $delempty); #No attribs left so leave it unlinked
530 die "Error: could not write to attrib file: $attfilepath\n"
531 unless ($attrib->write($dir, $attfilename)) == 1; #First write a copy without linking
532 my $ret=3;
533 if ($poollink) {
534 my $data = $attrib->writeData;
535 my $md5 = Digest::MD5->new;
536 my $digest;
537 if(($digest = $bpc->Buffer2MD5($md5, length($data), \$data)) ne -1
538 && ($ret = $bpc->MakeFileLink($attfilepath, $digest, 1, $Conf{CompressLevel})) <= 0) {
539 printerr "Can't link attrib file to pool: $attfilepath ($ret)\n";
540 }
541 }
542 return $ret;
543}
544
545# Write out $fileattrib for $file (without the mangle) to $dir/$attfilename (or
546# to the default attribute file for $dir if $attfilename is undef)
547# Reads in existing attrib file if pre-existing
548# 4th argument $poollink says whether to write to file (0) or link to
549# pool (using MakeFileLink).
550# Returns positive if successful, 0 if not
551# Specifically, 1 if linked to existing, 2 if linked to new,
552# 3 if written without linking
553sub write_file_attrib
554{
555 my ($bpc, $dir, $file, $fileattrib, $poollink, $attfilename) = @_; #Note $fileattrib is a hashref
556 my $ret=0;
557
558 read_attrib(my $attr, $dir, $attfilename); #Read in existing attrib file if it exists
559 $ret = write_attrib($bpc, $attr, $dir, $poollink, 0, $attfilename)
560 if $attr->set($file, $fileattrib) > 0;
561
562# unless (defined $dir) { #JJK: DEBUGGING
563# print "write_file_attrib: \$dir undefined\n";
564# print Dumper @_;
565# }
566
567 die "Error writing to '$file' entry to attrib file: " . attrib($dir, $attfilename) . "\n" unless $ret > 0;
568 return $ret;
569}
570
571sub attrib
572{
573 return (defined($_[1]) ? "$_[0]/$_[1]" : "$_[0]/attrib");
574}
575
576# Modified version of MakeFileLink including:
577# 1. Efficiency/clarity improvements
578# 2. Calls GetPoolLink to find candidate link targets.
579# 2. For non-compressed files, uses my improved jcompare comparison algorithm
580# 3. For compressed files, uses zcompare2 which compares only the compressed
581# data sans first-byte header & potential rsync digest trailer. This allows
582# for matches even if one file has rsync digest and other does not
583# 4. Moves file before unlinking in case subsequent link fails and needs to be
584# undone
585# 5. Added 6th input parameter to return pointer to the pool link name
586# 6. Extended meaning of newFile flag
587# 0 = Don't creat new pool file (as before)
588# 1 = Create new pool file IF no other links to source file
589# (this was the previous behavior for whenever newFile was set)
590# >2 = Create new pool file EVEN if source file has more than one link
591# (this will by extension link other things linked to the source
592# also to the pool -- which means that the pool might not clean
593# if it links to things outside of the pc directory -- so
594# use carefully
595# 7. Includes 'j' versions of file routines to allow dryrun
596# 8. Added check to see if already in pool and if so returns 3
597
598# For each file, check if the file exists in $bpc->{TopDir}/pool.
599# If so, remove the file and make a hardlink to the file in
600# the pool. Otherwise, if the newFile flag is set, make a
601# hardlink in the pool to the new file.
602#
603# Returns 0 if a link should be made to a new file (ie: when the file
604# is a new file but the newFile flag is 0).
605# JJK: actually also if $name has nlinks >1 regardless of newFile flag
606# Returns 1 if a link to an existing file is made,
607# Returns 2 if a link to a new file is made (only if $newFile is set)
608# Return 3 if first finds that already linked to ipool
609# Returns negative on error.
610
611sub jMakeFileLink
612{
613 my($bpc, $name, $d, $newFile, $compress, $linkptr) = @_;
614
615 my $poollink;
616 my $result=GetPoolLinkMD5($bpc, $name, $d, $compress, 1, \$poollink);
617 $$linkptr = $poollink if defined($linkptr) && $result > 0;
618
619 if($result == 1){ #Already linked to the pool
620 return 3;
621 }elsif($result == 2) { #Matches existing, linkable pool file
622 my $tempname = mktemp("$name.XXXXXXXXXXXXXXXX");
623 return -5 unless jrename($name, $tempname); #Temorarily save
624 if(!jlink($poollink, $name)) { #Link pool to source
625 jrename($tempname, $name); #Restore if can't link
626 return -3;
627 }
628 junlink($tempname); #Safe to remove the original
629 return 1;
630 }elsif($result == 3) {
631 if(defined($newFile) && #No link or match
632 ($newFile > 1 || ($newFile == 1 && (stat($name))[3] == 1 ))) {
633 $poollink =~ m|(.*)/|;
634 jmkpath($1, 0, 0777) unless -d $1 ;
635 return -4 unless jlink($name, $poollink);
636 return 2;
637 } else { #New link should have been made but told not to
638 return 0;
639 }
640 }else {
641 return -6; #Error from GetPoolLink call
642 }
643}
644
645# GetPoolLink
646# GetPoolLinkMD5
647#Find the pool/cpool file corresponding to file $name.
648#1. First iterates entire chain to see if *same inode* is present. I.e. if
649# already linked to the pool. If so, it returns the first instance.
650# Return = 1 and $Poolpathptr = name of the hard-linked match
651#2. If $compareflg is set, then iterate through again this time looking for
652# file *content* matches (this is much slower).
653# If so, it returns the first instance with Nlinks < HardLinkMax
654# Return = 2 and $Poolpathptr = name of the content match
655#3. Finally, if not linked (and also not matched if $compareflg set)
656# Return=3 and $$poolpathptr = first empty chain
657#Note: Return zero if zero size file
658# Return negative if error.
659#Note: if chain has multiple copies of the file, then it returns the first linked
660#match if present and if none and $compareflag set then the first content match
661sub GetPoolLink
662{
663 my($bpc, $md5, $name, $compress, $compareflg, $poolpathptr) = @_;
664
665 $compress = $bpc->{Conf}{CompressLevel} unless defined $compress;
666
667 my ($md5sum , $ret) = defined($compress) && $compress > 0 ?
668 zFile2MD5($bpc, $md5, $name, 0, $compress) :
669 $bpc->File2MD5($md5, $name);
670
671 return 0 if $ret == 0; #Zero-sized file
672 return -3 unless $ret >0;
673
674 GetPoolLinkMD5($bpc, $name, $md5sum, $compress, $compareflg, $poolpathptr);
675}
676
677sub GetPoolLinkMD5
678{
679 my($bpc, $name, $md5sum, $compress, $compareflg, $poolpathptr) = @_;
680 my($poolbase, $i);
681
682 return -1 unless -f $name;
683 my $inode = (stat(_))[1]; #Uses stat from -f
684 return 0 if (stat(_))[7] == 0; #Zero-size (though shouldn't really happen since
685 #md5sum input not defined for zero sized files
686
687 $compress = $bpc->{Conf}{CompressLevel} unless defined $compress;
688
689 return -2 unless
690 defined($poolbase = $bpc->MD52Path($md5sum, $compress));
691
692 #1st iteration looking for matching inode
693 $$poolpathptr = $poolbase;
694 for($i=0; -f $$poolpathptr; $i++) { #Look for linked copy (inode match)
695 return 1 if ((stat(_))[1] == $inode);
696 $$poolpathptr = $poolbase . '_' . $i; #Iterate
697 }
698
699 return 3 unless $compareflg; #No inode match
700
701 #Optional 2nd iteration looking for matching content
702 my $compare = defined($compress) && $compress > 0 ? \&zcompare2 : \&jcompare;
703 $$poolpathptr = $poolbase;
704 for(my $j=0; $j<$i; $j++ ) { #Look for content match
705 return 2 if (stat($$poolpathptr))[3] < $bpc->{Conf}{HardLinkMax} &&
706 !$compare->($name, $$poolpathptr);
707 $$poolpathptr = $poolbase . '_' . $j; #Iterate
708 }
709 # No matching existing pool entry - $$poolpathptr is first empty chain element
710 return 3;
711}
712
713#Convert pool name to constant length string consisting
714#of 32 hex digits for the base plus 6 (left) zero padded digits for
715#the chain suffix (suffixes are incremented by 1 so that no suffix
716#records as 0). Note this accomodates chains up to 10^6 long.
717#Use a number bigger than 6 if you have longer chains
718#Useful if you want to order (e.g., sort) pool file names numerically
719sub poolname2number
720{
721 $_[0] =~ m|(.*/)?([^_]*)(_(.*))?|;
722 my $suffix = defined($4) ? $4+1 : 0;
723 return sprintf("%s%06s", $2, $suffix)
724}
725
726# Renumber pool chain holes starting at $firsthole and continue up the chain
727# to fill up # to $maxgap holes (which need not be contiguous).
728# If $maxgap is not specified it defaults to 1 (sufficient to cover one
729# deletion - i.e. hole -- which may be $file itself)
730# If $firsthole exists, it is an error. Use delete_pool_file instead,
731# if you want to delete first before renumbering.
732# Return 1 on success; Negative on failure
733sub renumber_pool_chain
734{
735 my ($firsthole, $maxholes) = @_;
736
737 $maxholes = 1 unless defined $maxholes;
738
739 my ($base, $sufx);
740 if($firsthole =~ m|(.*_)([0-9]+)|) {
741 $base = $1;
742 $sufx = $2;
743 }else{
744 $base = $firsthole . "_";
745 $sufx = -1; #Corresponds to no suffix
746 }
747
748 my $nextsufx = $sufx+1;
749 my $nextfile = $base . $nextsufx;
750 while($nextsufx - $sufx <= $maxholes) {
751 if(-e $nextfile) { #Non-hole so move/renumber
752 if(-e $firsthole || ! jrename($nextfile, $firsthole)) {
753 warn "Error: Couldn't rename pool file: $nextfile --> $firsthole\n";
754 return -2;
755 }
756# print "Renumbering: $nextfile --> $firsthole\n";
757 $firsthole = $base . (++$sufx); #Move hole up the chain
758 }
759 $nextfile = $base . (++$nextsufx);
760 }
761 return 1;
762}
763
764# Delete pool file (if it exists) and regardless renumber
765# chain to fill hole left by file. Fill up to $maxholes
766# above $file (including the hole left by $file) where
767# $maxholes defaults to 1.
768# If $file doesn't exist, then it just fills the hole
769# left by $file.
770# Return 1 on success; Negative on failure
771sub delete_pool_file
772{
773 my ($file, $maxholes) = @_;
774
775 if(-e $file && !junlink($file)) { #Try to delete if exists
776 warn "Error: Couldn't unlink pool file: $file\n";
777 return -3;
778 }
779 return(renumber_pool_chain($file,$maxholes)); #Returns -1/-2 on fail
780}
781
782# Run BackupPC_nightly
783sub run_nightly
784{
785 my ($bpc) = @_;
786 my $err = $bpc->ServerConnect($Conf{ServerHost}, $Conf{ServerPort});
787 if ($err) {
788 printerr "BackupPC_nightly: can't connect to server ($err)...\n";
789 return($err);
790 }
791 if ((my $reply = $bpc->ServerMesg("BackupPC_nightly run")) eq "ok\n" ) {
792 $bpc->ServerMesg("log $0: called for BackupPC_nightly run...");
793 print "BackupPC_nightly scheduled to run...\n";
794 return 0;
795 }
796 else {
797 printerr "BackupPC_nightly ($reply)...\n";
798 return $reply;
799 }
800}
801
802
803# Rewrite of compare function (in File::Compare) since it messes up on
804# weird filenames with spaces and things (since it combines the "<"
805# and the filename in a single string). Also streamline the code by
806# getting rid of of extra fluff and removing code for comparing text
807# files while we are at. The basic algorithm remains the classic one
808# for comparing 2 raw files.
809# Returns 0 if equal, 1 if not equal. And negative if error.
810sub jcompare {
811 my ($file1,$file2,$size) = @_;
812 my ($fh1open, $fh2open, $fh1size);
813 my $ret=0;
814
815 local (*FH1, *FH2);
816 unless (($fh1open = open(FH1, "<", $file1)) &&
817 ($fh2open = open(FH2, "<", $file2))) {
818 $ret = -1;
819 goto compare_return;
820 }
821 binmode FH1;
822 binmode FH2;
823 if (($fh1size = -s FH1) != (-s FH2)) {
824 $ret=1;
825 goto compare_return;
826 }
827
828 unless (defined($size) && $size > 0) {
829 $size = $fh1size;
830 $size = LINUX_BLOCK_SIZE if $size < LINUX_BLOCK_SIZE;
831 $size = TOO_BIG if $size > TOO_BIG;
832 }
833
834 my $data1 = my $data2 = '';
835 my ($r1,$r2);
836 while(defined($r1 = read(FH1,$data1,$size)) && $r1 > 0) {
837 unless (defined($r2 = read(FH2,$data2,$r1)) && $r2 == $r1
838 && $data2 eq $data1) {
839 $ret=1;
840 goto compare_return;
841 }
842 }
843 $ret=1 if defined($r2 = read(FH2,$data2,LINUX_BLOCK_SIZE)) && $r2 > 0;
844 $ret =-2 if $r1 < 0 || $r2 < 0; # Error on read
845
846 compare_return:
847 close(FH1) if $fh1open;
848 close(FH2) if $fh2open;
849 return $ret;
850}
851
852# Version of compare for BackupPC compressed files. Compares inflated
853# (uncompressed) versions of files. The BackupPC::FileZIO subroutine
854# is used to read in the files instead of just raw open & read. Note
855# this version of compare is relatively slow since you must use zlib
856# to decompress the streams that are being compared. Also, there is no
857# shortcut to just first compare the filesize since the size is not
858# known until you finish reading the file.
859sub zcompare {
860 my ($file1, $file2, $compress)=@_;
861 my ($fh1, $fh2);
862 my $ret=0;
863
864 $compress =1 unless defined $compress;
865 unless ((defined ($fh1 = BackupPC::FileZIO->open($file1, 0, $compress))) &&
866 (defined ($fh2 = BackupPC::FileZIO->open($file2, 0, $compress)))) {
867 $ret = -1;
868 goto zcompare_return;
869 }
870 my $data1 = my $data2 = '';
871 my ($r1, $r2);
872 while ( ($r1 = $fh1->read(\$data1, 65536)) > 0 ) {
873 unless ((($r2 = $fh2->read(\$data2, $r1)) == $r1)
874 && $data1 eq $data2) {
875 $ret=1;
876 goto zcompare_return;
877 }
878 }
879 $ret =1 if ($r2 = $fh2->read(\$data2, LINUX_BLOCK_SIZE)) > 0; #see if anything left...
880 $ret =-1 if $r1 < 0 || $r2 < 0; # Error on read
881
882 zcompare_return:
883 $fh1->close() if defined $fh1;
884 $fh2->close() if defined $fh2;
885 return $ret;
886}
887
888# Second alternative version that combines the raw speed of jcompare
889# with the ability to compare compressed files in zcompare. This
890# routine effectively should compare two compressed files just as fast
891# as the raw jcompare. The compare algorithm strips off the first byte
892# header and the appended rsync checksum digest info and then does a
893# raw compare on the intervening raw compressed data. Since no zlib
894# inflation (decompression) is done it should be faster than the
895# zcompare algorithm which requires inflation. Also since zlib
896# compression is well-defined and lossless for any given compression
897# level and block size, the inflated versions are identical if and
898# only if the deflated (compressed) versions are identical, once the
899# header (first byte) and trailers (rsync digest) are stripped
900# off. Note that only the data between the header and trailers have
901# this uniqe mapping. Indeed, since BackupPC only adds the checksum
902# the second time a file is needed, it is possible that the compressed
903# value will change with time (even though the underlying data is
904# unchanged) due to differences in the envelope. Note, to some extent
905# this approach assumes that the appended digest info is correct (at
906# least to the extent of properly indicating the block count and hence
907# compressed data size that will be compared)
908sub zcompare2 {
909 my ($file1,$file2,$size) = @_;
910 my ($fh1, $fh2, $fh1open, $fh2open);
911 my $Too_Big = 1024 * 1024 * 2;
912 my $ret=0;
913
914 unless (($fh1open = open($fh1, "<", $file1)) &&
915 ($fh2open = open($fh2, "<", $file2))) {
916 $ret = -1;
917 goto zcompare2_return;
918 }
919 binmode $fh1;
920 binmode $fh2;
921
922 my $fh1size = -s $fh1;
923 my $fh2size = -s $fh2;
924
925 my $data1 = my $data2 = '';
926 unless (read($fh1, $data1, 1) == 1 && # Read first byte
927 read($fh2, $data2, 1) == 1) {
928 $ret = -1;
929 goto zcompare2_return;
930 }
931 if (vec($data1, 0, 8) == 0xd6 || vec($data1, 0, 8) == 0xd7) {
932 return -2 unless ( defined(seek($fh1, -8, 2)) );
933 return -3 unless ( read($fh1, $data1, 4) == 4 );
934 $fh1size -= 20 * unpack("V", $data1) + 49;
935 # [0xb3 separator byte] + 20 * NUM_BLOCKS + DIGEST_FILE(32) + DIGEST_INFO (16)
936 # where each of NUM_BLOCKS is a 20 byte Block digest consisting of 4 bytes of
937 # Adler32 and the full 16 byte (128bit) MD4 file digest (checksum).
938 # DIGEST_FILE is 2 copies of the full 16 byte (128bit) MD4 digest
939 # (one for protocol <=26 and the second for protocol >=27)
940 # DIGEST_INFO is metadata consisting of a pack("VVVV") encoding of the
941 # block size (should be 2048), the checksum seed, length of the
942 # block digest (NUM_BLOCKS), and the magic number (0x5fe3c289).
943 # Each is a 4 byte integer.
944 }
945
946 if (vec($data2, 0, 8) == 0xd6 || vec($data2, 0, 8) == 0xd7) {
947 return -2 unless ( defined(seek($fh2, -8, 2)) );
948 return -3 unless ( read($fh2, $data2, 4) == 4 );
949 $fh2size -= 20 * unpack("V", $data2) + 49;
950 }
951
952 if ($fh1size != $fh2size) {
953 $ret=1;
954 goto zcompare2_return;
955 }
956
957 seek($fh1,1,0) ; #skip first byte
958 seek($fh2,1,0) ; #skip first byte
959
960
961 my $bytesleft=--$fh1size; #Reduce by one since skipping first byte
962 $size = $fh1size unless defined($size) && $size > 0 && $size < $fh1size;
963 $size = $Too_Big if $size > $Too_Big;
964
965 my ($r1,$r2);
966 while(defined($r1 = read($fh1,$data1,$size))
967 && $r1 > 0) {
968 unless (defined($r2 = read($fh2,$data2,$r1))
969 && $data2 eq $data1) { #No need to test if $r1==$r2 since should be same size
970 $ret=1; #Plus if it doesn't, it will be picked up in the $data2 eq $data1 line
971 goto zcompare2_return;
972 }
973 $bytesleft -=$r1;
974 $size = $bytesleft if $bytesleft < $size;
975 }
976 $ret=1 if defined($r2 = read($fh2,$data2,$size)) && $r2 > 0;
977 #Note: The above line should rarely be executed since both files same size
978 $ret =-2 if $r1 < 0 || $r2 < 0; # Error on read
979
980 zcompare2_return:
981 close($fh1) if $fh1open;
982 close($fh2) if $fh2open;
983 return $ret;
984}
985
986#Routine to remove files (unlink) or directories (rmtree)
987# $fullfilename is full path name to file or directory
988# Returns number of files deleted in the listref $dirfileref if defined;
989sub delete_files
990{
991 my ($fullfilename, $dirfileref) = @_;
992 my $listdeleted = defined($dirfileref);
993 my $ret = -1;
994 my $output;
995 die "Error: '$fullfilename' doesn't exist or not writeable\n"
996 unless -w $fullfilename;
997 if(-f $fullfilename) {
998 $ret = junlink($fullfilename);
999 }
1000 elsif(-d $fullfilename) {
1001 $$dirfileref = "" if $listdeleted;
1002 open(local *STDOUT, '>', $dirfileref) #redirect standard out to capture output of rmtree
1003 if $listdeleted;
1004 $ret = jrmtree($fullfilename, $listdeleted, 1);
1005 }
1006 return $ret;
1007}
1008
1009#Touch command (modified version of Unix notion)
1010#First argument is the (full)filepath Second argument if defined means
1011#file is only created if not existent but access and modification
1012#times not changed if already existent.
1013#Returns 1 on success, -1 on error;
1014sub touch
1015{
1016 unless (-e $_[0]) { #Create if non-existent
1017 return -1 unless defined(sysopen(my $fh, $_[0], O_CREAT));
1018 close $fh;
1019 }
1020 my $time = time();
1021 utime($time, $time, $_[0]) unless defined $_[1];
1022 return 1;
1023}
1024
1025#Simple wrappers to protect when just doing dry runs
1026sub jcopy
1027{
1028 return 1 if $dryrun;
1029 copy @_;
1030}
1031
1032sub jlink
1033{
1034 return 1 if $dryrun;
1035 link $_[0], $_[1];
1036}
1037
1038sub junlink
1039{
1040 return 1 if $dryrun;
1041 unlink @_;
1042}
1043
1044sub jmkdir
1045{
1046 return 1 if $dryrun;
1047 mkdir @_;
1048}
1049
1050sub jmkpath
1051{
1052 return 1 if $dryrun;
1053 mkpath @_;
1054}
1055
1056sub jmake_path
1057{
1058 return 1 if $dryrun;
1059 mkpath @_;
1060}
1061
1062sub jrename
1063{
1064 return 1 if $dryrun;
1065 rename $_[0], $_[1];
1066}
1067
1068sub jrmtree
1069{
1070 return 1 if $dryrun;
1071 rmtree @_;
1072}
1073
10741;