From cf225af2332c70504fb2289775061f16c19a137c Mon Sep 17 00:00:00 2001 From: Progfou Date: Tue, 31 Jul 2012 16:38:41 +0700 Subject: [PATCH] =?utf8?q?backuppc-deletefile=20:=20premi=C3=A8re=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- backuppc-deletefile/BackupPC_deleteFile.pl | 1041 +++++++++++++++++++++++++++ backuppc-deletefile/debian/README | 8 + backuppc-deletefile/debian/changelog | 5 + backuppc-deletefile/debian/compat | 1 + backuppc-deletefile/debian/control | 15 + backuppc-deletefile/debian/copyright | 39 + backuppc-deletefile/debian/dirs | 2 + backuppc-deletefile/debian/install | 2 + backuppc-deletefile/debian/rules | 11 + backuppc-deletefile/jLib.pm | 1074 ++++++++++++++++++++++++++++ 10 files changed, 2198 insertions(+) create mode 100644 backuppc-deletefile/BackupPC_deleteFile.pl create mode 100644 backuppc-deletefile/debian/README create mode 100644 backuppc-deletefile/debian/changelog create mode 100644 backuppc-deletefile/debian/compat create mode 100644 backuppc-deletefile/debian/control create mode 100644 backuppc-deletefile/debian/copyright create mode 100644 backuppc-deletefile/debian/dirs create mode 100644 backuppc-deletefile/debian/docs create mode 100644 backuppc-deletefile/debian/install create mode 100755 backuppc-deletefile/debian/rules create mode 100644 backuppc-deletefile/jLib.pm diff --git a/backuppc-deletefile/BackupPC_deleteFile.pl b/backuppc-deletefile/BackupPC_deleteFile.pl new file mode 100644 index 0000000..8f0da72 --- /dev/null +++ b/backuppc-deletefile/BackupPC_deleteFile.pl @@ -0,0 +1,1041 @@ +#!/usr/bin/perl +#============================================================= -*-perl-*- +# +# BackupPC_deleteFile.pl: Delete one or more files/directories from +# a range of hosts, backups, and shares +# +# DESCRIPTION +# See below for detailed description of what it does and how it works +# +# AUTHOR +# Jeff Kosowsky +# +# COPYRIGHT +# Copyright (C) 2008, 2009 Jeff Kosowsky +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +#======================================================================== +# +# Version 0.1.5, released Dec 2009 +# +#======================================================================== +# CHANGELOG +# 0.1 (Nov 2008) - First public release +# 0.1.5 (Dec 2009) - Minor bug fixes +# Ability to abort/skip/force hard link deletion +#======================================================================== +# Program logic is as follows: +# +# 1. First construct a hash of hashes of 3 arrays and 2 hashes that +# encapsulates the structure of the full and incremental backups +# for each host. This hash is called: +# %backupsHoHA{}{} +# where the keys are: "ante", "post", "baks", "level", "vislvl" +# with the first 3 keys having arrays as values and the final 2 +# keys having hashes as values. This pre-step is done since this +# same structure can be re-used when deleting multiple files and +# dirs (with potential wilcards) across multiple shares, backups, +# and hosts. The component arrays and hashes which are unique per +# host are constructed as folows: +# +# - Start by constructing the simple hash %LevelH whose keys map +# backup numbers to incremental backup levels based on the +# information in the corresponding backupInfo file. +# +# - Then, for each host selected, determine the list (@Baks) of +# individual backups from which files are to be deleted based on +# bakRange and the actual existing backups. +# +# - Based on this list determine the list of direct antecedent +# backups (@Ante) that have strictly increasing backup levels +# starting with the previous level 0 backup. This list thus +# begins with the previous level zero backup and ends with the +# last backup before @Baks that has a lower incremental level +# than the first member of @Baks. Note: this list may be empty if +# @Baks starts with a full (level 0) backup. Note: there is at +# most one (and should in general be exactly one) incremental +# backup per level in this list starting with level 0. +# +# - Similarly, constuct the list of direct descendants (@Post) of +# the elements of @Baks that have strictly decreasing backup +# levels starting with the first incremental backup after @Baks +# and continuing until we reach a backup whose level is less than +# or equal to the level of the lowest incremental backup in @Baks +# (which may or may not be a level 0 backup). Again this list may +# be empty if the first backup after @Baks is lower than the +# level of all backups in @Baks. Also, again, there is at most +# one backup per level. +# +# - Note that by construction, @Ante is stored in ascending order +# and furthermore each backup number has a strictly ascending +# incremental level. Similarly, @Post is stored in strictly +# ascending order but its successive elements have monotonically +# non-increasing incremental levels. Also, the last element of +# @Ante has an incremental level lower than the first element of +# @Baks and the the last element of @Post has an incremental +# level higher than the lowest level of @Baks. This is all +# because anything else neither affects nor is affected by +# deletions in @Baks. In contrast, note that @Baks can have any +# any pattern of increasing, decreasing, or repeated incremental +# levels. +# +# - Finally, create the second hash (%VislvlH) which has keys equal +# to levels and values equal to the most recent backup with that +# level in @Baks or @Ante that could potentially still be visible +# in @Post. So, since we need to keep @Post unchanged, we need to +# make sure that whatever showed through into @Post before the +# deletions still shows through after deletion. Specifically, we +# may need to move/copy files (or directories) and set delete +# attributes to make sure that nothing more or less is visible in +# @Post after the deletions. +# +# 2. Second, for each host, combine the share names (and/or shell +# regexs) and list of file names (and/or shell regexs) with the +# backup ranges @Ante and @Baks to glob for all files that need +# either to be deleted from @Baks or blocked from view by setting a +# type=10 delete attribute type. If a directory is on the list and +# the remove directory flag (-r) is not set, then directories are +# skipped (and an error is logged). If any of these files (or dirs) +# are or contain hard links (either type hard link or a hard link +# "target") then they are skipped and logged since hard links +# cannot easily be deleted/copied/moved (since the other links will +# be affected). Duplicate entries and entries that are a subtree of +# another entry are rationalized and combined. +# +# 3. Third, for each host and for each relevant candidate file +# deletion, start going successively through the @Ante, @Baks, and +# @Post chains to determine which files and attributes need to be +# deleted, cleared, or copied/linked to @Post. +# +# - Start by going through, @Ante, in ascending order to construct +# two visibility hashes. The first hash, %VisibleAnte, is used to +# mark whether or not a file in @Ante may be visible from @Baks +# from a higher incremental level. The presence of a file sets +# the value of the hash while intervening delete type=10 or the +# lack of a parent directory resets the value to invisible +# (-1). Later, when we get to @Baks, we will need to make these +# invisible to complete our deletion effect +# +# The second hash, %VisibleAnteBaks, (whose construction +# continues when we iterate through @Baks) determines whether or +# not a file from @Ante or @Baks was originally visible from +# @Post. And if a file was visible, then the backup number of +# that file is stored in the value of the hash. Later, we will +# use this hash to copy/link files from @Ante and @Baks into +# @Post to preserve its pre-deletion state. +# +# Note that at each level, there is at *most* one backup from +# @Ante that is visible from @Baks (coded by %VisibleAnte) and +# similarly there is at *most* one backup from @Ante and @Baks +# combined that is visible from @Post (coded by +# @VisibleAnteBaks). +# +# - Next, go through @Baks to mark for deletion any instances of the +# file that are present. Then set the attrib type to type=10 +# (delete) if %VisibleAnte indicates that a file from @Ante would +# otherwise be visible at that level. Otherwise, clear the attrib +# and mark it for deletion. Similarly, once the type=10 type has +# been set, all higher level element of @Baks can have their file +# attribs cleared whether they originally indicated a file type or +# a delete type (i.e. no need for 2 layers of delete attribs). +# +# - Finally, go through the list of @Post in ascending order. If +# there is no file and no delete flag present, then use the +# information coded in %VisibleAnteBaks to determine whether we +# need to link/copy over a version of the file previously stored +# in @Ante and/or @Baks (along with the corresponding file attrib +# entry) or whether we need to set a type=10 delete +# attribute. Conversely, if originally, there was a type=10 delete +# attribute, then by construction of @Post, the delete type is no +# longer needed since the deletion will now occur in one of its +# antecedents in @Baks, so we need to clear the delete type from +# the attrib entry. +# +# 4. Finally, after all the files for a given host have been marked +# for deletion, moving/copying or attribute changes, loop through +# and execute the changes. Deletions are looped first by host and +# then by backup number and then alphabetically by filepath. +# +# Files are deleted by unlinking (recursively via rmtree for +# directories). Files are "copied" to @Post by first attempting to +# link to pool (either using an existing link or by creating a new +# pool entry) and if not successful then by copying. Directories +# are done recursively. Attributes are either cleared (deleted) or +# set to type=10 delete or copied over to @Post. Whenever an +# attribute file needs to be written, first an attempt is made to +# link to pool (or create a new pool entry and link if not +# present). Otherwise, the attribute is just written. Empty +# attribute files are deleted. The attribute writes to filesystem +# are done once per directory per backup (except for the moves). +# +# 5. As a last step, optionally BackupPC_nightly is called to clean up +# the pool, provided you set the -c flag and that the BackupPC +# daemon is running. Note that this routine itself does NOT touch +# the pool. + +# Debugging & Verification: + +# This program is instrumented to give you plenty of "output" to see +# all the subtleties of what is being deleted (or moved) and what is +# not. The seemingly simple rules of "inheritance" of incrementals +# hide a lot of complexity (and special cases) when you try to delete +# a file in the middle of a backup chain. +# +# To see what is happening during the "calculate_deletes" stage which +# is the heart of the algorithm in terms of determining what happens +# to what, it is best to use DEBUG level 2 or higher (-d 2). Then for +# every host and for every (unique) top-level file or directory +# scheduled for deletion, you will see the complete chain of how the +# program walks sequentially through @Ante, @Baks, and @Post. +# For each file, you first see a line of form: +# LOOKING AT: [hostname] [@Ante chain] [@Baks chain] [@Post chain] +# +# Followed by a triad of lines for each of the backups in the chain of form: +# ANTE[baknum](baklevel) [file code] [attribute code] +# BAKS[baknum](baklevel) [file code] [attribute code] [action flag] +# POST[baknum](baklevel) [file code] [attribute code] [action flag] +# +# where the file code is one of: +# F = file present at that baklevel and to be deleted (if in @Baks) +# (or f if in @Ante or @Post and potentially visible) +# D = Dnir present at that baklevel and to be deleted (if in @Baks) +# (or f if in @Ante or @Post and potentially visible) +# - = File not present at that baklevel +# X = Parent directory not present at that baklevel +# (or x if in @Ante or @Post) +# and the attribute code is one of: +# n = Attribute type key (if present) +# - = If no attribute for the file (implies no file) +# and the action flag is one of the following: (only applies to @Baks & @Post) +# C = Clear attribute (if attribute was previously present) +# D = Set to type 10 delete (if not already set) +# Mn = Move file/dir here from level 'n' (@Post only) +# +# More detail on the individual actions can be obtained by increasing +# the debugging level. +# +# The other interesting output is the result of the "execute_deletes" +# stage which shows what actually happens. Here, for every host and +# every backup of that host, you see what happens on a file by file +# level. The output is of form: +# [hostname][@Ante chain] [@Baks chain] [@Post chain] +# **BACKUP: [hostname][baknum](baklevel) +# [hostname][baknum] [file code][attribute code] +# +# where the file code is one of: +# F = Single file deleted +# D(n) = Directory deleted with total of 'n' file/dir deletes +# (including the directory) +# - = Nothing deleted +# and the attribute code is one of: +# C = Attribute cleared +# D = Attribute set to type 10 delete +# d = Attribute left alone with type 10 delete +# - = Attrib (otherwise) unchanged [shouldn't happen] +# and the (optional) move code is: (applies only to @Post) +# n->m = File/dir moved by *linking* to pool from backup 'n' to 'm' +# n=> = File/dir moved by *copying* from backup 'n' to 'm' +# Finally, since the files are sorted alphabetically by name and +# directory, we only need to actually write the attribute folder after +# we finish making all the delete/clear changes in a directory. +# This is coded as: +# [hostname][baknum] /attrib [-][attribute code] +# +# where the attribute code is one of: +# W = Attribute file *linked* to pool successfully +# w = Attribute file *copied* to filesystem successfully +# R = Empty attribute file removed from filesystem +# X = Error writing attribute file +#======================================================================== + +use strict; +use warnings; + +use File::Find; +use File::Glob ':glob'; +use Data::Dumper; #Just used for debugging... + +use lib "/usr/share/BackupPC/lib"; +use BackupPC::Lib; +use BackupPC::jLib; +use BackupPC::Attrib qw(:all); +use BackupPC::FileZIO; +use Getopt::Std; + +use constant S_HLINK_TARGET => 0400000; # this file is hardlink target + +my $DeleteAttribH = { #Hash reference to attribute entry for deleted file + type => BPC_FTYPE_DELETED, #10 + mode => 0, + uid => 0, + gid => 0, + size => 0, + mtime => 0, +}; + +my %filedelsHoH; +# Hash has following structure: +# $filedelsHoH{$host}{$baknum}{$file} = +# where the mask is one of the following elements + +use constant FILE_ATTRIB_COPY => 0000001; # File and corresponding attrib copied/linked to new backup in @Post +use constant FILE_DELETED => 0000002; # File deleted (not moved) +use constant ATTRIB_CLEARED => 0000010; # File attrib cleared +use constant ATTRIB_DELETETYPE => 0000020; # File attrib deleted + + +my $DEBUG; #Note setting here will override options value + +die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) ); +my $TopDir = $bpc->TopDir(); +chdir($TopDir); #Do this because 'find' will later try to return to working + #directory which may not be accessible if you are su backuppc + + +(my $pc = "$TopDir/pc") =~ s|//*|/|g; +%Conf = $bpc->Conf(); #Global variable defined in jLib.pm + +my %opts; +if ( !getopts("h:n:s:lrH:mF:qtcd:u", \%opts) || defined($opts{u}) || + !defined($opts{h}) || !defined($opts{n}) || + (!defined($opts{s}) && defined($opts{m})) || + (defined $opts{H} && $opts{H} !~ /^(0|abort|1|skip|2|force)$/) || + (!$opts{l} && !$opts{F} && @ARGV < 1)) { + print STDERR < Host (or - for all) from which path is offset + -n Range of successive backup numbers to delete. + N delete files from backup N (only) + M-N delete files from backups M-N (inclusive) + -M delete files from all backups up to M (inclusive) + M- delete files from all backups up from M (inlusive) + - delete files from ALL backups + {N} if one of the numbers is in braces, then interpret + as the N\'th backup counting from the *beginning* + [N] if one of the numbers is in braces, then interpret + as the N\'th backup counting from the *end* + -s Share name (or - for all) from which path is offset + (don\'t include the 'f' mangle) + NOTE: if unmangle option (-m) is not set then the share name + is optional and if not specified then it must instead be + included in mangled form as part of the file/directory names. + + Optional options: + -l Just list backups by host (with level noted in parentheses) + -r Allow directories to be removed too (otherwise skips over directories) + -H Treatment of hard links contained in deletion tree: + 0|abort abort with error=2 if hard links in tree [default] + 1|skip Skip hard links or directories containing them + 2|force delete anyway (BE WARNED: this may affect backup + integrity if hard linked to files outside tree) + -m Paths are unmangled (i.e. apply mangle to paths; doesn\'t apply to shares) + -F Read files/directories from (or stdin if = -) + -q Don\'t show deletions + -t Trial run -- do everything but deletions + -c Clean up pool - schedule BackupPC_nightly to run (requires server running) + Only runs if files were deleted + -d level Turn on debug level + -u Print this usage message... +EOF +exit(1); +} + +my $hostopt = $opts{h}; +my $numopt = $opts{n}; +my $shareopt = $opts{s} || ''; +my $listopt = $opts{l} || 0; +my $mangleopt = $opts{m} || 0; +my $rmdiropt = $opts{r} || 0; +my $fileopt = $opts{F} || 0; +my $quietopt = $opts{q} || 0; +$dryrun = $opts{t} || 0; #global variable jLib.pm +my $runnightlyopt = $opts{c} || 0; + +my $hardopt = $opts{H} || 0; +my $hardaction; +if($hardopt =~ /^(1|skip)$/) { + $hardopt = 1; + $hardaction = "SKIPPING"; +} +elsif($hardopt =~ /^(2|force)$/) { + $hardopt = 2; +} +else{ + $hardopt = 0; + $hardaction = "ABORTING"; +} + +$DEBUG = ($opts{d} || 0 ) unless defined $DEBUG; #Override hard-coded definition unless set explicitly +#$DEBUG && ($dryrun=1); #Uncomment if you want DEBUG to imply dry run +#$dryrun=1; #JJK: Uncomment to hard-wire to always dry-run (paranoia) +my $DRYRUN = ($dryrun == 0 ? "" : " DRY-RUN"); + + +# Fill hash with backup structure by host +my %backupsHoHA; +get_allhostbackups($hostopt, $numopt, \%backupsHoHA); +if($listopt) { + print_backup_list(\%backupsHoHA); + exit; +} + +my $shareregx_sh = my $shareregx_pl = $shareopt; +if($shareopt eq '-') { + $shareregx_pl = "f[^/]+"; + $shareregx_sh = "f*"; # For shell globbing +} +elsif($shareopt ne '') { + $shareregx_pl =~ s|//*|%2f|g; #Replace (one or more) '/' with %2f + $shareregx_sh = $shareregx_pl = "f" . $shareregx_pl; +} + +#Combine share and file arg regexps +my (@filelist, @sharearglist); +if($fileopt) { + @filelist = read_file($fileopt); +} +else { + @filelist = @ARGV; +} +foreach my $file (@filelist) { + $file = $bpc->fileNameMangle($file) if $mangleopt; #Mangle filename + my $sharearg = "$shareregx_sh/$file"; + $sharearg =~ s|//*|/|g; $sharearg =~ s|^/*||g; $sharearg =~ s|/*$||g; + # Remove double, leading, and trailing slashes + die "Error: Can't delete root share directory: $sharearg\n" + if ("$sharearg" =~ m|^[^/]*$|); #Avoid because dangerous... + push(@sharearglist, $sharearg); +} + +my $filesdeleted = my $totfilesdeleted = my $filescopied = 0; +my $attrsdeleted = my $attrscleared = my $atfilesdeleted = 0; + +my $hrdlnkflg; +foreach my $Host (keys %backupsHoHA) { #Loop through each host + $hrdlnkflg=0; + unless(defined @{$backupsHoHA{$Host}{baks}}) { #@baks is empty + print "[$Host] ***NO BACKUPS FOUND IN DELETE RANGE***\n" unless $quietopt; + next; + } + my @Ante = @{$backupsHoHA{$Host}{ante}}; + my @Baks = @{$backupsHoHA{$Host}{baks}}; + my @Post = @{$backupsHoHA{$Host}{post}}; + + print "[$Host][" . join(" ", @Ante) . "][" . + join(" ", @Baks) . "][" . join(" ", @Post) . "]\n" unless $quietopt; + +$DEBUG > 1 && (print " ANTE[$Host]: " . join(" ", @Ante) ."\n"); +$DEBUG > 1 && (print " BAKS[$Host]: " . join(" ", @Baks) ."\n"); +$DEBUG > 1 && (print " POST[$Host]: " . join(" ", @Post) ."\n"); + + #We need to glob files that occur both in the delete list (@Baks) and + #in the antecedent list (@Ante) since antecedents affect presence of + #later incrementals. + my $numregx_sh = "{" . join(",", @Ante, @Baks) . "}"; + my $pcHost = "$pc/$Host"; + my @filepathlist; + + foreach my $sharearg (@sharearglist) { + #Glob for all (relevant) file paths for host across @Baks & @Ante backups +#JJK @filepathlist = (@filepathlist, <$pcHost/$numregx_sh/$sharearg>); + @filepathlist = (@filepathlist, bsd_glob("$pcHost/$numregx_sh/$sharearg")); + } + #Now use a hash to collapse into unique file keys (with host & backup number stripped off) + my %fileH; + foreach my $filepath (@filepathlist) { + next unless -e $filepath; #Skip non-existent files (note if no wildcard in path, globbing + #will always return the file name even if doesn't exist) + $filepath =~ m|^$pcHost/[0-9]+/+(.*)|; + $fileH{$1}++; #Note ++ used to set the keys + } + unless(%fileH) { +$DEBUG && print " LOOKING AT: [$Host] [" . join(" ", @Ante) . "][" . join(" ", @Baks) . "][" . join(" ", @Post) . "] **NO DELETIONS ON THIS HOST**\n\n"; + next; + } + my $lastfile="///"; #dummy starting point since no file can have this name since eliminated dup '/'s + foreach my $File (sort keys %fileH) { #Iterate through sorted files + # First build an array of filepaths based on ascending backup numbers in + # @Baks. Also, do a quick check for directories. + next if $File =~ m|^$lastfile/|; # next if current file is in a subdirectory of previous file + $lastfile = $File; + #Now create list of paths to search for hardlinks + my @Pathlist = (); + foreach my $Baknum (@Ante) { #Need to include @Ante in hardlink search + my $Filepath = "$pc/$Host/$Baknum/$File"; + next unless -e $Filepath; + push (@Pathlist, $Filepath); + } + my $dirflag=0; + foreach my $Baknum (@Baks) { + my $Filepath = "$pc/$Host/$Baknum/$File"; + next unless -e $Filepath; + if (-d $Filepath && !$rmdiropt) { + $dirflag=1; #Only enforce directory check in @Baks because only deleting there + printerr "Skipping directory `$Host/*/$File` since -r flag not set\n\n"; + last; + } + push (@Pathlist, $Filepath); + } + next if $dirflag; + next unless(@Pathlist); #Probably shouldn't get here since by construction a path should exist + #for at least one of the elements of @Ante or @Baks + #Now check to see if any hard-links in the @Pathlist + find(\&find_is_hlink, @Pathlist ) unless $hardopt == 2; #Unless force + exit 2 if $hrdlnkflg && $hardopt == 0; #abort + next if $hrdlnkflg; +$DEBUG && print " LOOKING AT: [$Host] [" . join(" ", @Ante) . "][" . join(" ", @Baks) . "][" . join(" ", @Post) . "] $File\n"; + calculate_deletes($Host, $File, \$backupsHoHA{$Host}, \$filedelsHoH{$Host}, !$quietopt); +$DEBUG && print "\n"; + } + execute_deletes($Host, \$backupsHoHA{$Host}, \$filedelsHoH{$Host}, !$quietopt); +} + +print "\nFiles/directories deleted: $filesdeleted($totfilesdeleted) Files/directories copied: $filescopied\n" unless $quietopt; +print "Delete attrib set: $attrsdeleted Attributes cleared: $attrscleared\n" unless $quietopt; +print "Empty attrib files deleted: $atfilesdeleted Errors: $errorcount\n" unless $quietopt; +run_nightly($bpc) if (!$dryrun && $runnightlyopt); +exit; + +#Set $hrdlnkflg=1 if find a hard link (including "targets") +# Short-circuit/prune find as soon as hard link found. +sub find_is_hlink +{ + if($hrdlnkflg) { + $File::Find::prune = 1; #Prune search if hard link already found + #i.e. don't go any deeper (but still will finish the current level) + } + elsif($File::Find::name eq $File::Find::topdir #File + && -f && m|f.*| + &&( get_jtype($File::Find::name) & S_HLINK_TARGET)) { + # Check if file has type hard link (or hard link target) Note: we + # could have used this test recursively on all files in the + # directory tree, but it would be VERY SLOW since we would need to + # read the attrib file for every file in every + # subdirectory. Instead, we only use this method when we are + # searching directly for a file at the top leel + # (topdir). Otherwise, we use the method below that just + # recursively searches for the attrib file and reads that + # directly. + $hrdlnkflg = 1; + print relpath($File::Find::name) . ": File is a hard link. $hardaction...\n\n"; + } + elsif (-d && -e attrib($File::Find::name)) { #Directory + # Read through attrib file hash table in each subdirectory in tree to + # find files that are hard links (including 'targets'). Fast + # because only need to open attrib file once per subdirectory to test + # all the files in the directory. + read_attrib(my $attr, $File::Find::name); + foreach my $file (keys (%{$attr->get()})) { #Look through all file hash entries + if (${$attr->get($file)}{type} == 1 || #Hard link + (${$attr->get($file)}{mode} & S_HLINK_TARGET)) { #Hard link target + $hrdlnkflg = 1; + $File::Find::topdir =~ m|^$pc/([^/]+)/([0-9]+)/(.*)|; +# print relpath($File::Find::topdir) . +# print relpath($File::Find::name) . +# ": Directory contains hard link: $file'. $hardaction...\n\n"; + print "[$1][$2] $3: Directory contains hard link: " . + substr($File::Find::name, length($File::Find::topdir)) . + "/f$file ... $hardaction...\n\n"; + + last; #Stop readin attrib file...hard link found + } + } + } +} + +# Main routine for figuring out what files/dirs in @baks get deleted +# and/or copied/linked to @post along with which attrib entries are +# cleared or set to delete type in both the @baks and @post backupchains. +sub calculate_deletes +{ + my ($hostname, $filepath, $backupshostHAref, $filedelsHref, $verbose) = @_; + my @ante = @{$$backupshostHAref->{ante}}; + my @baks = @{$$backupshostHAref->{baks}}; + my @post = @{$$backupshostHAref->{post}}; + my %Level = %{$$backupshostHAref->{level}}; + my %Vislvl = %{$$backupshostHAref->{vislvl}}; + my $pchost = "$pc/$hostname"; + + #We first need to look down the direct antecedent chain in @ante + #to determine whether earlier versions of the file exist and if so + #at what level of incrementals will they be visible. A file in the + #@ante chain is potentially visible later in the @baks chain at + #the given level (or higher) if there is no intervening type=10 + #(delete) attrib in the chain. If there is already a type=10 + #attrib in the @ante chain then the file will be invisible in the + #@baks chain at the same level or higher of incrmental backups. + + #Recall that the elements of @ante by construction have *strictly* + #increasing backup levels. So, that the visibility scope decreases + #as you go down the chain. + + #We first iterate up the @ante chain and construct a hash + #(%VisibleLvl) that is either 1 or 0 depending on whether there is + #a file or type=10 delete attrib at that level. For any level at + #which there is no antecedent, the corresponding entry of + #%VisibleLvl remains undef + + my %VisibleAnte; # $VisibleAnte{$level} is equal to -1 if nothing visible from @Ante at the given level. + # i.e. if either there was a type=delete at that level or if that level was blank but + # there was a type=delete at at a lower level without an intervening file. + # Otherwise, it is set to the backup number of the file that was visible at that level. + # This hash is used to determine where we need to add type=10 delete attributes to + # @baks to keep the files still present in @ante from poking through into the + # deleted @baks region. + + my %VisibleAnteBaks; # This hash is very similar but now we construct it all the way through @Baks to + # determine what was ORIGINALLY visible to the elements of @post since we may + # need to copy/link files forward to @post if they have been deleted from @baks or + # if they are now blocked by a new type=delete attribute in @baks. + + $VisibleAnte{0} = $VisibleAnteBaks{0} = -1; #Starts as invisible until first file appears + $filepath =~ m|(.*)/|; + foreach my $prevbaknum (@ante) { + my $prevbakfile = "$pchost/$prevbaknum/$filepath"; + my $level = $Level{$prevbaknum}; + my $type = get_attrib_type($prevbakfile); + my $nodir = ($type == -3 ? 1 : 0); #Note type = -3 if dir non-existent + printerr "Attribute file unreadable: $prevbaknum/$filepath\n" if $type == -4; + + #Determine what is visible to @Baks and to @Post + if($type == BPC_FTYPE_DELETED || $nodir) { #Not visible if deleted type or no parent dir + $VisibleAnte{$level} = $VisibleAnteBaks{$level} = -1; #always update + $VisibleAnteBaks{$level} = -1 + if defined($Vislvl{$level}) && $Vislvl{$level} == $prevbaknum; + #only update if this is the most recent backup at this level visible from @post + } + elsif (-r $prevbakfile) { #File exists so visible at this level + $VisibleAnte{$level} = $prevbaknum; # always update because @ante is strictly increasing order + $VisibleAnteBaks{$level} = $prevbaknum + if defined($Vislvl{$level}) && $Vislvl{$level} == $prevbaknum; + #Only update if this will be visible from @post (may be blocked later by @baks) + } + +$DEBUG > 1 && print " ANTE[$prevbaknum]($level) $hostname/$prevbaknum/$filepath [" . (-f $prevbakfile ? "f" : (-d $prevbakfile ? "d": ($nodir ? "x" : "-"))) . "][" . ($type >=0 ? $type : "-") . "]\n"; + } + + #Next, iterate down @baks to schedule file/dirs for deletion + #and/or for clearing/changing file attrib entry based on the + #status of the visibility flag at that level (or below) and the + #presence of $filepath in the backup. + #The status of what we do to the file and what we do to the attribute is stored in + #the hash ref %filedelsHref + my $minbaklevel = $baks[0]; + foreach my $currbaknum (@baks) { + my $currbakfile = "$pchost/$currbaknum/$filepath"; + my $level = $Level{$currbaknum}; + my $type = get_attrib_type($currbakfile); + my $nodir = ($type == -3 ? 1 : 0); #Note type = -3 if dir non-existent + printerr "Attribute file unreadable: $currbaknum/$filepath\n" if $type == -4; + my $actionflag = "-"; my $printstring = "";#Used for debugging statements only + + #Determine what is visible to @Post; also set file for deletion if present + if($type == BPC_FTYPE_DELETED || $nodir) { #Not visible if deleted type or no parent dir + $VisibleAnteBaks{$level} = -1 + if defined $Vislvl{$level} && $Vislvl{$level} == $currbaknum; #update if visible from @post + } + elsif (-r $currbakfile ) { + $VisibleAnteBaks{$level} = $currbaknum + if defined($Vislvl{$level}) && $Vislvl{$level} == $currbaknum; #update if visible + $$filedelsHref->{$currbaknum}{$filepath} |= FILE_DELETED; +$DEBUG > 2 && ($printstring .= " [$currbaknum] Adding to delete list: $hostname/$currbaknum/$filepath\n"); + } + + #Determine whether deleted file attribs should be cleared or set to type 10=delete + if(!$nodir && $level <= $minbaklevel && last_visible_backup($level, \%VisibleAnte) >= 0) { + #Existing file in @ante will shine through since nothing in @baks is blocking + #Note if $level > $minbaklevel then we will already be shielding it with a previous @baks element + $minbaklevel = $level; + if ($type != BPC_FTYPE_DELETED) { # Set delete type if not already of type delete + $$filedelsHref->{$currbaknum}{$filepath} |= ATTRIB_DELETETYPE; + $actionflag="D"; +$DEBUG > 2 && ($printstring .= " [$currbaknum] Set attrib to type=delete: $hostname/$currbaknum/$filepath\n"); + } + } + elsif ($type >=0) { #No antecedent from @Ante will shine through since already blocked. + #So if there is an attribute type there, we should clear the attribute since + #nothing need be there + $$filedelsHref->{$currbaknum}{$filepath} |= ATTRIB_CLEARED; + $actionflag="C"; +$DEBUG > 2 && ($printstring .= " [$currbaknum] Clear attrib file entry: $hostname/$currbaknum/$filepath\n"); + } +$DEBUG > 1 && print " BAKS[$currbaknum]($level) $hostname/$currbaknum/$filepath [" . (-f $currbakfile ? "F" : (-d $currbakfile ? "D": ($nodir ? "X" : "-"))) . "][" . ($type>=0 ? $type : "-") . "][$actionflag]\n"; +$DEBUG >3 && print $printstring; + } + +#Finally copy over files as necessary to make them appropriately visible to @post +#Recall again that successive elements of @post are strictly lower in level. +#Therefore, each element of @post either already has a file entry or it +#inherits its entry from the previously deleted backups. + foreach my $nextbaknum (@post) { + my $nextbakfile = "$pchost/$nextbaknum/$filepath"; + my $level = $Level{$nextbaknum}; + my $type = get_attrib_type($nextbakfile); + my $nodir = ($type == -3 ? 1 : 0); #Note type = -3 if dir non-existent + printerr "Attribute file unreadable: $nextbaknum/$filepath\n" if $type == -4; + my $actionflag = "-"; my $printstring = ""; #Used for debugging statements only + + #If there is a previously visible file from @Ante or @Post that used to shine through (but won't now + #because either in @Ante and blocked by @Post deletion or deleted from @Post) and if nothing in @Post + # is blocking (i.e directory exists, no file there, and no delete type), then we need to copy/link + #the file forward + if ((my $delnum = last_visible_backup($level, \%VisibleAnteBaks)) >= 0 && + $type != BPC_FTYPE_DELETED && !$nodir && !(-r $nextbakfile)) { + #First mark that last visible source file in @Ante or @Post gets copied + $$filedelsHref->{$delnum}{$filepath} |= FILE_ATTRIB_COPY; + #Note still keep the FILE_DELETED attrib because we may still need to delete the source + #after moving if the source was in @baks + #Second tell the target where it gets its source + $$filedelsHref->{$nextbaknum}{$filepath} = ($delnum+1) << 6; # + #Store the source in higher bit numbers to avoid overlapping with our flags. Add 1 so as to + #be able to distinguish empty (non stored) path from backup #0. +$DEBUG > 2 && ($printstring .= " [$nextbaknum] Moving file and attrib from backup $delnum: $filepath\n"); + $actionflag = "M$delnum"; + } + elsif ($type == BPC_FTYPE_DELETED) { + # File has a delete attrib that is now no longer necessary since + # every element of @post by construction has a deleted immediate predecessor in @baks + $$filedelsHref->{$nextbaknum}{$filepath} |= ATTRIB_CLEARED; +$DEBUG > 2 && ($printstring .= " [$nextbaknum] Clear attrib file entry: $hostname/$nextbaknum/$filepath\n"); + $actionflag = "C"; + } +$DEBUG >1 && print " POST[$nextbaknum]($level) $hostname/$nextbaknum/$filepath [" . (-f $nextbakfile ? "f" : (-d $nextbakfile ? "d": ($nodir ? "x" : "-"))) . "][" . ($type >= 0 ? $type : "-") . "][$actionflag]\n"; +$DEBUG >3 && print $printstring; + } +} + +sub execute_deletes +{ + my ($hostname, $backupshostHAref, $filedelsHref, $verbose) = @_; + my @ante = @{$$backupshostHAref->{ante}}; + my @baks = @{$$backupshostHAref->{baks}}; + my @post = @{$$backupshostHAref->{post}}; + my %Level = %{$$backupshostHAref->{level}}; + + my $pchost = "$pc/$hostname"; + foreach my $backnum (@ante, @baks, @post) { + #Note the only @ante action is copying over files + #Note the only @post action is clearing the file attribute + print "**BACKUP: [$hostname][$backnum]($Level{$backnum})\n"; + my $prevdir=0; + my ($attr, $dir, $file); + foreach my $filepath (sort keys %{$$filedelsHref->{$backnum}}) { + my $VERBOSE = ($verbose ? "" : "[$hostname][$backnum] $filepath:"); + my $delfilelist; + my $filestring = my $attribstring = '-'; + my $movestring = my $printstring = ''; + $filepath =~ m|(.*)/f(.*)|; + $dir = "$pchost/$backnum/$1"; + my $dirstem = $1; + $file = $2; + if($dir ne $prevdir) { #New directory - we only need to read/write the atrrib file once per dir + write_attrib_out($bpc, $attr, $prevdir, $verbose) + if $prevdir; #Write out previous $attr + die "Error: can't write attribute file to directory: $dir" unless -w $dir; + read_attrib($attr, $dir); #Read in new attribute + $prevdir = $dir; + } + + my $action = $$filedelsHref->{$backnum}{$filepath}; + if($action & FILE_ATTRIB_COPY) { + my %sourceattr; + get_file_attrib("$pchost/$backnum/$filepath", \%sourceattr); + my $checkpoollinks = 1; #Don't just blindly copy or link - make sure linked to pool + foreach my $nextbaknum (@post) { + my ($ret1, $ret2); + next unless (defined($$filedelsHref->{$nextbaknum}{$filepath}) && + ($$filedelsHref->{$nextbaknum}{$filepath} >> 6) - 1 == $backnum); + #Note: >>6 followed by decrement of 1 recovers the backup number encoding + #Note: don't delete or clear/delete source attrib now because we may need to move + #several copies - so file deletion and attribute clear/delete is done after moving + + + if(($ret1=link_recursively_topool ($bpc, "$pchost/$backnum/$filepath", + "$pchost/$nextbaknum/$filepath", + $checkpoollinks, 1)) >= 0 + && ($ret2=write_file_attrib($bpc, "$pchost/$nextbaknum/$dirstem", $file, \%sourceattr, 1)) > 0){ + #First move files by linking them to pool recursively and then copy attributes + $checkpoollinks = 0 if $ret1 > 0; #No need to check pool links next time if all ok now + $movestring .= "," unless $movestring eq ''; + $movestring .= "$backnum" . ($ret1 == 1 ? "->" : "=>") . "$nextbaknum\n"; + $filescopied++; + } + else { + $action = 0; #If error moving, cancel the subsequent file and attrib deletion/clearing + junlink("$pchost/$nextbaknum/$filepath"); #undo partial move + if($ret1 <0) { + $printstring .= "$VERBOSE FAILED TO MOVE FILE/DIR: $backnum-->$nextbaknum -- UNDOING PARTIAL MOVE\n"; + } + else { + $printstring .= "$VERBOSE FAILED TO WRITE NEW ATTRIB FILE IN $nextbaknum AFTER MOVING FILE/DIR: $backnum-->$nextbaknum FROM $backnum -- UNDOING MOVE\n"; + } + next; # Skip to next move + } + } + } + if ($action & FILE_DELETED) { #Note delete follows moving + my $isdir = (-d "$pchost/$backnum/$filepath" ? 1 : 0); + my $numdeletes = delete_files("$pchost/$backnum/$filepath", \$delfilelist); + if($numdeletes > 0) { + $filestring = ($isdir ? "D$numdeletes" : "F" ); + $filesdeleted++; + $totfilesdeleted +=$numdeletes; + if($delfilelist) { + $delfilelist =~ s!(\n|^)(unlink|rmdir ) *$pchost/$backnum/$filepath(\n|$)!!g; #Remove top directory + $delfilelist =~ s!^(unlink|rmdir ) *$pc/! !gm; #Remove unlink/rmdir prefix + } + } + else { + $printstring .= "$VERBOSE FILE FAILED TO DELETE ($numdeletes)\n"; + } + } + if ($action & ATTRIB_CLEARED) { #And attrib changing follows file moving & deletion... + $attr->delete($file); + $attribstring = "C"; + $attrscleared++; + + } + elsif($action & ATTRIB_DELETETYPE) { + if (defined($attr->get($file)) && ${$attr->get($file)}{type} == BPC_FTYPE_DELETED) { + $attribstring = "d"; + } + else { + $attr->set($file, $DeleteAttribH); # Set file to deleted type (10) + $attribstring = "D"; + $attrsdeleted++; + } + } + print " [$hostname][$backnum]$filepath [$filestring][$attribstring] $movestring$DRYRUN\n" + if $verbose && ($filestring ne '-' || $attribstring ne '-' || $movestring ne ''); + print $delfilelist . "\n" if $verbose && $delfilelist; + print $printstring; + } + write_attrib_out($bpc, $attr, $dir, $verbose) + if $prevdir; #Write out last attribute + } +} + +sub write_attrib_out +{ + my ($bpc, $attr, $dir, $verbose) = @_; + my $ret; + my $numattribs = count_file_attribs($attr); + die "Error writing to attrib file for $dir\n" + unless ($ret =write_attrib ($bpc, $attr, $dir, 1, 1)) > 0; + $dir =~ m|^$pc/([^/]*)/([^/]*)/(.*)|; + $atfilesdeleted++ if $ret==4; + print " [$1][$2]$3/attrib [-]" . + ($ret==4 ? "[R]" : ($ret==3 ? "[w]" : ($ret > 0 ? "[W]" : "[X]"))) + ."$DRYRUN\n" if $verbose; + return $ret; +} + +#If earlier file is visible at this level, return the backup number where a file was last present +#Otherwise return -1 (occurs if there was an intervening type=10 or if a file never existed) +sub last_visible_backup +{ + my ($numlvl, $Visiblebackref) = @_; + my $lvl = --$numlvl; #For visibility look at one less than current level and lower + + return -1 unless $lvl >= 0; + do { + return ($Visiblebackref->{$numlvl} = $Visiblebackref->{$lvl}) #Set & return + if defined($Visiblebackref->{$lvl}); + } while($lvl--); + return -1; #This shouldn't happen since we initialize $Visiblebackref->{0} = -1; +} + +# Get the modified type from the attrib file. +# Which I define as: +# type + (type == BPC_FTYPE_HARDLINK => 1; ? S_HLINK_TARGET : (mode & S_HLINK_TARGET) ) +# i.e. you get both the type and whether it is either an hlink +# or an hlink-target +sub get_jtype +{ + my ($fullfilename) = @_; + my %fileattrib; + + return 100 if get_file_attrib($fullfilename, \%fileattrib) <= 0; + my $type = $fileattrib{type}; + my $mode = $fileattrib{mode}; + $type + ($type == BPC_FTYPE_HARDLINK ? + S_HLINK_TARGET : ($mode & S_HLINK_TARGET)); +} + +#Set elements of the hash backupsHoHA which is a mixed HoHA and HoHoH +#containing backup structure for each host in hostregex_sh + +# Elements are: +# backupsHoHA{$host}{baks} - chain (array) of consecutive backups numbers +# whose selected files we will be deleting +# backupsHoHA{$host}{ante} - chain (array) of backups directly antecedent +# to those in 'baks' - these are all "parents" of all elemenst +# of 'baks' [in descending numerical order and strictly descending +# increment order] +# backupsHoHA{$host}{post} - chain (array) of backups that are incremental +# backups of elements of 'baks' - these must all be "children" of +# all element of 'baks' [in ascending numerical order and strictly +# descending increment order] +# backupsHoHA{$host}{level}{$n} - level of backup $n +# backupsHoHA{$host}{vislvl}{$level} - highest (most recent) backup number in (@ante, @baks) with $level +# Note: this determines which backups from (@ante, @baks) are potentially visible from @post + +sub get_allhostbackups +{ + my ($hostregx_sh, $numregx, $backupsHoHAref) = @_; + + + die "$0: bad host name '$hostregx_sh'\n" + if ( $hostregx_sh !~ m|^([-\w\.\s*]+)$| || $hostregx_sh =~ m{(^|/)\.\.(/|$)} ); + $hostregx_sh = "*" if ($hostregx_sh eq '-'); # For shell globbing + + die "$0: bad backup number range '$numopt'\n" + if ( $numregx !~ m!^((\d*)|{(\d+)}|\[(\d+)\])-((\d*)|{(\d+)}|\[(\d+)\])$|(\d+)$! ); + + my $startnum=0; + my $endnum = 99999999; + if(defined $2 && $2 ne '') {$startnum = $2;} + elsif(defined $9) {$startnum = $endnum = $9;} + if(defined $6 && $6 ne ''){$endnum=$6}; + die "$0: bad dump range '$numopt'\n" + if ( $startnum < 0 || $startnum > $endnum); + my $startoffsetbeg = $3; + my $endoffsetbeg = $7; + my $startoffsetend = $4; + my $endoffsetend = $8; + + my @allbaks = bsd_glob("$pc/$hostregx_sh/[0-9]*/backupInfo"); + #Glob for list of valid backup paths + for (@allbaks) { #Convert glob to hash of backups and levels + m|.*/(.*)/([0-9]+)/backupInfo$|; # $1=host $2=baknum + my $level = get_bakinfo("$pc/$1/$2", "level"); + $backupsHoHAref->{$1}{level}{$2} = $level + if defined($level) && $level >=0; # Include if backup level defined + } + + foreach my $hostname (keys %{$backupsHoHAref}) { #Loop through each host + #Note: need to initialize the following before we assign reference shortcuts + #Note {level} already defined + @{$backupsHoHAref->{$hostname}{ante}} = (); + @{$backupsHoHAref->{$hostname}{baks}} = (); + @{$backupsHoHAref->{$hostname}{post}} = (); + %{$backupsHoHAref->{$hostname}{vislvl}} = (); + + #These are all references + my $anteA= $backupsHoHAref->{$hostname}{ante}; + my $baksA= $backupsHoHAref->{$hostname}{baks}; + my $postA= $backupsHoHAref->{$hostname}{post}; + my $levelH= $backupsHoHAref->{$hostname}{level}; + my $vislvlH= $backupsHoHAref->{$hostname}{vislvl}; + + my @baklist = (sort {$a <=> $b} keys %{$levelH}); #Sorted list of backups for current host + $startnum = $baklist[$startoffsetbeg-1] || 99999999 if defined $startoffsetbeg; + $endnum = $baklist[$endoffsetbeg-1] || 99999999 if defined $endoffsetbeg; + $startnum = $baklist[$#baklist - $startoffsetend +1] || 0 if defined $startoffsetend; + $endnum = $baklist[$#baklist - $endoffsetend +1] || 0 if defined $endoffsetend; + + my $minbaklevel = my $minvislevel = 99999999; + my @before = my @after = (); + #NOTE: following written for clarity, not speed + foreach my $baknum (reverse @baklist) { #Look backwards through list of backups + #Loop through reverse sorted list of backups for current host + my $level = $$levelH{$baknum}; + if($baknum <= $endnum) { + $$vislvlH{$level} = $baknum if $level < $minvislevel; + $minvislevel = $level if $level < $minvislevel; + } + if($baknum >= $startnum && $baknum <= $endnum) { + unshift(@{$baksA}, $baknum); #sorted in increasing order + $minbaklevel = $level if $level < $minbaklevel; + } + push (@before, $baknum) if $baknum < $startnum; #sorted in decreasing order + unshift(@after, $baknum) if $baknum > $endnum; #sorted in increasing order + } + next unless defined @{$baksA}; # Nothing to backup on this host + + my $oldlevel = $$levelH{$$baksA[0]}; # i.e. level of first backup in baksA + for (@before) { + #Find all direct antecedents until the preceding level 0 and push on anteA + if ($$levelH{$_} < $oldlevel) { + unshift(@{$anteA}, $_); #Antecedents are in increasing order with strictly increasing level + last if $$levelH{$_} == 0; + $oldlevel = $$levelH{$_}; + } + } + $oldlevel = 99999999; + for (@after) { + # Find all successors that are immediate children of elements of @baks + if ($$levelH{$_} <= $oldlevel) { # Can have multiple descendants at the same level + last if $$levelH{$_} <= $minbaklevel; #Not a successor because dips below minimum + push(@{$postA}, $_); #Descendants are increasing order with non-increasing level + $oldlevel = $$levelH{$_}; + } + } + } +} + +# Print the @Baks list along with the level of each backup in parentheses +sub print_backup_list +{ + my ($backupsHoHAref) = @_; + + foreach my $hostname (sort keys %{$backupsHoHAref}) { #Loop through each host + print "$hostname: "; + foreach my $baknum (@{$backupsHoHAref->{$hostname}{baks}}) { + print "$baknum($backupsHoHAref->{$hostname}{level}{$baknum}) "; + } + print "\n"; + } +} + +#Read in external file and return list of lines of file +sub read_file +{ + my ($file) = @_; + my $fh; + my @lines; + + if($file eq '-') { + $fh = *STDIN; + } + else { + die "ERROR: Can't open: $file\n" unless open($fh, "<", $file); + } + while(<$fh>) { + chomp; + next if m|^\s*$| || m|^#|; + push(@lines, $_); + } + close $fh if $file eq '-'; + return @lines; + +} + + +# Strip off the leading $TopDir/pc portion of path +sub relpath +{ + substr($_[0],1+length($pc)); +} + + +sub min +{ + $_[0] < $_[1] ? $_[0] : $_[1]; +} + diff --git a/backuppc-deletefile/debian/README b/backuppc-deletefile/debian/README new file mode 100644 index 0000000..19224a1 --- /dev/null +++ b/backuppc-deletefile/debian/README @@ -0,0 +1,8 @@ +backuppc-deletefile +------------------- + +Ce paquet facilite l'installation de la solution proposée sur ce site : + + http://sourceforge.net/apps/mediawiki/backuppc/index.php?title=BackupPC_DeleteFile + + -- Progfou Tue, 31 Jul 2012 15:41:52 +0700 diff --git a/backuppc-deletefile/debian/changelog b/backuppc-deletefile/debian/changelog new file mode 100644 index 0000000..63c6b52 --- /dev/null +++ b/backuppc-deletefile/debian/changelog @@ -0,0 +1,5 @@ +backuppc-deletefile (1.0) unstable; urgency=low + + * Première version empaquetée. + + -- Progfou Tue, 31 Jul 2012 15:37:04 +0700 diff --git a/backuppc-deletefile/debian/compat b/backuppc-deletefile/debian/compat new file mode 100644 index 0000000..7f8f011 --- /dev/null +++ b/backuppc-deletefile/debian/compat @@ -0,0 +1 @@ +7 diff --git a/backuppc-deletefile/debian/control b/backuppc-deletefile/debian/control new file mode 100644 index 0000000..99f97de --- /dev/null +++ b/backuppc-deletefile/debian/control @@ -0,0 +1,15 @@ +Source: backuppc-deletefile +Section: utils +Priority: optional +Maintainer: Progfou +Build-Depends: debhelper (>= 7) +Standards-Version: 3.8.4 +Homepage: http://sourceforge.net/apps/mediawiki/backuppc/index.php?title=BackupPC_DeleteFile + +Package: backuppc-deletefile +Architecture: all +Depends: ${misc:Depends}, backuppc +Description: outil de suppression de fichiers dans les sauvegardes + Ce paquet apporte l'outil BackupPC_DeleteFile qui permet de supprimer + correctement des fichiers dans les sauvegardes de BackupPC. + diff --git a/backuppc-deletefile/debian/copyright b/backuppc-deletefile/debian/copyright new file mode 100644 index 0000000..c9baea6 --- /dev/null +++ b/backuppc-deletefile/debian/copyright @@ -0,0 +1,39 @@ +This work was packaged for Debian by: + + Progfou on Tue, 31 Jul 2012 15:45:39 +0700 + +It was downloaded from http://sourceforge.net/apps/mediawiki/backuppc/index.php?title=BackupPC_DeleteFile + +Upstream Author(s): + + Jeff Kosowsky + +Copyright: + + Copyright (C) 2008-2011 Jeff Kosowsky + +License: + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this package; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +On Debian systems, the complete text of the GNU General Public +License can be found in `/usr/share/common-licenses/GPL-2'. + +The Debian packaging is: + + Copyright (C) 2012 Agence universitaire de la Francophonie - www.auf.org + +and is licensed under the GPL version 3, +see `/usr/share/common-licenses/GPL-3'. diff --git a/backuppc-deletefile/debian/dirs b/backuppc-deletefile/debian/dirs new file mode 100644 index 0000000..2c3d1d1 --- /dev/null +++ b/backuppc-deletefile/debian/dirs @@ -0,0 +1,2 @@ +/usr/share/backuppc/bin +/usr/share/backuppc/lib diff --git a/backuppc-deletefile/debian/docs b/backuppc-deletefile/debian/docs new file mode 100644 index 0000000..e69de29 diff --git a/backuppc-deletefile/debian/install b/backuppc-deletefile/debian/install new file mode 100644 index 0000000..9db3c64 --- /dev/null +++ b/backuppc-deletefile/debian/install @@ -0,0 +1,2 @@ +BackupPC_deleteFile.pl /usr/share/backuppc/bin +jLib.pm /usr/share/backuppc/lib diff --git a/backuppc-deletefile/debian/rules b/backuppc-deletefile/debian/rules new file mode 100755 index 0000000..706b324 --- /dev/null +++ b/backuppc-deletefile/debian/rules @@ -0,0 +1,11 @@ +#!/usr/bin/make -f +# -*- makefile -*- + +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 + +# This has to be exported to make some magic below work. +export DH_OPTIONS + +%: + dh $@ diff --git a/backuppc-deletefile/jLib.pm b/backuppc-deletefile/jLib.pm new file mode 100644 index 0000000..30b710f --- /dev/null +++ b/backuppc-deletefile/jLib.pm @@ -0,0 +1,1074 @@ +#============================================================= -*-perl-*- +# +# BackupPC::jLib package +# +# DESCRIPTION +# +# This library includes various supporting subroutines for use with BackupPC +# functions used by BackupPC. +# Some of the routines are variants/extensions of routines originally written +# by Craig Barratt as part of the main BackupPC release. +# +# AUTHOR +# Jeff Kosowsky +# +# COPYRIGHT +# Copyright (C) 2008-2011 Jeff Kosowsky +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +#======================================================================== +# +# Version 0.4.0, released January 2011 +# +#======================================================================== + +package BackupPC::jLib; + +use strict; +use vars qw($VERSION); +$VERSION = '0.4.0'; + +use warnings; +use File::Copy; +use File::Path; +use File::Temp; +use Fcntl; #Required for RW I/O masks + +use BackupPC::Lib; +use BackupPC::Attrib; +use BackupPC::FileZIO; +use Data::Dumper; #Just used for debugging... + +no utf8; + +use constant _128KB => 131072; +use constant _1MB => 1048576; +use constant LINUX_BLOCK_SIZE => 4096; +use constant TOO_BIG => 2097152; # 1024*1024*2 (2MB) + +require Exporter; +our @ISA = qw(Exporter); +our @EXPORT = qw( + %Conf $dryrun $errorcount + LINUX_BLOCK_SIZE TOO_BIG + printerr warnerr firstbyte + zFile2MD5 zFile2FullMD5 + link_recursively_topool + jfileNameUnmangle + getattr read_attrib count_file_attribs + get_bakinfo get_file_attrib get_attrib_value get_attrib_type + write_attrib write_file_attrib + attrib + GetPoolLink jMakeFileLink + poolname2number renumber_pool_chain delete_pool_file + run_nightly + jcompare zcompare zcompare2 + delete_files touch + jcopy jlink junlink jmkdir jmkpath jmake_path jrename jrmtree); + +#Global variables +our %Conf; +our $errorcount=0; +our $dryrun=1; #global variable - set to 1 to be safe -- should be set to + #0 in actual program files if you don't want a dry-run + #None of the routines below will change/delete/write actual + #file data if this flag is set. The goal is to do everything but + #the final write to assist with debugging or cold feet :) + +sub printerr +{ + print "ERROR: " . $_[0]; + $errorcount++; +} + +sub warnerr +{ + $|++; # flush printbuffer first + warn "ERROR: " . $_[0]; + $errorcount++; +} + +# Returns the firstbyte of a file. +# If coding $coding undefined or 0, return as unpacked 2 char hexadecimal +# string. Otherwise, return as binary byte. +# Return -1 on error. +# Useful for checking the type of compressed file/checksum coding. +sub firstbyte { + my ($file, $coding) = @_; + my $fbyte=''; + sysopen(my $fh, $file, O_RDONLY) || return -1; + $fbyte = -1 unless sysread($fh, $fbyte, 1) == 1; + close($fh); + if (! defined($coding) || $coding == 0) { + $fbyte = unpack('H*',$fbyte); # Unpack as 2 char hexadecimal string + } + else { + $fbyte = vec($fbyte, 0, 8); # Return as binary byte + } + return $fbyte; +} + +# Compute the MD5 digest of a compressed file. This is the compressed +# file version of the Lib.pm function File2MD5. +# For efficiency we don't use the whole file for big files +# - for files <= 256K we use the file size and the whole file. +# - for files <= 1M we use the file size, the first 128K and +# the last 128K. +# - for files > 1M, we use the file size, the first 128K and +# the 8th 128K (ie: the 128K up to 1MB). +# See the documentation for a discussion of the tradeoffs in +# how much data we use and how many collisions we get. +# +# Returns the MD5 digest (a hex string) and the file size if suceeeds. +# (or "" and error code if fails). +# Note return for a zero size file is ("", 0). +# +# If $size < 0 then calculate size of file by fully decompressing +# If $size = 0 then first try to read corresponding attrib file +# (if it exists), if doesn't work then calculate by fully decompressing +# IF $size >0 then use that as the size of the file +# +# If compreslvl is undefined then use the default compress level from +# the config file + +sub zFile2MD5 +{ + my($bpc, $md5, $name, $size, $compresslvl) = @_; + + my ($fh, $rsize, $filesize, $md5size); + + return ("", -1) unless -f $name; + return ("", 0) if (stat(_))[7] == 0; #Zero size file + $compresslvl = $Conf{CompressLevel} unless defined $compresslvl; + unless (defined ($fh = BackupPC::FileZIO->open($name, 0, $compresslvl))) { + printerr "Can't open $name\n"; + return ("", -1); + } + + my ($datafirst, $datalast); + my @data; + #First try to read up to the first 128K (131072 bytes) + if ( ($md5size = $fh->read(\$datafirst, _128KB)) < 0 ) { #Fist 128K + printerr "Can't read & decompress $name\n"; + return ("", -1); + } + + if ($md5size == _128KB) { # If full read, continue reading up to 1st MB + my $i=0; + #Read in up to 1MB (_1MB), 128K at a time and alternate between 2 data buffers + while ( ($rsize = $fh->read(\$data[(++$i)%2], _128KB)) == _128KB + && ($md5size += $rsize) < _1MB ) {} + $md5size +=$rsize if $rsize < _128KB; # Add back in partial read + $datalast = ($i > 1 ? + substr($data[($i-1)%2], $rsize, _128KB-$rsize) : '') + . substr($data[$i%2], 0 ,$rsize); #Last 128KB (up to 1MB) + } + if($md5size < _1MB) { #Already know the size because read it all (note don't do <=) + $filesize = $md5size; + } elsif($size > 0) { #Use given size + $filesize = $size; + } elsif($compresslvl == 0) { #Not compressed, so: size = actual size + $filesize = -s $name; + }elsif($size == 0) { # Try to find size from attrib file + $filesize = get_attrib_value($name, "size"); + if(!defined($filesize)) { + warn "Can't read size of $name from attrib file so calculating manually\n"; + } + } + if(!defined($filesize)) { #No choice but continue reading to find size + $filesize = $md5size; + while (($rsize = $fh->read(\($data[0]), _128KB)) > 0) { + $filesize +=$rsize; + } + } + $fh->close(); + + $md5->reset(); + $md5->add($filesize); + $md5->add($datafirst); + $md5->add($datalast) if defined($datalast); + return ($md5->hexdigest, $filesize); +} + +#Compute md5sum of the full data contents of a file. +#If the file is compressed, calculate the md5sum of the inflated +#version (using the zlib routines to uncompress the stream). Note this +#gives the md5sum of the FULL file -- not just the partial file md5sum +#above. +sub zFile2FullMD5 +{ + my($bpc, $md5, $name, $compresslvl) = @_; + + my $fh; + my $data; + + $compresslvl = $Conf{CompressLevel} unless defined $compresslvl; + unless (defined ($fh = BackupPC::FileZIO->open($name, 0, $compresslvl))) { + printerr "Can't open $name\n"; + return -1; + } + + $md5->reset(); + while ($fh->read(\$data, 65536) > 0) { + $md5->add($data); + } + + return $md5->hexdigest; +} + + +# Like MakeFileLink but for existing files where we don't have the +# digest available. So compute digest and call MakeFileLink after +# For each file, check if the file exists in $bpc->{TopDir}/pool. +# If so, remove the file and make a hardlink to the file in +# the pool. Otherwise, if the newFile flag is set, make a +# hardlink in the pool to the new file. +# +# Returns 0 if a link should be made to a new file (ie: when the file +# is a new file but the newFile flag is 0). +# Returns 1 if a link to an existing file is made, +# Returns 2 if a link to a new file is made (only if $newFile is set) +# Returns negative on error. +sub zMakeFileLink +{ + my($bpc, $md5, $name, $newFile, $compress) = @_; + + $compress = $Conf{CompressLevel} unless defined $compress; + + my ($d,$ret) = zFile2MD5($bpc, $md5, $name, 0, $compress); + return -5 if $ret < 0; + $bpc->MakeFileLink($name, $d, $newFile, $compress); +} + + +# Use the following to create a new file link ($copy) that links to +# the same pool file as the original ($orig) file does (or +# should). i.e. this doesn't assume that $orig is properly linked to +# the pool. This is computationally more costly than just making the +# link, but will avoid further corruption whereby you get archive +# files with multiple links to each other but no link to pool. + +# First, check if $orig links to the pool and if not create a link +# via MakeFileLink. Don't create a new pool entry if newFile is zero. +# If a link either already exists from the original to the pool or if +# one was successfully created, then simply link $copy to the same +# pool entry. Otherwise, just copy (don't link) $orig to $copy +# and leave it unlinked to the pool. + +# Note that checking whether $orig is linked to the pool is +# cheaper than running MakeFileLink since we only need the md5sum +# checksum. +# Note we assume that the attrib entry for $orig (if it exists) is +# correct, since we use that as a shortcut to determine the filesize +# (via zFile2MD5) +# Returns 1 if a link to an existing file is made, +# Returns 2 if a link to a new file is made (only if $newFile is set) +# Returns 0 if file was copied (either because MakeFileLink failed or +# because newFile=0 and no existing pool match +# Returns negative on error. +sub zCopyFileLink +{ + my ($bpc, $orig, $copy, $newFile, $compress) = @_; + my $ret=1; + $compress = $Conf{CompressLevel} unless defined $compress; + my $md5 = Digest::MD5->new; + my ($md5sum, $md5ret) = zFile2MD5($bpc, $md5, $orig, 0, $compress); + + #If $orig is already properly linked to the pool (or linkable to pool after running + #MakeFileLink on $orig) and HardLinkMax is not exceeded, then just link to $orig. + if($md5ret > 0) { #If valid md5sum and non-zero length file (so should be linked to pool)... + if((GetPoolLinkMD5($bpc, $orig, $md5sum, $compress, 0) == 1 || #If pool link already exists + ($ret = $bpc->MakeFileLink($orig, $md5sum, $newFile, $compress))> 0) #or creatable by MakeFileLink + && (stat($orig))[3] < $bpc->{Conf}{HardLinkMax}) { # AND (still) less than max links + return $ret if link($orig, $copy); # Then link from copy to orig + } + } + if(copy($orig, $copy) == 1) { #Otherwise first copy file then try to link the copy to pool + if($md5ret > 0 && ($ret = $bpc->MakeFileLink($copy, $md5sum, $newFile, $compress))> 0) { + return 2; #Link is to a new copy + } + printerr "Failed to link copied file to pool: $copy\n"; + return 0; #Copy but not linked + } + die "Failed to link or copy file: $copy\n"; + return -1; +} + +# Copy $source to $target, recursing down directory trees as +# needed. The 4th argument if non-zero, means (for files) use +# zCopyFileLink to make sure that everything is linked properly to the +# pool; otherwise, just do a simple link. The 5th argument $force, +# will erase a target directory if already present, otherwise an error +# is signalled. The final 6th argument is the CompressionLevel which +# can be left out and will the be calculated from +# bpc->Conf{CompressLevel} + +# Note the recursion is set up so that the return is negative if +# error, positive if consistent with a valid pool link (i.e. zero +# length file or directories are consistent too), and zero if +# successful but not consistent with a valid pool. The overall result +# is -1 if there is any error and otherwise the AND'ed result of the +# operations. That means if the overall result is positive then the +# whole tree is successfully linked to the pool, so the next time +# around we can use a simple linking (i.e $linkcheck=0). Note $source +# and $target must be full paths from root. Note: if files are not +# compressed properly then you won't be able to link them to pool. +sub link_recursively_topool +{ + my ($bpc, $source, $target, $linkcheck, $force, $compress) = @_; + my $ret=1; + die "Error: '$source' doesn't exist" unless -e $source; + if (-e $target) { + die "Error can't overwrite: $target (unless 'force' set)\n" unless $force; + die "Error can't remove: $target ($!)\n" unless rmtree($target, 0, 0); + } + if (-d $source) { + die "Error: mkdir failed to create new directory: $target ($!)\n" + unless jmkdir($target); + opendir( my $dh, $source) || die "Error: Could not open dir $source ($!)\n"; + foreach my $elem (readdir($dh)) { + next if /^\.\.?$/; # skip dot files (. and ..) + my $newsource = "$source/$elem"; + my $newtarget = "$target/$elem"; + my $newret = link_recursively_topool($bpc, $newsource, $newtarget, $linkcheck, $force, $compress); + if ($newret < 0) { #Error so break out of loop & return + closedir $dh; + return -1 + } + $ret = 0 if $newret == 0; #note $ret stays at 1 only if no elements return -1 or 0 + } + closedir $dh; + return $ret; + } + elsif ($dryrun) {return 1} # Return before making changes to filesystem + elsif ( ! -s $source) { # zero size + copy($source, $target); #No link since zero size + } + elsif ($linkcheck) { #Makes sure first that source properly linked to pool + return(zCopyFileLink($bpc, $source, $target, 1, $compress)); + } + else {#Otherwise, OK to perform simple link to source + return (link($source, $target) == 1 ? 1 : -1) + } +} + +sub get_bakinfo +{ + my ($bakinfofile, $entry) = @_; + our %backupInfo = (); + + $bakinfofile .= "/backupInfo"; + warn "Can't read $bakinfofile\n" unless -f $bakinfofile; + + unless (my $ret = do $bakinfofile) { # Load the backupInfo file + if ($@) { + warn "couldn't parse $bakinfofile: $@\n"; + } + elsif (!defined $ret) { + warn "couldn't do $bakinfofile: $!\n"; + } + elsif (! $ret) { + warn "couldn't run $bakinfofile\n"; + } + } + my $value = $backupInfo{$entry}; + warn "$bakinfofile is empty or missing entry for '$entry'\n" + unless defined $value; + return $value; +} + +# Note: getattr($attr) =$attr->{files} +# getattr($attr, $file) =$attr->{files}{$file} +# getattr($attr, $file, $attribute) =$attr->{files}{$file}{$attribute} + +sub getattr +{ + my($attr, $fileName, $Attribute) = @_; + return $attr->{files}{$fileName}{$Attribute} if ( defined($Attribute) ); + return $attr->{files}{$fileName} if ( defined($fileName) ); + return $attr->{files}; +} + + +#Reads in the attrib file for directory $_[1] and (optional alternative +#attrib file name $_[2]) and #stores it in the hashref $_[0] passed to +#the function +#Returns -1 and a blank $_[0] hash ref if attrib file doesn't exist +#already (not necessarily an error) +#Dies if attrib file exists but can't be read in. +sub read_attrib +{ +#Note: $_[0] = hash reference to attrib object +#SO DO NOT USE LOCAL VARIABLE FOR IT (i.e. don't do my $attr=$_[0] + $_[0] = BackupPC::Attrib->new({ compress => $Conf{CompressLevel} }); + +# unless (defined $_[1]) { #JJK: DEBUGGING +# print "read_attrib: \$_[1] undefined\n"; +# print Dumper @_; +# } + return -1 unless -f attrib($_[1], $_[2]); + #This is not necessarily an error because dir may be empty + + $_[0]->read($_[1],$_[2]) or + die "Error: Cannot read attrib file: " . attrib($_[1],$_[2]) . "\n"; + + return 1; +} + +#Same as Lib.pm fileNameUnmangle but doesn't require +#unneccessary '$bpc' +sub jfileNameUnmangle { + my($name) = @_; + + $name =~ s{/f}{/}g; + $name =~ s{^f}{}; + $name =~ s{%(..)}{chr(hex($1))}eg; + return $name; +} + +sub count_file_attribs +{ + my ($attrib) = @_; + return( scalar (keys (%{$attrib->get()}))); +} + +# Get attrib entry for $fullfilname. The corresponding hash is both returned and +# also fills the hash reference (optionally) passed via $fileattrib. +# If attrib file not present, return -1 (which may not be an error) +# Returns -2 if not a mangled file +# Dies if error +sub get_file_attrib +{ + my ($fullfilename, $fileattrib) = @_; + $fullfilename =~ m{(.+)/(.+)}; #1=dir; $2=file + return -2 unless defined $2; + + return -1 if read_attrib(my $attr, $1) < 0; + + %{$fileattrib} = %{$attr->{files}{jfileNameUnmangle($2)}}; + #Note unmangling removes initial 'f' AND undoes char mapping +} + +# Returns value of attrib $key for $fullfilename (full path) +# If not a mangled file or attrib file not present or there is not an +# entry for the specificed key for the given file, then return 'undef' +sub get_attrib_value +{ + my ($fullfilename, $key) = @_; + $fullfilename =~ m{(.+)/(.+)}; #1=dir; $2=file + + return undef unless defined $2; + return undef if read_attrib(my $attr, $1) < 0; + return $attr->{files}{jfileNameUnmangle($2)}{$key}; + #Note this returns undefined if key not present + #Note unmangling removes initial 'f' AND undoes char mapping +} + +# Returns value of attrib type key for $fullfilename (full path) +# If attrib file present but filename not an entry, return -1 [not an error if file nonexistent] +# If no attrib file (but directory exists), return -2 [not an error if directory empty] +# If directory non-existent, return -3 +# If attrib file present but not readble, return -4 [true error] +# Note there may an entry even if file non-existent (e.g. type 10 = delete) +sub get_attrib_type +{ + my ($fullfilename) = @_; + $fullfilename =~ m{(.+)/(.+)}; #1=dir; $2=file + +# unless (defined $1) { #JJK: DEBUGGING +# print "get_attrib_type: \$1 undefined\n"; +# print Dumper @_; +# } + + return -3 unless -d $1; + return -2 unless -f attrib($1); + return -4 unless read_attrib(my $attr, $1) >= 0; + my $type = $attr->{files}{jfileNameUnmangle($2)}{type}; + #Note unmangling removes initial 'f' AND undoes char mapping + return (defined($type) ? $type : -1); +} + +# 4th argument $poollink says whether to write to file (0) or link to +# pool (using MakeFileLink). +# 5th argument tells what to do if no files in $attrib +# (0=return error, 1=delete attrib file and return success) +# 6th argument is an optional alternative name for the attrib file itself +# Note does an unlink first since if there are hard links, we don't want +# to modify them +# Returns positive if successful, 0 if not +# Specifically, 1 if linked to existing, 2 if linked to new, +# 3 if written without linking, 4 if (empty) attrib file deleted + +sub write_attrib +{ + my ($bpc, $attrib, $dir, $poollink, $delempty, $attfilename) = @_; + die "Error: Cannot write to directory: $dir\n" unless -w $dir; + +# unless (defined $dir) { #JJK: DEBUGGING +# print "write_attrib: \$dir undefined"; +# print Dumper @_; +# } + + my $attfilepath = attrib($dir, $attfilename); + return 1 if $dryrun; #Return before writing changes + die "Error: could not unlink old attrib file: $attfilepath\n" + if (-e $attfilepath && ! unlink($attfilepath)); #Delete old attrib file if exists cuz may be hard-linked + return 4 if(count_file_attribs($attrib) == 0 && $delempty); #No attribs left so leave it unlinked + die "Error: could not write to attrib file: $attfilepath\n" + unless ($attrib->write($dir, $attfilename)) == 1; #First write a copy without linking + my $ret=3; + if ($poollink) { + my $data = $attrib->writeData; + my $md5 = Digest::MD5->new; + my $digest; + if(($digest = $bpc->Buffer2MD5($md5, length($data), \$data)) ne -1 + && ($ret = $bpc->MakeFileLink($attfilepath, $digest, 1, $Conf{CompressLevel})) <= 0) { + printerr "Can't link attrib file to pool: $attfilepath ($ret)\n"; + } + } + return $ret; +} + +# Write out $fileattrib for $file (without the mangle) to $dir/$attfilename (or +# to the default attribute file for $dir if $attfilename is undef) +# Reads in existing attrib file if pre-existing +# 4th argument $poollink says whether to write to file (0) or link to +# pool (using MakeFileLink). +# Returns positive if successful, 0 if not +# Specifically, 1 if linked to existing, 2 if linked to new, +# 3 if written without linking +sub write_file_attrib +{ + my ($bpc, $dir, $file, $fileattrib, $poollink, $attfilename) = @_; #Note $fileattrib is a hashref + my $ret=0; + + read_attrib(my $attr, $dir, $attfilename); #Read in existing attrib file if it exists + $ret = write_attrib($bpc, $attr, $dir, $poollink, 0, $attfilename) + if $attr->set($file, $fileattrib) > 0; + +# unless (defined $dir) { #JJK: DEBUGGING +# print "write_file_attrib: \$dir undefined\n"; +# print Dumper @_; +# } + + die "Error writing to '$file' entry to attrib file: " . attrib($dir, $attfilename) . "\n" unless $ret > 0; + return $ret; +} + +sub attrib +{ + return (defined($_[1]) ? "$_[0]/$_[1]" : "$_[0]/attrib"); +} + +# Modified version of MakeFileLink including: +# 1. Efficiency/clarity improvements +# 2. Calls GetPoolLink to find candidate link targets. +# 2. For non-compressed files, uses my improved jcompare comparison algorithm +# 3. For compressed files, uses zcompare2 which compares only the compressed +# data sans first-byte header & potential rsync digest trailer. This allows +# for matches even if one file has rsync digest and other does not +# 4. Moves file before unlinking in case subsequent link fails and needs to be +# undone +# 5. Added 6th input parameter to return pointer to the pool link name +# 6. Extended meaning of newFile flag +# 0 = Don't creat new pool file (as before) +# 1 = Create new pool file IF no other links to source file +# (this was the previous behavior for whenever newFile was set) +# >2 = Create new pool file EVEN if source file has more than one link +# (this will by extension link other things linked to the source +# also to the pool -- which means that the pool might not clean +# if it links to things outside of the pc directory -- so +# use carefully +# 7. Includes 'j' versions of file routines to allow dryrun +# 8. Added check to see if already in pool and if so returns 3 + +# For each file, check if the file exists in $bpc->{TopDir}/pool. +# If so, remove the file and make a hardlink to the file in +# the pool. Otherwise, if the newFile flag is set, make a +# hardlink in the pool to the new file. +# +# Returns 0 if a link should be made to a new file (ie: when the file +# is a new file but the newFile flag is 0). +# JJK: actually also if $name has nlinks >1 regardless of newFile flag +# Returns 1 if a link to an existing file is made, +# Returns 2 if a link to a new file is made (only if $newFile is set) +# Return 3 if first finds that already linked to ipool +# Returns negative on error. + +sub jMakeFileLink +{ + my($bpc, $name, $d, $newFile, $compress, $linkptr) = @_; + + my $poollink; + my $result=GetPoolLinkMD5($bpc, $name, $d, $compress, 1, \$poollink); + $$linkptr = $poollink if defined($linkptr) && $result > 0; + + if($result == 1){ #Already linked to the pool + return 3; + }elsif($result == 2) { #Matches existing, linkable pool file + my $tempname = mktemp("$name.XXXXXXXXXXXXXXXX"); + return -5 unless jrename($name, $tempname); #Temorarily save + if(!jlink($poollink, $name)) { #Link pool to source + jrename($tempname, $name); #Restore if can't link + return -3; + } + junlink($tempname); #Safe to remove the original + return 1; + }elsif($result == 3) { + if(defined($newFile) && #No link or match + ($newFile > 1 || ($newFile == 1 && (stat($name))[3] == 1 ))) { + $poollink =~ m|(.*)/|; + jmkpath($1, 0, 0777) unless -d $1 ; + return -4 unless jlink($name, $poollink); + return 2; + } else { #New link should have been made but told not to + return 0; + } + }else { + return -6; #Error from GetPoolLink call + } +} + +# GetPoolLink +# GetPoolLinkMD5 +#Find the pool/cpool file corresponding to file $name. +#1. First iterates entire chain to see if *same inode* is present. I.e. if +# already linked to the pool. If so, it returns the first instance. +# Return = 1 and $Poolpathptr = name of the hard-linked match +#2. If $compareflg is set, then iterate through again this time looking for +# file *content* matches (this is much slower). +# If so, it returns the first instance with Nlinks < HardLinkMax +# Return = 2 and $Poolpathptr = name of the content match +#3. Finally, if not linked (and also not matched if $compareflg set) +# Return=3 and $$poolpathptr = first empty chain +#Note: Return zero if zero size file +# Return negative if error. +#Note: if chain has multiple copies of the file, then it returns the first linked +#match if present and if none and $compareflag set then the first content match +sub GetPoolLink +{ + my($bpc, $md5, $name, $compress, $compareflg, $poolpathptr) = @_; + + $compress = $bpc->{Conf}{CompressLevel} unless defined $compress; + + my ($md5sum , $ret) = defined($compress) && $compress > 0 ? + zFile2MD5($bpc, $md5, $name, 0, $compress) : + $bpc->File2MD5($md5, $name); + + return 0 if $ret == 0; #Zero-sized file + return -3 unless $ret >0; + + GetPoolLinkMD5($bpc, $name, $md5sum, $compress, $compareflg, $poolpathptr); +} + +sub GetPoolLinkMD5 +{ + my($bpc, $name, $md5sum, $compress, $compareflg, $poolpathptr) = @_; + my($poolbase, $i); + + return -1 unless -f $name; + my $inode = (stat(_))[1]; #Uses stat from -f + return 0 if (stat(_))[7] == 0; #Zero-size (though shouldn't really happen since + #md5sum input not defined for zero sized files + + $compress = $bpc->{Conf}{CompressLevel} unless defined $compress; + + return -2 unless + defined($poolbase = $bpc->MD52Path($md5sum, $compress)); + + #1st iteration looking for matching inode + $$poolpathptr = $poolbase; + for($i=0; -f $$poolpathptr; $i++) { #Look for linked copy (inode match) + return 1 if ((stat(_))[1] == $inode); + $$poolpathptr = $poolbase . '_' . $i; #Iterate + } + + return 3 unless $compareflg; #No inode match + + #Optional 2nd iteration looking for matching content + my $compare = defined($compress) && $compress > 0 ? \&zcompare2 : \&jcompare; + $$poolpathptr = $poolbase; + for(my $j=0; $j<$i; $j++ ) { #Look for content match + return 2 if (stat($$poolpathptr))[3] < $bpc->{Conf}{HardLinkMax} && + !$compare->($name, $$poolpathptr); + $$poolpathptr = $poolbase . '_' . $j; #Iterate + } + # No matching existing pool entry - $$poolpathptr is first empty chain element + return 3; +} + +#Convert pool name to constant length string consisting +#of 32 hex digits for the base plus 6 (left) zero padded digits for +#the chain suffix (suffixes are incremented by 1 so that no suffix +#records as 0). Note this accomodates chains up to 10^6 long. +#Use a number bigger than 6 if you have longer chains +#Useful if you want to order (e.g., sort) pool file names numerically +sub poolname2number +{ + $_[0] =~ m|(.*/)?([^_]*)(_(.*))?|; + my $suffix = defined($4) ? $4+1 : 0; + return sprintf("%s%06s", $2, $suffix) +} + +# Renumber pool chain holes starting at $firsthole and continue up the chain +# to fill up # to $maxgap holes (which need not be contiguous). +# If $maxgap is not specified it defaults to 1 (sufficient to cover one +# deletion - i.e. hole -- which may be $file itself) +# If $firsthole exists, it is an error. Use delete_pool_file instead, +# if you want to delete first before renumbering. +# Return 1 on success; Negative on failure +sub renumber_pool_chain +{ + my ($firsthole, $maxholes) = @_; + + $maxholes = 1 unless defined $maxholes; + + my ($base, $sufx); + if($firsthole =~ m|(.*_)([0-9]+)|) { + $base = $1; + $sufx = $2; + }else{ + $base = $firsthole . "_"; + $sufx = -1; #Corresponds to no suffix + } + + my $nextsufx = $sufx+1; + my $nextfile = $base . $nextsufx; + while($nextsufx - $sufx <= $maxholes) { + if(-e $nextfile) { #Non-hole so move/renumber + if(-e $firsthole || ! jrename($nextfile, $firsthole)) { + warn "Error: Couldn't rename pool file: $nextfile --> $firsthole\n"; + return -2; + } +# print "Renumbering: $nextfile --> $firsthole\n"; + $firsthole = $base . (++$sufx); #Move hole up the chain + } + $nextfile = $base . (++$nextsufx); + } + return 1; +} + +# Delete pool file (if it exists) and regardless renumber +# chain to fill hole left by file. Fill up to $maxholes +# above $file (including the hole left by $file) where +# $maxholes defaults to 1. +# If $file doesn't exist, then it just fills the hole +# left by $file. +# Return 1 on success; Negative on failure +sub delete_pool_file +{ + my ($file, $maxholes) = @_; + + if(-e $file && !junlink($file)) { #Try to delete if exists + warn "Error: Couldn't unlink pool file: $file\n"; + return -3; + } + return(renumber_pool_chain($file,$maxholes)); #Returns -1/-2 on fail +} + +# Run BackupPC_nightly +sub run_nightly +{ + my ($bpc) = @_; + my $err = $bpc->ServerConnect($Conf{ServerHost}, $Conf{ServerPort}); + if ($err) { + printerr "BackupPC_nightly: can't connect to server ($err)...\n"; + return($err); + } + if ((my $reply = $bpc->ServerMesg("BackupPC_nightly run")) eq "ok\n" ) { + $bpc->ServerMesg("log $0: called for BackupPC_nightly run..."); + print "BackupPC_nightly scheduled to run...\n"; + return 0; + } + else { + printerr "BackupPC_nightly ($reply)...\n"; + return $reply; + } +} + + +# Rewrite of compare function (in File::Compare) since it messes up on +# weird filenames with spaces and things (since it combines the "<" +# and the filename in a single string). Also streamline the code by +# getting rid of of extra fluff and removing code for comparing text +# files while we are at. The basic algorithm remains the classic one +# for comparing 2 raw files. +# Returns 0 if equal, 1 if not equal. And negative if error. +sub jcompare { + my ($file1,$file2,$size) = @_; + my ($fh1open, $fh2open, $fh1size); + my $ret=0; + + local (*FH1, *FH2); + unless (($fh1open = open(FH1, "<", $file1)) && + ($fh2open = open(FH2, "<", $file2))) { + $ret = -1; + goto compare_return; + } + binmode FH1; + binmode FH2; + if (($fh1size = -s FH1) != (-s FH2)) { + $ret=1; + goto compare_return; + } + + unless (defined($size) && $size > 0) { + $size = $fh1size; + $size = LINUX_BLOCK_SIZE if $size < LINUX_BLOCK_SIZE; + $size = TOO_BIG if $size > TOO_BIG; + } + + my $data1 = my $data2 = ''; + my ($r1,$r2); + while(defined($r1 = read(FH1,$data1,$size)) && $r1 > 0) { + unless (defined($r2 = read(FH2,$data2,$r1)) && $r2 == $r1 + && $data2 eq $data1) { + $ret=1; + goto compare_return; + } + } + $ret=1 if defined($r2 = read(FH2,$data2,LINUX_BLOCK_SIZE)) && $r2 > 0; + $ret =-2 if $r1 < 0 || $r2 < 0; # Error on read + + compare_return: + close(FH1) if $fh1open; + close(FH2) if $fh2open; + return $ret; +} + +# Version of compare for BackupPC compressed files. Compares inflated +# (uncompressed) versions of files. The BackupPC::FileZIO subroutine +# is used to read in the files instead of just raw open & read. Note +# this version of compare is relatively slow since you must use zlib +# to decompress the streams that are being compared. Also, there is no +# shortcut to just first compare the filesize since the size is not +# known until you finish reading the file. +sub zcompare { + my ($file1, $file2, $compress)=@_; + my ($fh1, $fh2); + my $ret=0; + + $compress =1 unless defined $compress; + unless ((defined ($fh1 = BackupPC::FileZIO->open($file1, 0, $compress))) && + (defined ($fh2 = BackupPC::FileZIO->open($file2, 0, $compress)))) { + $ret = -1; + goto zcompare_return; + } + my $data1 = my $data2 = ''; + my ($r1, $r2); + while ( ($r1 = $fh1->read(\$data1, 65536)) > 0 ) { + unless ((($r2 = $fh2->read(\$data2, $r1)) == $r1) + && $data1 eq $data2) { + $ret=1; + goto zcompare_return; + } + } + $ret =1 if ($r2 = $fh2->read(\$data2, LINUX_BLOCK_SIZE)) > 0; #see if anything left... + $ret =-1 if $r1 < 0 || $r2 < 0; # Error on read + + zcompare_return: + $fh1->close() if defined $fh1; + $fh2->close() if defined $fh2; + return $ret; +} + +# Second alternative version that combines the raw speed of jcompare +# with the ability to compare compressed files in zcompare. This +# routine effectively should compare two compressed files just as fast +# as the raw jcompare. The compare algorithm strips off the first byte +# header and the appended rsync checksum digest info and then does a +# raw compare on the intervening raw compressed data. Since no zlib +# inflation (decompression) is done it should be faster than the +# zcompare algorithm which requires inflation. Also since zlib +# compression is well-defined and lossless for any given compression +# level and block size, the inflated versions are identical if and +# only if the deflated (compressed) versions are identical, once the +# header (first byte) and trailers (rsync digest) are stripped +# off. Note that only the data between the header and trailers have +# this uniqe mapping. Indeed, since BackupPC only adds the checksum +# the second time a file is needed, it is possible that the compressed +# value will change with time (even though the underlying data is +# unchanged) due to differences in the envelope. Note, to some extent +# this approach assumes that the appended digest info is correct (at +# least to the extent of properly indicating the block count and hence +# compressed data size that will be compared) +sub zcompare2 { + my ($file1,$file2,$size) = @_; + my ($fh1, $fh2, $fh1open, $fh2open); + my $Too_Big = 1024 * 1024 * 2; + my $ret=0; + + unless (($fh1open = open($fh1, "<", $file1)) && + ($fh2open = open($fh2, "<", $file2))) { + $ret = -1; + goto zcompare2_return; + } + binmode $fh1; + binmode $fh2; + + my $fh1size = -s $fh1; + my $fh2size = -s $fh2; + + my $data1 = my $data2 = ''; + unless (read($fh1, $data1, 1) == 1 && # Read first byte + read($fh2, $data2, 1) == 1) { + $ret = -1; + goto zcompare2_return; + } + if (vec($data1, 0, 8) == 0xd6 || vec($data1, 0, 8) == 0xd7) { + return -2 unless ( defined(seek($fh1, -8, 2)) ); + return -3 unless ( read($fh1, $data1, 4) == 4 ); + $fh1size -= 20 * unpack("V", $data1) + 49; + # [0xb3 separator byte] + 20 * NUM_BLOCKS + DIGEST_FILE(32) + DIGEST_INFO (16) + # where each of NUM_BLOCKS is a 20 byte Block digest consisting of 4 bytes of + # Adler32 and the full 16 byte (128bit) MD4 file digest (checksum). + # DIGEST_FILE is 2 copies of the full 16 byte (128bit) MD4 digest + # (one for protocol <=26 and the second for protocol >=27) + # DIGEST_INFO is metadata consisting of a pack("VVVV") encoding of the + # block size (should be 2048), the checksum seed, length of the + # block digest (NUM_BLOCKS), and the magic number (0x5fe3c289). + # Each is a 4 byte integer. + } + + if (vec($data2, 0, 8) == 0xd6 || vec($data2, 0, 8) == 0xd7) { + return -2 unless ( defined(seek($fh2, -8, 2)) ); + return -3 unless ( read($fh2, $data2, 4) == 4 ); + $fh2size -= 20 * unpack("V", $data2) + 49; + } + + if ($fh1size != $fh2size) { + $ret=1; + goto zcompare2_return; + } + + seek($fh1,1,0) ; #skip first byte + seek($fh2,1,0) ; #skip first byte + + + my $bytesleft=--$fh1size; #Reduce by one since skipping first byte + $size = $fh1size unless defined($size) && $size > 0 && $size < $fh1size; + $size = $Too_Big if $size > $Too_Big; + + my ($r1,$r2); + while(defined($r1 = read($fh1,$data1,$size)) + && $r1 > 0) { + unless (defined($r2 = read($fh2,$data2,$r1)) + && $data2 eq $data1) { #No need to test if $r1==$r2 since should be same size + $ret=1; #Plus if it doesn't, it will be picked up in the $data2 eq $data1 line + goto zcompare2_return; + } + $bytesleft -=$r1; + $size = $bytesleft if $bytesleft < $size; + } + $ret=1 if defined($r2 = read($fh2,$data2,$size)) && $r2 > 0; + #Note: The above line should rarely be executed since both files same size + $ret =-2 if $r1 < 0 || $r2 < 0; # Error on read + + zcompare2_return: + close($fh1) if $fh1open; + close($fh2) if $fh2open; + return $ret; +} + +#Routine to remove files (unlink) or directories (rmtree) +# $fullfilename is full path name to file or directory +# Returns number of files deleted in the listref $dirfileref if defined; +sub delete_files +{ + my ($fullfilename, $dirfileref) = @_; + my $listdeleted = defined($dirfileref); + my $ret = -1; + my $output; + die "Error: '$fullfilename' doesn't exist or not writeable\n" + unless -w $fullfilename; + if(-f $fullfilename) { + $ret = junlink($fullfilename); + } + elsif(-d $fullfilename) { + $$dirfileref = "" if $listdeleted; + open(local *STDOUT, '>', $dirfileref) #redirect standard out to capture output of rmtree + if $listdeleted; + $ret = jrmtree($fullfilename, $listdeleted, 1); + } + return $ret; +} + +#Touch command (modified version of Unix notion) +#First argument is the (full)filepath Second argument if defined means +#file is only created if not existent but access and modification +#times not changed if already existent. +#Returns 1 on success, -1 on error; +sub touch +{ + unless (-e $_[0]) { #Create if non-existent + return -1 unless defined(sysopen(my $fh, $_[0], O_CREAT)); + close $fh; + } + my $time = time(); + utime($time, $time, $_[0]) unless defined $_[1]; + return 1; +} + +#Simple wrappers to protect when just doing dry runs +sub jcopy +{ + return 1 if $dryrun; + copy @_; +} + +sub jlink +{ + return 1 if $dryrun; + link $_[0], $_[1]; +} + +sub junlink +{ + return 1 if $dryrun; + unlink @_; +} + +sub jmkdir +{ + return 1 if $dryrun; + mkdir @_; +} + +sub jmkpath +{ + return 1 if $dryrun; + mkpath @_; +} + +sub jmake_path +{ + return 1 if $dryrun; + mkpath @_; +} + +sub jrename +{ + return 1 if $dryrun; + rename $_[0], $_[1]; +} + +sub jrmtree +{ + return 1 if $dryrun; + rmtree @_; +} + +1; -- 1.7.10.4