@ -20,23 +20,11 @@ var (
hashAlgorithm = "sha256"
hashAlgorithm = "sha256"
)
)
// FileChecksums represents checksums into partially received file.
// BlockChecksum represents the checksum for a single block.
type FileChecksums struct {
type BlockChecksum struct {
// Offset is the offset into the file.
Checksum Checksum ` json:"checksum" `
Offset int64 ` json:"offset" `
Algorithm string ` json:"algo" ` // always "sha256" for now
// Length is the length of content being hashed in the file.
Size int64 ` json:"size" ` // always (64<<10) for now
Length int64 ` json:"length" `
// Checksums is a list of checksums of BlockSize-sized blocks
// starting from Offset. The number of checksums is the Length
// divided by BlockSize rounded up to the nearest integer.
// All blocks except for the last one are guaranteed to be checksums
// over BlockSize-sized blocks.
Checksums [ ] Checksum ` json:"checksums" `
// Algorithm is the hashing algorithm used to compute checksums.
Algorithm string ` json:"algorithm" ` // always "sha256" for now
// BlockSize is the size of each block.
// The last block may be smaller than this, but never zero.
BlockSize int64 ` json:"blockSize" ` // always (64<<10) for now
}
}
// Checksum is an opaque checksum that is comparable.
// Checksum is an opaque checksum that is comparable.
@ -92,113 +80,89 @@ func (m *Manager) PartialFiles(id ClientID) (ret []string, err error) {
return ret , nil
return ret , nil
}
}
// HashPartialFile hashes the contents of a partial file sent by id,
// HashPartialFile returns a function that hashes the next block in the file,
// starting at the specified offset and for the specified length.
// starting from the beginning of the file.
// If length is negative, it hashes the entire file.
// It returns (BlockChecksum{}, io.EOF) when the stream is complete.
// If the length exceeds the remaining file length, then it hashes until EOF.
// It is the caller's responsibility to call close.
// If [FileHashes.Length] is less than length and no error occurred,
func ( m * Manager ) HashPartialFile ( id ClientID , baseName string ) ( next func ( ) ( BlockChecksum , error ) , close func ( ) error , err error ) {
// then it implies that all remaining content in the file has been hashed.
func ( m * Manager ) HashPartialFile ( id ClientID , baseName string , offset , length int64 ) ( FileChecksums , error ) {
if m == nil || m . opts . Dir == "" {
if m == nil || m . opts . Dir == "" {
return FileChecksums { } , ErrNoTaildrop
return nil , nil , ErrNoTaildrop
}
}
noopNext := func ( ) ( BlockChecksum , error ) { return BlockChecksum { } , io . EOF }
noopClose := func ( ) error { return nil }
if m . opts . DirectFileMode && m . opts . AvoidFinalRename {
if m . opts . DirectFileMode && m . opts . AvoidFinalRename {
return FileChecksums { } , nil // resuming is not supported for users that peek at our file structure
return noopNext, noopClose , nil // resuming is not supported for users that peek at our file structure
}
}
dstFile , err := joinDir ( m . opts . Dir , baseName )
dstFile , err := joinDir ( m . opts . Dir , baseName )
if err != nil {
if err != nil {
return FileChecksums { } , err
return nil , nil , err
}
}
f , err := os . Open ( dstFile + id . partialSuffix ( ) )
f , err := os . Open ( dstFile + id . partialSuffix ( ) )
if err != nil {
if err != nil {
if os . IsNotExist ( err ) {
if os . IsNotExist ( err ) {
return FileChecksums{ } , nil
return noopNext, noopClose , nil
}
}
return FileChecksums { } , redactError ( err )
return nil , nil , redactError ( err )
}
}
defer f . Close ( )
if _ , err := f . Seek ( offset , io . SeekStart ) ; err != nil {
return FileChecksums { } , redactError ( err )
}
checksums := FileChecksums {
Offset : offset ,
Algorithm : hashAlgorithm ,
BlockSize : blockSize ,
}
b := make ( [ ] byte , blockSize ) // TODO: Pool this?
b := make ( [ ] byte , blockSize ) // TODO: Pool this?
r := io . Reader ( f )
next = func ( ) ( BlockChecksum , error ) {
if length >= 0 {
switch n , err := io . ReadFull ( f , b ) ; {
r = io . LimitReader ( f , length )
}
for {
switch n , err := io . ReadFull ( r , b ) ; {
case err != nil && err != io . EOF && err != io . ErrUnexpectedEOF :
case err != nil && err != io . EOF && err != io . ErrUnexpectedEOF :
return checksums , redactError ( err )
return BlockChecksum { } , redactError ( err )
case n == 0 :
case n == 0 :
return checksums, nil
return BlockChecksum { } , io . EOF
default :
default :
checksums . Checksums = append ( checksums . Checksums , hash ( b [ : n ] ) )
return BlockChecksum { hash ( b [ : n ] ) , hashAlgorithm , int64 ( n ) } , nil
checksums . Length += int64 ( n )
}
}
}
}
close = f . Close
return next , close , nil
}
}
// ResumeReader reads and discards the leading content of r
// ResumeReader reads and discards the leading content of r
// that matches the content based on the checksums that exist.
// that matches the content based on the checksums that exist.
// It returns the number of bytes consumed,
// It returns the number of bytes consumed,
// and returns an [io.Reader] representing the remaining content.
// and returns an [io.Reader] representing the remaining content.
func ResumeReader ( r io . Reader , hash File func ( offset , length int64 ) ( FileChecksums , error ) ) ( int64 , io . Reader , error ) {
func ResumeReader ( r io . Reader , hash Next func ( ) ( BlockChecksum , error ) ) ( int64 , io . Reader , error ) {
if hash File == nil {
if hash Next == nil {
return 0 , r , nil
return 0 , r , nil
}
}
// Ask for checksums of a particular content length,
// where the amount of memory needed to represent the checksums themselves
// is exactly equal to the blockSize.
numBlocks := blockSize / sha256 . Size
hashLength := blockSize * numBlocks
var offset int64
var offset int64
b := make ( [ ] byte , 0 , blockSize )
b := make ( [ ] byte , 0 , blockSize )
for {
for {
// Request a list of checksums for the partial file starting at offset.
// Obtain the next block checksum from the remote peer.
checksums , err := hashFile ( offset , hashLength )
cs , err := hashNext ( )
if len ( checksums . Checksums ) == 0 || err != nil {
switch {
case err == io . EOF :
return offset , io . MultiReader ( bytes . NewReader ( b ) , r ) , nil
case err != nil :
return offset , io . MultiReader ( bytes . NewReader ( b ) , r ) , err
return offset , io . MultiReader ( bytes . NewReader ( b ) , r ) , err
} else if checksums . BlockSize != blockSize || checksums . Algorithm != hashAlgorithm {
case cs . Algorithm != hashAlgorithm || cs . Size < 0 || cs . Size > blockSize :
return offset , io . MultiReader ( bytes . NewReader ( b ) , r ) , fmt . Errorf ( "invalid block size or hashing algorithm" )
return offset , io . MultiReader ( bytes . NewReader ( b ) , r ) , fmt . Errorf ( "invalid block size or hashing algorithm" )
}
}
// Read from r, comparing each block with the provided checksums.
// Read the contents of the next block.
for _ , want := range checksums . Checksums {
n , err := io . ReadFull ( r , b [ : blockSize ] )
// Read a block from r.
b = b [ : n ]
n , err := io . ReadFull ( r , b [ : blockSize ] )
if err == io . EOF || err == io . ErrUnexpectedEOF {
b = b [ : n ]
err = nil
if err == io . EOF || err == io . ErrUnexpectedEOF {
}
err = nil
if len ( b ) == 0 || err != nil {
}
// This should not occur in practice.
if len ( b ) == 0 || err != nil {
// It implies that an error occurred reading r,
// This should not occur in practice.
// or that the partial file on the remote side is fully complete.
// It implies that an error occurred reading r,
return offset , io . MultiReader ( bytes . NewReader ( b ) , r ) , err
// or that the partial file on the remote side is fully complete.
return offset , io . MultiReader ( bytes . NewReader ( b ) , r ) , err
}
// Compare the local and remote block checksums.
// If it mismatches, then resume from this point.
got := hash ( b )
if got != want {
return offset , io . MultiReader ( bytes . NewReader ( b ) , r ) , nil
}
offset += int64 ( len ( b ) )
b = b [ : 0 ]
}
}
// We hashed the remainder of the partial file, so stop.
// Compare the local and remote block checksums.
if checksums . Length < hashLength {
// If it mismatches, then resume from this point.
if cs . Checksum != hash ( b ) {
return offset , io . MultiReader ( bytes . NewReader ( b ) , r ) , nil
return offset , io . MultiReader ( bytes . NewReader ( b ) , r ) , nil
}
}
offset += int64 ( len ( b ) )
b = b [ : 0 ]
}
}
}
}