535 lines
14 KiB
Go
535 lines
14 KiB
Go
// Copyright 2013 Richard Lehane. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package mscfb
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"io"
|
|
"os"
|
|
"time"
|
|
"unicode"
|
|
"unicode/utf16"
|
|
|
|
"github.com/richardlehane/msoleps/types"
|
|
)
|
|
|
|
//objectType types
|
|
const (
|
|
unknown uint8 = 0x0 // this means unallocated - typically zeroed dir entries
|
|
storage uint8 = 0x1 // this means dir
|
|
stream uint8 = 0x2 // this means file
|
|
rootStorage uint8 = 0x5 // this means root
|
|
)
|
|
|
|
// color flags
|
|
const (
|
|
red uint8 = 0x0
|
|
black uint8 = 0x1
|
|
)
|
|
|
|
const lenDirEntry int = 64 + 4*4 + 16 + 4 + 8*2 + 4 + 8
|
|
|
|
type directoryEntryFields struct {
|
|
rawName [32]uint16 //64 bytes, unicode string encoded in UTF-16. If root, "Root Entry\0" w
|
|
nameLength uint16 //2 bytes
|
|
objectType uint8 //1 byte Must be one of the types specified above
|
|
color uint8 //1 byte Must be 0x00 RED or 0x01 BLACK
|
|
leftSibID uint32 //4 bytes, Dir? Stream ID of left sibling, if none set to NOSTREAM
|
|
rightSibID uint32 //4 bytes, Dir? Stream ID of right sibling, if none set to NOSTREAM
|
|
childID uint32 //4 bytes, Dir? Stream ID of child object, if none set to NOSTREAM
|
|
clsid types.Guid // Contains an object class GUID (must be set to zeroes for stream object)
|
|
stateBits [4]byte // user-defined flags for storage object
|
|
create types.FileTime // Windows FILETIME structure
|
|
modify types.FileTime // Windows FILETIME structure
|
|
startingSectorLoc uint32 // if a stream object, first sector location. If root, first sector of ministream
|
|
streamSize [8]byte // if a stream, size of user-defined data. If root, size of ministream
|
|
}
|
|
|
|
func makeDirEntry(b []byte) *directoryEntryFields {
|
|
d := &directoryEntryFields{}
|
|
for i := range d.rawName {
|
|
d.rawName[i] = binary.LittleEndian.Uint16(b[i*2 : i*2+2])
|
|
}
|
|
d.nameLength = binary.LittleEndian.Uint16(b[64:66])
|
|
d.objectType = uint8(b[66])
|
|
d.color = uint8(b[67])
|
|
d.leftSibID = binary.LittleEndian.Uint32(b[68:72])
|
|
d.rightSibID = binary.LittleEndian.Uint32(b[72:76])
|
|
d.childID = binary.LittleEndian.Uint32(b[76:80])
|
|
d.clsid = types.MustGuid(b[80:96])
|
|
copy(d.stateBits[:], b[96:100])
|
|
d.create = types.MustFileTime(b[100:108])
|
|
d.modify = types.MustFileTime(b[108:116])
|
|
d.startingSectorLoc = binary.LittleEndian.Uint32(b[116:120])
|
|
copy(d.streamSize[:], b[120:128])
|
|
return d
|
|
}
|
|
|
|
func (r *Reader) setDirEntries() error {
|
|
c := 20
|
|
if r.header.numDirectorySectors > 0 {
|
|
c = int(r.header.numDirectorySectors)
|
|
}
|
|
de := make([]*File, 0, c)
|
|
cycles := make(map[uint32]bool)
|
|
num := int(r.sectorSize / 128)
|
|
sn := r.header.directorySectorLoc
|
|
for sn != endOfChain {
|
|
buf, err := r.readAt(fileOffset(r.sectorSize, sn), int(r.sectorSize))
|
|
if err != nil {
|
|
return Error{ErrRead, "directory entries read error (" + err.Error() + ")", fileOffset(r.sectorSize, sn)}
|
|
}
|
|
for i := 0; i < num; i++ {
|
|
f := &File{r: r}
|
|
f.directoryEntryFields = makeDirEntry(buf[i*128:])
|
|
fixFile(r.header.majorVersion, f)
|
|
f.curSector = f.startingSectorLoc
|
|
de = append(de, f)
|
|
}
|
|
nsn, err := r.findNext(sn, false)
|
|
if err != nil {
|
|
return Error{ErrRead, "directory entries error finding sector (" + err.Error() + ")", int64(nsn)}
|
|
}
|
|
if nsn <= sn {
|
|
if nsn == sn || cycles[nsn] {
|
|
return Error{ErrRead, "directory entries sector cycle", int64(nsn)}
|
|
}
|
|
cycles[nsn] = true
|
|
}
|
|
sn = nsn
|
|
}
|
|
r.direntries = de
|
|
return nil
|
|
}
|
|
|
|
func fixFile(v uint16, f *File) {
|
|
fixName(f)
|
|
if f.objectType != stream {
|
|
return
|
|
}
|
|
// if the MSCFB major version is 4, then this can be a uint64 otherwise is a uint32 and the least signficant bits can contain junk
|
|
if v > 3 {
|
|
f.Size = int64(binary.LittleEndian.Uint64(f.streamSize[:]))
|
|
} else {
|
|
f.Size = int64(binary.LittleEndian.Uint32(f.streamSize[:4]))
|
|
}
|
|
}
|
|
|
|
func fixName(f *File) {
|
|
// From the spec:
|
|
// "The length [name] MUST be a multiple of 2, and include the terminating null character in the count.
|
|
// This length MUST NOT exceed 64, the maximum size of the Directory Entry Name field."
|
|
if f.nameLength < 4 || f.nameLength > 64 {
|
|
return
|
|
}
|
|
nlen := int(f.nameLength/2 - 1)
|
|
f.Initial = f.rawName[0]
|
|
var slen int
|
|
if !unicode.IsPrint(rune(f.Initial)) {
|
|
slen = 1
|
|
}
|
|
f.Name = string(utf16.Decode(f.rawName[slen:nlen]))
|
|
}
|
|
|
|
func (r *Reader) traverse() error {
|
|
r.File = make([]*File, 0, len(r.direntries))
|
|
var (
|
|
recurse func(int, []string)
|
|
err error
|
|
counter int
|
|
)
|
|
recurse = func(i int, path []string) {
|
|
// prevent cycles, number of recurse calls can't exceed number of directory entries
|
|
counter++
|
|
if counter > len(r.direntries) {
|
|
err = Error{ErrTraverse, "traversal counter overflow", int64(i)}
|
|
return
|
|
}
|
|
if i < 0 || i >= len(r.direntries) {
|
|
err = Error{ErrTraverse, "illegal traversal index", int64(i)}
|
|
return
|
|
}
|
|
file := r.direntries[i]
|
|
if file.leftSibID != noStream {
|
|
recurse(int(file.leftSibID), path)
|
|
}
|
|
r.File = append(r.File, file)
|
|
file.Path = path
|
|
if file.childID != noStream {
|
|
if i > 0 {
|
|
recurse(int(file.childID), append(path, file.Name))
|
|
} else {
|
|
recurse(int(file.childID), path)
|
|
}
|
|
}
|
|
if file.rightSibID != noStream {
|
|
recurse(int(file.rightSibID), path)
|
|
}
|
|
return
|
|
}
|
|
recurse(0, []string{})
|
|
return err
|
|
}
|
|
|
|
// File represents a MSCFB directory entry
|
|
type File struct {
|
|
Name string // stream or directory name
|
|
Initial uint16 // the first character in the name (identifies special streams such as MSOLEPS property sets)
|
|
Path []string // file path
|
|
Size int64 // size of stream
|
|
i int64 // bytes read
|
|
curSector uint32 // next sector for Read | Write
|
|
rem int64 // offset in current sector remaining previous Read | Write
|
|
*directoryEntryFields
|
|
r *Reader
|
|
}
|
|
|
|
type fileInfo struct{ *File }
|
|
|
|
func (fi fileInfo) Name() string { return fi.File.Name }
|
|
func (fi fileInfo) Size() int64 {
|
|
if fi.objectType != stream {
|
|
return 0
|
|
}
|
|
return fi.File.Size
|
|
}
|
|
func (fi fileInfo) IsDir() bool { return fi.mode().IsDir() }
|
|
func (fi fileInfo) ModTime() time.Time { return fi.Modified() }
|
|
func (fi fileInfo) Mode() os.FileMode { return fi.File.mode() }
|
|
func (fi fileInfo) Sys() interface{} { return nil }
|
|
|
|
func (f *File) mode() os.FileMode {
|
|
if f.objectType != stream {
|
|
return os.ModeDir | 0777
|
|
}
|
|
return 0666
|
|
}
|
|
|
|
// FileInfo for this directory entry. Useful for IsDir() (whether a directory entry is a stream (file) or a storage object (dir))
|
|
func (f *File) FileInfo() os.FileInfo {
|
|
return fileInfo{f}
|
|
}
|
|
|
|
// ID returns this directory entry's CLSID field
|
|
func (f *File) ID() string {
|
|
return f.clsid.String()
|
|
}
|
|
|
|
// Created returns this directory entry's created field
|
|
func (f *File) Created() time.Time {
|
|
return f.create.Time()
|
|
}
|
|
|
|
// Created returns this directory entry's modified field
|
|
func (f *File) Modified() time.Time {
|
|
return f.modify.Time()
|
|
}
|
|
|
|
// Read this directory entry
|
|
// Returns 0, io.EOF if no stream is available (i.e. for a storage object)
|
|
func (f *File) Read(b []byte) (int, error) {
|
|
if f.Size < 1 || f.i >= f.Size {
|
|
return 0, io.EOF
|
|
}
|
|
sz := len(b)
|
|
if int64(sz) > f.Size-f.i {
|
|
sz = int(f.Size - f.i)
|
|
}
|
|
// get sectors and lengths for reads
|
|
str, err := f.stream(sz)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
// now read
|
|
var idx, i int
|
|
for _, v := range str {
|
|
jdx := idx + int(v[1])
|
|
if jdx < idx || jdx > sz {
|
|
return 0, Error{ErrRead, "bad read length", int64(jdx)}
|
|
}
|
|
j, err := f.r.ra.ReadAt(b[idx:jdx], v[0])
|
|
i = i + j
|
|
if err != nil {
|
|
f.i += int64(i)
|
|
return i, Error{ErrRead, "underlying reader fail (" + err.Error() + ")", int64(idx)}
|
|
}
|
|
idx = jdx
|
|
}
|
|
f.i += int64(i)
|
|
if i != sz {
|
|
err = Error{ErrRead, "bytes read do not match expected read size", int64(i)}
|
|
} else if i < len(b) {
|
|
err = io.EOF
|
|
}
|
|
return i, err
|
|
}
|
|
|
|
// Write to this directory entry
|
|
// Depends on the io.ReaderAt supplied to mscfb.New() being a WriterAt too
|
|
// Returns 0, io.EOF if no stream is available (i.e. for a storage object)
|
|
func (f *File) Write(b []byte) (int, error) {
|
|
if f.Size < 1 || f.i >= f.Size {
|
|
return 0, io.EOF
|
|
}
|
|
if f.r.wa == nil {
|
|
wa, ok := f.r.ra.(io.WriterAt)
|
|
if !ok {
|
|
return 0, Error{ErrWrite, "mscfb.New must be given ReaderAt convertible to a io.WriterAt in order to write", 0}
|
|
}
|
|
f.r.wa = wa
|
|
}
|
|
sz := len(b)
|
|
if int64(sz) > f.Size-f.i {
|
|
sz = int(f.Size - f.i)
|
|
}
|
|
// get sectors and lengths for writes
|
|
str, err := f.stream(sz)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
// now read
|
|
var idx, i int
|
|
for _, v := range str {
|
|
jdx := idx + int(v[1])
|
|
if jdx < idx || jdx > sz {
|
|
return 0, Error{ErrWrite, "bad write length", int64(jdx)}
|
|
}
|
|
j, err := f.r.wa.WriteAt(b[idx:jdx], v[0])
|
|
i = i + j
|
|
if err != nil {
|
|
f.i += int64(i)
|
|
return i, Error{ErrWrite, "underlying writer fail (" + err.Error() + ")", int64(idx)}
|
|
}
|
|
idx = jdx
|
|
}
|
|
f.i += int64(i)
|
|
if i != sz {
|
|
err = Error{ErrWrite, "bytes written do not match expected write size", int64(i)}
|
|
} else if i < len(b) {
|
|
err = io.EOF
|
|
}
|
|
return i, err
|
|
}
|
|
|
|
// ReadAt reads p bytes at offset off from start of file. Does not affect seek place for other reads/writes.
|
|
func (f *File) ReadAt(p []byte, off int64) (n int, err error) {
|
|
// memorize place
|
|
mi, mrem, mcur := f.i, f.rem, f.curSector
|
|
_, err = f.Seek(off, 0)
|
|
if err == nil {
|
|
n, err = f.Read(p)
|
|
}
|
|
f.i, f.rem, f.curSector = mi, mrem, mcur
|
|
return n, err
|
|
}
|
|
|
|
// WriteAt reads p bytes at offset off from start of file. Does not affect seek place for other reads/writes.
|
|
func (f *File) WriteAt(p []byte, off int64) (n int, err error) {
|
|
// memorize place
|
|
mi, mrem, mcur := f.i, f.rem, f.curSector
|
|
_, err = f.Seek(off, 0)
|
|
if err == nil {
|
|
n, err = f.Write(p)
|
|
}
|
|
f.i, f.rem, f.curSector = mi, mrem, mcur
|
|
return n, err
|
|
}
|
|
|
|
// Seek sets the offset for the next Read or Write to offset, interpreted according to whence: 0 means relative to the
|
|
// start of the file, 1 means relative to the current offset, and 2 means relative to the end. Seek returns the new
|
|
// offset relative to the start of the file and an error, if any.
|
|
func (f *File) Seek(offset int64, whence int) (int64, error) {
|
|
var abs int64
|
|
switch whence {
|
|
default:
|
|
return 0, Error{ErrSeek, "invalid whence", int64(whence)}
|
|
case 0:
|
|
abs = offset
|
|
case 1:
|
|
abs = f.i + offset
|
|
case 2:
|
|
abs = f.Size - offset
|
|
}
|
|
switch {
|
|
case abs < 0:
|
|
return f.i, Error{ErrSeek, "can't seek before start of File", abs}
|
|
case abs >= f.Size:
|
|
return f.i, Error{ErrSeek, "can't seek past File length", abs}
|
|
case abs == f.i:
|
|
return abs, nil
|
|
case abs > f.i:
|
|
t := f.i
|
|
f.i = abs
|
|
return f.i, f.seek(abs - t)
|
|
}
|
|
if f.rem >= f.i-abs {
|
|
f.rem = f.rem - (f.i - abs)
|
|
f.i = abs
|
|
return f.i, nil
|
|
}
|
|
f.rem = 0
|
|
f.curSector = f.startingSectorLoc
|
|
f.i = abs
|
|
return f.i, f.seek(abs)
|
|
}
|
|
|
|
func (f *File) seek(sz int64) error {
|
|
// calculate ministream and sector size
|
|
var mini bool
|
|
var ss int64
|
|
if f.Size < miniStreamCutoffSize {
|
|
mini = true
|
|
ss = 64
|
|
} else {
|
|
ss = int64(f.r.sectorSize)
|
|
}
|
|
|
|
var j int64
|
|
var err error
|
|
// if we have a remainder in the current sector, use it first
|
|
if f.rem > 0 {
|
|
if ss-f.rem <= sz {
|
|
f.curSector, err = f.r.findNext(f.curSector, mini)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
j += ss - f.rem
|
|
f.rem = 0
|
|
if j == sz {
|
|
return nil
|
|
}
|
|
} else {
|
|
f.rem += sz
|
|
return nil
|
|
}
|
|
if f.curSector == endOfChain {
|
|
return Error{ErrRead, "unexpected early end of chain", int64(f.curSector)}
|
|
}
|
|
}
|
|
|
|
for {
|
|
// check if we are at the last sector
|
|
if sz-j < ss {
|
|
f.rem = sz - j
|
|
return nil
|
|
} else {
|
|
j += ss
|
|
f.curSector, err = f.r.findNext(f.curSector, mini)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// we might be at the last sector if there is no remainder, if so can return
|
|
if j == sz {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// return offsets and lengths for read or write
|
|
func (f *File) stream(sz int) ([][2]int64, error) {
|
|
// calculate ministream, cap for sector slice, and sector size
|
|
var mini bool
|
|
var l int
|
|
var ss int64
|
|
if f.Size < miniStreamCutoffSize {
|
|
mini = true
|
|
l = sz/64 + 2
|
|
ss = 64
|
|
} else {
|
|
l = sz/int(f.r.sectorSize) + 2
|
|
ss = int64(f.r.sectorSize)
|
|
}
|
|
|
|
sectors := make([][2]int64, 0, l)
|
|
var i, j int
|
|
|
|
// if we have a remainder from a previous read, use it first
|
|
if f.rem > 0 {
|
|
offset, err := f.r.getOffset(f.curSector, mini)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if ss-f.rem >= int64(sz) {
|
|
sectors = append(sectors, [2]int64{offset + f.rem, int64(sz)})
|
|
} else {
|
|
sectors = append(sectors, [2]int64{offset + f.rem, ss - f.rem})
|
|
}
|
|
if ss-f.rem <= int64(sz) {
|
|
f.curSector, err = f.r.findNext(f.curSector, mini)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
j += int(ss - f.rem)
|
|
f.rem = 0
|
|
} else {
|
|
f.rem += int64(sz)
|
|
}
|
|
if sectors[0][1] == int64(sz) {
|
|
return sectors, nil
|
|
}
|
|
if f.curSector == endOfChain {
|
|
return nil, Error{ErrRead, "unexpected early end of chain", int64(f.curSector)}
|
|
}
|
|
i++
|
|
}
|
|
|
|
for {
|
|
// emergency brake!
|
|
if i >= cap(sectors) {
|
|
return nil, Error{ErrRead, "index overruns sector length", int64(i)}
|
|
}
|
|
// grab the next offset
|
|
offset, err := f.r.getOffset(f.curSector, mini)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// check if we are at the last sector
|
|
if sz-j < int(ss) {
|
|
sectors = append(sectors, [2]int64{offset, int64(sz - j)})
|
|
f.rem = int64(sz - j)
|
|
return compressChain(sectors), nil
|
|
} else {
|
|
sectors = append(sectors, [2]int64{offset, ss})
|
|
j += int(ss)
|
|
f.curSector, err = f.r.findNext(f.curSector, mini)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// we might be at the last sector if there is no remainder, if so can return
|
|
if j == sz {
|
|
return compressChain(sectors), nil
|
|
}
|
|
}
|
|
i++
|
|
}
|
|
}
|
|
|
|
func compressChain(locs [][2]int64) [][2]int64 {
|
|
l := len(locs)
|
|
for i, x := 0, 0; i < l && x+1 < len(locs); i++ {
|
|
if locs[x][0]+locs[x][1] == locs[x+1][0] {
|
|
locs[x][1] = locs[x][1] + locs[x+1][1]
|
|
for j := range locs[x+1 : len(locs)-1] {
|
|
locs[x+1+j] = locs[j+x+2]
|
|
}
|
|
locs = locs[:len(locs)-1]
|
|
} else {
|
|
x += 1
|
|
}
|
|
}
|
|
return locs
|
|
}
|