service/vendor/github.com/richardlehane/mscfb/file.go

535 lines
14 KiB
Go
Raw Normal View History

2023-12-21 22:17:40 +08:00
// Copyright 2013 Richard Lehane. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mscfb
import (
"encoding/binary"
"io"
"os"
"time"
"unicode"
"unicode/utf16"
"github.com/richardlehane/msoleps/types"
)
//objectType types
const (
unknown uint8 = 0x0 // this means unallocated - typically zeroed dir entries
storage uint8 = 0x1 // this means dir
stream uint8 = 0x2 // this means file
rootStorage uint8 = 0x5 // this means root
)
// color flags
const (
red uint8 = 0x0
black uint8 = 0x1
)
const lenDirEntry int = 64 + 4*4 + 16 + 4 + 8*2 + 4 + 8
type directoryEntryFields struct {
rawName [32]uint16 //64 bytes, unicode string encoded in UTF-16. If root, "Root Entry\0" w
nameLength uint16 //2 bytes
objectType uint8 //1 byte Must be one of the types specified above
color uint8 //1 byte Must be 0x00 RED or 0x01 BLACK
leftSibID uint32 //4 bytes, Dir? Stream ID of left sibling, if none set to NOSTREAM
rightSibID uint32 //4 bytes, Dir? Stream ID of right sibling, if none set to NOSTREAM
childID uint32 //4 bytes, Dir? Stream ID of child object, if none set to NOSTREAM
clsid types.Guid // Contains an object class GUID (must be set to zeroes for stream object)
stateBits [4]byte // user-defined flags for storage object
create types.FileTime // Windows FILETIME structure
modify types.FileTime // Windows FILETIME structure
startingSectorLoc uint32 // if a stream object, first sector location. If root, first sector of ministream
streamSize [8]byte // if a stream, size of user-defined data. If root, size of ministream
}
func makeDirEntry(b []byte) *directoryEntryFields {
d := &directoryEntryFields{}
for i := range d.rawName {
d.rawName[i] = binary.LittleEndian.Uint16(b[i*2 : i*2+2])
}
d.nameLength = binary.LittleEndian.Uint16(b[64:66])
d.objectType = uint8(b[66])
d.color = uint8(b[67])
d.leftSibID = binary.LittleEndian.Uint32(b[68:72])
d.rightSibID = binary.LittleEndian.Uint32(b[72:76])
d.childID = binary.LittleEndian.Uint32(b[76:80])
d.clsid = types.MustGuid(b[80:96])
copy(d.stateBits[:], b[96:100])
d.create = types.MustFileTime(b[100:108])
d.modify = types.MustFileTime(b[108:116])
d.startingSectorLoc = binary.LittleEndian.Uint32(b[116:120])
copy(d.streamSize[:], b[120:128])
return d
}
func (r *Reader) setDirEntries() error {
c := 20
if r.header.numDirectorySectors > 0 {
c = int(r.header.numDirectorySectors)
}
de := make([]*File, 0, c)
cycles := make(map[uint32]bool)
num := int(r.sectorSize / 128)
sn := r.header.directorySectorLoc
for sn != endOfChain {
buf, err := r.readAt(fileOffset(r.sectorSize, sn), int(r.sectorSize))
if err != nil {
return Error{ErrRead, "directory entries read error (" + err.Error() + ")", fileOffset(r.sectorSize, sn)}
}
for i := 0; i < num; i++ {
f := &File{r: r}
f.directoryEntryFields = makeDirEntry(buf[i*128:])
fixFile(r.header.majorVersion, f)
f.curSector = f.startingSectorLoc
de = append(de, f)
}
nsn, err := r.findNext(sn, false)
if err != nil {
return Error{ErrRead, "directory entries error finding sector (" + err.Error() + ")", int64(nsn)}
}
if nsn <= sn {
if nsn == sn || cycles[nsn] {
return Error{ErrRead, "directory entries sector cycle", int64(nsn)}
}
cycles[nsn] = true
}
sn = nsn
}
r.direntries = de
return nil
}
func fixFile(v uint16, f *File) {
fixName(f)
if f.objectType != stream {
return
}
// if the MSCFB major version is 4, then this can be a uint64 otherwise is a uint32 and the least signficant bits can contain junk
if v > 3 {
f.Size = int64(binary.LittleEndian.Uint64(f.streamSize[:]))
} else {
f.Size = int64(binary.LittleEndian.Uint32(f.streamSize[:4]))
}
}
func fixName(f *File) {
// From the spec:
// "The length [name] MUST be a multiple of 2, and include the terminating null character in the count.
// This length MUST NOT exceed 64, the maximum size of the Directory Entry Name field."
if f.nameLength < 4 || f.nameLength > 64 {
return
}
nlen := int(f.nameLength/2 - 1)
f.Initial = f.rawName[0]
var slen int
if !unicode.IsPrint(rune(f.Initial)) {
slen = 1
}
f.Name = string(utf16.Decode(f.rawName[slen:nlen]))
}
func (r *Reader) traverse() error {
r.File = make([]*File, 0, len(r.direntries))
var (
recurse func(int, []string)
err error
counter int
)
recurse = func(i int, path []string) {
// prevent cycles, number of recurse calls can't exceed number of directory entries
counter++
if counter > len(r.direntries) {
err = Error{ErrTraverse, "traversal counter overflow", int64(i)}
return
}
if i < 0 || i >= len(r.direntries) {
err = Error{ErrTraverse, "illegal traversal index", int64(i)}
return
}
file := r.direntries[i]
if file.leftSibID != noStream {
recurse(int(file.leftSibID), path)
}
r.File = append(r.File, file)
file.Path = path
if file.childID != noStream {
if i > 0 {
recurse(int(file.childID), append(path, file.Name))
} else {
recurse(int(file.childID), path)
}
}
if file.rightSibID != noStream {
recurse(int(file.rightSibID), path)
}
return
}
recurse(0, []string{})
return err
}
// File represents a MSCFB directory entry
type File struct {
Name string // stream or directory name
Initial uint16 // the first character in the name (identifies special streams such as MSOLEPS property sets)
Path []string // file path
Size int64 // size of stream
i int64 // bytes read
curSector uint32 // next sector for Read | Write
rem int64 // offset in current sector remaining previous Read | Write
*directoryEntryFields
r *Reader
}
type fileInfo struct{ *File }
func (fi fileInfo) Name() string { return fi.File.Name }
func (fi fileInfo) Size() int64 {
if fi.objectType != stream {
return 0
}
return fi.File.Size
}
func (fi fileInfo) IsDir() bool { return fi.mode().IsDir() }
func (fi fileInfo) ModTime() time.Time { return fi.Modified() }
func (fi fileInfo) Mode() os.FileMode { return fi.File.mode() }
func (fi fileInfo) Sys() interface{} { return nil }
func (f *File) mode() os.FileMode {
if f.objectType != stream {
return os.ModeDir | 0777
}
return 0666
}
// FileInfo for this directory entry. Useful for IsDir() (whether a directory entry is a stream (file) or a storage object (dir))
func (f *File) FileInfo() os.FileInfo {
return fileInfo{f}
}
// ID returns this directory entry's CLSID field
func (f *File) ID() string {
return f.clsid.String()
}
// Created returns this directory entry's created field
func (f *File) Created() time.Time {
return f.create.Time()
}
// Created returns this directory entry's modified field
func (f *File) Modified() time.Time {
return f.modify.Time()
}
// Read this directory entry
// Returns 0, io.EOF if no stream is available (i.e. for a storage object)
func (f *File) Read(b []byte) (int, error) {
if f.Size < 1 || f.i >= f.Size {
return 0, io.EOF
}
sz := len(b)
if int64(sz) > f.Size-f.i {
sz = int(f.Size - f.i)
}
// get sectors and lengths for reads
str, err := f.stream(sz)
if err != nil {
return 0, err
}
// now read
var idx, i int
for _, v := range str {
jdx := idx + int(v[1])
if jdx < idx || jdx > sz {
return 0, Error{ErrRead, "bad read length", int64(jdx)}
}
j, err := f.r.ra.ReadAt(b[idx:jdx], v[0])
i = i + j
if err != nil {
f.i += int64(i)
return i, Error{ErrRead, "underlying reader fail (" + err.Error() + ")", int64(idx)}
}
idx = jdx
}
f.i += int64(i)
if i != sz {
err = Error{ErrRead, "bytes read do not match expected read size", int64(i)}
} else if i < len(b) {
err = io.EOF
}
return i, err
}
// Write to this directory entry
// Depends on the io.ReaderAt supplied to mscfb.New() being a WriterAt too
// Returns 0, io.EOF if no stream is available (i.e. for a storage object)
func (f *File) Write(b []byte) (int, error) {
if f.Size < 1 || f.i >= f.Size {
return 0, io.EOF
}
if f.r.wa == nil {
wa, ok := f.r.ra.(io.WriterAt)
if !ok {
return 0, Error{ErrWrite, "mscfb.New must be given ReaderAt convertible to a io.WriterAt in order to write", 0}
}
f.r.wa = wa
}
sz := len(b)
if int64(sz) > f.Size-f.i {
sz = int(f.Size - f.i)
}
// get sectors and lengths for writes
str, err := f.stream(sz)
if err != nil {
return 0, err
}
// now read
var idx, i int
for _, v := range str {
jdx := idx + int(v[1])
if jdx < idx || jdx > sz {
return 0, Error{ErrWrite, "bad write length", int64(jdx)}
}
j, err := f.r.wa.WriteAt(b[idx:jdx], v[0])
i = i + j
if err != nil {
f.i += int64(i)
return i, Error{ErrWrite, "underlying writer fail (" + err.Error() + ")", int64(idx)}
}
idx = jdx
}
f.i += int64(i)
if i != sz {
err = Error{ErrWrite, "bytes written do not match expected write size", int64(i)}
} else if i < len(b) {
err = io.EOF
}
return i, err
}
// ReadAt reads p bytes at offset off from start of file. Does not affect seek place for other reads/writes.
func (f *File) ReadAt(p []byte, off int64) (n int, err error) {
// memorize place
mi, mrem, mcur := f.i, f.rem, f.curSector
_, err = f.Seek(off, 0)
if err == nil {
n, err = f.Read(p)
}
f.i, f.rem, f.curSector = mi, mrem, mcur
return n, err
}
// WriteAt reads p bytes at offset off from start of file. Does not affect seek place for other reads/writes.
func (f *File) WriteAt(p []byte, off int64) (n int, err error) {
// memorize place
mi, mrem, mcur := f.i, f.rem, f.curSector
_, err = f.Seek(off, 0)
if err == nil {
n, err = f.Write(p)
}
f.i, f.rem, f.curSector = mi, mrem, mcur
return n, err
}
// Seek sets the offset for the next Read or Write to offset, interpreted according to whence: 0 means relative to the
// start of the file, 1 means relative to the current offset, and 2 means relative to the end. Seek returns the new
// offset relative to the start of the file and an error, if any.
func (f *File) Seek(offset int64, whence int) (int64, error) {
var abs int64
switch whence {
default:
return 0, Error{ErrSeek, "invalid whence", int64(whence)}
case 0:
abs = offset
case 1:
abs = f.i + offset
case 2:
abs = f.Size - offset
}
switch {
case abs < 0:
return f.i, Error{ErrSeek, "can't seek before start of File", abs}
case abs >= f.Size:
return f.i, Error{ErrSeek, "can't seek past File length", abs}
case abs == f.i:
return abs, nil
case abs > f.i:
t := f.i
f.i = abs
return f.i, f.seek(abs - t)
}
if f.rem >= f.i-abs {
f.rem = f.rem - (f.i - abs)
f.i = abs
return f.i, nil
}
f.rem = 0
f.curSector = f.startingSectorLoc
f.i = abs
return f.i, f.seek(abs)
}
func (f *File) seek(sz int64) error {
// calculate ministream and sector size
var mini bool
var ss int64
if f.Size < miniStreamCutoffSize {
mini = true
ss = 64
} else {
ss = int64(f.r.sectorSize)
}
var j int64
var err error
// if we have a remainder in the current sector, use it first
if f.rem > 0 {
if ss-f.rem <= sz {
f.curSector, err = f.r.findNext(f.curSector, mini)
if err != nil {
return err
}
j += ss - f.rem
f.rem = 0
if j == sz {
return nil
}
} else {
f.rem += sz
return nil
}
if f.curSector == endOfChain {
return Error{ErrRead, "unexpected early end of chain", int64(f.curSector)}
}
}
for {
// check if we are at the last sector
if sz-j < ss {
f.rem = sz - j
return nil
} else {
j += ss
f.curSector, err = f.r.findNext(f.curSector, mini)
if err != nil {
return err
}
// we might be at the last sector if there is no remainder, if so can return
if j == sz {
return nil
}
}
}
}
// return offsets and lengths for read or write
func (f *File) stream(sz int) ([][2]int64, error) {
// calculate ministream, cap for sector slice, and sector size
var mini bool
var l int
var ss int64
if f.Size < miniStreamCutoffSize {
mini = true
l = sz/64 + 2
ss = 64
} else {
l = sz/int(f.r.sectorSize) + 2
ss = int64(f.r.sectorSize)
}
sectors := make([][2]int64, 0, l)
var i, j int
// if we have a remainder from a previous read, use it first
if f.rem > 0 {
offset, err := f.r.getOffset(f.curSector, mini)
if err != nil {
return nil, err
}
if ss-f.rem >= int64(sz) {
sectors = append(sectors, [2]int64{offset + f.rem, int64(sz)})
} else {
sectors = append(sectors, [2]int64{offset + f.rem, ss - f.rem})
}
if ss-f.rem <= int64(sz) {
f.curSector, err = f.r.findNext(f.curSector, mini)
if err != nil {
return nil, err
}
j += int(ss - f.rem)
f.rem = 0
} else {
f.rem += int64(sz)
}
if sectors[0][1] == int64(sz) {
return sectors, nil
}
if f.curSector == endOfChain {
return nil, Error{ErrRead, "unexpected early end of chain", int64(f.curSector)}
}
i++
}
for {
// emergency brake!
if i >= cap(sectors) {
return nil, Error{ErrRead, "index overruns sector length", int64(i)}
}
// grab the next offset
offset, err := f.r.getOffset(f.curSector, mini)
if err != nil {
return nil, err
}
// check if we are at the last sector
if sz-j < int(ss) {
sectors = append(sectors, [2]int64{offset, int64(sz - j)})
f.rem = int64(sz - j)
return compressChain(sectors), nil
} else {
sectors = append(sectors, [2]int64{offset, ss})
j += int(ss)
f.curSector, err = f.r.findNext(f.curSector, mini)
if err != nil {
return nil, err
}
// we might be at the last sector if there is no remainder, if so can return
if j == sz {
return compressChain(sectors), nil
}
}
i++
}
}
func compressChain(locs [][2]int64) [][2]int64 {
l := len(locs)
for i, x := 0, 0; i < l && x+1 < len(locs); i++ {
if locs[x][0]+locs[x][1] == locs[x+1][0] {
locs[x][1] = locs[x][1] + locs[x+1][1]
for j := range locs[x+1 : len(locs)-1] {
locs[x+1+j] = locs[j+x+2]
}
locs = locs[:len(locs)-1]
} else {
x += 1
}
}
return locs
}