/* Copyright(C) 2007. E. Rouault. */
/* VDI images support. Wrapper aroud VirtualBox OSE code */
/* FIXME : endianness issues and support for non Linux hosts */

/*
 * Copyright (C) 2006-2007 innotek GmbH
 *
 * This file is part of VirtualBox Open Source Edition (OSE), as
 * available from http://www.virtualbox.org. This file is free software;
 * you can redistribute it and/or modify it under the terms of the GNU
 * General Public License as published by the Free Software Foundation,
 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
 * distribution. VirtualBox OSE is distributed in the hope that it will
 * be useful, but WITHOUT ANY WARRANTY of any kind.
 *
 * If you received this file as part of a commercial VirtualBox
 * distribution, then only the terms of your commercial VirtualBox
 * license agreement apply instead of the previous paragraph.
 */

#include <assert.h>
#include "qemu-common.h"
#include "block_int.h"

#define AssertFailed() assert(0)
#define AssertReturn(cond, ret) do { if (!(cond)) return ret; } while(0)
#define AssertMsg(x, y) do { if (!(x)) printf y;} while(0) 
#define Assert(x) assert(x)
#define AssertMsgFailed(x) printf x
#define AssertMsgRC(rc, x) if (!RT_SUCCESS(rc)) printf x
#define LogRel(x) printf x
#define Log(x) 
#define LogFlow(x) 

#define RT_DONT_CONVERT_FILENAMES

#define DECLINLINE(x) static x
#define VBOXDDU_DECL(x) static x
#define RTDECL(x) static x

#define RT_SUCCESS(rc)   ( (int)(rc) >= VINF_SUCCESS )
#define RT_FAILURE(rc)      ( (!RT_SUCCESS(rc)) )

#define VBOX_SUCCESS(rc)    RT_SUCCESS(rc)
#define VBOX_FAILURE(rc)    RT_FAILURE(rc)


/** @def RT_ALIGN_T
 * Align macro.
 * @param   u           Value to align.
 * @param   uAlignment  The alignment. Power of two!
 * @param   type        Integer type to use while aligning.
 * @remark  This macro is the prefered alignment macro, it doesn't have any of the pitfalls RT_ALIGN has.
 */
#define RT_ALIGN_T(u, uAlignment, type) ( ((type)(u) + ((uAlignment) - 1)) & ~(type)((uAlignment) - 1) )

/** @def RT_ALIGN_32
 * Align macro for a 32-bit value.
 * @param   u32         Value to align.
 * @param   uAlignment  The alignment. Power of two!
 */
#define RT_ALIGN_32(u32, uAlignment)            RT_ALIGN_T(u32, uAlignment, uint32_t)


/**
 * UUID data type.
 */
typedef union RTUUID
{
    /** 8-bit view. */
    uint8_t     au8[16];
    /** 16-bit view. */
    uint16_t    au16[8];
    /** 32-bit view. */
    uint32_t    au32[4];
    /** 64-bit view. */
    uint64_t    au64[2];
    /** The way the UUID is declared by the ext2 guys. */
    struct
    {
        uint32_t    u32TimeLow;
        uint16_t    u16TimeMid;
        uint16_t    u16TimeHiAndVersion;
        uint16_t    u16ClockSeq;
        uint8_t     au8Node[6];
    } Gen;
    /** @deprecated */
    unsigned char aUuid[16];
} RTUUID;
typedef RTUUID *PRTUUID;
typedef const RTUUID *PCRTUUID;

/** @name VDI image types
 * @{ */
typedef enum VDIIMAGETYPE
{
    /** Normal dynamically growing base image file. */
    VDI_IMAGE_TYPE_NORMAL = 1,
    /** Preallocated base image file of a fixed size. */
    VDI_IMAGE_TYPE_FIXED,
    /** Dynamically growing image file for undo/commit changes support. */
    VDI_IMAGE_TYPE_UNDO,
    /** Dynamically growing image file for differencing support. */
    VDI_IMAGE_TYPE_DIFF,

    /** First valid image type value. */
    VDI_IMAGE_TYPE_FIRST  = VDI_IMAGE_TYPE_NORMAL,
    /** Last valid image type value. */
    VDI_IMAGE_TYPE_LAST   = VDI_IMAGE_TYPE_DIFF
} VDIIMAGETYPE;
/** Pointer to VDI image type. */
typedef VDIIMAGETYPE *PVDIIMAGETYPE;
/** @} */

/**
 * BIOS translation mode.
 */
typedef enum PDMBIOSTRANSLATION
{
    /** No translation. */
    PDMBIOSTRANSLATION_NONE = 1,
    /** LBA translation. */
    PDMBIOSTRANSLATION_LBA,
    /** Automatic select mode. */
    PDMBIOSTRANSLATION_AUTO
} PDMBIOSTRANSLATION;

/** Pointer to BIOS translation mode. */
typedef PDMBIOSTRANSLATION *PPDMBIOSTRANSLATION;

/** Unsigned integer. */
typedef uint32_t        RTUINT;
/** File handle. */
typedef RTUINT                                      RTFILE;
/** Pointer to file handle. */
typedef RTFILE                                     *PRTFILE;
/** Nil file handle. */
#define NIL_RTFILE                                  (~(RTFILE)0)

typedef unsigned char bool;
#define false 0
#define true 1

#define RTPATH_MAX   (4096 + 4)      /* (PATH_MAX + 1) on linux w/ some alignment */
/*******************************************************************************
*   Constants And Macros, Structures and Typedefs                              *
*******************************************************************************/

/** Image info, not handled anyhow.
 *  Must be less than 64 bytes in length, including the trailing 0.
 */
#define VDI_IMAGE_FILE_INFO   "<<< innotek VirtualBox Disk Image >>>\n"

/** The Sector size.
 * Currently we support only 512 bytes sectors.
 */
#define VDI_GEOMETRY_SECTOR_SIZE    (512)
/**  512 = 2^^9 */
#define VDI_GEOMETRY_SECTOR_SHIFT   (9)

/**
 * Harddisk geometry.
 */
#pragma pack(1)
typedef struct VDIDISKGEOMETRY
{
    /** Cylinders. */
    uint32_t    cCylinders;
    /** Heads. */
    uint32_t    cHeads;
    /** Sectors per track. */
    uint32_t    cSectors;
    /** Sector size. (bytes per sector) */
    uint32_t    cbSector;
} VDIDISKGEOMETRY, *PVDIDISKGEOMETRY;
#pragma pack()

/** Image signature. */
#define VDI_IMAGE_SIGNATURE   (0xbeda107f)

/**
 * Pre-Header to be stored in image file - used for version control.
 */
#pragma pack(1)
typedef struct VDIPREHEADER
{
    /** Just text info about image type, for eyes only. */
    char            szFileInfo[64];
    /** The image signature (VDI_IMAGE_SIGNATURE). */
    uint32_t        u32Signature;
    /** The image version (VDI_IMAGE_VERSION). */
    uint32_t        u32Version;
} VDIPREHEADER, *PVDIPREHEADER;
#pragma pack()

/**
 * Size of szComment field of HDD image header.
 */
#define VDI_IMAGE_COMMENT_SIZE    256

/**
 * Header to be stored in image file, VDI_IMAGE_VERSION_MAJOR = 0.
 * Prepended by VDIPREHEADER.
 */
#pragma pack(1)
typedef struct VDIHEADER0
{
    /** The image type (VDI_IMAGE_TYPE_*). */
    uint32_t        u32Type;
    /** Image flags (VDI_IMAGE_FLAGS_*). */
    uint32_t        fFlags;
    /** Image comment. (UTF-8) */
    char            szComment[VDI_IMAGE_COMMENT_SIZE];
    /** Image geometry. */
    VDIDISKGEOMETRY Geometry;
    /** Size of disk (in bytes). */
    uint64_t        cbDisk;
    /** Block size. (For instance VDI_IMAGE_BLOCK_SIZE.) */
    uint32_t        cbBlock;
    /** Number of blocks. */
    uint32_t        cBlocks;
    /** Number of allocated blocks. */
    uint32_t        cBlocksAllocated;
    /** UUID of image. */
    RTUUID          uuidCreate;
    /** UUID of image's last modification. */
    RTUUID          uuidModify;
    /** Only for secondary images - UUID of primary image. */
    RTUUID          uuidLinkage;
} VDIHEADER0, *PVDIHEADER0;
#pragma pack()

/**
 * Header to be stored in image file, VDI_IMAGE_VERSION_MAJOR = 1.
 * Prepended by VDIPREHEADER.
 */
#pragma pack(1)
typedef struct VDIHEADER1
{
    /** Size of this structure in bytes. */
    uint32_t        cbHeader;
    /** The image type (VDI_IMAGE_TYPE_*). */
    uint32_t        u32Type;
    /** Image flags (VDI_IMAGE_FLAGS_*). */
    uint32_t        fFlags;
    /** Image comment. (UTF-8) */
    char            szComment[VDI_IMAGE_COMMENT_SIZE];
    /** Offset of Blocks array from the begining of image file.
     * Should be sector-aligned for HDD access optimization. */
    uint32_t        offBlocks;
    /** Offset of image data from the begining of image file.
     * Should be sector-aligned for HDD access optimization. */
    uint32_t        offData;
    /** Image geometry. */
    VDIDISKGEOMETRY Geometry;
    /** BIOS HDD translation mode, see PDMBIOSTRANSLATION. */
    uint32_t        u32Translation;
    /** Size of disk (in bytes). */
    uint64_t        cbDisk;
    /** Block size. (For instance VDI_IMAGE_BLOCK_SIZE.) Should be a power of 2! */
    uint32_t        cbBlock;
    /** Size of additional service information of every data block.
     * Prepended before block data. May be 0.
     * Should be a power of 2 and sector-aligned for optimization reasons. */
    uint32_t        cbBlockExtra;
    /** Number of blocks. */
    uint32_t        cBlocks;
    /** Number of allocated blocks. */
    uint32_t        cBlocksAllocated;
    /** UUID of image. */
    RTUUID          uuidCreate;
    /** UUID of image's last modification. */
    RTUUID          uuidModify;
    /** Only for secondary images - UUID of previous image. */
    RTUUID          uuidLinkage;
    /** Only for secondary images - UUID of previous image's last modification. */
    RTUUID          uuidParentModify;
} VDIHEADER1, *PVDIHEADER1;
#pragma pack()

/**
 * Header structure for all versions.
 */
typedef struct VDIHEADER
{
    unsigned        uVersion;
    union
    {
        VDIHEADER0    v0;
        VDIHEADER1    v1;
    } u;
} VDIHEADER, *PVDIHEADER;

/** Block 'pointer'. */
typedef uint32_t    VDIIMAGEBLOCKPOINTER;
/** Pointer to a block 'pointer'. */
typedef VDIIMAGEBLOCKPOINTER *PVDIIMAGEBLOCKPOINTER;


/**
 * Default image block size, may be changed by setBlockSize/getBlockSize.
 *
 * Note: for speed reasons block size should be a power of 2 !
 */
#define _1M                     0x00100000
#define VDI_IMAGE_DEFAULT_BLOCK_SIZE            _1M

#define BIT(x) (1 << (x))
/**
 * fModified bit flags.
 */
#define VDI_IMAGE_MODIFIED_FLAG                 BIT(0)
#define VDI_IMAGE_MODIFIED_FIRST                BIT(1)
#define VDI_IMAGE_MODIFIED_DISABLE_UUID_UPDATE  BIT(2)

/**
 * Image structure
 */
typedef struct VDIIMAGEDESC
{
    /** Link to parent image descriptor, if any. */
    struct VDIIMAGEDESC    *pPrev;
    /** Link to child image descriptor, if any. */
    struct VDIIMAGEDESC    *pNext;
    /** File handle. */
    RTFILE                  File;
    /** True if the image is operating in readonly mode. */
    bool                    fReadOnly;
    /** Image open flags, VDI_OPEN_FLAGS_*. */
    unsigned                fOpen;
    /** Image pre-header. */
    VDIPREHEADER            PreHeader;
    /** Image header. */
    VDIHEADER               Header;
    /** Pointer to a block array. */
    PVDIIMAGEBLOCKPOINTER   paBlocks;
    /** fFlags copy from image header, for speed optimization. */
    unsigned                fFlags;
    /** Start offset of block array in image file, here for speed optimization. */
    unsigned                offStartBlocks;
    /** Start offset of data in image file, here for speed optimization. */
    unsigned                offStartData;
    /** Block mask for getting the offset into a block from a byte hdd offset. */
    unsigned                uBlockMask;
    /** Block shift value for converting byte hdd offset into paBlock index. */
    unsigned                uShiftOffset2Index;
    /** Block shift value for converting block index into offset in image. */
    unsigned                uShiftIndex2Offset;
    /** Offset of data from the beginning of block. */
    unsigned                offStartBlockData;
    /** Image is modified flags (VDI_IMAGE_MODIFIED*). */
    unsigned                fModified;
    /** Container filename. (UTF-8)
     * @todo Make this variable length to save a bunch of bytes. (low prio) */
    char                    szFilename[RTPATH_MAX];
} VDIIMAGEDESC, *PVDIIMAGEDESC;

/**
 * Default work buffer size, may be changed by setBufferSize() method.
 *
 * For best speed performance it must be equal to image block size.
 */
#define VDIDISK_DEFAULT_BUFFER_SIZE   (VDI_IMAGE_DEFAULT_BLOCK_SIZE)

/** VDIDISK Signature. */
#define VDIDISK_SIGNATURE (0xbedafeda)

/**
 * VBox HDD Container main structure, private part.
 */
struct VDIDISK
{
    /** Structure signature (VDIDISK_SIGNATURE). */
    uint32_t        u32Signature;

    /** Number of opened images. */
    unsigned        cImages;

    /** Base image. */
    PVDIIMAGEDESC   pBase;

    /** Last opened image in the chain.
     * The same as pBase if only one image is used or the last opened diff image. */
    PVDIIMAGEDESC   pLast;

    /** Default block size for newly created images. */
    unsigned        cbBlock;

    /** Working buffer size, allocated only while committing data,
     * copying block from primary image to secondary and saving previously
     * zero block. Buffer deallocated after operation complete.
     * @remark  For best performance buffer size must be equal to image's
     *          block size, however it may be decreased for memory saving.
     */
    unsigned        cbBuf;

    /** Flag whether zero writes should be handled normally or optimized
     * away if possible. */
    bool            fHonorZeroWrites;

    /** The media interface. */
    //PDMIMEDIA       IMedia;
    /** Pointer to the driver instance. */
    //PPDMDRVINS      pDrvIns;
};
typedef struct VDIDISK VDIDISK;
typedef VDIDISK *PVDIDISK;


/**
 * Block marked as free is not allocated in image file, read from this
 * block may returns any random data.
 */
#define VDI_IMAGE_BLOCK_FREE   ((VDIIMAGEBLOCKPOINTER)~0)

/**
 * Block marked as zero is not allocated in image file, read from this
 * block returns zeroes.
 */
#define VDI_IMAGE_BLOCK_ZERO   ((VDIIMAGEBLOCKPOINTER)~1)

/**
 * Block 'pointer' >= VDI_IMAGE_BLOCK_UNALLOCATED indicates block is not
 * allocated in image file.
 */
#define VDI_IMAGE_BLOCK_UNALLOCATED   (VDI_IMAGE_BLOCK_ZERO)
#define IS_VDI_IMAGE_BLOCK_ALLOCATED(bp)   (bp < VDI_IMAGE_BLOCK_UNALLOCATED)

#define GET_MAJOR_HEADER_VERSION(ph) (VDI_GET_VERSION_MAJOR((ph)->uVersion))
#define GET_MINOR_HEADER_VERSION(ph) (VDI_GET_VERSION_MINOR((ph)->uVersion))

/** Get major version from combined version. */
#define VDI_GET_VERSION_MAJOR(uVer)    ((uVer) >> 16)
/** Get minor version from combined version. */
#define VDI_GET_VERSION_MINOR(uVer)    ((uVer) & 0xffff)


/* WARNING: This implementation ASSUMES little endian. */



/** @name Misc. Status Codes
 * @{
 */
/** Success. */
#define VINF_SUCCESS                        0

/** General failure - DON'T USE THIS!!!
 * (aka SUPDRV_ERR_GENERAL_FAILURE) */
#define VERR_GENERAL_FAILURE                (-1)
/** Invalid parameter.
 * (aka SUPDRV_ERR_INVALID_PARAM) */
#define VERR_INVALID_PARAMETER              (-2)
/** Invalid magic or cookie.
 * (aka SUPDRV_ERR_INVALID_MAGIC) */
#define VERR_INVALID_MAGIC                  (-3)
/** Invalid loader handle.
 * (aka SUPDRV_ERR_INVALID_HANDLE) */
#define VERR_INVALID_HANDLE                 (-4)
/** Failed to lock the address range.
 * (aka SUPDRV_ERR_INVALID_HANDLE) */
#define VERR_LOCK_FAILED                    (-5)
/** Invalid memory pointer.
 * (aka SUPDRV_ERR_INVALID_POINTER) */
#define VERR_INVALID_POINTER                (-6)
/** Failed to patch the IDT.
 * (aka SUPDRV_ERR_IDT_FAILED) */
#define VERR_IDT_FAILED                     (-7)
/** Memory allocation failed.
 * (aka SUPDRV_ERR_NO_MEMORY) */
#define VERR_NO_MEMORY                      (-8)
/** Already loaded.
 * (aka SUPDRV_ERR_ALREADY_LOADED) */
#define VERR_ALREADY_LOADED                 (-9)
/** Permission denied.
 * (aka SUPDRV_ERR_PERMISSION_DENIED) */
#define VERR_PERMISSION_DENIED              (-10)
/** Version mismatch.
 * (aka SUPDRV_ERR_VERSION_MISMATCH) */
#define VERR_VERSION_MISMATCH               (-11)
/** The request function is not implemented. */
#define VERR_NOT_IMPLEMENTED                (-12)

/** Not supported. */
#define VERR_NOT_SUPPORTED                  (-37)

/** @name VBox HDD Container (VDI) Status Codes
 * @{
 */
/** Invalid image file header. */
#define VERR_VDI_INVALID_HEADER                     (-3200)
/** Invalid image file header: invalid signature. */
#define VERR_VDI_INVALID_SIGNATURE                  (-3201)
/** Invalid image file header: invalid version. */
#define VERR_VDI_UNSUPPORTED_VERSION                (-3202)
/** Invalid image type. */
#define VERR_VDI_INVALID_TYPE                       (-3203)
/** Invalid image flags. */
#define VERR_VDI_INVALID_FLAGS                      (-3204)
/** Operation can't be done in current HDD container state. */
#define VERR_VDI_INVALID_STATE                      (-3205)
/** Differencing image can't be used with current base image. */
#define VERR_VDI_WRONG_DIFF_IMAGE                   (-3206)
/** Two or more images of one HDD has different versions. */
#define VERR_VDI_IMAGES_VERSION_MISMATCH            (-3207)
/** Differencing and parent images can't be used together due to UUID. */
#define VERR_VDI_IMAGES_UUID_MISMATCH               (-3208)
/** No differencing images to commit. */
#define VERR_VDI_NO_DIFF_IMAGES                     (-3209)
/** Virtual HDD is not opened. */
#define VERR_VDI_NOT_OPENED                         (-3210)
/** Requested image is not opened. */
#define VERR_VDI_IMAGE_NOT_FOUND                    (-3211)
/** Image is read-only. */
#define VERR_VDI_IMAGE_READ_ONLY                    (-3212)
/** Comment string is too long. */
#define VERR_VDI_COMMENT_TOO_LONG                   (-3213)
/** Geometry hasn't been set. */
#define VERR_VDI_GEOMETRY_NOT_SET                   (-3214)
/** No data for this block in image. */
#define VINF_VDI_BLOCK_FREE                         3215
/** Configuration value not found. */
#define VERR_VDI_VALUE_NOT_FOUND                    (-3216)
/** @} */

/** @name Common File/Disk/Pipe/etc Status Codes
 * @{
 */
/** Unresolved (unknown) file i/o error. */
#define VERR_FILE_IO_ERROR                  (-100)
/** File/Device open failed. */
#define VERR_OPEN_FAILED                    (-101)
/** File not found. */
#define VERR_FILE_NOT_FOUND                 (-102)
/** Path not found. */
#define VERR_PATH_NOT_FOUND                 (-103)
/** Invalid (malformed) file/path name. */
#define VERR_INVALID_NAME                   (-104)
/** File/Device already exists. */
#define VERR_ALREADY_EXISTS                 (-105)
/** Too many open files. */
#define VERR_TOO_MANY_OPEN_FILES            (-106)
/** Seek error. */
#define VERR_SEEK                           (-107)
/** Seek below file start. */
#define VERR_NEGATIVE_SEEK                  (-108)
/** Trying to seek on device. */
#define VERR_SEEK_ON_DEVICE                 (-109)
/** Reached the end of the file. */
#define VERR_EOF                            (-110)
/** Reached the end of the file. */
#define VINF_EOF                            110
/** Generic file read error. */
#define VERR_READ_ERROR                     (-111)
/** Generic file write error. */
#define VERR_WRITE_ERROR                    (-112)
/** Write protect error. */
#define VERR_WRITE_PROTECT                  (-113)
/** Sharing violetion, file is being used by another process. */
#define VERR_SHARING_VIOLATION              (-114)
/** Unable to lock a region of a file. */
#define VERR_FILE_LOCK_FAILED               (-115)
/** File access error, another process has locked a portion of the file. */
#define VERR_FILE_LOCK_VIOLATION            (-116)
/** File or directory can't be created. */
#define VERR_CANT_CREATE                    (-117)
/** Directory can't be deleted. */
#define VERR_CANT_DELETE_DIRECTORY          (-118)
/** Can't move file to another disk. */
#define VERR_NOT_SAME_DEVICE                (-119)
/** The filename or extension is too long. */
#define VERR_FILENAME_TOO_LONG              (-120)
/** Media not present in drive. */
#define VERR_MEDIA_NOT_PRESENT              (-121)
/** The type of media was not recognized. Not formatted? */
#define VERR_MEDIA_NOT_RECOGNIZED           (-122)
/** Can't unlock - region was not locked. */
#define VERR_FILE_NOT_LOCKED                (-123)
/** Unrecoverable error: lock was lost. */
#define VERR_FILE_LOCK_LOST                 (-124)
/** Can't delete directory with files. */
#define VERR_DIR_NOT_EMPTY                  (-125)
/** A directory operation was attempted on a non-directory object. */
#define VERR_NOT_A_DIRECTORY                (-126)
/** A non-directory operation was attempted on a directory object. */
#define VERR_IS_A_DIRECTORY                 (-127)
/** Tried to grow a file beyond the limit imposed by the process or the filesystem. */
#define VERR_FILE_TOO_BIG                   (-128)
/** @} */


/** Current VDI image major version. */
#define VDI_IMAGE_VERSION_MAJOR     (0x0001)
/** Current VDI image minor version. */
#define VDI_IMAGE_VERSION_MINOR     (0x0001)
/** Current VDI image version. */
#define VDI_IMAGE_VERSION           ((VDI_IMAGE_VERSION_MAJOR << 16) | VDI_IMAGE_VERSION_MINOR)

/** Get VDI major version from combined version. */
#define VDI_GET_VERSION_MAJOR(uVer)    ((uVer) >> 16)
/** Get VDI minor version from combined version. */
#define VDI_GET_VERSION_MINOR(uVer)    ((uVer) & 0xffff)


/** @name VBox HDD container image flags
 * @{
 */
/** No flags. */
#define VD_IMAGE_FLAGS_NONE                 (0)
/** VMDK: Split image into 2GB extents. */
#define VD_VMDK_IMAGE_FLAGS_SPLIT_2G        (0x0001)
/** VMDK: Raw disk image (giving access to a number of host partitions). */
#define VD_VMDK_IMAGE_FLAGS_RAWDISK         (0x0002)
/** VDI: Fill new blocks with zeroes while expanding image file. Only valid
 * for newly created images, never set for opened existing images. */
#define VD_VDI_IMAGE_FLAGS_ZERO_EXPAND      (0x0100)

/** Mask of valid image flags for VMDK. */
#define VD_VMDK_IMAGE_FLAGS_MASK            (VD_IMAGE_FLAGS_NONE | VD_VMDK_IMAGE_FLAGS_SPLIT_2G | VD_VMDK_IMAGE_FLAGS_RAWDISK)

/** Mask of valid image flags for VDI. */
#define VD_VDI_IMAGE_FLAGS_MASK             (VD_IMAGE_FLAGS_NONE | VD_VDI_IMAGE_FLAGS_ZERO_EXPAND)

/** Default image flags. */
#define VD_IMAGE_FLAGS_DEFAULT              (VD_IMAGE_FLAGS_NONE)
/** @} */

/** @name VDI image flags
 * @{  */
/** No flags. */
#define VDI_IMAGE_FLAGS_NONE          (0x00)
/** Fill new blocks with zeroes while expanding image file. */
#define VDI_IMAGE_FLAGS_ZERO_EXPAND   (0x01)

/** Mask of valid image flags. */
#define VDI_IMAGE_FLAGS_MASK          (VDI_IMAGE_FLAGS_NONE | VDI_IMAGE_FLAGS_ZERO_EXPAND)

/** Default image flags. */
#define VDI_IMAGE_FLAGS_DEFAULT       (VDI_IMAGE_FLAGS_NONE)
/** @} */

/** @name VDI image open mode flags
 * @{
 */
/** Try to open image in read/write exclusive access mode if possible, or in read-only elsewhere. */
#define VDI_OPEN_FLAGS_NORMAL     (0)
/** Open image in read-only mode with sharing access with others. */
#define VDI_OPEN_FLAGS_READONLY   (1)
/** Mask of valid flags. */
#define VDI_OPEN_FLAGS_MASK (VDI_OPEN_FLAGS_NORMAL | VDI_OPEN_FLAGS_READONLY)
/** @}*/

/** @name Open flags
 * @{ */
/** Open the file with read access. */
#define RTFILE_O_READ               0x00000001
/** Open the file with write access. */
#define RTFILE_O_WRITE              0x00000002
/** Open the file with read & write access. */
#define RTFILE_O_READWRITE          0x00000003
/** The file access mask.
 * @remark The value 0 is invalid. */
#define RTFILE_O_ACCESS_MASK        0x00000003



/** Sharing mode: deny none (the default mode). */
#define RTFILE_O_DENY_NONE          0x00000000
/** Sharing mode: deny read. */
#define RTFILE_O_DENY_READ          0x00000010
/** Sharing mode: deny write. */
#define RTFILE_O_DENY_WRITE         0x00000020
/** Sharing mode: deny read and write. */
#define RTFILE_O_DENY_READWRITE     0x00000030
/** Sharing mode: deny all. */
#define RTFILE_O_DENY_ALL           RTFILE_O_DENY_READWRITE
/** Sharing mode: do NOT deny delete (NT).
 * @remark  This might not be implemented on all platforms,
 *          and will be defaulted & ignored on those.
 */
#define RTFILE_O_DENY_NOT_DELETE    0x00000040
/** Sharing mode mask. */
#define RTFILE_O_DENY_MASK          0x00000070

/** Action: Open an existing file (the default action). */
#define RTFILE_O_OPEN               0x00000000
/** Action: Create a new file or open an existing one. */
#define RTFILE_O_OPEN_CREATE        0x00000100
/** Action: Create a new a file. */
#define RTFILE_O_CREATE             0x00000200
/** Action: Create a new file or replace an existing one. */
#define RTFILE_O_CREATE_REPLACE     0x00000300
/** Action mask. */
#define RTFILE_O_ACTION_MASK        0x00000300

/** Truncate the file.
 * @remark  This will not truncate files opened for read-only.
 * @remark  The trunction doesn't have to be atomically, so anyone
 *          else opening the file may be racing us. The caller is
 *          responsible for not causing this race. */
#define RTFILE_O_TRUNCATE           0x00001000
/** Make the handle inheritable on RTProcessCreate(/exec). */
#define RTFILE_O_INHERIT            0x00002000
/** Open file in non-blocking mode - non-portable.
 * @remark  This flag may not be supported on all platforms, in which
 *          case it's considered an invalid parameter.
 */
#define RTFILE_O_NON_BLOCK          0x00004000
/** Write through directly to disk. Workaround to avoid iSCSI
 * initiator deadlocks on Windows hosts.
 * @remark  This might not be implemented on all platforms,
 *          and will be ignored on those.
 */
#define RTFILE_O_WRITE_THROUGH      0x00008000

/** Mask of all valid flags.
 * @remark  This doesn't validate the access mode properly.
 */
#define RTFILE_O_VALID_MASK         0x0000F333



/** Default file permissions for newly created files. */
#if defined(S_IRUSR) && defined(S_IWUSR)
# define RT_FILE_PERMISSION  (S_IRUSR | S_IWUSR)
#else
# define RT_FILE_PERMISSION  (00600)
#endif


/** @name Lock flags (bit masks).
 * @{ */
/** Read access, can be shared with others. */
#define RTFILE_LOCK_READ            0x00
/** Write access, one at a time. */
#define RTFILE_LOCK_WRITE           0x01
/** Don't wait for other locks to be released. */
#define RTFILE_LOCK_IMMEDIATELY     0x00
/** Wait till conflicting locks have been released. */
#define RTFILE_LOCK_WAIT            0x02
/** Valid flags mask */
#define RTFILE_LOCK_MASK            0x03
/** @} */

/** @name Seek flags.
 * @{ */
/** Seek from the start of the file. */
#define RTFILE_SEEK_BEGIN     0x00
/** Seek from the current file position. */
#define RTFILE_SEEK_CURRENT   0x01
/** Seek from the end of the file. */
#define RTFILE_SEEK_END       0x02
/** @internal */
#define RTFILE_SEEK_FIRST     RTFILE_SEEK_BEGIN
/** @internal */
#define RTFILE_SEEK_LAST      RTFILE_SEEK_END
/** @} */


static void* RTMemAllocZ(int size)
{
  void* ptr = malloc(size);
  memset(ptr, 0, size);
  return ptr;
}
#define RTMemTmpAllocZ RTMemAllocZ
#define RTMemTmpAlloc malloc
#define RTMemTmpFree free

#define RTMemAlloc malloc
#define RTMemFree free
#define VALID_PTR(x) ((x) !=  NULL)



static void RTRandBytes(void* ptr, int len)
{
  char* c = (char*)ptr;
  int i;
  for(i=0;i<len;i++)
    c[i] = i;
}

/**
 * Generates a new UUID value.
 *
 * @returns iprt status code.
 * @param   pUuid           Where to store generated uuid.
 */
RTDECL(int)  RTUuidCreate(PRTUUID pUuid)
{
    /* validate input. */
    AssertReturn(pUuid, VERR_INVALID_PARAMETER);

    RTRandBytes(pUuid, sizeof(*pUuid));
    pUuid->Gen.u16ClockSeq = (pUuid->Gen.u16ClockSeq & 0x3fff) | 0x8000;
    pUuid->Gen.u16TimeHiAndVersion = (pUuid->Gen.u16TimeHiAndVersion & 0x0fff) | 0x4000;

    return VINF_SUCCESS;
}


/**
 * Makes a null UUID value.
 *
 * @returns iprt status code.
 * @param   pUuid           Where to store generated null uuid.
 */
RTDECL(int)  RTUuidClear(PRTUUID pUuid)
{
    AssertReturn(pUuid, VERR_INVALID_PARAMETER);
    pUuid->au64[0] = 0;
    pUuid->au64[1] = 0;
    return VINF_SUCCESS;
}

/**
 * Checks if UUID is null.
 *
 * @returns true if UUID is null.
 * @param   pUuid           uuid to check.
 */
RTDECL(int)  RTUuidIsNull(PCRTUUID pUuid)
{
    AssertReturn(pUuid, VERR_INVALID_PARAMETER);
    return !pUuid->au64[0]
        && !pUuid->au64[1];
}


/**
 * Compares two UUID values.
 *
 * @returns 0 if eq, < 0 or > 0.
 * @param   pUuid1          First value to compare.
 * @param   pUuid2          Second value to compare.
 */
int  RTUuidCompare(PCRTUUID pUuid1, PCRTUUID pUuid2)
{
    /*
     * Special cases.
     */
    if (pUuid1 == pUuid2)
        return 0;
    if (!pUuid1)
        return RTUuidIsNull(pUuid2) ? 0 : -1;
    if (!pUuid2)
        return RTUuidIsNull(pUuid1) ? 0 : 1;

    /*
     * Standard cases.
     */
    if (pUuid1->Gen.u32TimeLow != pUuid2->Gen.u32TimeLow)
        return pUuid1->Gen.u32TimeLow < pUuid2->Gen.u32TimeLow ? -1 : 1;
    if (pUuid1->Gen.u16TimeMid != pUuid2->Gen.u16TimeMid)
        return pUuid1->Gen.u16TimeMid < pUuid2->Gen.u16TimeMid ? -1 : 1;
    if (pUuid1->Gen.u16TimeHiAndVersion != pUuid2->Gen.u16TimeHiAndVersion)
        return pUuid1->Gen.u16TimeHiAndVersion < pUuid2->Gen.u16TimeHiAndVersion ? -1 : 1;
    if (pUuid1->Gen.u16ClockSeq != pUuid2->Gen.u16ClockSeq)
        return pUuid1->Gen.u16ClockSeq < pUuid2->Gen.u16ClockSeq ? -1 : 1;
    if (pUuid1->Gen.au8Node[0] != pUuid2->Gen.au8Node[0])
        return pUuid1->Gen.au8Node[0] < pUuid2->Gen.au8Node[0] ? -1 : 1;
    if (pUuid1->Gen.au8Node[1] != pUuid2->Gen.au8Node[1])
        return pUuid1->Gen.au8Node[1] < pUuid2->Gen.au8Node[1] ? -1 : 1;
    if (pUuid1->Gen.au8Node[2] != pUuid2->Gen.au8Node[2])
        return pUuid1->Gen.au8Node[2] < pUuid2->Gen.au8Node[2] ? -1 : 1;
    if (pUuid1->Gen.au8Node[3] != pUuid2->Gen.au8Node[3])
        return pUuid1->Gen.au8Node[3] < pUuid2->Gen.au8Node[3] ? -1 : 1;
    if (pUuid1->Gen.au8Node[4] != pUuid2->Gen.au8Node[4])
        return pUuid1->Gen.au8Node[4] < pUuid2->Gen.au8Node[4] ? -1 : 1;
    if (pUuid1->Gen.au8Node[5] != pUuid2->Gen.au8Node[5])
        return pUuid1->Gen.au8Node[5] < pUuid2->Gen.au8Node[5] ? -1 : 1;
    return 0;
}

/*******************************************************************************
*   Internal Functions for header access                                       *
*******************************************************************************/
DECLINLINE(VDIIMAGETYPE) getImageType(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: return (VDIIMAGETYPE)ph->u.v0.u32Type;
        case 1: return (VDIIMAGETYPE)ph->u.v1.u32Type;
    }
    AssertFailed();
    return (VDIIMAGETYPE)0;
}

DECLINLINE(unsigned) getImageFlags(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: return ph->u.v0.fFlags;
        case 1: return ph->u.v1.fFlags;
    }
    AssertFailed();
    return 0;
}

DECLINLINE(char *) getImageComment(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: return &ph->u.v0.szComment[0];
        case 1: return &ph->u.v1.szComment[0];
    }
    AssertFailed();
    return NULL;
}

DECLINLINE(unsigned) getImageBlocksOffset(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: return (sizeof(VDIPREHEADER) + sizeof(VDIHEADER0));
        case 1: return ph->u.v1.offBlocks;
    }
    AssertFailed();
    return 0;
}

DECLINLINE(unsigned) getImageDataOffset(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: return sizeof(VDIPREHEADER) + sizeof(VDIHEADER0) + \
                       (ph->u.v0.cBlocks * sizeof(VDIIMAGEBLOCKPOINTER));
        case 1: return ph->u.v1.offData;
    }
    AssertFailed();
    return 0;
}

DECLINLINE(PVDIDISKGEOMETRY) getImageGeometry(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: return &ph->u.v0.Geometry;
        case 1: return &ph->u.v1.Geometry;
    }
    AssertFailed();
    return NULL;
}

DECLINLINE(PDMBIOSTRANSLATION) getImageTranslation(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: return PDMBIOSTRANSLATION_AUTO;
        case 1: return (PDMBIOSTRANSLATION)ph->u.v1.u32Translation;
    }
    AssertFailed();
    return PDMBIOSTRANSLATION_NONE;
}

DECLINLINE(uint64_t) getImageDiskSize(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: return ph->u.v0.cbDisk;
        case 1: return ph->u.v1.cbDisk;
    }
    AssertFailed();
    return 0;
}

DECLINLINE(unsigned) getImageBlockSize(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: return ph->u.v0.cbBlock;
        case 1: return ph->u.v1.cbBlock;
    }
    AssertFailed();
    return 0;
}

DECLINLINE(unsigned) getImageExtraBlockSize(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: return 0;
        case 1: return ph->u.v1.cbBlockExtra;
    }
    AssertFailed();
    return 0;
}

DECLINLINE(unsigned) getImageBlocks(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: return ph->u.v0.cBlocks;
        case 1: return ph->u.v1.cBlocks;
    }
    AssertFailed();
    return 0;
}

DECLINLINE(unsigned) getImageBlocksAllocated(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: return ph->u.v0.cBlocksAllocated;
        case 1: return ph->u.v1.cBlocksAllocated;
    }
    AssertFailed();
    return 0;
}

DECLINLINE(void) setImageBlocksAllocated(PVDIHEADER ph, unsigned cBlocks)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: ph->u.v0.cBlocksAllocated = cBlocks; return;
        case 1: ph->u.v1.cBlocksAllocated = cBlocks; return;
    }
    AssertFailed();
}

DECLINLINE(PRTUUID) getImageCreationUUID(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: return &ph->u.v0.uuidCreate;
        case 1: return &ph->u.v1.uuidCreate;
    }
    AssertFailed();
    return NULL;
}

DECLINLINE(PRTUUID) getImageModificationUUID(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: return &ph->u.v0.uuidModify;
        case 1: return &ph->u.v1.uuidModify;
    }
    AssertFailed();
    return NULL;
}

DECLINLINE(PRTUUID) getImageParentUUID(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 0: return &ph->u.v0.uuidLinkage;
        case 1: return &ph->u.v1.uuidLinkage;
    }
    AssertFailed();
    return NULL;
}

DECLINLINE(PRTUUID) getImageParentModificationUUID(PVDIHEADER ph)
{
    switch (GET_MAJOR_HEADER_VERSION(ph))
    {
        case 1: return &ph->u.v1.uuidParentModify;
    }
    AssertFailed();
    return NULL;
}


/*******************************************************************************
*   Internal Functions                                                         *
*******************************************************************************/

VBOXDDU_DECL(void) vdiInitVDIDisk(PVDIDISK pDisk);
VBOXDDU_DECL(void) vdiFlushImage(PVDIIMAGEDESC pImage);

typedef int (*PFNVMPROGRESS)(void*, unsigned int, void*);

static unsigned getPowerOfTwo(unsigned uNumber);
static void vdiInitPreHeader(PVDIPREHEADER pPreHdr);
static int  vdiValidatePreHeader(PVDIPREHEADER pPreHdr);
static void vdiInitHeader(PVDIHEADER pHeader, VDIIMAGETYPE enmType, uint32_t fFlags,
                          const char *pszComment, uint64_t cbDisk, uint32_t cbBlock,
                          uint32_t cbBlockExtra);
static int  vdiValidateHeader(PVDIHEADER pHeader);
static int  vdiCreateImage(const char *pszFilename, VDIIMAGETYPE enmType, unsigned fFlags,
                          uint64_t cbSize, const char *pszComment, PVDIIMAGEDESC pParent,
                          PFNVMPROGRESS pfnProgress, void *pvUser);
static void vdiInitImageDesc(PVDIIMAGEDESC pImage);
static void vdiSetupImageDesc(PVDIIMAGEDESC pImage);
static int  vdiOpenImage(PVDIIMAGEDESC *ppImage, const char *pszFilename, unsigned fOpen,
                        PVDIIMAGEDESC pParent);
static int  vdiUpdateHeader(PVDIIMAGEDESC pImage);
static int  vdiUpdateBlockInfo(PVDIIMAGEDESC pImage, unsigned uBlock);
static void vdiSetModifiedFlag(PVDIIMAGEDESC pImage);
static void vdiResetModifiedFlag(PVDIIMAGEDESC pImage);
static void vdiCloseImage(PVDIIMAGEDESC pImage);
static int  vdiReadInBlock(PVDIIMAGEDESC pImage, unsigned uBlock, unsigned offRead,
                           unsigned cbToRead, void *pvBuf);
static int  vdiFillBlockByZeroes(PVDIDISK pDisk, PVDIIMAGEDESC pImage, unsigned uBlock);
static int  vdiWriteInBlock(PVDIDISK pDisk, PVDIIMAGEDESC pImage, unsigned uBlock,
                            unsigned offWrite, unsigned cbToWrite, const void *pvBuf);
static int  vdiCopyBlock(PVDIDISK pDisk, PVDIIMAGEDESC pImage, unsigned uBlock);

static void vdiAddImageToList(PVDIDISK pDisk, PVDIIMAGEDESC pImage);
static void vdiRemoveImageFromList(PVDIDISK pDisk, PVDIIMAGEDESC pImage);



/**
 * internal: return power of 2 or 0 if num error.
 */
static unsigned getPowerOfTwo(unsigned uNumber)
{
    if (uNumber == 0)
        return 0;
    unsigned uPower2 = 0;
    while ((uNumber & 1) == 0)
    {
        uNumber >>= 1;
        uPower2++;
    }
    return uNumber == 1 ? uPower2 : 0;
}

/**
 * internal: init HDD preheader.
 */
static void vdiInitPreHeader(PVDIPREHEADER pPreHdr)
{
    pPreHdr->u32Signature = VDI_IMAGE_SIGNATURE;
    pPreHdr->u32Version = VDI_IMAGE_VERSION;
    memset(pPreHdr->szFileInfo, 0, sizeof(pPreHdr->szFileInfo));
    strncat(pPreHdr->szFileInfo, VDI_IMAGE_FILE_INFO, sizeof(pPreHdr->szFileInfo));
}

/**
 * internal: check HDD preheader.
 */
static int vdiValidatePreHeader(PVDIPREHEADER pPreHdr)
{
    if (pPreHdr->u32Signature != VDI_IMAGE_SIGNATURE)
        return VERR_VDI_INVALID_SIGNATURE;

    if (    pPreHdr->u32Version != VDI_IMAGE_VERSION
        &&  pPreHdr->u32Version != 0x00000002)    /* old version. */
        return VERR_VDI_UNSUPPORTED_VERSION;

    return VINF_SUCCESS;
}

/**
 * internal: init HDD header. Always use latest header version.
 * @param   pHeader     Assumes it was initially initialized to all zeros.
 */
static void vdiInitHeader(PVDIHEADER pHeader, VDIIMAGETYPE enmType, uint32_t fFlags,
                          const char *pszComment, uint64_t cbDisk, uint32_t cbBlock,
                          uint32_t cbBlockExtra)
{
    pHeader->uVersion = VDI_IMAGE_VERSION;
    pHeader->u.v1.cbHeader = sizeof(VDIHEADER1);
    pHeader->u.v1.u32Type = (uint32_t)enmType;
    pHeader->u.v1.fFlags = fFlags;
#ifdef VBOX_STRICT
    char achZero[VDI_IMAGE_COMMENT_SIZE] = {0};
    Assert(!memcmp(pHeader->u.v1.szComment, achZero, VDI_IMAGE_COMMENT_SIZE));
#endif
    pHeader->u.v1.szComment[0] = '\0';
    if (pszComment)
    {
        AssertMsg(strlen(pszComment) < sizeof(pHeader->u.v1.szComment),
                  ("HDD Comment is too long, cb=%d\n", strlen(pszComment)));
        strncat(pHeader->u.v1.szComment, pszComment, sizeof(pHeader->u.v1.szComment));
    }

    /* Mark the geometry not-calculated. */
    pHeader->u.v1.Geometry.cCylinders = 0;
    pHeader->u.v1.Geometry.cHeads = 0;
    pHeader->u.v1.Geometry.cSectors = 0;
    pHeader->u.v1.Geometry.cbSector = VDI_GEOMETRY_SECTOR_SIZE;
    pHeader->u.v1.u32Translation = PDMBIOSTRANSLATION_AUTO;

    pHeader->u.v1.cbDisk = cbDisk;
    pHeader->u.v1.cbBlock = cbBlock;
    pHeader->u.v1.cBlocks = (uint32_t)(cbDisk / cbBlock);
    if (cbDisk % cbBlock)
        pHeader->u.v1.cBlocks++;
    pHeader->u.v1.cbBlockExtra = cbBlockExtra;
    pHeader->u.v1.cBlocksAllocated = 0;

    /* Init offsets. */
    pHeader->u.v1.offBlocks = RT_ALIGN_32(sizeof(VDIPREHEADER) + sizeof(VDIHEADER1), VDI_GEOMETRY_SECTOR_SIZE);
    pHeader->u.v1.offData = RT_ALIGN_32(pHeader->u.v1.offBlocks + (pHeader->u.v1.cBlocks * sizeof(VDIIMAGEBLOCKPOINTER)), VDI_GEOMETRY_SECTOR_SIZE);

    /* Init uuids. */
    RTUuidCreate(&pHeader->u.v1.uuidCreate);
    RTUuidClear(&pHeader->u.v1.uuidModify);
    RTUuidClear(&pHeader->u.v1.uuidLinkage);
    RTUuidClear(&pHeader->u.v1.uuidParentModify);
}

/**
 * internal: check HDD header.
 */
static int vdiValidateHeader(PVDIHEADER pHeader)
{
    /* Check verion-dependend header parameters. */
    switch (GET_MAJOR_HEADER_VERSION(pHeader))
    {
        case 0:
        {
            /* Old header version. */
            break;
        }
        case 1:
        {
            /* Current header version. */

            if (pHeader->u.v1.cbHeader < sizeof(VDIHEADER1))
            {
                LogRel(("VDI: v1 header size wrong (%d < %ld)\n",
                       pHeader->u.v1.cbHeader, sizeof(VDIHEADER1)));
                return VERR_VDI_INVALID_HEADER;
            }

            if (getImageBlocksOffset(pHeader) < (sizeof(VDIPREHEADER) + sizeof(VDIHEADER1)))
            {
                LogRel(("VDI: v1 blocks offset wrong (%d < %ld)\n",
                       getImageBlocksOffset(pHeader), sizeof(VDIPREHEADER) + sizeof(VDIHEADER1)));
                return VERR_VDI_INVALID_HEADER;
            }

            if (getImageDataOffset(pHeader) < (getImageBlocksOffset(pHeader) + getImageBlocks(pHeader) * sizeof(VDIIMAGEBLOCKPOINTER)))
            {
                LogRel(("VDI: v1 image data offset wrong (%d < %ld)\n",
                       getImageDataOffset(pHeader), getImageBlocksOffset(pHeader) + getImageBlocks(pHeader) * sizeof(VDIIMAGEBLOCKPOINTER)));
                return VERR_VDI_INVALID_HEADER;
            }

            if (    getImageType(pHeader) == VDI_IMAGE_TYPE_UNDO
                ||  getImageType(pHeader) == VDI_IMAGE_TYPE_DIFF)
            {
                if (RTUuidIsNull(getImageParentUUID(pHeader)))
                {
                    LogRel(("VDI: v1 uuid of parent is 0)\n"));
                    return VERR_VDI_INVALID_HEADER;
                }
                if (RTUuidIsNull(getImageParentModificationUUID(pHeader)))
                {
                    LogRel(("VDI: v1 uuid of parent modification is 0\n"));
                    return VERR_VDI_INVALID_HEADER;
                }
            }

            break;
        }
        default:
            /* Unsupported. */
            return VERR_VDI_UNSUPPORTED_VERSION;
    }

    /* Check common header parameters. */

    bool fFailed = false;

    if (    getImageType(pHeader) < VDI_IMAGE_TYPE_FIRST
        ||  getImageType(pHeader) > VDI_IMAGE_TYPE_LAST)
    {
        LogRel(("VDI: bad image type %d\n", getImageType(pHeader)));
        fFailed = true;
    }

    if (getImageFlags(pHeader) & ~VDI_IMAGE_FLAGS_MASK)
    {
        LogRel(("VDI: bad image flags %08x\n", getImageFlags(pHeader)));
        fFailed = true;
    }

    if ((getImageGeometry(pHeader))->cbSector != VDI_GEOMETRY_SECTOR_SIZE)
    {
        LogRel(("VDI: wrong sector size (%d != %d)\n",
               (getImageGeometry(pHeader))->cbSector, VDI_GEOMETRY_SECTOR_SIZE));
        fFailed = true;
    }

    if (    getImageDiskSize(pHeader) == 0
        ||  getImageBlockSize(pHeader) == 0
        ||  getImageBlocks(pHeader) == 0
        ||  getPowerOfTwo(getImageBlockSize(pHeader)) == 0)
    {
        LogRel(("VDI: wrong size (%lld, %d, %d, %d)\n",
              getImageDiskSize(pHeader), getImageBlockSize(pHeader),
              getImageBlocks(pHeader), getPowerOfTwo(getImageBlockSize(pHeader))));
        fFailed = true;
    }

    if (getImageBlocksAllocated(pHeader) > getImageBlocks(pHeader))
    {
        LogRel(("VDI: too many blocks allocated (%d > %d)\n"
                "     blocksize=%d disksize=%lld\n",
              getImageBlocksAllocated(pHeader), getImageBlocks(pHeader),
              getImageBlockSize(pHeader), getImageDiskSize(pHeader)));
        fFailed = true;
    }

    if (    getImageExtraBlockSize(pHeader) != 0
        &&  getPowerOfTwo(getImageExtraBlockSize(pHeader)) == 0)
    {
        LogRel(("VDI: wrong extra size (%d, %d)\n",
               getImageExtraBlockSize(pHeader), getPowerOfTwo(getImageExtraBlockSize(pHeader))));
        fFailed = true;
    }

    if ((uint64_t)getImageBlockSize(pHeader) * getImageBlocks(pHeader) < getImageDiskSize(pHeader))
    {
        LogRel(("VDI: wrong disk size (%d, %d, %lld)\n",
               getImageBlockSize(pHeader), getImageBlocks(pHeader), getImageDiskSize(pHeader)));
        fFailed = true;
    }

    if (RTUuidIsNull(getImageCreationUUID(pHeader)))
    {
        LogRel(("VDI: uuid of creator is 0\n"));
        fFailed = true;
    }

    if (RTUuidIsNull(getImageModificationUUID(pHeader)))
    {
        LogRel(("VDI: uuid of modificator is 0\n"));
        fFailed = true;
    }

    return fFailed ? VERR_VDI_INVALID_HEADER : VINF_SUCCESS;
}

/**
 * internal: init VDIIMAGEDESC structure.
 */
static void vdiInitImageDesc(PVDIIMAGEDESC pImage)
{
    pImage->pPrev = NULL;
    pImage->pNext = NULL;
    pImage->File = NIL_RTFILE;
    pImage->paBlocks = NULL;
}

/**
 * internal: setup VDIIMAGEDESC structure by image header.
 */
static void vdiSetupImageDesc(PVDIIMAGEDESC pImage)
{
    pImage->fFlags             = getImageFlags(&pImage->Header);
    pImage->offStartBlocks     = getImageBlocksOffset(&pImage->Header);
    pImage->offStartData       = getImageDataOffset(&pImage->Header);
    pImage->uBlockMask         = getImageBlockSize(&pImage->Header) - 1;
    pImage->uShiftIndex2Offset =
    pImage->uShiftOffset2Index = getPowerOfTwo(getImageBlockSize(&pImage->Header));
    pImage->offStartBlockData  = getImageExtraBlockSize(&pImage->Header);
    if (pImage->offStartBlockData != 0)
        pImage->uShiftIndex2Offset += getPowerOfTwo(pImage->offStartBlockData);
}


static int  RTFileOpen(PRTFILE pFile, const char *pszFilename, unsigned fOpen)
{
			Log(("%s:%d", __FILE__, __LINE__));
    /*
     * Validate input.
     */
    if (!VALID_PTR(pFile))
    {
        AssertMsgFailed(("Invalid pFile %p\n", pFile));
        return VERR_INVALID_PARAMETER;
    }
    *pFile = NIL_RTFILE;
    if (!VALID_PTR(pszFilename))
    {
        AssertMsgFailed(("Invalid pszFilename %p\n", pszFilename));
        return VERR_INVALID_PARAMETER;
    }
			Log(("%s:%d", __FILE__, __LINE__));

    /*
     * Calculate open mode flags.
     */
    int fOpenMode = 0;
#ifdef O_BINARY
    fOpenMode |= O_BINARY;              /* (pc) */
#endif
#ifdef O_LARGEFILE
    fOpenMode |= O_LARGEFILE;           /* (linux) */
#endif
#ifdef O_NOINHERIT
    if (!(fOpen & RTFILE_O_INHERIT))
        fOpenMode |= O_NOINHERIT;
#endif
#ifdef O_NONBLOCK
    if (fOpen & RTFILE_O_NON_BLOCK)
        fOpenMode |= O_NONBLOCK;
#endif
#ifdef O_SYNC
    if (fOpen & RTFILE_O_WRITE_THROUGH)
        fOpenMode |= O_SYNC;
#endif
			Log(("%s:%d", __FILE__, __LINE__));

    /* create/truncate file */
    switch (fOpen & RTFILE_O_ACTION_MASK)
    {
        case RTFILE_O_OPEN:             break;
        case RTFILE_O_OPEN_CREATE:      fOpenMode |= O_CREAT; break;
        case RTFILE_O_CREATE:           fOpenMode |= O_CREAT | O_EXCL; break;
        case RTFILE_O_CREATE_REPLACE:   fOpenMode |= O_CREAT | O_TRUNC; break; /** @todo replacing needs fixing, this is *not* a 1:1 mapping! */
    }
    if (fOpen & RTFILE_O_TRUNCATE)
        fOpenMode |= O_TRUNC;

    switch (fOpen & RTFILE_O_ACCESS_MASK)
    {
        case RTFILE_O_READ:             fOpenMode |= O_RDONLY; break;
        case RTFILE_O_WRITE:            fOpenMode |= O_WRONLY; break;
        case RTFILE_O_READWRITE:        fOpenMode |= O_RDWR; break;
        default:
            AssertMsgFailed(("RTFileOpen received an invalid RW value, fOpen=%#x\n", fOpen));
            return VERR_INVALID_PARAMETER;
    }

    /** @todo sharing! */

    /*
     * Open/create the file.
     */
#ifdef RT_DONT_CONVERT_FILENAMES
    int fh = open(pszFilename, fOpenMode, RT_FILE_PERMISSION);
    int iErr = errno;
			Log(("%s:%d", __FILE__, __LINE__));
    if (fh < 0)
    {
			Log(("%s:%d", __FILE__, __LINE__));
      //perror("");
      //fprintf(stderr, "errno = %d\n", iErr);
    }
#else
    char *pszNativeFilename;
    rc = rtPathToNative(&pszNativeFilename, pszFilename);
    if (RT_FAILURE(rc))
	{
			Log(("%s:%d", __FILE__, __LINE__));
        return (rc);
	}

    int fh = open(pszNativeFilename, fOpenMode, RT_FILE_PERMISSION);
    int iErr = errno;
			Log(("%s:%d %s", __FILE__, __LINE__, pszNativeFilename));
    rtPathFreeNative(pszNativeFilename);
#endif
    if (fh >= 0)
    {
        /*
         * Mark the file handle close on exec, unless inherit is specified.
         */
        if (    !(fOpen & RTFILE_O_INHERIT)
#ifdef O_NOINHERIT
            ||  (fOpenMode & O_NOINHERIT) /* careful since it could be a dummy. */
#endif
            /*||  fcntl(fh, F_SETFD, FD_CLOEXEC) >= 0*/)
        {
            *pFile = (RTFILE)fh;
            Assert((int)*pFile == fh);
            LogFlow(("RTFileOpen(%p:{%RTfile}, %p:{%s}, %#x): returns %Rrc\n",
                     pFile, *pFile, pszFilename, pszFilename, fOpen, rc));
            return VINF_SUCCESS;
        }
        iErr = errno;
        close(fh);
    }
    return VERR_GENERAL_FAILURE;
}

int  RTFileSetSize(RTFILE File, uint64_t cbSize)
{
    /*
     * Validate offset.
     */
    if (    sizeof(off_t) < sizeof(cbSize)
        &&  (cbSize >> 32) != 0)
    {
        AssertMsgFailed(("64-bit filesize not supported! cbSize=%lld\n", cbSize));
        return VERR_NOT_SUPPORTED;
    }

#if defined(_MSC_VER) || (defined(__OS2__) && (!defined(__INNOTEK_LIBC__) || __INNOTEK_LIBC__ < 0x006))
    if (chsize((int)File, (off_t)cbSize) == 0)
#else
    /* This relies on a non-standard feature of FreeBSD, Linux, and OS/2
     * LIBC v0.6 and higher. (SuS doesn't define ftruncate() and size bigger
     * than the file.)
     */
    if (ftruncate((int)File, (off_t)cbSize) == 0)
#endif
        return VINF_SUCCESS;
    return VERR_GENERAL_FAILURE;
}

int  RTFileWrite(RTFILE File, const void *pvBuf, unsigned cbToWrite, unsigned *pcbWritten)
{
    if (cbToWrite <= 0)
        return VINF_SUCCESS;

    /*
     * Attempt write.
     */
    ssize_t cbWritten = write((int)File, pvBuf, cbToWrite);
    if (cbWritten >= 0)
    {
        if (pcbWritten)
            /* caller can handle partial write. */
            *pcbWritten = cbWritten;
        else
        {
            /* Caller expects all to be write. */
            while ((ssize_t)cbToWrite > cbWritten)
            {
                ssize_t cbWrittenPart = write((int)File, (const char *)pvBuf + cbWritten, cbToWrite - cbWritten);
                if (cbWrittenPart <= 0)
                    return VERR_GENERAL_FAILURE;
                cbWritten += cbWrittenPart;
            }
        }
        return VINF_SUCCESS;
    }
    return VERR_GENERAL_FAILURE;
}

int  RTFileSeek(RTFILE File, int64_t offSeek, unsigned uMethod, uint64_t *poffActual)
{
    static const unsigned aSeekRecode[] =
    {
        SEEK_SET,
        SEEK_CUR,
        SEEK_END,
    };

    /*
     * Validate input.
     */
    if (uMethod > RTFILE_SEEK_END)
    {
        AssertMsgFailed(("Invalid uMethod=%d\n", uMethod));
        return VERR_INVALID_PARAMETER;
    }

    /* check that within off_t range. */
    /*if (    sizeof(off_t) < sizeof(offSeek)
        && (    (offSeek > 0 && (unsigned)(offSeek >> 32) != 0)
            ||  (offSeek < 0 && (unsigned)(-offSeek >> 32) != 0)))
    {
        AssertMsgFailed(("64-bit search not supported\n"));
        return VERR_NOT_SUPPORTED;
    }*/

    off_t offCurrent = lseek((int)File, (off_t)offSeek, aSeekRecode[uMethod]);
    if (offCurrent != ~0)
    {
        if (poffActual)
            *poffActual = (uint64_t)offCurrent;
        return VINF_SUCCESS;
    }
    return VERR_GENERAL_FAILURE;
}

#define RT_MIN(a,b) (((a)<(b)) ? (a) : (b))

int  RTFileClose(RTFILE File)
{
    if (close((int)File) == 0)
        return VINF_SUCCESS;
    return VERR_GENERAL_FAILURE;
}


int  RTFileDelete(const char *pszFilename)
{
    /*
    char *pszNativeFilename;
    int rc = rtPathToNative(&pszNativeFilename, pszFilename);
    if (RT_SUCCESS(rc))
    {
        if (unlink(pszNativeFilename) != 0)
            rc = RTErrConvertFromErrno(errno);
        rtPathFreeNative(pszNativeFilename);
    }
    */
  if (unlink(pszFilename) == 0)
    return VINF_SUCCESS;
  return VERR_GENERAL_FAILURE;
}
/**
 * internal: create image.
 */
static int vdiCreateImage(const char *pszFilename, VDIIMAGETYPE enmType, unsigned fFlags,
                          uint64_t cbSize, const char *pszComment, PVDIIMAGEDESC pParent,
                          PFNVMPROGRESS pfnProgress, void *pvUser)
{
    /* Check args. */
    Assert(pszFilename);
    Assert(enmType >= VDI_IMAGE_TYPE_FIRST && enmType <= VDI_IMAGE_TYPE_LAST);
    Assert(!(fFlags & ~VDI_IMAGE_FLAGS_MASK));
    Assert(cbSize);

    /* Special check for comment length. */
    if (    pszComment
        &&  strlen(pszComment) >= VDI_IMAGE_COMMENT_SIZE)
    {
        Log(("vdiCreateImage: pszComment is too long, cb=%d\n", strlen(pszComment)));
        return VERR_VDI_COMMENT_TOO_LONG;
    }

    if (    enmType == VDI_IMAGE_TYPE_UNDO
        ||  enmType == VDI_IMAGE_TYPE_DIFF)
    {
        Assert(pParent);
        if ((pParent->PreHeader.u32Version >> 16) != VDI_IMAGE_VERSION_MAJOR)
        {
            /* Invalid parent image version. */
            Log(("vdiCreateImage: unsupported parent version=%08X\n", pParent->PreHeader.u32Version));
            return VERR_VDI_UNSUPPORTED_VERSION;
        }

        /* get image params from the parent image. */
        fFlags = getImageFlags(&pParent->Header);
        cbSize = getImageDiskSize(&pParent->Header);
    }

    PVDIIMAGEDESC pImage = (PVDIIMAGEDESC)RTMemAllocZ(sizeof(VDIIMAGEDESC));
    if (!pImage)
        return VERR_NO_MEMORY;
    vdiInitImageDesc(pImage);

    vdiInitPreHeader(&pImage->PreHeader);
    vdiInitHeader(&pImage->Header, enmType, fFlags, pszComment, cbSize, VDI_IMAGE_DEFAULT_BLOCK_SIZE, 0);

    if (    enmType == VDI_IMAGE_TYPE_UNDO
        ||  enmType == VDI_IMAGE_TYPE_DIFF)
    {
        /* Set up linkage information. */
        pImage->Header.u.v1.uuidLinkage = *getImageCreationUUID(&pParent->Header);
        pImage->Header.u.v1.uuidParentModify = *getImageModificationUUID(&pParent->Header);
    }

    pImage->paBlocks = (PVDIIMAGEBLOCKPOINTER)RTMemAlloc(sizeof(VDIIMAGEBLOCKPOINTER) * getImageBlocks(&pImage->Header));
    if (!pImage->paBlocks)
    {
        RTMemFree(pImage);
        return VERR_NO_MEMORY;
    }

    if (enmType != VDI_IMAGE_TYPE_FIXED)
    {
        /* for growing images mark all blocks in paBlocks as free. */
        unsigned i;
        for (i = 0; i < pImage->Header.u.v1.cBlocks; i++)
            pImage->paBlocks[i] = VDI_IMAGE_BLOCK_FREE;
    }
    else
    {
        /* for fixed images mark all blocks in paBlocks as allocated */
        unsigned i;
        for (i = 0; i < pImage->Header.u.v1.cBlocks; i++)
            pImage->paBlocks[i] = i;
        pImage->Header.u.v1.cBlocksAllocated = pImage->Header.u.v1.cBlocks;
    }

    /* Setup image parameters. */
    vdiSetupImageDesc(pImage);

    /* create file */
    int rc = RTFileOpen(&pImage->File,
                        pszFilename,
                        RTFILE_O_READWRITE | RTFILE_O_CREATE | RTFILE_O_DENY_ALL);
    if (VBOX_SUCCESS(rc))
    {
#if 0
        /* Lock image exclusively to close any wrong access by VDI API calls. */
        uint64_t cbLock = pImage->offStartData
                        + ((uint64_t)getImageBlocks(&pImage->Header) << pImage->uShiftIndex2Offset);
        rc = RTFileLock(pImage->File,
                        RTFILE_LOCK_WRITE | RTFILE_LOCK_IMMEDIATELY, 0, cbLock);
        if (VBOX_FAILURE(rc))
        {
            cbLock = 0;    /* Not locked. */
            goto l_create_failed;
        }
#endif

        if (enmType == VDI_IMAGE_TYPE_FIXED)
        {
            /*
             * Allocate & commit whole file if fixed image, it must be more
             * effective than expanding file by write operations.
             */
            rc = RTFileSetSize(pImage->File,
                               pImage->offStartData
                             + ((uint64_t)getImageBlocks(&pImage->Header) << pImage->uShiftIndex2Offset));
        }
        else
        {
            /* Set file size to hold header and blocks array. */
            rc = RTFileSetSize(pImage->File, pImage->offStartData);
        }
        if (VBOX_FAILURE(rc))
            goto l_create_failed;

        /* Generate image last-modify uuid */
        RTUuidCreate(getImageModificationUUID(&pImage->Header));

        /* Write pre-header. */
        rc = RTFileWrite(pImage->File, &pImage->PreHeader, sizeof(pImage->PreHeader), NULL);
        if (VBOX_FAILURE(rc))
            goto l_create_failed;

        /* Write header. */
        rc = RTFileWrite(pImage->File, &pImage->Header.u.v1, sizeof(pImage->Header.u.v1), NULL);
        if (VBOX_FAILURE(rc))
            goto l_create_failed;

        /* Write blocks array. */
        rc = RTFileSeek(pImage->File, pImage->offStartBlocks, RTFILE_SEEK_BEGIN, NULL);
        if (VBOX_FAILURE(rc))
            goto l_create_failed;
        rc = RTFileWrite(pImage->File,
                         pImage->paBlocks,
                         getImageBlocks(&pImage->Header) * sizeof(VDIIMAGEBLOCKPOINTER),
                         NULL);
        if (VBOX_FAILURE(rc))
            goto l_create_failed;

        if (    (enmType == VDI_IMAGE_TYPE_FIXED)
            &&  (fFlags & VDI_IMAGE_FLAGS_ZERO_EXPAND))
        {
            /* Fill image with zeroes. */

            rc = RTFileSeek(pImage->File, pImage->offStartData, RTFILE_SEEK_BEGIN, NULL);
            if (VBOX_FAILURE(rc))
                goto l_create_failed;

            /* alloc tmp zero-filled buffer */
            void *pvBuf = RTMemTmpAllocZ(VDIDISK_DEFAULT_BUFFER_SIZE);
            if (pvBuf)
            {
                uint64_t cbFill = (uint64_t)getImageBlocks(&pImage->Header) << pImage->uShiftIndex2Offset;
                uint64_t cbDisk = cbFill;

                /* do loop to fill all image. */
                while (cbFill > 0)
                {
                    unsigned to_fill = (unsigned)RT_MIN(cbFill, VDIDISK_DEFAULT_BUFFER_SIZE);

                    rc = RTFileWrite(pImage->File, pvBuf, to_fill, NULL);
                    if (VBOX_FAILURE(rc))
                        break;

                    cbFill -= to_fill;

                    if (pfnProgress)
                    {
                        rc = pfnProgress(NULL /* WARNING! pVM=NULL  */,
                                         (unsigned)(((cbDisk - cbFill) * 100) / cbDisk),
                                         pvUser);
                        if (VBOX_FAILURE(rc))
                            break;
                    }
                }
                RTMemTmpFree(pvBuf);
            }
            else
            {
                /* alloc error */
                rc = VERR_NO_MEMORY;
            }
        }

    l_create_failed:

#if 0
        if (cbLock)
            RTFileUnlock(pImage->File, 0, cbLock);
#endif
        RTFileClose(pImage->File);

        /* Delete image file if error occured while creating */
        if (VBOX_FAILURE(rc))
            RTFileDelete(pszFilename);
    }

    RTMemFree(pImage->paBlocks);
    RTMemFree(pImage);

    if (    VBOX_SUCCESS(rc)
        &&  pfnProgress)
        pfnProgress(NULL /* WARNING! pVM=NULL  */, 100, pvUser);

    Log(("vdiCreateImage: done, filename=\"%s\", rc=%d\n", pszFilename, rc));

    return rc;
}

int  RTFileRead(RTFILE File, void *pvBuf, unsigned cbToRead, unsigned *pcbRead)
{
    if (cbToRead <= 0)
        return VINF_SUCCESS;

    /*
     * Attempt read.
     */
    ssize_t cbRead = read((int)File, pvBuf, cbToRead);
    if (cbRead >= 0)
    {
        if (pcbRead)
            /* caller can handle partial read. */
            *pcbRead = cbRead;
        else
        {
            /* Caller expects all to be read. */
            while ((ssize_t)cbToRead > cbRead)
            {
                ssize_t cbReadPart = read((int)File, (char*)pvBuf + cbRead, cbToRead - cbRead);
                if (cbReadPart <= 0)
                {
                    if (cbReadPart == 0)
                        return VERR_EOF;
                    else
                        return VERR_GENERAL_FAILURE; //RTErrConvertFromErrno(errno);
                }
                cbRead += cbReadPart;
            }
        }
        return VINF_SUCCESS;
    }

    return VERR_GENERAL_FAILURE; //RTErrConvertFromErrno(errno);
}


/**
 * Open an image.
 * @internal
 */
static int vdiOpenImage(PVDIIMAGEDESC *ppImage, const char *pszFilename,
                        unsigned fOpen, PVDIIMAGEDESC pParent)
{
    /*
     * Validate input.
     */
    Assert(ppImage);
    Assert(pszFilename);
    Assert(!(fOpen & ~VDI_OPEN_FLAGS_MASK));

    PVDIIMAGEDESC   pImage;
    size_t          cchFilename = strlen(pszFilename);
    if (cchFilename >= sizeof(pImage->szFilename))
    {
        AssertMsgFailed(("filename=\"%s\" is too long (%d bytes)!\n", pszFilename, cchFilename));
        return VERR_FILENAME_TOO_LONG;
    }

    pImage = (PVDIIMAGEDESC)RTMemAllocZ(sizeof(VDIIMAGEDESC));
    if (!pImage)
        return VERR_NO_MEMORY;
    vdiInitImageDesc(pImage);

    memcpy(pImage->szFilename, pszFilename, cchFilename);
    pImage->fOpen = fOpen;

    /*
     * Open the image.
     */
    int rc = RTFileOpen(&pImage->File,
                        pImage->szFilename,
                        fOpen & VDI_OPEN_FLAGS_READONLY
                        ? RTFILE_O_READ      | RTFILE_O_OPEN | RTFILE_O_DENY_NONE
                        : RTFILE_O_READWRITE | RTFILE_O_OPEN | RTFILE_O_DENY_NONE);
    if (VBOX_FAILURE(rc))
    {
        if (!(fOpen & VDI_OPEN_FLAGS_READONLY))
        {
            /* Try to open image for reading only. */
            rc = RTFileOpen(&pImage->File,
                            pImage->szFilename,
                            RTFILE_O_READ | RTFILE_O_OPEN | RTFILE_O_DENY_NONE);
            if (VBOX_SUCCESS(rc))
                pImage->fOpen |= VDI_OPEN_FLAGS_READONLY;
        }
        if (VBOX_FAILURE(rc))
        {
            RTMemFree(pImage);
			Log(("%s:%d", __FILE__, __LINE__));
            return rc;
        }
    }
    /* Set up current image r/w state. */
    pImage->fReadOnly = !!(pImage->fOpen & VDI_OPEN_FLAGS_READONLY);

    /*
     * Set initial file lock for reading header only.
     * Length of lock doesn't matter, it just must include image header.
     */
#if 0
    uint64_t cbLock = _1M;
    rc = RTFileLock(pImage->File, RTFILE_LOCK_READ | RTFILE_LOCK_IMMEDIATELY, 0, cbLock);
    if (VBOX_FAILURE(rc))
    {
        cbLock = 0;
        goto l_open_failed;
    }
#endif
    
    /* Read pre-header. */
    rc = RTFileRead(pImage->File, &pImage->PreHeader, sizeof(pImage->PreHeader), NULL);
    if (VBOX_FAILURE(rc))
        goto l_open_failed;
    rc = vdiValidatePreHeader(&pImage->PreHeader);
    if (VBOX_FAILURE(rc))
        goto l_open_failed;

    /* Read header. */
    pImage->Header.uVersion = pImage->PreHeader.u32Version;
    switch (GET_MAJOR_HEADER_VERSION(&pImage->Header))
    {
        case 0:
            rc = RTFileRead(pImage->File, &pImage->Header.u.v0, sizeof(pImage->Header.u.v0), NULL);
            break;
        case 1:
            rc = RTFileRead(pImage->File, &pImage->Header.u.v1, sizeof(pImage->Header.u.v1), NULL);
            break;
        default:
            rc = VERR_VDI_UNSUPPORTED_VERSION;
            break;
    }
    if (VBOX_FAILURE(rc))
        goto l_open_failed;

    rc = vdiValidateHeader(&pImage->Header);
    if (VBOX_FAILURE(rc))
        goto l_open_failed;

    /* Check diff image correctness. */
    if (pParent)
    {
        if (pImage->PreHeader.u32Version != pParent->PreHeader.u32Version)
        {
            rc = VERR_VDI_IMAGES_VERSION_MISMATCH;
            goto l_open_failed;
        }

        if (    getImageType(&pImage->Header) != VDI_IMAGE_TYPE_UNDO
            &&  getImageType(&pImage->Header) != VDI_IMAGE_TYPE_DIFF)
        {
            rc = VERR_VDI_WRONG_DIFF_IMAGE;
            goto l_open_failed;
        }

        if (    getImageDiskSize(&pImage->Header) != getImageDiskSize(&pParent->Header)
            ||  getImageBlockSize(&pImage->Header) != getImageBlockSize(&pParent->Header)
            ||  getImageBlocks(&pImage->Header) != getImageBlocks(&pParent->Header)
            ||  getImageExtraBlockSize(&pImage->Header) != getImageExtraBlockSize(&pParent->Header))
        {
            rc = VERR_VDI_WRONG_DIFF_IMAGE;
            goto l_open_failed;
        }

        /* Check linkage data. */
        if (    RTUuidCompare(getImageParentUUID(&pImage->Header),
                              getImageCreationUUID(&pParent->Header))
            ||  RTUuidCompare(getImageParentModificationUUID(&pImage->Header),
                              getImageModificationUUID(&pParent->Header)))
        {
            rc = VERR_VDI_IMAGES_UUID_MISMATCH;
            goto l_open_failed;
        }
    }

    /* Setup image parameters by header. */
    vdiSetupImageDesc(pImage);

    /* reset modified flag into first-modified state. */
    pImage->fModified = VDI_IMAGE_MODIFIED_FIRST;

#if 0
    /* Image is validated, set working file lock on it. */
    rc = RTFileUnlock(pImage->File, 0, cbLock);
    AssertRC(rc);
    cbLock = pImage->offStartData
           + ((uint64_t)getImageBlocks(&pImage->Header) << pImage->uShiftIndex2Offset);
    rc = RTFileLock(pImage->File,
                    (pImage->fReadOnly) ?
                        RTFILE_LOCK_READ | RTFILE_LOCK_IMMEDIATELY :
                        RTFILE_LOCK_WRITE | RTFILE_LOCK_IMMEDIATELY,
                    0,
                    cbLock);
    if (    VBOX_FAILURE(rc)
        &&  !pImage->fReadOnly)
    {
        /* Failed to lock image for writing, try read-only lock. */
        rc = RTFileLock(pImage->File,
                        RTFILE_LOCK_READ | RTFILE_LOCK_IMMEDIATELY, 0, cbLock);
        if (VBOX_SUCCESS(rc))
            pImage->fReadOnly = true;
    }
    if (VBOX_FAILURE(rc))
    {
        cbLock = 0;    /* Not locked. */
        goto l_open_failed;
    }
#endif
    
    /* Allocate memory for blocks array. */
    pImage->paBlocks = (PVDIIMAGEBLOCKPOINTER)RTMemAlloc(sizeof(VDIIMAGEBLOCKPOINTER) * getImageBlocks(&pImage->Header));
    if (!pImage->paBlocks)
    {
        rc = VERR_NO_MEMORY;
        goto l_open_failed;
    }

    /* Read blocks array. */
    rc = RTFileSeek(pImage->File, pImage->offStartBlocks, RTFILE_SEEK_BEGIN, NULL);
    if (VBOX_FAILURE(rc))
        goto l_open_failed;
    rc = RTFileRead(pImage->File, pImage->paBlocks,
                    getImageBlocks(&pImage->Header) * sizeof(VDIIMAGEBLOCKPOINTER), NULL);
    if (VBOX_FAILURE(rc))
        goto l_open_failed;

    /* all done. */
    *ppImage = pImage;
    return VINF_SUCCESS;

l_open_failed:
    /* Clean up. */
    if (pImage->paBlocks)
        RTMemFree(pImage->paBlocks);
#if 0
    if (cbLock)
        RTFileUnlock(pImage->File, 0, cbLock);
#endif
    RTFileClose(pImage->File);
    RTMemFree(pImage);
    Log(("vdiOpenImage: failed, filename=\"%s\", rc=%d\n", pszFilename, rc));
    return rc;
}

/**
 * internal: save header to file.
 */
static int vdiUpdateHeader(PVDIIMAGEDESC pImage)
{
    /* Seek to header start. */
    int rc = RTFileSeek(pImage->File, sizeof(VDIPREHEADER), RTFILE_SEEK_BEGIN, NULL);
    if (VBOX_SUCCESS(rc))
    {
        switch (GET_MAJOR_HEADER_VERSION(&pImage->Header))
        {
            case 0:
                rc = RTFileWrite(pImage->File, &pImage->Header.u.v0, sizeof(pImage->Header.u.v0), NULL);
                break;
            case 1:
                rc = RTFileWrite(pImage->File, &pImage->Header.u.v1, sizeof(pImage->Header.u.v1), NULL);
                break;
            default:
                rc = VERR_VDI_UNSUPPORTED_VERSION;
                break;
        }
    }
    AssertMsgRC(rc, ("vdiUpdateHeader failed, filename=\"%s\" rc=%d\n", pImage->szFilename, rc));
    return rc;
}

/**
 * internal: save block pointer to file, save header to file.
 */
static int vdiUpdateBlockInfo(PVDIIMAGEDESC pImage, unsigned uBlock)
{
    /* Update image header. */
    int rc = vdiUpdateHeader(pImage);
    if (VBOX_SUCCESS(rc))
    {
        /* write only one block pointer. */
        rc = RTFileSeek(pImage->File,
                        pImage->offStartBlocks + uBlock * sizeof(VDIIMAGEBLOCKPOINTER),
                        RTFILE_SEEK_BEGIN,
                        NULL);
        if (VBOX_SUCCESS(rc))
            rc = RTFileWrite(pImage->File,
                             &pImage->paBlocks[uBlock],
                             sizeof(VDIIMAGEBLOCKPOINTER),
                             NULL);
        AssertMsgRC(rc, ("vdiUpdateBlockInfo failed to update block=%u, filename=\"%s\", rc=%d\n",
                         uBlock, pImage->szFilename, rc));
    }
    return rc;
}

/**
 * internal: mark image as modified, if this is the first change - update image header
 * on disk with a new uuidModify value.
 */
static void vdiSetModifiedFlag(PVDIIMAGEDESC pImage)
{
    pImage->fModified |= VDI_IMAGE_MODIFIED_FLAG;
    if (pImage->fModified & VDI_IMAGE_MODIFIED_FIRST)
    {
        pImage->fModified &= ~VDI_IMAGE_MODIFIED_FIRST;

        /* first modify - generate uuidModify and save to file. */
        vdiResetModifiedFlag(pImage);

        if (!(pImage->fModified | VDI_IMAGE_MODIFIED_DISABLE_UUID_UPDATE))
        {
            /* save header to file,
             * note: no rc checking.
             */
            vdiUpdateHeader(pImage);
        }
    }
}

/**
 * internal: generate new uuidModify if the image was changed.
 */
static void vdiResetModifiedFlag(PVDIIMAGEDESC pImage)
{
    if (pImage->fModified & VDI_IMAGE_MODIFIED_FLAG)
    {
        /* generate new last-modified uuid */
        if (!(pImage->fModified | VDI_IMAGE_MODIFIED_DISABLE_UUID_UPDATE))
            RTUuidCreate(getImageModificationUUID(&pImage->Header));

        pImage->fModified &= ~VDI_IMAGE_MODIFIED_FLAG;
    }
}

int  RTFileFlush(RTFILE File)
{
    if (fsync((int)File))
        return VERR_GENERAL_FAILURE;//RTErrConvertFromErrno(errno);
    return VINF_SUCCESS;
}


/**
 * Flush the image file to disk.
 */
void vdiFlushImage(PVDIIMAGEDESC pImage)
{
    if (!pImage->fReadOnly)
    {
        /* Update last-modified uuid if need. */
        vdiResetModifiedFlag(pImage);

        /* Save header. */
        int rc = vdiUpdateHeader(pImage);
        AssertMsgRC(rc, ("vdiUpdateHeader() failed, filename=\"%s\", rc=%d\n",
                         pImage->szFilename, rc));
        RTFileFlush(pImage->File);
    }
}

/**
 * internal: close image file.
 */
static void vdiCloseImage(PVDIIMAGEDESC pImage)
{
    /* Params checking. */
    Assert(pImage);
    Assert(pImage->File != NIL_RTFILE);

    vdiFlushImage(pImage);
#if 0
    RTFileUnlock(pImage->File,
                 0,
                 pImage->offStartData
               + ((uint64_t)getImageBlocks(&pImage->Header) << pImage->uShiftIndex2Offset));
#endif
    RTFileClose(pImage->File);

    /* free image resources */
    RTMemFree(pImage->paBlocks);
    RTMemFree(pImage);
}

/**
 * internal: read data inside image block.
 *
 * note: uBlock must be valid, readed data must not overlap block bounds.
 */
static int vdiReadInBlock(PVDIIMAGEDESC pImage, unsigned uBlock, unsigned offRead,
                          unsigned cbToRead, void *pvBuf)
{
    if (IS_VDI_IMAGE_BLOCK_ALLOCATED(pImage->paBlocks[uBlock]))
    {
        /* block present in image file */
        uint64_t u64Offset = ((uint64_t)pImage->paBlocks[uBlock] << pImage->uShiftIndex2Offset)
                           + (pImage->offStartData + pImage->offStartBlockData + offRead);
        int rc = RTFileSeek(pImage->File, u64Offset, RTFILE_SEEK_BEGIN, NULL);
        if (VBOX_SUCCESS(rc))
            rc = RTFileRead(pImage->File, pvBuf, cbToRead, NULL);
        if (VBOX_FAILURE(rc))
            Log(("vdiReadInBlock: rc=%d filename=\"%s\" uBlock=%u offRead=%u cbToRead=%u u64Offset=%llu\n",
                 rc, pImage->szFilename, uBlock, offRead, cbToRead, u64Offset));
        return rc;
    }

    /* Returns zeroes for both free and zero block types. */
    memset(pvBuf, 0, cbToRead);
    return VINF_SUCCESS;
}

/**
 * Read data from virtual HDD.
 *
 * @returns VBox status code.
 * @param   pDisk           Pointer to VDI HDD container.
 * @param   offStart        Offset of first reading byte from start of disk.
 * @param   pvBuf           Pointer to buffer for reading data.
 * @param   cbToRead        Number of bytes to read.
 */
VBOXDDU_DECL(int) VDIDiskRead(PVDIDISK pDisk, uint64_t offStart, void *pvBuf, unsigned cbToRead)
{
    /* sanity check */
    Assert(pDisk);
    AssertMsg(pDisk->u32Signature == VDIDISK_SIGNATURE, ("u32Signature=%08x\n", pDisk->u32Signature));

    PVDIIMAGEDESC pImage = pDisk->pLast;
    Assert(pImage);

    /* Check params. */
    if (    offStart + cbToRead > getImageDiskSize(&pImage->Header)
        ||  cbToRead == 0)
    {
        AssertMsgFailed(("offStart=%llu cbToRead=%u\n", offStart, cbToRead));
        return VERR_INVALID_PARAMETER;
    }

    /* Calculate starting block number and offset inside it. */
    unsigned uBlock = (unsigned)(offStart >> pImage->uShiftOffset2Index);
    unsigned offRead = (unsigned)offStart & pImage->uBlockMask;

    /* Save block size here for speed optimization. */
    unsigned cbBlock = getImageBlockSize(&pImage->Header);

    /* loop through blocks */
    int rc;
    for (;;)
    {
        unsigned to_read;
        if ((offRead + cbToRead) <= cbBlock)
            to_read = cbToRead;
        else
            to_read = cbBlock - offRead;

        if (pDisk->cImages > 1)
        {
            /* Differencing images are used, handle them. */
            pImage = pDisk->pLast;

            /* Search for image with allocated block. */
            while (pImage->paBlocks[uBlock] == VDI_IMAGE_BLOCK_FREE)
            {
                pImage = pImage->pPrev;
                if (!pImage)
                {
                    /* Block is not allocated in all images of chain. */
                    pImage = pDisk->pLast;
                    break;
                }
            }
        }

        rc = vdiReadInBlock(pImage, uBlock, offRead, to_read, pvBuf);

        cbToRead -= to_read;
        if (    cbToRead == 0
            ||  VBOX_FAILURE(rc))
            break;

        /* goto next block */
        uBlock++;
        offRead = 0;
        pvBuf = (char *)pvBuf + to_read;
    }

    return rc;
}

/**
 * internal: fill the whole block with zeroes.
 *
 * note: block id must be valid, block must be already allocated in file.
 * note: if pDisk is NULL, the default buffer size is used
 */
static int vdiFillBlockByZeroes(PVDIDISK pDisk, PVDIIMAGEDESC pImage, unsigned uBlock)
{
    int rc;

    /* seek to start of block in file. */
    uint64_t u64Offset = ((uint64_t)pImage->paBlocks[uBlock] << pImage->uShiftIndex2Offset)
                       + (pImage->offStartData + pImage->offStartBlockData);
    rc = RTFileSeek(pImage->File, u64Offset, RTFILE_SEEK_BEGIN, NULL);
    if (VBOX_FAILURE(rc))
    {
        Log(("vdiFillBlockByZeroes: seek rc=%d filename=\"%s\" uBlock=%u u64Offset=%llu\n",
             rc, pImage->szFilename, uBlock, u64Offset));
        return rc;
    }

    /* alloc tmp zero-filled buffer */
    void *pvBuf = RTMemTmpAllocZ(pDisk ? pDisk->cbBuf : VDIDISK_DEFAULT_BUFFER_SIZE);
    if (!pvBuf)
        return VERR_NO_MEMORY;

    unsigned cbFill = getImageBlockSize(&pImage->Header);

    /* do loop, because buffer size may be less then block size */
    while (cbFill > 0)
    {
        unsigned to_fill = RT_MIN(cbFill, pDisk ? pDisk->cbBuf : VDIDISK_DEFAULT_BUFFER_SIZE);
        rc = RTFileWrite(pImage->File, pvBuf, to_fill, NULL);
        if (VBOX_FAILURE(rc))
        {
            Log(("vdiFillBlockByZeroes: write rc=%d filename=\"%s\" uBlock=%u u64Offset=%llu cbFill=%u to_fill=%u\n",
                 rc, pImage->szFilename, uBlock, u64Offset, cbFill, to_fill));
            break;
        }

        cbFill -= to_fill;
    }

    RTMemTmpFree(pvBuf);
    return rc;
}

/**
 * Finds the first set bit in a bitmap.
 *
 * @returns Index of the first set bit.
 * @returns -1 if no clear bit was found.
 * @param   pvBitmap    Pointer to the bitmap.
 * @param   cBits       The number of bits in the bitmap. Multiple of 32.
 */

typedef unsigned long            RTCCUINTREG;

#define RT_INLINE_ASM_GNU_STYLE 1

int ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
{
    if (cBits)
    {
        int32_t iBit;
# if RT_INLINE_ASM_GNU_STYLE
        RTCCUINTREG uEAX, uECX, uEDI;
        cBits = RT_ALIGN_32(cBits, 32);
        __asm__ __volatile__("repe; scasl\n\t"
                             "je    1f\n\t"
#  ifdef __amd64__
                             "lea   -4(%%rdi), %%rdi\n\t"
                             "movl  (%%rdi), %%eax\n\t"
                             "subq  %5, %%rdi\n\t"
#  else
                             "lea   -4(%%edi), %%edi\n\t"
                             "movl  (%%edi), %%eax\n\t"
                             "subl  %5, %%edi\n\t"
#  endif
                             "shll  $3, %%edi\n\t"
                             "bsfl  %%eax, %%edx\n\t"
                             "addl  %%edi, %%edx\n\t"
                             "1:\t\n"
                             : "=d" (iBit),
                               "=&c" (uECX),
                               "=&D" (uEDI),
                               "=&a" (uEAX)
                             : "0" (0xffffffff),
                               "mr" (pvBitmap),
                               "1" (cBits >> 5),
                               "2" (pvBitmap),
                               "3" (0));
# else
        cBits = RT_ALIGN_32(cBits, 32);
        __asm
        {
#  ifdef __amd64__
            mov     rdi, [pvBitmap]
            mov     rbx, rdi
#  else
            mov     edi, [pvBitmap]
            mov     ebx, edi
#  endif
            mov     edx, 0ffffffffh
            xor     eax, eax
            mov     ecx, [cBits]
            shr     ecx, 5
            repe    scasd
            je      done
#  ifdef __amd64__
            lea     rdi, [rdi - 4]
            mov     eax, [rdi]
            sub     rdi, rbx
#  else
            lea     edi, [edi - 4]
            mov     eax, [edi]
            sub     edi, ebx
#  endif
            shl     edi, 3
            bsf     edx, eax
            add     edx, edi
        done:
            mov   [iBit], edx
        }
# endif
        return iBit;
    }
    return -1;
}


/**
 * internal: write data inside image block.
 *
 * note: uBlock must be valid, written data must not overlap block bounds.
 */
static int vdiWriteInBlock(PVDIDISK pDisk, PVDIIMAGEDESC pImage, unsigned uBlock, unsigned offWrite, unsigned cbToWrite, const void *pvBuf)
{
    int rc;

    /* Check if we can write into file. */
    if (pImage->fReadOnly)
    {
        Log(("vdiWriteInBlock: failed, image \"%s\" is read-only!\n", pImage->szFilename));
        return VERR_WRITE_PROTECT;
    }

    /* This could be optimized a little (not setting it when writing zeroes
     * to a zeroed block). Won't buy us much, because it's very unlikely
     * that only such zero data block writes occur while the VDI is opened. */
    vdiSetModifiedFlag(pImage);

    if (!IS_VDI_IMAGE_BLOCK_ALLOCATED(pImage->paBlocks[uBlock]))
    {
        if (!pDisk || !pDisk->fHonorZeroWrites)
        {
            /* If the destination block is unallocated at this point, it's either
             * a zero block or a block which hasn't been used so far (which also
             * means that it's a zero block. Don't need to write anything to this
             * block if the data consists of just zeroes. */
            Assert(cbToWrite % 4 == 0);
            if (ASMBitFirstSet((volatile void *)pvBuf, cbToWrite * 8) == -1)
            {
                pImage->paBlocks[uBlock] = VDI_IMAGE_BLOCK_ZERO;
                return VINF_SUCCESS;
            }
        }

        /* need to allocate a new block in image file */

        /* expand file by one block */
        uint64_t u64Size = (((uint64_t)(getImageBlocksAllocated(&pImage->Header) + 1)) << pImage->uShiftIndex2Offset)
                         + pImage->offStartData;
        rc = RTFileSetSize(pImage->File, u64Size);
        if (VBOX_FAILURE(rc))
        {
            Log(("vdiWriteInBlock: set size rc=%d filename=\"%s\" uBlock=%u u64Size=%llu\n",
                 rc, pImage->szFilename, uBlock, u64Size));
            return rc;
        }

        unsigned cBlocksAllocated = getImageBlocksAllocated(&pImage->Header);
        pImage->paBlocks[uBlock] = cBlocksAllocated;
        setImageBlocksAllocated(&pImage->Header, cBlocksAllocated + 1);

        if (    pImage->fFlags & VDI_IMAGE_FLAGS_ZERO_EXPAND
            ||  pImage->paBlocks[uBlock] == VDI_IMAGE_BLOCK_ZERO)
        {
            /* Fill newly allocated block by zeroes. */

            if (offWrite || cbToWrite != getImageBlockSize(&pImage->Header))
            {
                rc = vdiFillBlockByZeroes(pDisk, pImage, uBlock);
                if (VBOX_FAILURE(rc))
                    return rc;
            }
        }

        rc = vdiUpdateBlockInfo(pImage, uBlock);
        if (VBOX_FAILURE(rc))
            return rc;
    }

    /* Now block present in image file, write data inside it. */
    uint64_t u64Offset = ((uint64_t)pImage->paBlocks[uBlock] << pImage->uShiftIndex2Offset)
                       + (pImage->offStartData + pImage->offStartBlockData + offWrite);
    rc = RTFileSeek(pImage->File, u64Offset, RTFILE_SEEK_BEGIN, NULL);
    if (VBOX_SUCCESS(rc))
    {
        rc = RTFileWrite(pImage->File, pvBuf, cbToWrite, NULL);
        if (VBOX_FAILURE(rc))
            Log(("vdiWriteInBlock: write rc=%d filename=\"%s\" uBlock=%u offWrite=%u u64Offset=%llu cbToWrite=%u\n",
                 rc, pImage->szFilename, uBlock, offWrite, u64Offset, cbToWrite));
    }
    else
        Log(("vdiWriteInBlock: seek rc=%d filename=\"%s\" uBlock=%u offWrite=%u u64Offset=%llu\n",
             rc, pImage->szFilename, uBlock, offWrite, u64Offset));

    return rc;
}

/**
 * internal: copy data block from one (parent) image to last image.
 */
static int vdiCopyBlock(PVDIDISK pDisk, PVDIIMAGEDESC pImage, unsigned uBlock)
{
    Assert(pImage != pDisk->pLast);

    if (pImage->paBlocks[uBlock] == VDI_IMAGE_BLOCK_ZERO)
    {
        /*
         * if src block is zero, set dst block to zero too.
         */
        pDisk->pLast->paBlocks[uBlock] = VDI_IMAGE_BLOCK_ZERO;
        return VINF_SUCCESS;
    }

    /* alloc tmp buffer */
    void *pvBuf = RTMemTmpAlloc(pDisk->cbBuf);
    if (!pvBuf)
        return VERR_NO_MEMORY;

    int rc = VINF_SUCCESS;

    unsigned cbCopy = getImageBlockSize(&pImage->Header);
    unsigned offCopy = 0;

    /* do loop, because buffer size may be less then block size */
    while (cbCopy > 0)
    {
        unsigned to_copy = RT_MIN(cbCopy, pDisk->cbBuf);
        rc = vdiReadInBlock(pImage, uBlock, offCopy, to_copy, pvBuf);
        if (VBOX_FAILURE(rc))
            break;

        rc = vdiWriteInBlock(pDisk, pDisk->pLast, uBlock, offCopy, to_copy, pvBuf);
        if (VBOX_FAILURE(rc))
            break;

        cbCopy -= to_copy;
        offCopy += to_copy;
    }

    RTMemTmpFree(pvBuf);
    return rc;
}

/**
 * Write data to virtual HDD.
 *
 * @returns VBox status code.
 * @param   pDisk           Pointer to VDI HDD container.
 * @param   offStart        Offset of first writing byte from start of HDD.
 * @param   pvBuf           Pointer to buffer of writing data.
 * @param   cbToWrite       Number of bytes to write.
 */
VBOXDDU_DECL(int) VDIDiskWrite(PVDIDISK pDisk, uint64_t offStart, const void *pvBuf, unsigned cbToWrite)
{
    /* sanity check */
    Assert(pDisk);
    AssertMsg(pDisk->u32Signature == VDIDISK_SIGNATURE, ("u32Signature=%08x\n", pDisk->u32Signature));

    PVDIIMAGEDESC pImage = pDisk->pLast;
    Assert(pImage);

    /* Check params. */
    if (    offStart + cbToWrite > getImageDiskSize(&pImage->Header)
        ||  cbToWrite == 0)
    {
        AssertMsgFailed(("offStart=%llu cbToWrite=%u, imageDisk=%Ld\n", offStart, cbToWrite, getImageDiskSize(&pImage->Header)));
        return VERR_INVALID_PARAMETER;
    }

    /* Calculate starting block number and offset inside it. */
    unsigned uBlock   = (unsigned)(offStart >> pImage->uShiftOffset2Index);
    unsigned offWrite = (unsigned)offStart   & pImage->uBlockMask;
    unsigned cbBlock  = getImageBlockSize(&pImage->Header);

    /* loop through blocks */
    int rc;
    for (;;)
    {
        unsigned to_write;
        if (offWrite + cbToWrite <= cbBlock)
            to_write = cbToWrite;
        else
            to_write = cbBlock - offWrite;

        /* All callers write less than a VDI block right now (assuming
         * default VDI block size). So not worth optimizing for the case
         * where a full block is overwritten (no copying required).
         * Checking whether a block is all zeroes after the write is too
         * expensive (would require reading the rest of the block). */

        if (pDisk->cImages > 1)
        {
            /* Differencing images are used, handle them. */

            /* Search for image with allocated block. */
            while (pImage->paBlocks[uBlock] == VDI_IMAGE_BLOCK_FREE)
            {
                pImage = pImage->pPrev;
                if (!pImage)
                {
                    /* Block is not allocated in all images of chain. */
                    pImage = pDisk->pLast;
                    break;
                }
            }

            if (pImage != pDisk->pLast)
            {
                /* One of parent image has a block data, copy it into last image. */
                rc = vdiCopyBlock(pDisk, pImage, uBlock);
                if (VBOX_FAILURE(rc))
                    break;
                pImage = pDisk->pLast;
            }
        }

        /* Actually write the data into block. */
        rc = vdiWriteInBlock(pDisk, pImage, uBlock, offWrite, to_write, pvBuf);

        cbToWrite -= to_write;
        if (    cbToWrite == 0
            || VBOX_FAILURE(rc))
            break;

        /* goto next block */
        uBlock++;
        offWrite = 0;
        pvBuf = (char *)pvBuf + to_write;
    }

    return rc;
}

/**
 * Creates a new base image file.
 *
 * @returns VBox status code.
 * @param   pszFilename     Name of the creating image file.
 * @param   enmType         Image type, only base image types are acceptable.
 * @param   cbSize          Image size in bytes.
 * @param   pszComment      Pointer to image comment. NULL is ok.
 * @param   pfnProgress     Progress callback. Optional.
 * @param   pvUser          User argument for the progress callback.
 */
VBOXDDU_DECL(int) VDICreateBaseImage(const char *pszFilename, VDIIMAGETYPE enmType, uint64_t cbSize,
                                     const char *pszComment, PFNVMPROGRESS pfnProgress, void *pvUser)
{
    LogFlow(("VDICreateBaseImage:\n"));

    /* Check arguments. */
    if (    !pszFilename
        ||  *pszFilename == '\0'
        ||  (enmType != VDI_IMAGE_TYPE_NORMAL && enmType != VDI_IMAGE_TYPE_FIXED)
        ||  cbSize < VDI_IMAGE_DEFAULT_BLOCK_SIZE)
    {
        AssertMsgFailed(("Invalid arguments: pszFilename=%p enmType=%x cbSize=%llu\n",
                         pszFilename, enmType, cbSize));
        return VERR_INVALID_PARAMETER;
    }

    int rc = vdiCreateImage(pszFilename, enmType, VDI_IMAGE_FLAGS_DEFAULT, cbSize, pszComment, NULL,
                            pfnProgress, pvUser);
    LogFlow(("VDICreateBaseImage: returns %d for filename=\"%s\"\n", rc, pszFilename));
    return rc;
}


/**
 * Creates a differencing dynamically growing image file for specified parent image.
 *
 * @returns VBox status code.
 * @param   pszFilename     Name of the creating differencing image file.
 * @param   pszParent       Name of the parent image file. May be base or diff image type.
 * @param   pszComment      Pointer to image comment. NULL is ok.
 * @param   pfnProgress     Progress callback. Optional.
 * @param   pvUser          User argument for the progress callback.
 */
VBOXDDU_DECL(int) VDICreateDifferenceImage(const char *pszFilename, const char *pszParent,
                                           const char *pszComment, PFNVMPROGRESS pfnProgress,
                                           void *pvUser)
{
    LogFlow(("VDICreateDifferenceImage:\n"));

    /* Check arguments. */
    if (    !pszFilename
        ||  *pszFilename == '\0'
        ||  !pszParent
        ||  *pszParent == '\0')
    {
        AssertMsgFailed(("Invalid arguments: pszFilename=%p pszParent=%p\n",
                         pszFilename, pszParent));
        return VERR_INVALID_PARAMETER;
    }

    PVDIIMAGEDESC pParent;
    int rc = vdiOpenImage(&pParent, pszParent, VDI_OPEN_FLAGS_READONLY, NULL);
    if (VBOX_SUCCESS(rc))
    {
        rc = vdiCreateImage(pszFilename, VDI_IMAGE_TYPE_DIFF, VDI_IMAGE_FLAGS_DEFAULT,
                            getImageDiskSize(&pParent->Header), pszComment, pParent,
                            pfnProgress, pvUser);
        vdiCloseImage(pParent);
    }
    LogFlow(("VDICreateDifferenceImage: returns %d for filename=\"%s\"\n", rc, pszFilename));
    return rc;
}

/**
 * Initialize the VDIDISK structure.
 */
void vdiInitVDIDisk(PVDIDISK pDisk)
{
    Assert(pDisk);
    pDisk->u32Signature = VDIDISK_SIGNATURE;
    pDisk->cImages = 0;
    pDisk->pBase   = NULL;
    pDisk->pLast   = NULL;
    pDisk->cbBlock = VDI_IMAGE_DEFAULT_BLOCK_SIZE;
    pDisk->cbBuf   = VDIDISK_DEFAULT_BUFFER_SIZE;
    pDisk->fHonorZeroWrites = false;
}

/**
 * internal: add image structure to the end of images list.
 */
static void vdiAddImageToList(PVDIDISK pDisk, PVDIIMAGEDESC pImage)
{
    pImage->pPrev = NULL;
    pImage->pNext = NULL;

    if (pDisk->pBase)
    {
        Assert(pDisk->cImages > 0);
        pImage->pPrev = pDisk->pLast;
        pDisk->pLast->pNext = pImage;
        pDisk->pLast = pImage;
    }
    else
    {
        Assert(pDisk->cImages == 0);
        pDisk->pBase = pImage;
        pDisk->pLast = pImage;
    }

    pDisk->cImages++;
}

/**
 * internal: remove image structure from the images list.
 */
static void vdiRemoveImageFromList(PVDIDISK pDisk, PVDIIMAGEDESC pImage)
{
    Assert(pDisk->cImages > 0);

    if (pImage->pPrev)
        pImage->pPrev->pNext = pImage->pNext;
    else
        pDisk->pBase = pImage->pNext;

    if (pImage->pNext)
        pImage->pNext->pPrev = pImage->pPrev;
    else
        pDisk->pLast = pImage->pPrev;

    pImage->pPrev = NULL;
    pImage->pNext = NULL;

    pDisk->cImages--;
}

/**
 * Allocates and initializes VDI HDD container.
 *
 * @returns Pointer to newly created HDD container with no one opened image file.
 * @returns NULL on failure, typically out of memory.
 */
VBOXDDU_DECL(PVDIDISK) VDIDiskCreate(void)
{
    PVDIDISK pDisk = (PVDIDISK)RTMemAllocZ(sizeof(VDIDISK));
    if (pDisk)
        vdiInitVDIDisk(pDisk);
    LogFlow(("VDIDiskCreate: returns pDisk=%X\n", pDisk));
    return pDisk;
}

/**
 * Closes all opened image files in HDD container.
 *
 * @param   pDisk           Pointer to VDI HDD container.
 */
VBOXDDU_DECL(void) VDIDiskCloseAllImages(PVDIDISK pDisk)
{
    LogFlow(("VDIDiskCloseAllImages:\n"));
    /* sanity check */
    Assert(pDisk);
    AssertMsg(pDisk->u32Signature == VDIDISK_SIGNATURE, ("u32Signature=%08x\n", pDisk->u32Signature));

    PVDIIMAGEDESC pImage = pDisk->pLast;
    while (pImage)
    {
        PVDIIMAGEDESC pPrev = pImage->pPrev;
        vdiRemoveImageFromList(pDisk, pImage);
        vdiCloseImage(pImage);
        pImage = pPrev;
    }
    Assert(pDisk->pLast == NULL);
}

/**
 * Destroys VDI HDD container. If container has opened image files they will be closed.
 *
 * @param   pDisk           Pointer to VDI HDD container.
 */
VBOXDDU_DECL(void) VDIDiskDestroy(PVDIDISK pDisk)
{
    LogFlow(("VDIDiskDestroy: pDisk=%X\n", pDisk));
    /* sanity check */
    Assert(pDisk);
    AssertMsg(pDisk->u32Signature == VDIDISK_SIGNATURE, ("u32Signature=%08x\n", pDisk->u32Signature));

    if (pDisk)
    {
        VDIDiskCloseAllImages(pDisk);
        RTMemFree(pDisk);
    }
}

/**
 * Get disk size of VDI HDD container.
 *
 * @returns Virtual disk size in bytes.
 * @returns 0 if no one VDI image is opened in HDD container.
 */
VBOXDDU_DECL(uint64_t) VDIDiskGetSize(PVDIDISK pDisk)
{
    /* sanity check */
    Assert(pDisk);
    AssertMsg(pDisk->u32Signature == VDIDISK_SIGNATURE, ("u32Signature=%08x\n", pDisk->u32Signature));

    if (pDisk->pBase)
    {
        LogFlow(("VDIDiskGetSize: returns %llu\n", getImageDiskSize(&pDisk->pBase->Header)));
        return getImageDiskSize(&pDisk->pBase->Header);
    }

    AssertMsgFailed(("No one disk image is opened!\n"));
    return 0;
}

/**
 * Get virtual disk geometry stored in image file.
 *
 * @returns VBox status code.
 * @returns VERR_VDI_NOT_OPENED if no one VDI image is opened in HDD container.
 * @returns VERR_VDI_GEOMETRY_NOT_SET if no geometry has been setted.
 * @param   pDisk           Pointer to VDI HDD container.
 * @param   pcCylinders     Where to store the number of cylinders. NULL is ok.
 * @param   pcHeads         Where to store the number of heads. NULL is ok.
 * @param   pcSectors       Where to store the number of sectors. NULL is ok.
 */
VBOXDDU_DECL(int) VDIDiskGetGeometry(PVDIDISK pDisk, unsigned *pcCylinders, unsigned *pcHeads, unsigned *pcSectors)
{
    /* sanity check */
    Assert(pDisk);
    AssertMsg(pDisk->u32Signature == VDIDISK_SIGNATURE, ("u32Signature=%08x\n", pDisk->u32Signature));

    if (pDisk->pBase)
    {
        int rc = VINF_SUCCESS;
        PVDIDISKGEOMETRY pGeometry = getImageGeometry(&pDisk->pBase->Header);
        LogFlow(("VDIDiskGetGeometry: C/H/S = %u/%u/%u\n",
                 pGeometry->cCylinders, pGeometry->cHeads, pGeometry->cSectors));
        if (    pGeometry->cCylinders > 0
            &&  pGeometry->cHeads > 0
            &&  pGeometry->cSectors > 0)
        {
            if (pcCylinders)
                *pcCylinders = pGeometry->cCylinders;
            if (pcHeads)
                *pcHeads = pGeometry->cHeads;
            if (pcSectors)
                *pcSectors = pGeometry->cSectors;
        }
        else
            rc = VERR_VDI_GEOMETRY_NOT_SET;

        LogFlow(("VDIDiskGetGeometry: returns %d\n", rc));
        return rc;
    }

    AssertMsgFailed(("No one disk image is opened!\n"));
    return VERR_VDI_NOT_OPENED;
}

/**
 * Get virtual disk translation mode stored in image file.
 *
 * @returns VBox status code.
 * @returns VERR_VDI_NOT_OPENED if no one VDI image is opened in HDD container.
 * @param   pDisk           Pointer to VDI HDD container.
 * @param   penmTranslation Where to store the translation mode (see pdm.h).
 */
VBOXDDU_DECL(int) VDIDiskGetTranslation(PVDIDISK pDisk, PPDMBIOSTRANSLATION penmTranslation)
{
    /* sanity check */
    Assert(pDisk);
    AssertMsg(pDisk->u32Signature == VDIDISK_SIGNATURE, ("u32Signature=%08x\n", pDisk->u32Signature));
    Assert(penmTranslation);

    if (pDisk->pBase)
    {
        *penmTranslation = getImageTranslation(&pDisk->pBase->Header);
        LogFlow(("VDIDiskGetTranslation: translation=%d\n", *penmTranslation));
        return VINF_SUCCESS;
    }

    AssertMsgFailed(("No one disk image is opened!\n"));
    return VERR_VDI_NOT_OPENED;
}


/**
 * Opens an image file.
 *
 * The first opened image file in a HDD container must have a base image type,
 * others (next opened images) must be a differencing or undo images.
 * Linkage is checked for differencing image to be in consistence with the previously opened image.
 * When a next differencing image is opened and the last image was opened in read/write access
 * mode, then the last image is reopened in read-only with deny write sharing mode. This allows
 * other processes to use images in read-only mode too.
 *
 * Note that the image can be opened in read-only mode if a read/write open is not possible.
 * Use VDIDiskIsReadOnly to check open mode.
 *
 * @returns VBox status code.
 * @param   pDisk           Pointer to VDI HDD container.
 * @param   pszFilename     Name of the image file to open.
 * @param   fOpen           Image file open mode, see VDI_OPEN_FLAGS_* constants.
 */
VBOXDDU_DECL(int) VDIDiskOpenImage(PVDIDISK pDisk, const char *pszFilename, unsigned fOpen)
{
    /* sanity check */
    Assert(pDisk);
    AssertMsg(pDisk->u32Signature == VDIDISK_SIGNATURE, ("u32Signature=%08x\n", pDisk->u32Signature));

    /* Check arguments. */
    if (    !pszFilename
        ||  *pszFilename == '\0'
        ||  (fOpen & ~VDI_OPEN_FLAGS_MASK))
    {
        AssertMsgFailed(("Invalid arguments: pszFilename=%p fOpen=%x\n", pszFilename, fOpen));
        return VERR_INVALID_PARAMETER;
    }
    LogFlow(("VDIDiskOpenImage: pszFilename=\"%s\" fOpen=%X\n", pszFilename, fOpen));

    PVDIIMAGEDESC pImage;
    int rc = vdiOpenImage(&pImage, pszFilename, fOpen, pDisk->pLast);
    if (VBOX_SUCCESS(rc))
    {
        if (pDisk->pLast == NULL)
        {
            /* Opening base image, check its type. */
            if (    getImageType(&pImage->Header) != VDI_IMAGE_TYPE_NORMAL
                &&  getImageType(&pImage->Header) != VDI_IMAGE_TYPE_FIXED)
            {
                /* E. Rouault : add this hack to handle base images */
                rc = VDIDiskOpenImage(pDisk, getImageComment(&pImage->Header), VDI_OPEN_FLAGS_READONLY);
                if (VBOX_SUCCESS(rc))
                {
                  vdiCloseImage(pImage);
                  return VDIDiskOpenImage(pDisk, pszFilename, fOpen);
                }
                else
                {
                  rc = VERR_VDI_INVALID_TYPE;
                }
            }
        }

        if (VBOX_SUCCESS(rc))
            vdiAddImageToList(pDisk, pImage);
        else
            vdiCloseImage(pImage);
    }

    LogFlow(("VDIDiskOpenImage: returns %d\n", rc));
    return rc;
}

static int VDI_probe(const uint8_t *buf, int buf_size, const char *filename)
{
    VDIPREHEADER* preheader = (VDIPREHEADER*)buf;
    VDIHEADER fakeHeader;
    assert(buf_size > sizeof(VDIPREHEADER));
    if (!RT_SUCCESS(vdiValidatePreHeader(preheader)))
      return 0;
    
    fakeHeader.uVersion = preheader->u32Version;
    switch (GET_MAJOR_HEADER_VERSION(&fakeHeader))
    {
        case 0:
            return 2;
            
        case 1:
            return 2;
            
        default:
            return 0;
            
    }
}


typedef struct BDRVVDIState {
    PVDIDISK vdiDisk;
} BDRVVDIState;

static int VDI_open(BlockDriverState *bs, const char *filename, int flags)
{
    int rc;
    BDRVVDIState *s = bs->opaque;
    PDMBIOSTRANSLATION translation;

    s->vdiDisk = VDIDiskCreate();
        
    rc = VDIDiskOpenImage(s->vdiDisk, filename,
                            ((flags & BDRV_O_ACCESS) == O_RDWR) ? VDI_OPEN_FLAGS_NORMAL : VDI_OPEN_FLAGS_READONLY);
    assert(RT_SUCCESS(rc));
    if (!((flags & BDRV_O_ACCESS) == O_RDWR))
    {
      bs->read_only = 1;
    }
    VDIDiskGetGeometry(s->vdiDisk, &bs->cyls, &bs->heads, &bs->secs);
    VDIDiskGetTranslation(s->vdiDisk, &translation);
    //fprintf(stderr, "C=%d,H=%d,S=%d,size=%Ld\n", bs->cyls,bs->headss,bs->secs, getImageDiskSize(&s->imageDesc->Header));
    if (translation == PDMBIOSTRANSLATION_NONE)
      bs->translation = BIOS_ATA_TRANSLATION_NONE;
    else if (translation == PDMBIOSTRANSLATION_LBA)
      bs->translation = BIOS_ATA_TRANSLATION_LBA;
    else
      bs->translation = BIOS_ATA_TRANSLATION_AUTO;
    bs->total_sectors = VDIDiskGetSize(s->vdiDisk) / 512;

    return 0;
}

static int VDI_read(BlockDriverState *bs, int64_t sector_num, 
                    uint8_t *buf, int nb_sectors)
{
    BDRVVDIState *s = bs->opaque;
    int rc;
    
    rc = VDIDiskRead(s->vdiDisk, sector_num * 512, buf, nb_sectors * 512);
    assert(RT_SUCCESS(rc));
    return 0;
}

static int VDI_write(BlockDriverState *bs, int64_t sector_num, 
                     const uint8_t *buf, int nb_sectors)
{
    BDRVVDIState *s = bs->opaque;
    int rc;
    
    rc = VDIDiskWrite(s->vdiDisk, sector_num * 512, buf, nb_sectors * 512);
    assert(RT_SUCCESS(rc));
    return 0;
}

static int64_t VDI_getlength(BlockDriverState *bs)
{
  BDRVVDIState *s = bs->opaque;
  return VDIDiskGetSize(s->vdiDisk);
}


static int VDI_create(const char *filename, QEMUOptionParameter *options)
{
    int64_t total_size = 0;
    const char *backing_file = NULL;
    int flags = 0;
    // Read out options
    while (options && options->name) {
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
            total_size = options->value.n / 512;
        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
            backing_file = options->value.s;
        } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
            flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0;
        }
        options++;
    }


  //fprintf(stderr, "creationg image of size %Ld\n", total_size * 512);
  if (backing_file)
  {
    assert(RT_SUCCESS(VDICreateDifferenceImage(filename, backing_file, backing_file, NULL, NULL)));
  }
  else
  {
    assert(RT_SUCCESS(VDICreateBaseImage(filename, VDI_IMAGE_TYPE_NORMAL, total_size * 512, NULL, NULL, NULL)));
  }
  return 0;
}

static void VDI_close(BlockDriverState *bs)
{
    BDRVVDIState *s = bs->opaque;
    VDIDiskDestroy(s->vdiDisk);
}

static BlockDriver bdrv_vdi = {
    .format_name = "vdi",
    .instance_size = sizeof(BDRVVDIState),
    .bdrv_probe = VDI_probe,
    .bdrv_open = VDI_open,
    .bdrv_read = VDI_read,
    .bdrv_write = VDI_write,
    .bdrv_close = VDI_close,
    .bdrv_create = VDI_create,
    .bdrv_getlength = VDI_getlength,
};


static void bdrv_vdi_init(void)
{
    bdrv_register(&bdrv_vdi);
}

block_init(bdrv_vdi_init);

