# Copyright 2010 Google Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish, dis-
# tribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the fol-
# lowing conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.

import base64
import binascii
import os
import re
import StringIO
from boto.exception import BotoClientError
from boto.s3.key import Key as S3Key
from boto.s3.keyfile import KeyFile

class Key(S3Key):
    """
    Represents a key (object) in a GS bucket.

    :ivar bucket: The parent :class:`boto.gs.bucket.Bucket`.
    :ivar name: The name of this Key object.
    :ivar metadata: A dictionary containing user metadata that you
        wish to store with the object or that has been retrieved from
        an existing object.
    :ivar cache_control: The value of the `Cache-Control` HTTP header.
    :ivar content_type: The value of the `Content-Type` HTTP header.
    :ivar content_encoding: The value of the `Content-Encoding` HTTP header.
    :ivar content_disposition: The value of the `Content-Disposition` HTTP
        header.
    :ivar content_language: The value of the `Content-Language` HTTP header.
    :ivar etag: The `etag` associated with this object.
    :ivar last_modified: The string timestamp representing the last
        time this object was modified in GS.
    :ivar owner: The ID of the owner of this object.
    :ivar storage_class: The storage class of the object.  Currently, one of:
        STANDARD | DURABLE_REDUCED_AVAILABILITY.
    :ivar md5: The MD5 hash of the contents of the object.
    :ivar size: The size, in bytes, of the object.
    :ivar generation: The generation number of the object.
    :ivar meta_generation: The generation number of the object metadata.
    :ivar encrypted: Whether the object is encrypted while at rest on
        the server.
    """
    generation = None
    meta_generation = None

    def __repr__(self):
        if self.generation and self.meta_generation:
            ver_str = '#%s.%s' % (self.generation, self.meta_generation)
        else:
            ver_str = ''
        if self.bucket:
            return '<Key: %s,%s%s>' % (self.bucket.name, self.name, ver_str)
        else:
            return '<Key: None,%s%s>' % (self.name, ver_str)

    def endElement(self, name, value, connection):
        if name == 'Key':
            self.name = value
        elif name == 'ETag':
            self.etag = value
        elif name == 'IsLatest':
            if value == 'true':
                self.is_latest = True
            else:
                self.is_latest = False
        elif name == 'LastModified':
            self.last_modified = value
        elif name == 'Size':
            self.size = int(value)
        elif name == 'StorageClass':
            self.storage_class = value
        elif name == 'Owner':
            pass
        elif name == 'VersionId':
            self.version_id = value
        elif name == 'Generation':
            self.generation = value
        elif name == 'MetaGeneration':
            self.meta_generation = value
        else:
            setattr(self, name, value)

    def handle_version_headers(self, resp, force=False):
        self.meta_generation = resp.getheader('x-goog-metageneration', None)
        self.generation = resp.getheader('x-goog-generation', None)

    def get_file(self, fp, headers=None, cb=None, num_cb=10,
                 torrent=False, version_id=None, override_num_retries=None,
                 response_headers=None):
        query_args = None
        if self.generation:
            query_args = ['generation=%s' % self.generation]
        self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb,
                                override_num_retries=override_num_retries,
                                response_headers=response_headers,
                                query_args=query_args)

    def delete(self):
        return self.bucket.delete_key(self.name, version_id=self.version_id,
                                      generation=self.generation)

    def add_email_grant(self, permission, email_address):
        """
        Convenience method that provides a quick way to add an email grant to a
        key. This method retrieves the current ACL, creates a new grant based on
        the parameters passed in, adds that grant to the ACL and then PUT's the
        new ACL back to GS.

        :type permission: string
        :param permission: The permission being granted. Should be one of:
            READ|FULL_CONTROL
            See http://code.google.com/apis/storage/docs/developer-guide.html#authorization
            for more details on permissions.

        :type email_address: string
        :param email_address: The email address associated with the Google
                              account to which you are granting the permission.
        """
        acl = self.get_acl()
        acl.add_email_grant(permission, email_address)
        self.set_acl(acl)

    def add_user_grant(self, permission, user_id):
        """
        Convenience method that provides a quick way to add a canonical user
        grant to a key. This method retrieves the current ACL, creates a new
        grant based on the parameters passed in, adds that grant to the ACL and
        then PUT's the new ACL back to GS.

        :type permission: string
        :param permission: The permission being granted. Should be one of:
            READ|FULL_CONTROL
            See http://code.google.com/apis/storage/docs/developer-guide.html#authorization
            for more details on permissions.

        :type user_id: string
        :param user_id: The canonical user id associated with the GS account to
             which you are granting the permission.
        """
        acl = self.get_acl()
        acl.add_user_grant(permission, user_id)
        self.set_acl(acl)

    def add_group_email_grant(self, permission, email_address, headers=None):
        """
        Convenience method that provides a quick way to add an email group
        grant to a key. This method retrieves the current ACL, creates a new
        grant based on the parameters passed in, adds that grant to the ACL and
        then PUT's the new ACL back to GS.

        :type permission: string
        :param permission: The permission being granted. Should be one of:
            READ|FULL_CONTROL
            See http://code.google.com/apis/storage/docs/developer-guide.html#authorization
            for more details on permissions.

        :type email_address: string
        :param email_address: The email address associated with the Google
            Group to which you are granting the permission.
        """
        acl = self.get_acl(headers=headers)
        acl.add_group_email_grant(permission, email_address)
        self.set_acl(acl, headers=headers)

    def add_group_grant(self, permission, group_id):
        """
        Convenience method that provides a quick way to add a canonical group
        grant to a key. This method retrieves the current ACL, creates a new
        grant based on the parameters passed in, adds that grant to the ACL and
        then PUT's the new ACL back to GS.

        :type permission: string
        :param permission: The permission being granted. Should be one of:
            READ|FULL_CONTROL
            See http://code.google.com/apis/storage/docs/developer-guide.html#authorization
            for more details on permissions.

        :type group_id: string
        :param group_id: The canonical group id associated with the Google
            Groups account you are granting the permission to.
        """
        acl = self.get_acl()
        acl.add_group_grant(permission, group_id)
        self.set_acl(acl)

    def set_contents_from_file(self, fp, headers=None, replace=True,
                               cb=None, num_cb=10, policy=None, md5=None,
                               res_upload_handler=None, size=None, rewind=False,
                               if_generation=None):
        """
        Store an object in GS using the name of the Key object as the
        key in GS and the contents of the file pointed to by 'fp' as the
        contents.

        :type fp: file
        :param fp: the file whose contents are to be uploaded

        :type headers: dict
        :param headers: additional HTTP headers to be sent with the PUT request.

        :type replace: bool
        :param replace: If this parameter is False, the method will first check
            to see if an object exists in the bucket with the same key. If it
            does, it won't overwrite it. The default value is True which will
            overwrite the object.

        :type cb: function
        :param cb: a callback function that will be called to report
            progress on the upload. The callback should accept two integer
            parameters, the first representing the number of bytes that have
            been successfully transmitted to GS and the second representing the
            total number of bytes that need to be transmitted.

        :type num_cb: int
        :param num_cb: (optional) If a callback is specified with the cb
            parameter, this parameter determines the granularity of the callback
            by defining the maximum number of times the callback will be called
            during the file transfer.

        :type policy: :class:`boto.gs.acl.CannedACLStrings`
        :param policy: A canned ACL policy that will be applied to the new key
            in GS.

        :type md5: A tuple containing the hexdigest version of the MD5 checksum
            of the file as the first element and the Base64-encoded version of
            the plain checksum as the second element. This is the same format
            returned by the compute_md5 method.
        :param md5: If you need to compute the MD5 for any reason prior to
            upload, it's silly to have to do it twice so this param, if present,
            will be used as the MD5 values of the file. Otherwise, the checksum
            will be computed.

        :type res_upload_handler: ResumableUploadHandler
        :param res_upload_handler: If provided, this handler will perform the
            upload.

        :type size: int
        :param size: (optional) The Maximum number of bytes to read from
            the file pointer (fp). This is useful when uploading
            a file in multiple parts where you are splitting the
            file up into different ranges to be uploaded. If not
            specified, the default behaviour is to read all bytes
            from the file pointer. Less bytes may be available.
            Notes:

                1. The "size" parameter currently cannot be used when
                   a resumable upload handler is given but is still
                   useful for uploading part of a file as implemented
                   by the parent class.
                2. At present Google Cloud Storage does not support
                   multipart uploads.

        :type rewind: bool
        :param rewind: (optional) If True, the file pointer (fp) will be 
                       rewound to the start before any bytes are read from
                       it. The default behaviour is False which reads from
                       the current position of the file pointer (fp).

        :type if_generation: int
        :param if_generation: (optional) If set to a generation number, the
            object will only be written to if its current generation number is
            this value. If set to the value 0, the object will only be written
            if it doesn't already exist.

        :rtype: int
        :return: The number of bytes written to the key.

        TODO: At some point we should refactor the Bucket and Key classes,
        to move functionality common to all providers into a parent class,
        and provider-specific functionality into subclasses (rather than
        just overriding/sharing code the way it currently works).
        """
        provider = self.bucket.connection.provider
        if res_upload_handler and size:
            # could use size instead of file_length if provided but...
            raise BotoClientError('"size" param not supported for resumable uploads.')
        headers = headers or {}
        if policy:
            headers[provider.acl_header] = policy

        if rewind:
            # caller requests reading from beginning of fp.
            fp.seek(0, os.SEEK_SET)
        else:
            # The following seek/tell/seek logic is intended
            # to detect applications using the older interface to
            # set_contents_from_file(), which automatically rewound the
            # file each time the Key was reused. This changed with commit
            # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads
            # split into multiple parts and uploaded in parallel, and at
            # the time of that commit this check was added because otherwise
            # older programs would get a success status and upload an empty
            # object. Unfortuantely, it's very inefficient for fp's implemented
            # by KeyFile (used, for example, by gsutil when copying between
            # providers). So, we skip the check for the KeyFile case.
            # TODO: At some point consider removing this seek/tell/seek
            # logic, after enough time has passed that it's unlikely any
            # programs remain that assume the older auto-rewind interface.
            if not isinstance(fp, KeyFile):
                spos = fp.tell()
                fp.seek(0, os.SEEK_END)
                if fp.tell() == spos:
                    fp.seek(0, os.SEEK_SET)
                    if fp.tell() != spos:
                        # Raise an exception as this is likely a programming
                        # error whereby there is data before the fp but nothing
                        # after it.
                        fp.seek(spos)
                        raise AttributeError('fp is at EOF. Use rewind option '
                                             'or seek() to data start.')
                # seek back to the correct position.
                fp.seek(spos)

        if hasattr(fp, 'name'):
            self.path = fp.name
        if self.bucket != None:
            if isinstance(fp, KeyFile):
                # Avoid EOF seek for KeyFile case as it's very inefficient.
                key = fp.getkey()
                size = key.size - fp.tell()
                self.size = size
                # At present both GCS and S3 use MD5 for the etag for
                # non-multipart-uploaded objects. If the etag is 32 hex
                # chars use it as an MD5, to avoid having to read the file
                # twice while transferring.
                if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)):
                    etag = key.etag.strip('"')
                    md5 = (etag, base64.b64encode(binascii.unhexlify(etag)))
            if size:
                self.size = size
            else:
                # If md5 is provided, still need to size so
                # calculate based on bytes to end of content
                spos = fp.tell()
                fp.seek(0, os.SEEK_END)
                self.size = fp.tell() - spos
                fp.seek(spos)
                size = self.size

            if md5 == None:
                md5 = self.compute_md5(fp, size)
            self.md5 = md5[0]
            self.base64md5 = md5[1]

            if self.name == None:
                self.name = self.md5

            if not replace:
                if self.bucket.lookup(self.name):
                    return

            if if_generation is not None:
                headers['x-goog-if-generation-match'] = str(if_generation)

            if res_upload_handler:
                res_upload_handler.send_file(self, fp, headers, cb, num_cb)
            else:
                # Not a resumable transfer so use basic send_file mechanism.
                self.send_file(fp, headers, cb, num_cb, size=size)

    def set_contents_from_filename(self, filename, headers=None, replace=True,
                                   cb=None, num_cb=10, policy=None, md5=None,
                                   reduced_redundancy=None,
                                   res_upload_handler=None,
                                   if_generation=None):
        """
        Store an object in GS using the name of the Key object as the
        key in GS and the contents of the file named by 'filename'.
        See set_contents_from_file method for details about the
        parameters.

        :type filename: string
        :param filename: The name of the file that you want to put onto GS

        :type headers: dict
        :param headers: Additional headers to pass along with the request to GS.

        :type replace: bool
        :param replace: If True, replaces the contents of the file if it
            already exists.

        :type cb: function
        :param cb: (optional) a callback function that will be called to report
            progress on the download. The callback should accept two integer
            parameters, the first representing the number of bytes that have
            been successfully transmitted from GS and the second representing
            the total number of bytes that need to be transmitted.

        :type cb: int
        :param num_cb: (optional) If a callback is specified with the cb
            parameter this parameter determines the granularity of the callback
            by defining the maximum number of times the callback will be called
            during the file transfer.

        :type policy: :class:`boto.gs.acl.CannedACLStrings`
        :param policy: A canned ACL policy that will be applied to the new key
            in GS.

        :type md5: A tuple containing the hexdigest version of the MD5 checksum
            of the file as the first element and the Base64-encoded version of
            the plain checksum as the second element. This is the same format
            returned by the compute_md5 method.
        :param md5: If you need to compute the MD5 for any reason prior to
            upload, it's silly to have to do it twice so this param, if present,
            will be used as the MD5 values of the file. Otherwise, the checksum
            will be computed.

        :type res_upload_handler: ResumableUploadHandler
        :param res_upload_handler: If provided, this handler will perform the
            upload.

        :type if_generation: int
        :param if_generation: (optional) If set to a generation number, the
            object will only be written to if its current generation number is
            this value. If set to the value 0, the object will only be written
            if it doesn't already exist.
        """
        # Clear out any previously computed md5 hashes, since we are setting the content.
        self.md5 = None
        self.base64md5 = None

        fp = open(filename, 'rb')
        self.set_contents_from_file(fp, headers, replace, cb, num_cb,
                                    policy, md5, res_upload_handler,
                                    if_generation=if_generation)
        fp.close()

    def set_contents_from_string(self, s, headers=None, replace=True,
                                 cb=None, num_cb=10, policy=None, md5=None,
                                 if_generation=None):
        """
        Store an object in S3 using the name of the Key object as the
        key in S3 and the string 's' as the contents.
        See set_contents_from_file method for details about the
        parameters.

        :type headers: dict
        :param headers: Additional headers to pass along with the
                        request to AWS.

        :type replace: bool
        :param replace: If True, replaces the contents of the file if
                        it already exists.

        :type cb: function
        :param cb: a callback function that will be called to report
                   progress on the upload.  The callback should accept
                   two integer parameters, the first representing the
                   number of bytes that have been successfully
                   transmitted to S3 and the second representing the
                   size of the to be transmitted object.

        :type cb: int
        :param num_cb: (optional) If a callback is specified with
                       the cb parameter this parameter determines the
                       granularity of the callback by defining
                       the maximum number of times the callback will
                       be called during the file transfer.

        :type policy: :class:`boto.s3.acl.CannedACLStrings`
        :param policy: A canned ACL policy that will be applied to the
                       new key in S3.

        :type md5: A tuple containing the hexdigest version of the MD5
                   checksum of the file as the first element and the
                   Base64-encoded version of the plain checksum as the
                   second element.  This is the same format returned by
                   the compute_md5 method.
        :param md5: If you need to compute the MD5 for any reason prior
                    to upload, it's silly to have to do it twice so this
                    param, if present, will be used as the MD5 values
                    of the file.  Otherwise, the checksum will be computed.

        :type if_generation: int
        :param if_generation: (optional) If set to a generation number, the
            object will only be written to if its current generation number is
            this value. If set to the value 0, the object will only be written
            if it doesn't already exist.
        """

        # Clear out any previously computed md5 hashes, since we are setting the content.
        self.md5 = None
        self.base64md5 = None

        if isinstance(s, unicode):
            s = s.encode("utf-8")
        fp = StringIO.StringIO(s)
        r = self.set_contents_from_file(fp, headers, replace, cb, num_cb,
                                        policy, md5,
                                        if_generation=if_generation)
        fp.close()
        return r

    def set_contents_from_stream(self, *args, **kwargs):
        """
        Store an object using the name of the Key object as the key in
        cloud and the contents of the data stream pointed to by 'fp' as
        the contents.

        The stream object is not seekable and total size is not known.
        This has the implication that we can't specify the
        Content-Size and Content-MD5 in the header. So for huge
        uploads, the delay in calculating MD5 is avoided but with a
        penalty of inability to verify the integrity of the uploaded
        data.

        :type fp: file
        :param fp: the file whose contents are to be uploaded

        :type headers: dict
        :param headers: additional HTTP headers to be sent with the
            PUT request.

        :type replace: bool
        :param replace: If this parameter is False, the method will first check
            to see if an object exists in the bucket with the same key. If it
            does, it won't overwrite it. The default value is True which will
            overwrite the object.

        :type cb: function
        :param cb: a callback function that will be called to report
            progress on the upload. The callback should accept two integer
            parameters, the first representing the number of bytes that have
            been successfully transmitted to GS and the second representing the
            total number of bytes that need to be transmitted.

        :type num_cb: int
        :param num_cb: (optional) If a callback is specified with the
            cb parameter, this parameter determines the granularity of
            the callback by defining the maximum number of times the
            callback will be called during the file transfer.

        :type policy: :class:`boto.gs.acl.CannedACLStrings`
        :param policy: A canned ACL policy that will be applied to the new key
            in GS.

        :type reduced_redundancy: bool
        :param reduced_redundancy: If True, this will set the storage
            class of the new Key to be REDUCED_REDUNDANCY. The Reduced
            Redundancy Storage (RRS) feature of S3, provides lower
            redundancy at lower storage cost.

        :type size: int
        :param size: (optional) The Maximum number of bytes to read from
            the file pointer (fp). This is useful when uploading a
            file in multiple parts where you are splitting the file up
            into different ranges to be uploaded. If not specified,
            the default behaviour is to read all bytes from the file
            pointer. Less bytes may be available.

        :type if_generation: int
        :param if_generation: (optional) If set to a generation number, the
            object will only be written to if its current generation number is
            this value. If set to the value 0, the object will only be written
            if it doesn't already exist.
        """
        if_generation = kwargs.pop('if_generation', None)
        if if_generation is not None:
            headers = kwargs.get('headers', {})
            headers['x-goog-if-generation-match'] = str(if_generation)
            kwargs['headers'] = headers
        super(Key, self).set_contents_from_stream(*args, **kwargs)

    def set_acl(self, acl_or_str, headers=None, generation=None,
                 if_generation=None, if_metageneration=None):
        """Sets the ACL for this object.

        :type acl_or_str: string or :class:`boto.gs.acl.ACL`
        :param acl_or_str: A canned ACL string (see
            :data:`~.gs.acl.CannedACLStrings`) or an ACL object.

        :type headers: dict
        :param headers: Additional headers to set during the request.

        :type generation: int
        :param generation: If specified, sets the ACL for a specific generation
            of a versioned object. If not specified, the current version is
            modified.

        :type if_generation: int
        :param if_generation: (optional) If set to a generation number, the acl
            will only be updated if its current generation number is this value.

        :type if_metageneration: int
        :param if_metageneration: (optional) If set to a metageneration number,
            the acl will only be updated if its current metageneration number is
            this value.
        """
        if self.bucket != None:
            self.bucket.set_acl(acl_or_str, self.name, headers=headers,
                                generation=generation,
                                if_generation=if_generation,
                                if_metageneration=if_metageneration)

    def get_acl(self, headers=None, generation=None):
        """Returns the ACL of this object.

        :param dict headers: Additional headers to set during the request.

        :param int generation: If specified, gets the ACL for a specific
            generation of a versioned object. If not specified, the current
            version is returned.

        :rtype: :class:`.gs.acl.ACL`
        """
        if self.bucket != None:
            return self.bucket.get_acl(self.name, headers=headers,
                                       generation=generation)

    def get_xml_acl(self, headers=None, generation=None):
        """Returns the ACL string of this object.

        :param dict headers: Additional headers to set during the request.

        :param int generation: If specified, gets the ACL for a specific
            generation of a versioned object. If not specified, the current
            version is returned.

        :rtype: str
        """
        if self.bucket != None:
            return self.bucket.get_xml_acl(self.name, headers=headers,
                                           generation=generation)

    def set_xml_acl(self, acl_str, headers=None, generation=None,
                     if_generation=None, if_metageneration=None):
        """Sets this objects's ACL to an XML string.

        :type acl_str: string
        :param acl_str: A string containing the ACL XML.

        :type headers: dict
        :param headers: Additional headers to set during the request.

        :type generation: int
        :param generation: If specified, sets the ACL for a specific generation
            of a versioned object. If not specified, the current version is
            modified.

        :type if_generation: int
        :param if_generation: (optional) If set to a generation number, the acl
            will only be updated if its current generation number is this value.

        :type if_metageneration: int
        :param if_metageneration: (optional) If set to a metageneration number,
            the acl will only be updated if its current metageneration number is
            this value.
        """
        if self.bucket != None:
            return self.bucket.set_xml_acl(acl_str, self.name, headers=headers,
                                           generation=generation,
                                           if_generation=if_generation,
                                           if_metageneration=if_metageneration)

    def set_canned_acl(self, acl_str, headers=None, generation=None,
                       if_generation=None, if_metageneration=None):
        """Sets this objects's ACL using a predefined (canned) value.

        :type acl_str: string
        :param acl_str: A canned ACL string. See
            :data:`~.gs.acl.CannedACLStrings`.

        :type headers: dict
        :param headers: Additional headers to set during the request.

        :type generation: int
        :param generation: If specified, sets the ACL for a specific generation
            of a versioned object. If not specified, the current version is
            modified.

        :type if_generation: int
        :param if_generation: (optional) If set to a generation number, the acl
            will only be updated if its current generation number is this value.

        :type if_metageneration: int
        :param if_metageneration: (optional) If set to a metageneration number,
            the acl will only be updated if its current metageneration number is
            this value.
        """
        if self.bucket != None:
            return self.bucket.set_canned_acl(
                acl_str,
                self.name,
                headers=headers,
                generation=generation,
                if_generation=if_generation,
                if_metageneration=if_metageneration
            )
