## This file is part of MLPY.
## Input data module.

## This code is written by Davide Albanese, <albanese@fbk.eu>.
## (C) 2008 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY.

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.

__all__ = ["data_fromfile", "data_fromfile_wl", "data_tofile", "data_tofile_wl", "data_normalize", "data_standardize", "standardize", "center", "standardize_from", "center_from"]

from numpy import *
import csv
import warnings

def deprecation(message):
    warnings.warn(message, DeprecationWarning)


def data_fromfile(file, ytype=int):
    """
    Read data file in the form::

      x11 [TAB] x12 [TAB] ... x1n [TAB] y1
      x21 [TAB] x22 [TAB] ... x2n [TAB] y2
       .         .        .    .        .
       .         .         .   .        .
       .         .          .  .        .
      xm1 [TAB] xm2 [TAB] ... xmn [TAB] ym
    
    where xij are float and yi are of type 'ytype'
    (numpy.int or numpy.float).
    
    Input
    
      * *file*  - data file name
      * *ytype* - numpy datatype for labels (numpy.int
        or numpy.float)
      
    Output
    
      * *x* - data    [2D numpy array float]
      * *y* - classes [1D numpy array int or float]

    Example:

    >>> from numpy import *
    >>> from mlpy import *
    >>> x, y = data_fromfile('data_example.dat')
    >>> x
    array([[ 1.1,  2. ,  5.3,  3.1],
    ...    [ 3.7,  1.4,  2.3,  4.5],
    ...    [ 1.4,  5.4,  3.1,  1.4]])
    >>> y
    array([ 1, -1,  1])
    """

    f = open(file)
    firstline = f.readline()
    cols = len(firstline.split("\t"))
    f.close()  
    
    try:
        data = fromfile(file = file, sep = "\t")  
        data = data.reshape((-1, cols))
    except ValueError:
        raise ValueError("'%s' is not a valid data file" % file)
    
    x = delete(data, -1, 1)
    y = data[:, -1].astype(ytype)
   
    return (x, y)


def data_fromfile_wl(file):
    """
    Read data file in the form::

      x11 [TAB] x12 [TAB] ... x1n [TAB]
      x21 [TAB] x22 [TAB] ... x2n [TAB]
       .         .        .    .
       .         .         .   .       
       .         .          .  .       
      xm1 [TAB] xm2 [TAB] ... xmn [TAB]
    
    where xij are float.
    
    Input
    
      * *file* - data file name
      
    Output
    
      * *x* - data    [2D numpy array float]

    Example:

    >>> from numpy import *
    >>> from mlpy import *
    >>> x, y = data_fromfile('data_example.dat')
    >>> x
    array([[ 1.1,  2. ,  5.3,  3.1],
    ...    [ 3.7,  1.4,  2.3,  4.5],
    ...    [ 1.4,  5.4,  3.1,  1.4]])
    """

    f = open(file)
    firstline = f.readline()
    cols = len(firstline.split("\t"))
    f.close()  
    
    try:
        data = fromfile(file = file, sep = "\t")  
        data = data.reshape((-1, cols))
    except ValueError:
        raise ValueError("'%s' is not a valid data file" % file)
    
    return data
   

def data_tofile(file, x, y, sep="\t"):
    """
    Write data file in the form::

      x11 [sep] x12 [sep] ... x1n [sep] y1
      x21 [sep] x22 [sep] ... x2n [sep] y2
       .         .        .    .        .
       .         .         .   .        .
       .         .          .  .        .
      xm1 [sep] xm2 [sep] ... xmn [sep] ym
    
    where xij are float and yi are integer.
    
    Input
    
      * *file* - data file name
      * *x*    - data    [2D numpy array float]
      * *y*    - classes [1D numpy array integer]
      * *sep*  - separator
    """

    
    writer = csv.writer(open(file, "wb"), delimiter = sep, lineterminator = '\n')
    writer.writerows(append(x, y.reshape(-1, 1), axis = 1))


def data_tofile_wl(file, x, sep="\t"):
    """
    Write data file in the form::

      x11 [sep] x12 [sep] ... x1n [sep]
      x21 [sep] x22 [sep] ... x2n [sep]
       .         .        .    .       
       .         .         .   .       
       .         .          .  .       
      xm1 [sep] xm2 [sep] ... xmn [sep]
    
    where xij are float.
    
    Input
    
      * *file* - data file name
      * *x*    - data    [2D numpy array float]
      * *sep*  - separator
    """


    writer = csv.writer(open(file, "wb"), delimiter = sep, lineterminator = '\n')
    writer.writerows(x)


def data_normalize(x):
    """
    Normalize numpy array (2D) x.

    Input
    
      * *x* - data [2D numpy array float]

    Output
    
      * normalized data      

    Example:

    >>> from numpy import *
    >>> from mlpy import *
    >>> x = array([[ 1.1,  2. ,  5.3,  3.1],
    ...            [ 3.7,  1.4,  2.3,  4.5],
    ...            [ 1.4,  5.4,  3.1,  1.4]])
    >>> data_normalize(x)
    array([[-0.9797065 , -0.48295391,  1.33847226,  0.12418815],
    ...    [ 0.52197912, -1.13395464, -0.48598056,  1.09795608],
    ...    [-0.75217354,  1.35919078,  0.1451563 , -0.75217354]])
    """
    
    deprecation("deprecated in mlpy 2.3")

    #raise DeprecationWarning("Deprecated in version 2.1.0")
    
    ret_x = empty_like(x)
    mean_x = x.mean(axis=1)
    std_x = x.std(axis=1) * sqrt(x.shape[1] / (x.shape[1] - 1.0))

    for i in range(x.shape[0]):
        ret_x[i, :] = (x[i, :] - mean_x[i]) / std_x[i]
    
    return ret_x
   

def data_standardize(x, p = None):
    """
    Standardize numpy array (2D) x and optionally
    standardize p using mean and std of x.

    Input
    
      * *x* - data          [2D numpy array float]
      * *p* - optional data [2D numpy array float]

    Output
    
      * standardized data      

    Example:

    >>> from numpy import *
    >>> from mlpy import *
    >>> x = array([[ 1.1,  2. ,  5.3,  3.1],
    ...            [ 3.7,  1.4,  2.3,  4.5],
    ...            [ 1.4,  5.4,  3.1,  1.4]])
    >>> data_standardize(x)
    array([[-0.67958381, -0.43266792,  1.1157668 ,  0.06441566],
    ...    [ 1.1482623 , -0.71081158, -0.81536804,  0.96623494],
    ...    [-0.46867849,  1.1434795 , -0.30039875, -1.0306506 ]])
    """

    deprecation("deprecated in mlpy 2.3. Use mlpy.standardize() and "
                "mlpy.standardize_from() instead")

    ret_x = empty_like(x)
    mean_x = x.mean(axis=0)
    std_x = x.std(axis=0) * sqrt(x.shape[0] / (x.shape[0] - 1.0))

    for i in range(x.shape[1]):
        ret_x[:, i] = (x[:, i] - mean_x[i]) / std_x[i]

    if not p == None:
        ret_p = empty_like(p)

        for i in range(p.shape[1]):
            ret_p[:, i] = (p[:, i] - mean_x[i]) / std_x[i]
    
    if p == None:
        return ret_x
    else:
        return (ret_x, ret_p)
    
    
def standardize(x):
    """ Standardize x.
    
    x is standardized to have mean 0 and unit length by columns.
    Return standardized x, the mean and the standard deviation.
    """

    m = x.mean(axis=0)
    s = x.std(axis=0)

    return (x - m) / (s * np.sqrt(x.shape[0])), m, s


def center(y):
    """ Center y to have mean 0.

    Return centered y.
    """
    
    m =  np.mean(y)

    return y - m, m


def standardize_from(x, mean, std):
    """Standardize x using external mean and standard deviation.

    Return standardized x.
    """

    return (x - mean) / (std * np.sqrt(x.shape[0]))


def center_from(y, mean):
    """Center y using external mean.

    Return centered y.
    """

    return y - mean
