博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
机器学习归一化(附Python实现源码)
阅读量:4170 次
发布时间:2019-05-26

本文共 3179 字,大约阅读时间需要 10 分钟。

# -*- coding: utf-8 -*-import inspectimport mathimport numpy as npfrom sklearn import preprocessingdef max_min_normalization(data_list):    """    利用最大最小数将一组数据进行归一化输出    x_new = (x - min) / (max - min)    :param data_list:    :return:    """    normalized_list = []    max_min_interval = max(data_list) - min(data_list)    for data in data_list:        data = float(data)        new_data = (data - min(data_list)) / max_min_interval        normalized_list.append(round(new_data, 3))    return normalized_listdef mean_normalization(data_list):    """    利用平均数将一组数据进行标准化输出    标准化的结果不一定是在0,1之间    x_new = (x - mean) / (max - min)    :param data_list:    :return:    """    normalized_list = []    mean = sum(data_list) / len(data_list)    max_min_interval = max(data_list) - min(data_list)    for data in data_list:        data = float(data)        new_data = (data - mean) / max_min_interval        normalized_list.append(round(new_data, 3))    return normalized_listdef zscores_normalization(data_list):    """    利用z-scores方法针对数据进行标准化    :param data_list:    :return:    """    normalized_list = []    mean = sum(data_list, 0.0) / len(data_list)    var_lst = []    for data in data_list:        var_lst.append((float(data) - mean) ** 2)    std_value = math.sqrt(sum(var_lst) / len(var_lst))    for data in data_list:        normalized_list.append(round((data - mean) / std_value, 3))    return normalized_listdef max_min_normalization_using_numpy(data_list):    """    用数据处理包numpy归一化    :param data_list:    :return:    """    normalized_list = []    max = np.max(data_list)    min = np.min(data_list)    for data in data_list:        new_data = (float(data) - min) / (max - min)        normalized_list.append(round(new_data, 3))    return normalized_listdef zscores_normalization_using_numpy(data_list):    """    利用numpy中现有的方法计算标准差和平均数,然后用z-scores方法针对数据进行标准化    :param data_list:    :return:    """    normalized_list = []    mean = np.mean(data_list)    std = np.std(data_list)    for data in data_list:        normalized_list.append(round((data - mean) / std, 3))    return normalized_listdef normalize_data_using_sk(data_list):    """    利用sklearn学习库自带的归一方法实现    :param data_list:    :return:    """    data_array = np.asarray(data_list, 'float').reshape(1, -1)    new_data = preprocessing.minmax_scale(data_array, axis=1)    return np.round(new_data, 3)[0, :]if __name__ == '__main__':    data_list = np.random.randint(1, 20, 10)    data = globals().copy()    for key in data:        if inspect.isfunction(data[key]):            res = data[key](data_list)            print '%s:\n%s' % (key, res)

运行结果:

zscores_normalization_using_numpy:

[-1.528, 1.382, -0.255, 1.564, -0.073, 0.291, 0.837, -1.346, -0.8, -0.073]
max_min_normalization:
[0.0, 0.941, 0.412, 1.0, 0.471, 0.588, 0.765, 0.059, 0.235, 0.471]
normalize_data_using_sk:
[0.    0.941 0.412 1.    0.471 0.588 0.765 0.059 0.235 0.471]
max_min_normalization_using_numpy:
[0.0, 0.941, 0.412, 1.0, 0.471, 0.588, 0.765, 0.059, 0.235, 0.471]
mean_normalization:
[-0.471, 0.471, -0.059, 0.529, 0.0, 0.118, 0.294, -0.412, -0.235, 0.0]
zscores_normalization:
[-1.528, 1.382, -0.255, 1.564, -0.073, 0.291, 0.837, -1.346, -0.8, -0.073]

你可能感兴趣的文章
深入理解Mysql索引底层数据结构与算法(二)
查看>>
IDEA自动去掉无用的import
查看>>
js数字转换成汉字
查看>>
MySQL不同存储引擎底层真正存储结构
查看>>
MySQL存储引擎底层常见面试题
查看>>
MySQL Explain执行计划详解
查看>>
索引最佳实践具体实例
查看>>
临时关闭MySQL缓存
查看>>
HBase学习和使用
查看>>
LSTM
查看>>
牛客网 数字游戏
查看>>
逆波兰表达式
查看>>
逆波兰表达式
查看>>
K-means中K值的选取
查看>>
kmeans优化算法
查看>>
牛客网 构造队列
查看>>
牛客网 跳石板
查看>>
牛客网 最大的奇约数
查看>>
python大坑:AttributeError: 'module' object has no attribute 'Workbook'
查看>>
python 协程
查看>>