本文共 3179 字,大约阅读时间需要 10 分钟。
# -*- coding: utf-8 -*-import inspectimport mathimport numpy as npfrom sklearn import preprocessingdef max_min_normalization(data_list): """ 利用最大最小数将一组数据进行归一化输出 x_new = (x - min) / (max - min) :param data_list: :return: """ normalized_list = [] max_min_interval = max(data_list) - min(data_list) for data in data_list: data = float(data) new_data = (data - min(data_list)) / max_min_interval normalized_list.append(round(new_data, 3)) return normalized_listdef mean_normalization(data_list): """ 利用平均数将一组数据进行标准化输出 标准化的结果不一定是在0,1之间 x_new = (x - mean) / (max - min) :param data_list: :return: """ normalized_list = [] mean = sum(data_list) / len(data_list) max_min_interval = max(data_list) - min(data_list) for data in data_list: data = float(data) new_data = (data - mean) / max_min_interval normalized_list.append(round(new_data, 3)) return normalized_listdef zscores_normalization(data_list): """ 利用z-scores方法针对数据进行标准化 :param data_list: :return: """ normalized_list = [] mean = sum(data_list, 0.0) / len(data_list) var_lst = [] for data in data_list: var_lst.append((float(data) - mean) ** 2) std_value = math.sqrt(sum(var_lst) / len(var_lst)) for data in data_list: normalized_list.append(round((data - mean) / std_value, 3)) return normalized_listdef max_min_normalization_using_numpy(data_list): """ 用数据处理包numpy归一化 :param data_list: :return: """ normalized_list = [] max = np.max(data_list) min = np.min(data_list) for data in data_list: new_data = (float(data) - min) / (max - min) normalized_list.append(round(new_data, 3)) return normalized_listdef zscores_normalization_using_numpy(data_list): """ 利用numpy中现有的方法计算标准差和平均数,然后用z-scores方法针对数据进行标准化 :param data_list: :return: """ normalized_list = [] mean = np.mean(data_list) std = np.std(data_list) for data in data_list: normalized_list.append(round((data - mean) / std, 3)) return normalized_listdef normalize_data_using_sk(data_list): """ 利用sklearn学习库自带的归一方法实现 :param data_list: :return: """ data_array = np.asarray(data_list, 'float').reshape(1, -1) new_data = preprocessing.minmax_scale(data_array, axis=1) return np.round(new_data, 3)[0, :]if __name__ == '__main__': data_list = np.random.randint(1, 20, 10) data = globals().copy() for key in data: if inspect.isfunction(data[key]): res = data[key](data_list) print '%s:\n%s' % (key, res)
运行结果:
zscores_normalization_using_numpy:
[-1.528, 1.382, -0.255, 1.564, -0.073, 0.291, 0.837, -1.346, -0.8, -0.073] max_min_normalization: [0.0, 0.941, 0.412, 1.0, 0.471, 0.588, 0.765, 0.059, 0.235, 0.471] normalize_data_using_sk: [0. 0.941 0.412 1. 0.471 0.588 0.765 0.059 0.235 0.471] max_min_normalization_using_numpy: [0.0, 0.941, 0.412, 1.0, 0.471, 0.588, 0.765, 0.059, 0.235, 0.471] mean_normalization: [-0.471, 0.471, -0.059, 0.529, 0.0, 0.118, 0.294, -0.412, -0.235, 0.0] zscores_normalization: [-1.528, 1.382, -0.255, 1.564, -0.073, 0.291, 0.837, -1.346, -0.8, -0.073]