uisrnn.evals
Utils for model evaluation.
1# Copyright 2018 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Utils for model evaluation.""" 15 16from scipy import optimize 17import numpy as np 18 19 20def get_list_inverse_index(unique_ids): 21 """Get value to position index from a list of unique ids. 22 23 Args: 24 unique_ids: A list of unique integers of strings. 25 26 Returns: 27 result: a dict from value to position 28 29 Raises: 30 TypeError: If unique_ids is not a list. 31 """ 32 if not isinstance(unique_ids, list): 33 raise TypeError('unique_ids must be a list') 34 result = dict() 35 for i, unique_id in enumerate(unique_ids): 36 result[unique_id] = i 37 return result 38 39 40def compute_sequence_match_accuracy(sequence1, sequence2): 41 """Compute the accuracy between two sequences by finding optimal matching. 42 43 Args: 44 sequence1: A list of integers or strings. 45 sequence2: A list of integers or strings. 46 47 Returns: 48 accuracy: sequence matching accuracy as a number in [0.0, 1.0] 49 50 Raises: 51 TypeError: If sequence1 or sequence2 is not list. 52 ValueError: If sequence1 and sequence2 are not same size. 53 """ 54 if not isinstance(sequence1, list) or not isinstance(sequence2, list): 55 raise TypeError('sequence1 and sequence2 must be lists') 56 if not sequence1 or len(sequence1) != len(sequence2): 57 raise ValueError( 58 'sequence1 and sequence2 must have the same non-zero length') 59 # get unique ids from sequences 60 unique_ids1 = sorted(set(sequence1)) 61 unique_ids2 = sorted(set(sequence2)) 62 inverse_index1 = get_list_inverse_index(unique_ids1) 63 inverse_index2 = get_list_inverse_index(unique_ids2) 64 # get the count matrix 65 count_matrix = np.zeros((len(unique_ids1), len(unique_ids2))) 66 for item1, item2 in zip(sequence1, sequence2): 67 index1 = inverse_index1[item1] 68 index2 = inverse_index2[item2] 69 count_matrix[index1, index2] += 1.0 70 row_index, col_index = optimize.linear_sum_assignment(-count_matrix) 71 optimal_match_count = count_matrix[row_index, col_index].sum() 72 accuracy = optimal_match_count / len(sequence1) 73 return accuracy
def
get_list_inverse_index(unique_ids):
21def get_list_inverse_index(unique_ids): 22 """Get value to position index from a list of unique ids. 23 24 Args: 25 unique_ids: A list of unique integers of strings. 26 27 Returns: 28 result: a dict from value to position 29 30 Raises: 31 TypeError: If unique_ids is not a list. 32 """ 33 if not isinstance(unique_ids, list): 34 raise TypeError('unique_ids must be a list') 35 result = dict() 36 for i, unique_id in enumerate(unique_ids): 37 result[unique_id] = i 38 return result
Get value to position index from a list of unique ids.
Args: unique_ids: A list of unique integers of strings.
Returns: result: a dict from value to position
Raises: TypeError: If unique_ids is not a list.
def
compute_sequence_match_accuracy(sequence1, sequence2):
41def compute_sequence_match_accuracy(sequence1, sequence2): 42 """Compute the accuracy between two sequences by finding optimal matching. 43 44 Args: 45 sequence1: A list of integers or strings. 46 sequence2: A list of integers or strings. 47 48 Returns: 49 accuracy: sequence matching accuracy as a number in [0.0, 1.0] 50 51 Raises: 52 TypeError: If sequence1 or sequence2 is not list. 53 ValueError: If sequence1 and sequence2 are not same size. 54 """ 55 if not isinstance(sequence1, list) or not isinstance(sequence2, list): 56 raise TypeError('sequence1 and sequence2 must be lists') 57 if not sequence1 or len(sequence1) != len(sequence2): 58 raise ValueError( 59 'sequence1 and sequence2 must have the same non-zero length') 60 # get unique ids from sequences 61 unique_ids1 = sorted(set(sequence1)) 62 unique_ids2 = sorted(set(sequence2)) 63 inverse_index1 = get_list_inverse_index(unique_ids1) 64 inverse_index2 = get_list_inverse_index(unique_ids2) 65 # get the count matrix 66 count_matrix = np.zeros((len(unique_ids1), len(unique_ids2))) 67 for item1, item2 in zip(sequence1, sequence2): 68 index1 = inverse_index1[item1] 69 index2 = inverse_index2[item2] 70 count_matrix[index1, index2] += 1.0 71 row_index, col_index = optimize.linear_sum_assignment(-count_matrix) 72 optimal_match_count = count_matrix[row_index, col_index].sum() 73 accuracy = optimal_match_count / len(sequence1) 74 return accuracy
Compute the accuracy between two sequences by finding optimal matching.
Args: sequence1: A list of integers or strings. sequence2: A list of integers or strings.
Returns: accuracy: sequence matching accuracy as a number in [0.0, 1.0]
Raises: TypeError: If sequence1 or sequence2 is not list. ValueError: If sequence1 and sequence2 are not same size.