# (Uses str.find.) def get_indices(text, substr): '''Return a list containing the indices where str substr appears in str text. Overlapping instances of substr should be counted. For example, get_indices("abcabcabc", "bc") --> [1, 4, 7] get_indices("bbbbbbb", "bb") --> [0, 1, 2, 3, 4, 5]''' # Accumulate the indices of occurrences here. indices = [] # The position of the next occurrence of substr within text. index = text.find(substr, 0) # Keep looking until there are no more occurrences. while index != -1: # Record this occurrence. indices.append(index) # Advance past this occurrence and look for the next one. index = text.find(substr, index + 1) return indices # (Does not use str.find.) def basic_stringmatch(text, pattern): '''Find all instances of the string pattern in the string text. Return a list containing the starting indices of all matches. This version iterates through text one character at a time, checking if the pattern matches starting at that character. It checks the entire pattern at each character.''' matches = [] pattern_length = len(pattern) # Iterate through text for index in range(len(text) - pattern_length + 1): # Check if the pattern matches starting at this character match = True for match_index in range(pattern_length): if text[index + match_index] != pattern[match_index]: match = False if match: matches.append(index) return matches def early_exit_stringmatch(text, pattern): '''Find all instances of the string pattern in the string text. Return a list containing the starting indices of all matches. This version iterates through text one character at a time, checking if the pattern matches starting at that character. It stops checking the pattern when a mismatch is detected.''' matches = [] pattern_length = len(pattern) # Iterate through text for index in range(len(text) - pattern_length + 1): # Check if the pattern matches starting at this character match_index = 0 while match_index < pattern_length and \ text[index + match_index] == pattern[match_index]: match_index = match_index + 1 # The pattern matches if the previous loop iterated through # the entire pattern. if match_index == pattern_length: matches.append(index) return matches if __name__ == '__main__': funs = (get_indices,basic_stringmatch,early_exit_stringmatch) for f in funs: print f,f("abcabcabc", "bc"), "sb", [1,4,7] print f,f("bbbbbbb", "bb"), "sb", [0,1,2,3,4,5] print f,f("bbbbbbb", "aa"), "sb", [] print f,f("abc", "abcde"), "sb", [] print f,f("", ""), "sb", [0] print f,f("abcabcabc", "ab"), "sb", [0,3,6] print f,f("CTACAATATATCGTATCATATCC","ATATC"), "sb",[7,17] print f,f("ATATAT","AT"), "sb", [0,2,4]