Lecture 4: Advanced Collection Operations

1. Sorting Data Collections

Sorting Lists

# Sort a list in-place (modifies the original list) numbers = [3, 1, 4, 1, 5, 9, 2, 6] numbers.sort() # [1, 1, 2, 3, 4, 5, 6, 9] # Sort in descending order numbers.sort(reverse=True) # [9, 6, 5, 4, 3, 2, 1, 1] # Get a new sorted list (original remains unchanged) numbers = [3, 1, 4, 1, 5] sorted_numbers = sorted(numbers) # [1, 1, 3, 4, 5] # Sort with a custom key words = ["banana", "apple", "cherry"] words.sort(key=len) # ["apple", "banana", "cherry"] # Sort by multiple criteria (length, then alphabetically) words = ["fig", "apple", "banana", "cherry"] words.sort(key=lambda x: (len(x), x)) # ['fig', 'apple', 'banana', 'cherry']

Sorting Dictionaries

# Sort dictionary by keys student_scores = {'Alice': 85, 'Bob': 75, 'Charlie': 92} sorted_by_name = dict(sorted(student_scores.items())) # Sort dictionary by values (ascending) sorted_by_score = dict(sorted(student_scores.items(), key=lambda x: x[1])) # Sort by value descending sorted_by_score_desc = dict(sorted(student_scores.items(), key=lambda x: x[1], reverse=True))

Sorting with itemgetter and attrgetter

from operator import itemgetter, attrgetter # Using itemgetter for dictionaries students = [ {'name': 'Alice', 'grade': 'A', 'age': 20}, {'name': 'Bob', 'grade': 'B', 'age': 22}, {'name': 'Charlie', 'grade': 'A', 'age': 19} ] # Sort by grade, then by age students_sorted = sorted(students, key=itemgetter('grade', 'age')) # Using attrgetter for objects class Student: def __init__(self, name, grade, age): self.name = name self.grade = grade self.age = age student_objects = [ Student('Alice', 'A', 20), Student('Bob', 'B', 22), Student('Charlie', 'A', 19) ] # Sort by grade, then by age student_objects_sorted = sorted(student_objects, key=attrgetter('grade', 'age'))

2. Typecasting Collections

Between List, Tuple, and Set

# List to tuple my_list = [1, 2, 3, 2] my_tuple = tuple(my_list) # (1, 2, 3, 2) # Tuple to list my_list_again = list(my_tuple) # [1, 2, 3, 2] # List to set (removes duplicates) my_set = set(my_list) # {1, 2, 3} # Set to list (order not guaranteed) my_list_from_set = list(my_set) # [1, 2, 3] (order may vary)

Dictionary Conversions

# List of tuples to dictionary pairs = [('a', 1), ('b', 2), ('c', 3)] my_dict = dict(pairs) # {'a': 1, 'b': 2, 'c': 3} # Dictionary to list of tuples (key-value pairs) dict_items = list(my_dict.items()) # [('a', 1), ('b', 2), ('c', 3)] # Dictionary keys/values to list keys = list(my_dict.keys()) # ['a', 'b', 'c'] values = list(my_dict.values()) # [1, 2, 3] # Two lists to dictionary keys = ['a', 'b', 'c'] values = [1, 2, 3] my_dict = dict(zip(keys, values)) # {'a': 1, 'b': 2, 'c': 3}

String to List/Tuple and Back

# String to list of characters s = "hello" char_list = list(s) # ['h', 'e', 'l', 'l', 'o'] # List of characters to string back_to_string = ''.join(char_list) # 'hello' # String to list of words sentence = "Hello world from Python" words = sentence.split() # ['Hello', 'world', 'from', 'Python'] # List of words to string back_to_sentence = ' '.join(words) # 'Hello world from Python'

3. Applications of Collections

1. Data Processing

# Count word frequencies in a text from collections import Counter text = """Python is an interpreted high-level programming language for general-purpose programming. Created by Guido van Rossum and first released in 1991, Python has a design philosophy that emphasizes code readability.""" # Split into words and count frequencies words = text.lower().split() word_counts = Counter(words) print(word_counts.most_common(3)) # Output: [('programming', 2), ('python', 2), ('is', 1)]

2. Graph Representation

# Using dictionary to represent a graph graph = { 'A': ['B', 'C'], 'B': ['A', 'D', 'E'], 'C': ['A', 'F'], 'D': ['B'], 'E': ['B', 'F'], 'F': ['C', 'E'] } # Breadth-First Search def bfs(graph, start): visited = set() queue = [start] visited.add(start) while queue: vertex = queue.pop(0) print(vertex, end=' ') for neighbor in graph[vertex]: if neighbor not in visited: visited.add(neighbor) queue.append(neighbor) bfs(graph, 'A') # A B C D E F

3. Matrix Operations

# Matrix as list of lists matrix = [ [1, 2, 3], [4, 5, 6], [7, 8, 9] ] # Transpose a matrix transposed = [[row[i] for row in matrix] for i in range(len(matrix[0]))] # Matrix multiplication def matrix_multiply(a, b): return [[sum(x * y for x, y in zip(a_row, b_col)) for b_col in zip(*b)] for a_row in a] # Example usage A = [[1, 2], [3, 4]] B = [[5, 6], [7, 8]] result = matrix_multiply(A, B) # [[19, 22], [43, 50]]

4. Caching with Dictionary

# Memoization using dictionary cache = {} def fibonacci(n): if n in cache: return cache[n] if n <= 1: return n result = fibonacci(n-1) + fibonacci(n-2) cache[n] = result return result # Using functools.lru_cache (built-in decorator) from functools import lru_cache @lru_cache(maxsize=None) def fib(n): if n <= 1: return n return fib(n-1) + fib(n-2)