diff options
author | Fudgerboy <91767657+Fudgerboy@users.noreply.github.com> | 2024-05-04 23:35:43 +0000 |
---|---|---|
committer | Fudgerboy <91767657+Fudgerboy@users.noreply.github.com> | 2024-05-04 23:35:43 +0000 |
commit | 8d8e15205dbdd76d4574085e5321de1070e67b33 (patch) | |
tree | a9ac6ae14fecbe568dc34adf38b0844f7ba8f642 /wk6/pset/dna/dna.py | |
parent | 5121d368936d3d1f2611636e9183c060be7f3bf5 (diff) |
Sat, May 4, 2024, 4:35 PM -07:00
Diffstat (limited to 'wk6/pset/dna/dna.py')
-rw-r--r-- | wk6/pset/dna/dna.py | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/wk6/pset/dna/dna.py b/wk6/pset/dna/dna.py new file mode 100644 index 0000000..401e674 --- /dev/null +++ b/wk6/pset/dna/dna.py @@ -0,0 +1,58 @@ +import csv +import sys + + +def main(): + + # TODO: Check for command-line usage + + # TODO: Read database file into a variable + + # TODO: Read DNA sequence file into a variable + + # TODO: Find longest match of each STR in DNA sequence + + # TODO: Check database for matching profiles + + return + + +def longest_match(sequence, subsequence): + """Returns length of longest run of subsequence in sequence.""" + + # Initialize variables + longest_run = 0 + subsequence_length = len(subsequence) + sequence_length = len(sequence) + + # Check each character in sequence for most consecutive runs of subsequence + for i in range(sequence_length): + + # Initialize count of consecutive runs + count = 0 + + # Check for a subsequence match in a "substring" (a subset of characters) within sequence + # If a match, move substring to next potential match in sequence + # Continue moving substring and checking for matches until out of consecutive matches + while True: + + # Adjust substring start and end + start = i + count * subsequence_length + end = start + subsequence_length + + # If there is a match in the substring + if sequence[start:end] == subsequence: + count += 1 + + # If there is no match in the substring + else: + break + + # Update most consecutive matches found + longest_run = max(longest_run, count) + + # After checking for runs at each character in seqeuence, return longest run found + return longest_run + + +main() |