summaryrefslogtreecommitdiff
path: root/wk6/pset/dna/dna.py
diff options
context:
space:
mode:
Diffstat (limited to 'wk6/pset/dna/dna.py')
-rw-r--r--wk6/pset/dna/dna.py58
1 files changed, 58 insertions, 0 deletions
diff --git a/wk6/pset/dna/dna.py b/wk6/pset/dna/dna.py
new file mode 100644
index 0000000..401e674
--- /dev/null
+++ b/wk6/pset/dna/dna.py
@@ -0,0 +1,58 @@
+import csv
+import sys
+
+
+def main():
+
+ # TODO: Check for command-line usage
+
+ # TODO: Read database file into a variable
+
+ # TODO: Read DNA sequence file into a variable
+
+ # TODO: Find longest match of each STR in DNA sequence
+
+ # TODO: Check database for matching profiles
+
+ return
+
+
+def longest_match(sequence, subsequence):
+ """Returns length of longest run of subsequence in sequence."""
+
+ # Initialize variables
+ longest_run = 0
+ subsequence_length = len(subsequence)
+ sequence_length = len(sequence)
+
+ # Check each character in sequence for most consecutive runs of subsequence
+ for i in range(sequence_length):
+
+ # Initialize count of consecutive runs
+ count = 0
+
+ # Check for a subsequence match in a "substring" (a subset of characters) within sequence
+ # If a match, move substring to next potential match in sequence
+ # Continue moving substring and checking for matches until out of consecutive matches
+ while True:
+
+ # Adjust substring start and end
+ start = i + count * subsequence_length
+ end = start + subsequence_length
+
+ # If there is a match in the substring
+ if sequence[start:end] == subsequence:
+ count += 1
+
+ # If there is no match in the substring
+ else:
+ break
+
+ # Update most consecutive matches found
+ longest_run = max(longest_run, count)
+
+ # After checking for runs at each character in seqeuence, return longest run found
+ return longest_run
+
+
+main()