Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cloning refactor and data race fix #393

Merged
merged 16 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 53 additions & 42 deletions clone/clone.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ import (
"regexp"
"sort"
"strings"
"sync"

"github.com/TimothyStiles/poly/checks"
"github.com/TimothyStiles/poly/seqhash"
Expand Down Expand Up @@ -87,11 +86,23 @@ type Enzyme struct {
RecognitionSite string
}

// Eventually, we want to get the data for this map from ftp://ftp.neb.com/pub/rebase
var enzymeMap = map[string]Enzyme{
"BsaI": {"BsaI", regexp.MustCompile("GGTCTC"), regexp.MustCompile("GAGACC"), 1, 4, "GGTCTC"},
"BbsI": {"BbsI", regexp.MustCompile("GAAGAC"), regexp.MustCompile("GTCTTC"), 2, 4, "GAAGAC"},
"BtgZI": {"BtgZI", regexp.MustCompile("GCGATG"), regexp.MustCompile("CATCGC"), 10, 4, "GCGATG"},
// EnzymeManager manager for Enzymes. Allows for management of enzymes throughout the lifecyle of your
// program. EnzymeManager is not safe for concurrent use.
type EnzymeManager struct {
// eMap Map of enzymes that exist for the lifetime of the manager. Not safe for concurrent use.
eMap map[string]Enzyme
}

// NewEnzymeManager creates a new EnzymeManager given some enzymes.
func NewEnzymeManager(enzymes []Enzyme) EnzymeManager {
eMap := make(map[string]Enzyme)
for i := range enzymes {
eMap[enzymes[i].Name] = enzymes[i]
}

return EnzymeManager{
eMap: eMap,
}
}

/******************************************************************************
Expand All @@ -100,20 +111,14 @@ Base cloning functions begin here.

******************************************************************************/

func getBaseRestrictionEnzymes() map[string]Enzyme {
return enzymeMap
}

// CutWithEnzymeByName cuts a given sequence with an enzyme represented by the
// enzyme's name. It is a convenience wrapper around CutWithEnzyme that
// allows us to specify the enzyme by name.
func CutWithEnzymeByName(seq Part, directional bool, enzymeStr string) ([]Fragment, error) {
enzymeMap := getBaseRestrictionEnzymes()
if _, ok := enzymeMap[enzymeStr]; !ok {
return []Fragment{}, errors.New("Enzyme " + enzymeStr + " not found in enzymeMap")
func (em EnzymeManager) CutWithEnzymeByName(seq Part, directional bool, name string) ([]Fragment, error) {
if v, ok := em.eMap[name]; ok {
return CutWithEnzyme(seq, directional, v), nil
}
enzyme := enzymeMap[enzymeStr]
return CutWithEnzyme(seq, directional, enzyme), nil
return []Fragment{}, errors.New("Enzyme " + name + " not found")
}

// CutWithEnzyme cuts a given sequence with an enzyme represented by an Enzyme struct.
Expand Down Expand Up @@ -235,12 +240,17 @@ func CutWithEnzyme(seq Part, directional bool, enzyme Enzyme) []Fragment {
return fragments
}

func recurseLigate(wg *sync.WaitGroup, constructs chan string, infiniteLoopingConstructs chan string, seedFragment Fragment, fragmentList []Fragment, usedFragments []Fragment) {
func recurseLigate(seedFragment Fragment, fragmentList []Fragment, usedFragments []Fragment, existingSeqhashes map[string]struct{}) (openConstructs []string, infiniteConstructs []string) {
// Recurse ligate simulates all possible ligations of a series of fragments. Each possible combination begins with a "seed" that fragments from the pool can be added to.
defer wg.Done()
// If the seed ligates to itself, we can call it done with a successful circularization!
if seedFragment.ForwardOverhang == seedFragment.ReverseOverhang {
constructs <- seedFragment.ForwardOverhang + seedFragment.Sequence
construct := seedFragment.ForwardOverhang + seedFragment.Sequence
seqhash, _ := seqhash.Hash(construct, "DNA", true, true)
if _, ok := existingSeqhashes[seqhash]; ok {
return nil, nil
}
existingSeqhashes[seqhash] = struct{}{}
return []string{construct}, nil
} else {
for _, newFragment := range fragmentList {
// If the seedFragment's reverse overhang is ligates to a fragment's forward overhang, we can ligate those together and seed another ligation reaction
Expand All @@ -262,17 +272,26 @@ func recurseLigate(wg *sync.WaitGroup, constructs chan string, infiniteLoopingCo
// If the newFragment's reverse complement already exists in the used fragment list, we need to cancel the recursion.
for _, usedFragment := range usedFragments {
if usedFragment.Sequence == newFragment.Sequence {
infiniteLoopingConstructs <- usedFragment.ForwardOverhang + usedFragment.Sequence + usedFragment.ReverseOverhang
return
infiniteConstruct := usedFragment.ForwardOverhang + usedFragment.Sequence + usedFragment.ReverseOverhang
seqhash, _ := seqhash.Hash(infiniteConstruct, "DNA", false, true)
if _, ok := existingSeqhashes[seqhash]; ok {
return nil, nil
}
existingSeqhashes[seqhash] = struct{}{}
return nil, []string{infiniteConstruct}
}
}
wg.Add(1)
// If everything is clear, append fragment to usedFragments and recurse.
usedFragments = append(usedFragments, newFragment)
go recurseLigate(wg, constructs, infiniteLoopingConstructs, newSeed, fragmentList, usedFragments)
oc, ic := recurseLigate(newSeed, fragmentList, usedFragments, existingSeqhashes)

openConstructs = append(openConstructs, oc...)
infiniteConstructs = append(infiniteConstructs, ic...)
}
}
}

return openConstructs, infiniteConstructs
}

func getConstructs(c chan string, constructSequences chan []string, circular bool) {
Expand Down Expand Up @@ -303,26 +322,17 @@ func getConstructs(c chan string, constructSequences chan []string, circular boo
}

// CircularLigate simulates ligation of all possible fragment combinations into circular plasmids.
func CircularLigate(fragments []Fragment) ([]string, []string, error) {
var wg sync.WaitGroup
func CircularLigate(fragments []Fragment) ([]string, []string) {
var outputConstructs []string
var outputInfiniteLoopingConstructs []string
constructs := make(chan string)
infiniteLoopingConstructs := make(chan string) // sometimes we will get stuck in infinite loops. These are sequences with a recursion break
constructSequences := make(chan []string)
infiniteLoopingConstructSequences := make(chan []string)
existingSeqhashes := make(map[string]struct{})
for _, fragment := range fragments {
wg.Add(1)
go recurseLigate(&wg, constructs, infiniteLoopingConstructs, fragment, fragments, []Fragment{})
openConstructs, infiniteConstructs := recurseLigate(fragment, fragments, []Fragment{}, existingSeqhashes)

outputConstructs = append(outputConstructs, openConstructs...)
outputInfiniteLoopingConstructs = append(outputInfiniteLoopingConstructs, infiniteConstructs...)
}
go getConstructs(constructs, constructSequences, true)
go getConstructs(infiniteLoopingConstructs, infiniteLoopingConstructSequences, false)
wg.Wait()
close(constructs)
close(infiniteLoopingConstructs)
outputConstructs = <-constructSequences
outputInfiniteLoopingConstructs = <-infiniteLoopingConstructSequences
return outputConstructs, outputInfiniteLoopingConstructs, nil
return outputConstructs, outputInfiniteLoopingConstructs
}

/******************************************************************************
Expand All @@ -333,14 +343,15 @@ Specific cloning functions begin here.

// GoldenGate simulates a GoldenGate cloning reaction. As of right now, we only
// support BsaI, BbsI, BtgZI, and BsmBI.
func GoldenGate(sequences []Part, enzymeStr string) ([]string, []string, error) {
func (em *EnzymeManager) GoldenGate(sequences []Part, enzymeStr string) (openConstructs []string, infiniteLoops []string, err error) {
var fragments []Fragment
for _, sequence := range sequences {
newFragments, err := CutWithEnzymeByName(sequence, true, enzymeStr)
newFragments, err := em.CutWithEnzymeByName(sequence, true, enzymeStr)
if err != nil {
return []string{}, []string{}, err
}
fragments = append(fragments, newFragments...)
}
return CircularLigate(fragments)
oc, il := CircularLigate(fragments)
return oc, il, nil
}
Loading