Macaulay2 Engine
Loading...
Searching...
No Matches
SuffixTree.hpp
Go to the documentation of this file.
1#ifndef _suffix_tree_hpp_
2#define _suffix_tree_hpp_
3
45
46#include "NCAlgebras/Word.hpp" // for Word
47
48#include <iostream> // for ostream
49#include <map> // for map, operator!=, __map_iterator, map<>::iterator
50#include <tuple> // for tuple
51#include <utility> // for pair, make_pair
52#include <vector> // for vector, vector<>::iterator, operator<
53
54// used in return value for WordTable as well
55using Overlap = std::tuple<int,int,int,bool>;
56
57// data type of an arc/vertex label
58using Label = std::vector<int>;
59
77{
78public:
79 friend std::ostream& operator<<(std::ostream& o, const SuffixTreeNode& suffixTreeNode);
80 std::ostream& dump(std::ostream&, int depth, bool dumpChildren = true) const;
81
82 // the default constructor makes a root node
84 mParent(nullptr),
86 mSuffixLink(nullptr),
87 mIsFullPattern(false),
89 mArcLabel(Label {}),
90 mLabel(Label {})
91 {};
92
93 // this one makes nodes in the process of building the tree
96 bool isFullPattern) :
99 mSuffixLink(nullptr),
102 {
103 // copy the data in arcLabel to a label owned by the node
104 for (auto i = 0; i < arcLabel.size(); ++i)
105 {
106 mArcLabel.push_back(arcLabel.begin()[i]);
107 }
108
109 // build the label of the node from the label of the parent and the arc label
110 mLabel.insert(mLabel.begin(),parent->label().begin(),parent->label().end());
111 mLabel.insert(mLabel.end(),mArcLabel.begin(),mArcLabel.end());
112 parent->addChild(this);
113 }
114
116
119 Label& arcLabel() { return mArcLabel; }
120 Label& label() { return mLabel; }
121 int patternLeafCount() const { return mPatternLeafCount; }
122 bool isFullPattern() const { return mIsFullPattern; }
123 bool isLeaf() const { return (*(mLabel.end()-1) < 0); }
124
125 std::map<Label,SuffixTreeNode*>::iterator childrenBegin() { return mChildren.begin(); }
126 std::map<Label,SuffixTreeNode*>::iterator childrenEnd() { return mChildren.end(); }
127 size_t numChildren() const { return mChildren.size(); }
128
129 void setParent(SuffixTreeNode* newParent) { mParent = newParent; }
130 void setSuffixLink(SuffixTreeNode* newSuffixLink) { mSuffixLink = newSuffixLink; }
131
132 void removeChild(const Label& child) { mChildren.erase(child); }
133 void addChild(SuffixTreeNode* child) { mChildren.insert(std::make_pair(child->arcLabel(),child)); }
134
135 void dropFromArcLabel(int toDrop) { mArcLabel.erase(mArcLabel.begin(), mArcLabel.begin()+toDrop); }
136
137 void addToPatternLeafCount(bool doIncrement) { if (doIncrement) mPatternLeafCount++; }
138 void setPatternLeafCount(int newPatternLeafCount) { mPatternLeafCount = newPatternLeafCount; }
139
141 {
142 // this function should *only* be called on leaves
143 // since internal nodes may correspond to multiple patterns
144 // returns the 0 based index of the pattern corresponding to this leaf
145 return -*(mLabel.end()-1)-1;
146 }
147
149 {
150 auto search = mChildren.find(s);
151 if (search != mChildren.end())
152 return nullptr;
153 else
154 return search->second;
155 }
156
157private:
158 // parent of this node
160 // children of this node. keys are labels, values are nodes
161 std::map<Label,SuffixTreeNode*> mChildren;
162 // suffix link pointer
164
165 // does this node correspond to a full pattern (i.e. not a suffix of one)?
167 // counts the number of pattern leaves that are descendents of this node
169
170 // a label of a node can be any suffix of the word w . n where n is the
171 // (negative of) the index of the word w. We use negatives here so they don't
172 // collide with the nonnegative integers, which represent variables.
173 // warning: the negative index is 1-based, not 0-based.
175
176 // this could be inductively recomputed each time, but I think it is
177 // better just to store a copy of it in the data type
179};
180
181// return value types for internal functions for SuffixTree
184 Word>;
185
187 Word>;
188
189using InsertType = std::tuple<SuffixTreeNode*,
192
193using SubwordsType = std::tuple<SuffixTreeNode*,
194 Word,
196 bool>;
197
214// other than the monomial list, all functions should work on Word(s) not Label(s)
215// to avoid unnecessary copies
216
218{
219private:
220 //FRIEND_TEST(SuffixTree, suffixtree1);
221public:
222 friend std::ostream& operator<<(std::ostream& o, const SuffixTree& suffixTree);
223 friend void outputPatterns(std::ostream& o, const SuffixTree& suffixTree);
224 // these functions are also in WordTable; we would like to keep the
225 // basic interface of both classes the same
226
227 // need to create the root
228 SuffixTree();
229
230 // the SuffixTree owns all Labels and SuffixTreeNodes created within,
231 // as well as the monomials inserted (stored as labels).
232 ~SuffixTree();
233
234 size_t monomialCount() const { return mMonomials.size(); }
235
236 size_t insert(const Word& w);
237
238 auto insert(const Word& s, std::vector<Overlap>& newRightOverlaps) -> size_t;
239 //size_t insert(Word w, std::vector<Overlap>& newRightOverlaps);
240
241 const Word operator[](int index) const;
242
243 void clear() {
244 destroyChildren(mRoot); //also deletes mRoot
245 SuffixTreeNode* root = new SuffixTreeNode();
246 mRoot = root;
247 mMonomials.clear();
248 }
249
250
251 // lookup routines
252
253 // return all pairs (i,j), where
254 // the i-th word in the table is w (say)
255 // j is a position in word
256 // such that w appears in word starting at position j.
257 bool subwords(const Word& word,
258 std::vector<std::pair<int,int>>& output) const;
259
260 // sets 'output' to the first pair (i,j), where
261 // the i-th word in the table is w (say)
262 // j is a position in word
263 // such that w appears in word starting at position j.
264 // if such a match is found, output is set, and true is returned.
265 // if not, false is returned.
266 bool subword(const Word& word,
267 std::pair<int,int>& output) const;
268
269 // return all pairs (i,j), where
270 // the i-th word in the table is w (say)
271 // j is a position in w
272 // such that word appears in w starting at position j.
273 bool superwords(const Word& word,
274 std::vector<std::pair<int,int>>& output) const;
275
276
277 // this command returns true if word = alpha . v . beta for some v in the
278 // word table, where if v = wordTable[index1], then alpha is not empty
279 // and if v = wordTable[index2] then beta is not empty. Otherwise, it returns false.
280 auto isNontrivialSuperword(const Word& word, int index1, int index2) const -> bool;
281
282 // given 'word', find all left overlaps with elements of the table.
283 // A left overlap of 'alpha' and 'beta' is:
284 // a prefix of alpha is a suffix of beta.
285 // i.e. alpha = a.b
286 // beta = c.a (a,b,c are words)
287 // returned Triple for this overlap:
288 void leftOverlaps(std::vector<Overlap>& output) const;
289
290 // find (right) overlaps with most recent added word 'w'.
291 // Note: Not sure this is possible in this implementation
292 // void rightOverlaps(std::vector<Overlap>& newRightOverlaps) const;
293
294 // other public functions:
295 int numPatterns() const { return mMonomials.size(); }
296
297 // FM: the following are really just here so the tests will run, I would prefer
298 // it if they were private. Is there a way to make this work?
299
300private:
301public: // TODO: fix so we can test these private functions (right now, we just make them public)
302 // the following are internal functions needed for the SuffixTree data
303 // structure to work
304
305 // This function splits the arc from f to its parent by inserting a
306 // new internal node with arc label prefix, where prefix is a prefix
307 // of f->arcLabel(). A pointer to the new node is returned.
308 auto splitArc(SuffixTreeNode* f,
309 const Word& prefix) -> SuffixTreeNode*;
310
311 // s is a suffix not yet in the table. This function finds the
312 // locus of the longest prefix of s whose locus exists. The search
313 // starts at y, and moves down the tree according to the string s.
314 // The return value is the contracted locus x and a node f which is
315 // either a child of y sharing a prefix pre with s - y.label, or f
316 // is nullTreeNode if no such child exists.
318 const Word& s,
319 bool incrementLeafCount = false) const -> ContractedLocusType;
320
321 // For this function to work, there must be a path starting from x
322 // with beta as a prefix (See e.g. Lemma 1 in Amir, et.al.) This
323 // function finds the locus of the shortest word that has beta as a
324 // prefix. It returns this locus, together with the prefix that
325 // needs to be split (if necessary) if beta is empty, then simply
326 // return (x,beta) since x is the extended locus
328 const Word& beta) const -> ExtendedLocusType;
329
330 // Finds an arc from y to a child whose label shares a prefix with s
331 // return a std::pair of nullptrs if no match is found, i.e. the empty
332 // prefix is the only shared prefix with any child of y
334 const Word& s) const -> ExtendedLocusType;
335
336 // Return the longest shared prefix of s and t as a copy
337 // this doesn't really belong here...
338 auto sharedPrefix(const Word s, const Word t) const -> Word;
339
340 // Return all pattern leaves below v
341 auto patternLeaves(SuffixTreeNode* v, std::vector<int>& output) const -> void;
342 auto patternLeavesWorker(SuffixTreeNode* v) const -> std::vector<SuffixTreeNode*>;
343
344 // return all leaves below v
345 auto allLeaves(SuffixTreeNode* v, std::vector<SuffixTreeNode*>& output) const -> void;
346
347 // functions for insert algorithm
348
349 // these functions are for debugging purposes.
350 auto insert(Label& lab, std::vector<Overlap>& rightOverlaps) -> size_t;
351 auto insert(std::vector<Label>& labs, std::vector<Overlap>& rightOverlaps) -> size_t;
352
353 auto insert(std::vector<Word>& ss, std::vector<Overlap>& rightOverlaps) -> size_t;
355 const Word& s,
356 bool isFullPattern) -> InsertType;
359 const Word& beta,
360 const Word& s,
361 bool isFullPattern) -> InsertType;
363 const Word& s,
364 bool isFullPattern) -> InsertType;
365
366 // functions for subwords algorithm
367 // the output is a list of pairs (i,j) such that monomial i appears in position j
368 // of the word w.
369 //auto subword(const Word& w, std::pair<int,int>& output) const -> bool;
370 //auto subwords(const Word& w, std::vector<std::pair<int,int>>& output) const -> bool;
371 auto subwords(const Word& w, std::vector<std::pair<int,int>>& output,bool onlyFirst) const -> bool;
373 const Word& beta,
374 const Word& s) const -> SubwordsType;
376 const Word& s) const -> SubwordsType;
377
378 // the output is a list of pairs (i,j) such that the word w appears in monomial i in position j
379 auto superword(const Word& w, std::pair<int,int>& output) const -> bool;
380 //auto superwords(const Word& w, std::vector<std::pair<int,int>>& output) const -> bool;
381 auto superwords(const Word& w, std::vector<std::pair<int,int>>& output,bool onlyFirst) const -> bool;
382
383 // the output is a list of (i,j) giving the monomial and position of the overlap
384 // in the second argument
385 auto leftOverlaps(const Word& w,
386 std::vector<std::pair<int,int>>& output,
387 bool avoidLast = false) const -> void;
388
389private:
390public: // TODO: remove this public. These are private... Here for testing
391 // root node of the tree
393
394 // we will copy the monomials into the data structure. Even though
395 // this is of type Label, these monomials will not have the word number
396 // appended as a suffix when stored in this container.
398
399 void destroyChildren(SuffixTreeNode* node) const;
400
401};
402
403// some utility/debugging functions
404void outputPatterns(std::ostream& o, const SuffixTree& suffixTree);
405std::ostream& operator<<(std::ostream& o, const SuffixTree& suffixTree);
406std::ostream& operator<<(std::ostream& o, const SuffixTreeNode& suffixTreeNode);
407void outputLabel(std::ostream& o, const Label& vec);
408
409#endif
410
411// Local Variables:
412// compile-command: "make -C $M2BUILDDIR/Macaulay2/e "
413// indent-tabs-mode: nil
414// End:
std::vector< int > word
std::tuple< int, int, int, bool > Overlap
Word prefix(const Word vec, int lengthOfPrefix)
std::tuple< SuffixTreeNode *, SuffixTreeNode *, SuffixTreeNode * > InsertType
std::tuple< SuffixTreeNode *, SuffixTreeNode *, Word > ContractedLocusType
void outputLabel(std::ostream &o, const Label &vec)
Definition SuffixTree.cpp:6
std::tuple< SuffixTreeNode *, Word, SuffixTreeNode *, bool > SubwordsType
std::tuple< SuffixTreeNode *, Word > ExtendedLocusType
std::vector< int > Label
Word and WordWithData — non-owning views over the flat-int encoding of a non-commutative word.
auto subwordsStepC(SuffixTreeNode *x, const Word &beta, const Word &s) const -> SubwordsType
auto contractedLocus(SuffixTreeNode *y, const Word &s, bool incrementLeafCount=false) const -> ContractedLocusType
size_t monomialCount() const
auto insertStepD(SuffixTreeNode *y, const Word &s, bool isFullPattern) -> InsertType
auto extendedLocus(SuffixTreeNode *x, const Word &beta) const -> ExtendedLocusType
auto patternLeaves(SuffixTreeNode *v, std::vector< int > &output) const -> void
auto sharedPrefix(const Word s, const Word t) const -> Word
int numPatterns() const
void leftOverlaps(std::vector< Overlap > &output) const
size_t insert(const Word &w)
auto isNontrivialSuperword(const Word &word, int index1, int index2) const -> bool
void destroyChildren(SuffixTreeNode *node) const
bool superwords(const Word &word, std::vector< std::pair< int, int > > &output) const
auto findMatch(SuffixTreeNode *y, const Word &s) const -> ExtendedLocusType
bool subwords(const Word &word, std::vector< std::pair< int, int > > &output) const
friend std::ostream & operator<<(std::ostream &o, const SuffixTree &suffixTree)
auto insertWorker(SuffixTreeNode *v, const Word &s, bool isFullPattern) -> InsertType
auto insertStepC(SuffixTreeNode *v, SuffixTreeNode *x, const Word &beta, const Word &s, bool isFullPattern) -> InsertType
SuffixTreeNode * mRoot
bool subword(const Word &word, std::pair< int, int > &output) const
const Word operator[](int index) const
Here begin the functions to make the new class swappable with the old WordTable.
friend void outputPatterns(std::ostream &o, const SuffixTree &suffixTree)
auto patternLeavesWorker(SuffixTreeNode *v) const -> std::vector< SuffixTreeNode * >
std::vector< Label > mMonomials
auto allLeaves(SuffixTreeNode *v, std::vector< SuffixTreeNode * > &output) const -> void
auto splitArc(SuffixTreeNode *f, const Word &prefix) -> SuffixTreeNode *
auto superword(const Word &w, std::pair< int, int > &output) const -> bool
auto subwordsStepD(SuffixTreeNode *y, const Word &s) const -> SubwordsType
void setSuffixLink(SuffixTreeNode *newSuffixLink)
void setPatternLeafCount(int newPatternLeafCount)
void removeChild(const Label &child)
std::map< Label, SuffixTreeNode * >::iterator childrenEnd()
size_t numChildren() const
auto getChild(Label &s) -> SuffixTreeNode *
std::map< Label, SuffixTreeNode * >::iterator childrenBegin()
int patternLeafCount() const
void dropFromArcLabel(int toDrop)
SuffixTreeNode * parent()
SuffixTreeNode(SuffixTreeNode *parent, Word arcLabel, bool isFullPattern)
friend std::ostream & operator<<(std::ostream &o, const SuffixTreeNode &suffixTreeNode)
Label & arcLabel()
std::ostream & dump(std::ostream &, int depth, bool dumpChildren=true) const
bool isFullPattern() const
void addChild(SuffixTreeNode *child)
bool isLeaf() const
static SuffixTreeNode * buildRoot()
int getPatternNumber() const
SuffixTreeNode * suffixLink()
SuffixTreeNode * mParent
SuffixTreeNode * mSuffixLink
void setParent(SuffixTreeNode *newParent)
Label & label()
void addToPatternLeafCount(bool doIncrement)
std::map< Label, SuffixTreeNode * > mChildren
One node of a generalised suffix tree built over the inserted non-commutative monomial patterns.
Non-owning view of a non-commutative word: [begin, end) of int variable indices.
Definition Word.hpp:56
void size_t s
Definition m2-mem.cpp:271
void size_t
Definition m2-mem.h:114
STL namespace.
volatile int x