presage 0.9.1
forwardTokenizer.cpp
Go to the documentation of this file.
1
2/******************************************************
3 * Presage, an extensible predictive text entry system
4 * ---------------------------------------------------
5 *
6 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 **********(*)*/
23
24
25#include "forwardTokenizer.h"
26
28 const std::string blankspaces,
29 const std::string separators)
30 : Tokenizer(stream, blankspaces, separators)
31{
32 //std::cerr << "ForwardTokenizer::ForwardTokenizer()" << std::endl;
33 offset = offbeg;
34}
35
38
40{
42
43 // store current seek pointer position
44 std::streamoff curroff = offset;
45
46 // position get pointer at beginning of stream
47 offset = offbeg;
48
49 int count = 0;
50 while (hasMoreTokens()) {
51 count++;
52 nextToken();
53 }
54
55 // reposition seek get pointer to original position
56 offset = curroff;
57
58 return count;
59}
60
62{
63 //StreamGuard guard(stream, offset);
64
65 if (offset >= offend) {
66 return false;
67 } else {
68 return true;
69 }
70}
71
73{
75
76 int current;
77 std::string str;
78
79 if (stream.good()) { // good() if bad,fail and eof bit are not set
80 current = stream.peek();
81 if (offset < offend) {
82
83 while (isBlankspace(current)
84 || isSeparator(current)) {
85 offset++;
86 stream.seekg(offset);
87 current = stream.peek();
88 }
89
90 while (!isBlankspace(current)
91 && !isSeparator(current)
92 && offset < offend) {
93
94 //std::cerr << "[DEBUG] read: "
95 // << static_cast<char>(current)
96 // << std::endl;
97
98 if( lowercaseMode() ) {
99 current = tolower( current );
100 }
101
102 str.push_back(current);
103
104 //std::cerr << "[DEBUG] pushed: "
105 // << static_cast<char>(current)
106 // << std::endl;
107
108 offset++;
109 stream.seekg(offset);
110 current = stream.peek();
111 }
112 }
113
114// do {
115// do {
116// current = stream.peek();
117// offset++;
118// stream.seekg(offset);
119//
120// //std::cerr << "[DEBUG] read: "
121// // << static_cast<char>(current)
122// // << std::endl;
123//
124// if ( !isBlankspace(current)
125// && !isSeparator(current)
126// && offset <= offend) {
127//
128// if( lowercaseMode() ) {
129// current = tolower( current );
130// }
131//
132// str.push_back(current);
133//
134// //std::cerr << "[DEBUG] pushed: "
135// // << static_cast<char>(current)
136// // << std::endl;
137// }
138// } while ( !isBlankspace(current)
139// && !isSeparator(current)
140// && offset < offend);
141// } while (str.empty() && (offset < offend));
142 } else {
143 std::cerr << "stream is NOT good!" << std::endl;
144 }
145
146 //std::cerr << "[DEBUG] token: " << str << std::endl;
147
148 return str;
149}
150
152{
153 return static_cast<double>(offset) / offend;
154}
155
virtual bool hasMoreTokens() const
virtual int countTokens()
virtual std::string nextToken()
ForwardTokenizer(std::istream &stream, const std::string blankspaces, const std::string separators)
virtual double progress() const
std::istream & stream
Definition tokenizer.h:144
std::streamoff offend
Definition tokenizer.h:147
std::streamoff offbeg
Definition tokenizer.h:146
bool isSeparator(const int character) const
std::streamoff offset
Definition tokenizer.h:148
bool lowercaseMode() const
Definition tokenizer.cpp:86
bool isBlankspace(const int character) const
Definition tokenizer.cpp:91