ICU 76.1 76.1
brkiter.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4********************************************************************************
5* Copyright (C) 1997-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7********************************************************************************
8*
9* File brkiter.h
10*
11* Modification History:
12*
13* Date Name Description
14* 02/18/97 aliu Added typedef for TextCount. Made DONE const.
15* 05/07/97 aliu Fixed DLL declaration.
16* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
17* 08/11/98 helena Sync-up JDK1.2.
18* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
19********************************************************************************
20*/
21
22#ifndef BRKITER_H
23#define BRKITER_H
24
25#include "unicode/utypes.h"
26
31
32#include "unicode/utypes.h"
33
34#if U_SHOW_CPLUSPLUS_API
35
36#if UCONFIG_NO_BREAK_ITERATION
37
38U_NAMESPACE_BEGIN
39
40/*
41 * Allow the declaration of APIs with pointers to BreakIterator
42 * even when break iteration is removed from the build.
43 */
44class BreakIterator;
45
46U_NAMESPACE_END
47
48#else
49
50#include "unicode/uobject.h"
51#include "unicode/unistr.h"
52#include "unicode/chariter.h"
53#include "unicode/locid.h"
54#include "unicode/ubrk.h"
55#include "unicode/strenum.h"
56#include "unicode/utext.h"
57#include "unicode/umisc.h"
58
59U_NAMESPACE_BEGIN
60
107public:
112 virtual ~BreakIterator();
113
127 virtual bool operator==(const BreakIterator&) const = 0;
128
135 bool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
136
142 virtual BreakIterator* clone() const = 0;
143
149 virtual UClassID getDynamicClassID() const override = 0;
150
155 virtual CharacterIterator& getText() const = 0;
156
171 virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
172
184 virtual void setText(const UnicodeString &text) = 0;
185
204 virtual void setText(UText *text, UErrorCode &status) = 0;
205
214 virtual void adoptText(CharacterIterator* it) = 0;
215
216 enum {
222 DONE = static_cast<int32_t>(-1)
223 };
224
230 virtual int32_t first() = 0;
231
237 virtual int32_t last() = 0;
238
245 virtual int32_t previous() = 0;
246
253 virtual int32_t next() = 0;
254
260 virtual int32_t current() const = 0;
261
270 virtual int32_t following(int32_t offset) = 0;
271
280 virtual int32_t preceding(int32_t offset) = 0;
281
290 virtual UBool isBoundary(int32_t offset) = 0;
291
301 virtual int32_t next(int32_t n) = 0;
302
316 virtual int32_t getRuleStatus() const;
317
346 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
347
367 static BreakIterator* U_EXPORT2
368 createWordInstance(const Locale& where, UErrorCode& status);
369
391 static BreakIterator* U_EXPORT2
392 createLineInstance(const Locale& where, UErrorCode& status);
393
413 static BreakIterator* U_EXPORT2
415
434 static BreakIterator* U_EXPORT2
436
437#ifndef U_HIDE_DEPRECATED_API
460 static BreakIterator* U_EXPORT2
461 createTitleInstance(const Locale& where, UErrorCode& status);
462#endif /* U_HIDE_DEPRECATED_API */
463
473 static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
474
484 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
485 const Locale& displayLocale,
486 UnicodeString& name);
487
496 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
497 UnicodeString& name);
498
499#ifndef U_FORCE_HIDE_DEPRECATED_API
519 virtual BreakIterator * createBufferClone(void *stackBuffer,
520 int32_t &BufferSize,
521 UErrorCode &status) = 0;
522#endif // U_FORCE_HIDE_DEPRECATED_API
523
524#ifndef U_HIDE_DEPRECATED_API
525
532 inline UBool isBufferClone();
533
534#endif /* U_HIDE_DEPRECATED_API */
535
536#if !UCONFIG_NO_SERVICE
552 static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
553 const Locale& locale,
555 UErrorCode& status);
556
569 static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
570
578#endif
579
586
587#ifndef U_HIDE_INTERNAL_API
594 const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
595#endif /* U_HIDE_INTERNAL_API */
596
622 virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
623
624 private:
625 static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
626 static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
627 static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
628
629 friend class ICUBreakIteratorFactory;
630 friend class ICUBreakIteratorService;
631
632protected:
633 // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
634 // or else the compiler will create a public ones.
639#ifndef U_HIDE_INTERNAL_API
641 BreakIterator (const Locale& valid, const Locale &actual);
643 BreakIterator &operator = (const BreakIterator &other);
644#endif /* U_HIDE_INTERNAL_API */
645
646private:
647
649 char actualLocale[ULOC_FULLNAME_CAPACITY];
650 char validLocale[ULOC_FULLNAME_CAPACITY];
651 char requestLocale[ULOC_FULLNAME_CAPACITY];
652};
653
654#ifndef U_HIDE_DEPRECATED_API
655
657{
658 return false;
659}
660
661#endif /* U_HIDE_DEPRECATED_API */
662
663U_NAMESPACE_END
664
665#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
666
667#endif /* U_SHOW_CPLUSPLUS_API */
668
669#endif // BRKITER_H
670//eof
C++ API: Character Iterator.
virtual bool operator==(const BreakIterator &) const =0
Return true if another object is semantically equal to this one.
static StringEnumeration * getAvailableLocales()
Return a StringEnumeration over the locales available at the time of the call, including registered l...
virtual ~BreakIterator()
destructor
BreakIterator(const Locale &valid, const Locale &actual)
BreakIterator(const BreakIterator &other)
virtual int32_t previous()=0
Set the iterator position to the boundary preceding the current boundary.
virtual int32_t getRuleStatus() const
For RuleBasedBreakIterators, return the status tag from the break rule that determined the boundary a...
virtual void adoptText(CharacterIterator *it)=0
Change the text over which this operates.
virtual CharacterIterator & getText() const =0
Return a CharacterIterator over the text being analyzed.
static BreakIterator * createCharacterInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for character-breaks using specified locale Returns an instance of a BreakIterat...
virtual void setText(UText *text, UErrorCode &status)=0
Reset the break iterator to operate over the text represented by the UText.
virtual int32_t last()=0
Set the iterator position to the index immediately BEYOND the last character in the text being scanne...
virtual BreakIterator * clone() const =0
Return a polymorphic copy of this object.
virtual int32_t next()=0
Advance the iterator to the boundary following the current boundary.
static BreakIterator * createWordInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for word-breaks using the given locale.
virtual int32_t first()=0
Sets the current iteration position to the beginning of the text, position zero.
const char * getLocaleID(ULocDataLocaleType type, UErrorCode &status) const
Get the locale for this break iterator object.
static UnicodeString & getDisplayName(const Locale &objectLocale, const Locale &displayLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the desired language.
Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const
Returns the locale for this break iterator.
bool operator!=(const BreakIterator &rhs) const
Returns the complement of the result of operator==.
Definition brkiter.h:135
virtual void setText(const UnicodeString &text)=0
Change the text over which this operates.
static BreakIterator * createTitleInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for title-casing breaks using the specified locale Returns an instance of a Brea...
virtual int32_t next(int32_t n)=0
Set the iterator position to the nth boundary from the current boundary.
virtual int32_t preceding(int32_t offset)=0
Set the iterator position to the first boundary preceding the specified offset.
virtual int32_t current() const =0
Return character index of the current iterator position within the text.
virtual UBool isBoundary(int32_t offset)=0
Return true if the specified position is a boundary position.
static UnicodeString & getDisplayName(const Locale &objectLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the language of the default locale.
virtual UClassID getDynamicClassID() const override=0
Return a polymorphic class ID for this object.
static BreakIterator * createSentenceInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for sentence-breaks using specified locale Returns an instance of a BreakIterato...
virtual BreakIterator & refreshInputText(UText *input, UErrorCode &status)=0
Set the subject text string upon which the break iterator is operating without changing any other asp...
UBool isBufferClone()
Determine whether the BreakIterator was created in user memory by createBufferClone(),...
Definition brkiter.h:656
static UBool unregister(URegistryKey key, UErrorCode &status)
Unregister a previously-registered BreakIterator using the key returned from the register call.
virtual UText * getUText(UText *fillIn, UErrorCode &status) const =0
Get a UText for the text being analyzed.
static BreakIterator * createLineInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for line-breaks using specified locale.
virtual BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status)=0
Deprecated functionality.
virtual int32_t following(int32_t offset)=0
Advance the iterator to the first boundary following the specified offset.
@ DONE
DONE is returned by previous() and next() after all valid boundaries have been returned.
Definition brkiter.h:222
static URegistryKey registerInstance(BreakIterator *toAdopt, const Locale &locale, UBreakIteratorType kind, UErrorCode &status)
Register a new break iterator of the indicated kind, to use in the given locale.
static const Locale * getAvailableLocales(int32_t &count)
Get the set of Locales for which TextBoundaries are installed.
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status)
For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) that determined the b...
Abstract class that defines an API for iteration on text objects.
Definition chariter.h:361
A Locale object represents a specific geographical, political, or cultural region.
Definition locid.h:195
Base class for 'pure' C++ implementations of uenum api.
Definition strenum.h:61
UObject is the common ICU "boilerplate" class.
Definition uobject.h:223
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:296
C++ API: Locale ID object.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
C++ API: String Enumeration.
UText struct.
Definition utext.h:1328
C API: BreakIterator.
UBreakIteratorType
The possible types of text boundaries.
Definition ubrk.h:102
#define ULOC_FULLNAME_CAPACITY
Useful constant for the maximum size of the whole locale ID (including the terminating NULL and all k...
Definition uloc.h:264
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
Definition uloc.h:338
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition umachine.h:247
C API: Miscellaneous definitions.
const void * URegistryKey
Opaque type returned by registerInstance, registerFactory and unregister for service registration.
Definition umisc.h:57
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition uobject.h:96
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:430
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition utypes.h:315