libzypp  17.35.12
Utf8.h
Go to the documentation of this file.
1 /*---------------------------------------------------------------------\
2 | ____ _ __ __ ___ |
3 | |__ / \ / / . \ . \ |
4 | / / \ V /| _/ _/ |
5 | / /__ | | | | | | |
6 | /_____||_| |_| |_| |
7 | |
8 ----------------------------------------------------------------------/
9 *
10 * This file contains private API, this might break at any time between releases.
11 * Strictly for internal use!
12 */
13 
14 #ifndef ZYPP_TUI_OUTPUT_UTF8_H_
15 #define ZYPP_TUI_OUTPUT_UTF8_H_
16 
17 #include <cstdlib>
18 #include <cstring>
19 #include <wchar.h>
20 
21 #include <iostream>
22 #include <string>
23 #include <utility>
24 
25 namespace ztui {
26 
28 namespace utf8
29 {
31  class string
32  {
33  public:
35  static const size_type npos = std::string::npos;
36 
37  public:
38  string() {}
39  string( const char * rhs ) : _str( rhs ) {}
40  string( std::string rhs ) : _str(std::move( rhs )) {}
41 
42  public:
43  const char * c_str() const { return _str.c_str(); }
44  const std::string & str() const { return _str; }
45  std::string & str() { return _str; }
46 
47  public:
49  size_type size() const
50  {
51  // test for locales using dual width fonts:
52  static bool isCJK = []()->bool {
53  const char * lang = ::getenv( "LANG" );
54  return ( lang && ( !strncmp( lang, "zh", 2 )
55  || !strncmp( lang, "ko", 2 )
56  || !strncmp( lang, "ja", 2 ) ) );
57  }();
58 
59  if ( isCJK )
60  {
61  // this should actually be correct for ALL locales:
62  size_type len = 0;
63  const char *s = _str.c_str();
64  for ( size_type slen = _str.size(); slen > 0; )
65  {
66  if ( *s == '\033' && *(s+1) == '[' ) // skip ansi SGR
67  {
68  slen -= 2; s += 2;
69  while ( slen > 0 && *s != 'm' )
70  { --slen; ++s; }
71  if ( slen > 0 )
72  { --slen; ++s; }
73  continue;
74  }
75 
76  wchar_t wc = 0;
77  size_t bytes = mbrtowc( &wc, s, slen, NULL );
78  if ( bytes <= 0 )
79  break;
80  len += wcwidth( wc );
81  slen -= bytes;
82  s += bytes;
83  }
84  return len;
85  }
86 
87  // NON CJK: faster and hopefully accurate enough:
88  // simply do not count continuation bytes '10xxxxxx'
89  size_type ret = _str.size();
90  size_type ansi = 0;
91  for ( auto ch : _str )
92  {
93  if ( ansi )
94  {
95  if ( ansi == 1 && ch == '[' )
96  {
97  ansi = 2;
98  continue;
99  }
100  else if ( ansi >= 2 ) // not testing for in [0-9;m]
101  {
102  ++ansi;
103  if ( ch == 'm' ) // SGR end
104  { ret -= ansi; ansi = 0; }
105  continue;
106  }
107  }
108 
109  if ( isContinuationByte( ch ) )
110  --ret;
111  else if ( ch == '\033' )
112  ansi = 1;
113  }
114  return ret;
115  }
116 
119  { return size(); }
120 
122  string substr( size_type pos_r = 0, size_type len_r = npos ) const
123  {
124  size_type p = upos( pos_r );
125  size_type l = upos( len_r, p );
126  return string( _str.substr( p, ( l == npos ? npos : l-p ) ) );
127  }
128 
129  private:
131  bool isContinuationByte( char ch ) const
132  { return( (ch & 0xC0) == 0x80 ); }
133 
135  size_type upos( size_type pos_r, size_type start_r = 0 ) const
136  {
137  if ( pos_r == npos || start_r > _str.size() )
138  return npos;
139 
140  size_type upos = start_r;
141  for ( const char * chp = _str.c_str() + upos; *chp; ++chp, ++upos )
142  {
143  if ( ! isContinuationByte( *chp ) )
144  {
145  if ( pos_r )
146  --pos_r;
147  else
148  return upos;
149 
150  while ( *chp == '\033' && *(chp+1) == '[' ) // skip any ansi SGR
151  {
152  chp += 2;
153  upos += 2;
154  while ( *chp && *chp != 'm' )
155  { ++chp; ++upos; }
156  if ( *chp )
157  { ++chp; ++upos; }
158  else
159  break; // incomplete ansi SGR
160  }
161  if ( ! *chp )
162  break; // incomplete ansi SGR
163  }
164  }
165  return( pos_r ? npos : upos );
166  }
167 
168  private:
169  std::string _str;
170  };
171 
173  inline string operator+( const string & lhs, const string & rhs )
174  { return string( lhs.str() + rhs.str() ); }
176  inline string operator+( const string & lhs, const std::string & rhs )
177  { return string( lhs.str() + rhs ); }
179  inline string operator+( const std::string & lhs, const string & rhs )
180  { return string( lhs + rhs.str() ); }
182  inline string operator+( const string & lhs, const char * rhs )
183  { return string( lhs.str() + rhs ); }
185  inline string operator+( const char * lhs, const string & rhs )
186  { return string( lhs + rhs.str() ); }
187 
189  inline std::ostream & operator<<( std::ostream & str, const string & obj )
190  { return str << obj.str(); }
191 
192 } // namespace utf8
194 
195 }
196 
197 #endif // ZYPP_TUI_OUTPUT_UTF8_H_
size_type length() const
Definition: Utf8.h:118
size_type size() const
utf8 size
Definition: Utf8.h:49
bool isContinuationByte(char ch) const
Test for continuation byte &#39;10xxxxxx&#39;.
Definition: Utf8.h:131
static const size_type npos
Definition: Utf8.h:35
String related utilities and Regular expression matching.
std::ostream & operator<<(std::ostream &str, const string &obj)
Definition: Utf8.h:189
Definition: Arch.h:363
string(std::string rhs)
Definition: Utf8.h:40
size_type upos(size_type pos_r, size_type start_r=0) const
Return start of codepoint pos_r starting at position start_r.
Definition: Utf8.h:135
string(const char *rhs)
Definition: Utf8.h:39
string substr(size_type pos_r=0, size_type len_r=npos) const
utf8 substring
Definition: Utf8.h:122
std::string::size_type size_type
Definition: Utf8.h:34
string operator+(const string &lhs, const std::string &rhs)
Definition: Utf8.h:176
std::string _str
Definition: Utf8.h:169
Simple utf8 string.
Definition: Utf8.h:31
const char * c_str() const
Definition: Utf8.h:43
string operator+(const string &lhs, const string &rhs)
Definition: Utf8.h:173
std::string & str()
Definition: Utf8.h:45
SolvableIdType size_type
Definition: PoolMember.h:126
const std::string & str() const
Definition: Utf8.h:44