On Wednesday, 14 July 2021 at 19:00:08 UTC, jfondren wrote:
On Wednesday, 14 July 2021 at 18:04:44 UTC, someone wrote:
On Wednesday, 14 July 2021 at 06:28:37 UTC, jfondren wrote:
alternate 1:
- pull tests out into a named enum template, like std.traits
- always static assert enum, rather than conditionally
asserting false
- always have the rest of the code
```d
enum isString(T) = is(T == string) || is(T == wstring) ||
is(T == dstring);
// very similar to std.traits.isSomeString
struct gudtUGC(T) {
static assert(isString!T, "error message");
// unconditional structure code
}
```
... is it me or this isn't triggering the assert either ?
This isn't a complete example. The same problem elsewhere in
your program can cause dmd to exit before getting to the static
assert here, for the same reason that the static assert in your
original code wasn't got to.
Here's a complete example:
```d
enum isString(T) = is(T == string) || is(T == wstring) || is(T
== dstring);
struct gudtUGC(T) {
static assert(isString!T, "error message");
// unconditional structure code
}
unittest {
gudtUGC!int;
}
```
which fails with
```
example.d(4): Error: static assert: "error message"
example.d(9): instantiated from here: `gudtUGC!int`
```
If you have the static assert there but then still follow up
with static ifs that only conditionally produce an alias, then
you have the same problem still.
Please, go to the bottom of the unittest block and uncomment one
of those lines (DMD version here is DMD64 D Compiler v2.096.1):
```d
/// implementation-bugs [-] using foreach (with this structure)
always misses the last grapheme‐cluster … possible phobos bug #
20483 @ unittest's testUTFcommon() last line
/// implementation-bugs [-] static assert not firing
/// implementation‐tasks [✓] for the time being input parameters
are declared const instead of in; eventually they'll be back to
in when the related DIP was setted once and for all; but,
definetely—not scope const
/// implementation‐tasks [✓] reconsider excessive cast usage as
suggested by Ali: bypassing compiler checks could be potentially
harmful … cast and integer promotion @
http://ddili.org/ders/d.en/cast.html
/// implementation‐tasks [-] reconsider making this whole UDT
zero‐based as suggested by ag0aep6g—has a good point
/// implementation‐tasks‐possible [-] pad[L|R]
/// implementation‐tasks‐possible [-] replicate/repeat
/// implementation‐tasks‐possible [-] replace(string, string)
/// implementation‐tasks‐possible [-] translate(string, string) …
same‐size strings matching one‐to‐one
/// usage: array slicing can be used for usual things like:
left() right() substr() etc … mainly when grapheme‐clusters are
not expected at all
/// usage: array slicing needs a zero‐based first range argument
and a second one one‐based (or one‐past‐beyond; which it is
somehow … counter‐intuitive
module fw.types.UniCode;
import std.algorithm : map, joiner;
import std.array : array;
import std.conv : to;
import std.range : walkLength, take, tail, drop, dropBack; ///
repeat, padLeft, padRight
import std.stdio;
import std.uni : Grapheme, byGrapheme;
/// within this file: gudtUGC
shared static this() { } /// the following will be executed
only‐once per‐app:
static this() { } /// the following will be executed
only‐once per‐thread:
static ~this() { } /// the following will be executed
only‐once per‐thread:
shared static ~this() { } /// the following will be executed
only‐once per‐app:
alias stringUGC = Grapheme;
alias stringUGC08 = gudtUGC!(stringUTF08);
alias stringUGC16 = gudtUGC!(stringUTF16);
alias stringUGC32 = gudtUGC!(stringUTF32);
alias stringUTF08 = string; /// same as immutable(char )[];
alias stringUTF16 = wstring; /// same as immutable(wchar)[];
alias stringUTF32 = dstring; /// same as immutable(dchar)[];
enum isTypeSupported(type) =
is(type == stringUTF08) ||
is(type == stringUTF16) ||
is(type == stringUTF32)
;
/// mixin templateUGC!(stringUTF08, r"gudtUGC08"d);
/// mixin templateUGC!(stringUTF16, r"gudtUGC16"d);
/// mixin templateUGC!(stringUTF32, r"gudtUGC32"d);
/// template templateUGC (typeStringUTF, alias lstrStructureID) {
/// if these were possible there will be no need for stringUGC##
aliases in main()
public struct gudtUGC(typeStringUTF) { /// UniCode
grapheme‐cluster‐aware string manipulation (implemented for
one‐based operations)
static assert(
isTypeSupported!(typeStringUTF),
r"ooops … gudtUGC structure requires
[string|wstring|dstring] ≠ ["d ~ typeStringUTF.stringof ~ r"]"d
); /// meaning: this will halt compilation if this UDT
was instantiated with a type other than the ones intended
/// provides: public property size_t count
/// provides: public size_t decode(typeStringUTF
strSequence)
/// provides: public typeStringUTF encode()
/// provides: public gudtUGC!(typeStringUTF) take(size_t
intStart, size_t intCount = 1)
/// provides: public gudtUGC!(typeStringUTF) takeL(size_t
intCount)
/// provides: public gudtUGC!(typeStringUTF) takeR(size_t
intCount)
/// provides: public gudtUGC!(typeStringUTF) chopL(size_t
intCount)
/// provides: public gudtUGC!(typeStringUTF) chopR(size_t
intCount)
/// provides: public gudtUGC!(typeStringUTF) padL(size_t
intCount, typeStringUTF strPadding = r" ")
/// provides: public gudtUGC!(typeStringUTF) padR(size_t
intCount, typeStringUTF strPadding = r" ")
/// provides: public typeStringUTF takeasUTF(size_t
intStart, size_t intCount = 1)
/// provides: public typeStringUTF takeLasUTF(size_t
intCount)
/// provides: public typeStringUTF takeRasUTF(size_t
intCount)
/// provides: public typeStringUTF chopLasUTF(size_t
intCount)
/// provides: public typeStringUTF chopRasUTF(size_t
intCount)
/// provides: public typeStringUTF padL(size_t intCount,
typeStringUTF strPadding = r" ")
/// provides: public typeStringUTF padR(size_t intCount,
typeStringUTF strPadding = r" ")
/// usage; eg: stringUGC32("äëåčñœß … russian = русский 🇷🇺
≠ 🇯🇵 日本語 = japanese"d).take(35, 3).take(1,2).take(1,1).encode();
/// 日
/// usage; eg: stringUGC32("äëåčñœß … russian = русский 🇷🇺
≠ 🇯🇵 日本語 = japanese"d).take(35).encode(); /// 日
/// usage; eg: stringUGC32("äëåčñœß … russian = русский 🇷🇺
≠ 🇯🇵 日本語 = japanese"d).takeasUTF(35); /// 日
void popFront() { ++pintSequenceCurrent; }
bool empty() { return pintSequenceCurrent ==
pintSequenceCount; }
typeStringUTF front() { return
takeasUTF(pintSequenceCurrent); }
private stringUGC[] pugcSequence;
private size_t pintSequenceCount = 0; @property public
size_t count() { return pintSequenceCount; }
private size_t pintSequenceCurrent = 0;
this(
const typeStringUTF lstrSequence
) {
/// (1) given UTF‐encoded sequence
this.decode(lstrSequence);
}
@safe public size_t decode( /// UniCode (UTF‐encoded →
grapheme‐cluster) sequence
const typeStringUTF lstrSequence
) {
/// (1) given UTF‐encoded sequence
size_t lintSequenceCount = 0;
if (lstrSequence is null) {
pugcSequence = null;
pintSequenceCount = 0;
pintSequenceCurrent = 0;
} else {
pugcSequence = lstrSequence.byGrapheme.array;
pintSequenceCount = pugcSequence.walkLength;
pintSequenceCurrent = 1;
lintSequenceCount = pintSequenceCount;
}
return lintSequenceCount;
}
@safe public typeStringUTF encode() { /// UniCode
(grapheme‐cluster → UTF‐encoded) sequence
typeStringUTF lstrSequence = null;
if (pintSequenceCount >= 1) {
lstrSequence = pugcSequence
.map!((ref g) => g[])
.joiner
.to!(typeStringUTF)
;
}
return lstrSequence;
}
@safe public gudtUGC!(typeStringUTF) take( /// UniCode
(grapheme‐cluster → grapheme‐cluster) sequence
const size_t lintStart,
const size_t lintCount = 1
) {
/// (1) given start position >= 1
/// (2) given count >= 1
gudtUGC!(typeStringUTF) lugcSequence;
if (lintStart >= 1 && lintCount >= 1) {
/// eg#1: takeasUTF(1,3) → range#1=start-1=1-1=0 and
range#2=range#1+count=0+3=3 → 0..3
/// eg#1: takeasUTF(6,3) → range#2=start-1=6-1=5 and
range#2=range#1+count=5+3=8 → 5..8
/// eg#2: takeasUTF(01,1) → range#1=start-1=01-1=00
and range#2=range#1+count=00+1=01 → 00..01
/// eg#2: takeasUTF(50,1) → range#2=start-1=50-1=49
and range#2=range#1+count=49+1=50 → 49..50
size_t lintRange1 = lintStart - 1;
size_t lintRange2 = lintRange1 + lintCount;
if (lintRange2 <= pintSequenceCount) {
lugcSequence =
gudtUGC!(typeStringUTF)(pugcSequence[lintRange1..lintRange2]
.map!((ref g) => g[])
.joiner
.to!(typeStringUTF)
);
}
}
return lugcSequence;
}
@safe public gudtUGC!(typeStringUTF) takeL( /// UniCode
(grapheme‐cluster → grapheme‐cluster) sequence
const size_t lintCount
) {
/// (1) given count >= 1
gudtUGC!(typeStringUTF) lugcSequence;
if (lintCount >= 1 && lintCount <= pintSequenceCount) {
lugcSequence = gudtUGC!(typeStringUTF)(pugcSequence
.take(lintCount)
.map!((ref g) => g[])
.joiner
.to!(typeStringUTF)
);
}
return lugcSequence;
}
@safe public gudtUGC!(typeStringUTF) takeR( /// UniCode
(grapheme‐cluster → grapheme‐cluster) sequence
const size_t lintCount
) {
/// (1) given count >= 1
gudtUGC!(typeStringUTF) lugcSequence;
if (lintCount >= 1 && lintCount <= pintSequenceCount) {
lugcSequence = gudtUGC!(typeStringUTF)(pugcSequence
.tail(lintCount)
.map!((ref g) => g[])
.joiner
.to!(typeStringUTF)
);
}
return lugcSequence;
}
@safe public gudtUGC!(typeStringUTF) chopL( /// UniCode
(grapheme‐cluster → grapheme‐cluster) sequence
const size_t lintCount
) {
/// (1) given count >= 1
gudtUGC!(typeStringUTF) lugcSequence;
if (lintCount >= 1 && lintCount <= pintSequenceCount) {
lugcSequence = gudtUGC!(typeStringUTF)(pugcSequence
.drop(lintCount)
.map!((ref g) => g[])
.joiner
.to!(typeStringUTF)
);
}
return lugcSequence;
}
@safe public gudtUGC!(typeStringUTF) chopR( /// UniCode
(grapheme‐cluster → grapheme‐cluster) sequence
const size_t lintCount
) {
/// (1) given count >= 1
gudtUGC!(typeStringUTF) lugcSequence;
if (lintCount >= 1 && lintCount <= pintSequenceCount) {
lugcSequence = gudtUGC!(typeStringUTF)(pugcSequence
.dropBack(lintCount)
.map!((ref g) => g[])
.joiner
.to!(typeStringUTF)
);
}
return lugcSequence;
}
@safe public typeStringUTF takeasUTF( /// UniCode
(grapheme‐cluster → UTF‐encoded) sequence
const size_t lintStart,
const size_t lintCount = 1
) {
/// (1) given start position >= 1
/// (2) given count >= 1
typeStringUTF lstrSequence = null;
if (lintStart >= 1 && lintCount >= 1) { /// eg: see
take()
size_t lintRange1 = lintStart - 1;
size_t lintRange2 = lintRange1 + lintCount;
if (lintRange2 <= pintSequenceCount) {
lstrSequence =
pugcSequence[lintRange1..lintRange2]
.map!((ref g) => g[])
.joiner
.to!(typeStringUTF)
;
}
}
return lstrSequence;
}
@safe public typeStringUTF takeLasUTF( /// UniCode
(grapheme‐cluster → UTF‐encoded) sequence
const size_t lintCount
) {
/// (1) given count >= 1
typeStringUTF lstrSequence = null;
if (lintCount >= 1 && lintCount <= pintSequenceCount) {
lstrSequence = pugcSequence
.take(lintCount)
.map!((ref g) => g[])
.joiner
.to!(typeStringUTF)
;
}
return lstrSequence;
}
@safe public typeStringUTF takeRasUTF( /// UniCode
(grapheme‐cluster → UTF‐encoded) sequence
const size_t lintCount
) {
/// (1) given count >= 1
typeStringUTF lstrSequence = null;
if (lintCount >= 1 && lintCount <= pintSequenceCount) {
lstrSequence = pugcSequence
.tail(lintCount)
.map!((ref g) => g[])
.joiner
.to!(typeStringUTF)
;
}
return lstrSequence;
}
@safe public typeStringUTF chopLasUTF( /// UniCode
(grapheme‐cluster → UTF‐encoded) sequence
const size_t lintCount
) {
/// (1) given count >= 1
typeStringUTF lstrSequence = null;
if (lintCount >= 1 && lintCount <= pintSequenceCount) {
lstrSequence = pugcSequence
.drop(lintCount)
.map!((ref g) => g[])
.joiner
.to!(typeStringUTF)
;
}
return lstrSequence;
}
@safe public typeStringUTF chopRasUTF( /// UniCode
(grapheme‐cluster → UTF‐encoded) sequence
const size_t lintCount
) {
/// (1) given count >= 1
typeStringUTF lstrSequence = null;
if (lintCount >= 1 && lintCount <= pintSequenceCount) {
lstrSequence = pugcSequence
.dropBack(lintCount)
.map!((ref g) => g[])
.joiner
.to!(typeStringUTF)
;
}
return lstrSequence;
}
@safe public typeStringUTF padLasUTF( /// UniCode
(grapheme‐cluster → UTF‐encoded) sequence
const size_t lintCount,
const typeStringUTF lstrPadding = r" "
) {
/// (1) given count >= 1
/// [2] given padding (default is a single blank space)
typeStringUTF lstrSequence = null;
if (lintCount >= 1 && lintCount > pintSequenceCount) {
lstrSequence = null; /// pending
}
return lstrSequence;
}
@safe public typeStringUTF padRasUTF( /// UniCode
(grapheme‐cluster → UTF‐encoded) sequence
const size_t lintCount,
const typeStringUTF lstrPadding = r" "
) {
/// (1) given count >= 1
/// [2] given padding (default is a single blank space)
typeStringUTF lstrSequence = null;
if (lintCount >= 1 && lintCount > pintSequenceCount) {
lstrSequence = null; /// pending
}
return lstrSequence;
}
}
unittest {
void testUTFcommon( /// the following should be the same
(regardless of the encoding being used) and is the whole point of
this UDT being made:
typeStringUTF,
typeStringUGC
)(
const typeStringUTF lstrSequence3
) {
typeStringUGC lugcSequence3 =
typeStringUGC(lstrSequence3);
with (lugcSequence3) {
assert(encode() == lstrSequence3);
assert(take(35, 3).take(1,2).take(1,1).encode() ==
r"日");
assert(take(21).encode() == r"р");
assert(take(27).encode() == r"й");
assert(take(35).encode() == r"日");
assert(take(37).encode() == r"語");
assert(take(21, 7).encode() == r"русский");
assert(take(35, 3).encode() == r"日本語");
assert(takeasUTF(21) == r"р");
assert(takeasUTF(27) == r"й");
assert(takeasUTF(35) == r"日");
assert(takeasUTF(37) == r"語");
assert(takeasUTF(21, 7) == r"русский");
assert(takeasUTF(35, 3) == r"日本語");
assert(takeL(1).encode() == r"ä");
assert(takeR(1).encode() == r"😎");
assert(takeL(7).encode() == r"äëåčñœß");
assert(takeR(16).encode() == r"日本語 = japanese 😎");
assert(takeLasUTF(1) == r"ä");
assert(takeRasUTF(1) == r"😎");
assert(takeLasUTF(7) == r"äëåčñœß");
assert(takeRasUTF(16) == r"日本語 = japanese 😎");
assert(chopL(10).encode() == r"russian = русский 🇷🇺 ≠
🇯🇵 日本語 = japanese 😎");
assert(chopR(21).encode() == r"äëåčñœß … russian =
русский 🇷🇺");
assert(chopLasUTF(10) == r"russian = русский 🇷🇺 ≠ 🇯🇵
日本語 = japanese 😎");
assert(chopRasUTF(21) == r"äëåčñœß … russian =
русский 🇷🇺");
}
typeStringUTF lstrSequence3reencoded;
for (
size_t lintSequenceUGC = 1;
lintSequenceUGC <=
lstrSequence3.byGrapheme.walkLength;
++lintSequenceUGC
) {
lstrSequence3reencoded ~=
lugcSequence3.takeasUTF(lintSequenceUGC);
}
assert(lstrSequence3reencoded == lstrSequence3);
lstrSequence3reencoded = null;
foreach (typeStringUTF lstrSequence3UGC; lugcSequence3)
{ lstrSequence3reencoded ~= lstrSequence3UGC; }
//assert(lstrSequence3reencoded == lstrSequence3); ///
ooops … always missing last grapheme‐cluster: possible bug # 20483
}
void testUTF08(
const stringUTF08 lstrSequence1,
const stringUTF08 lstrSequence2,
const stringUTF08 lstrSequence3
) {
assert(lstrSequence1.byGrapheme.walkLength == 50); ///
checking UGC sizes; ie grapheme‐cluster count
assert(lstrSequence2.byGrapheme.walkLength == 50);
assert(lstrSequence3.byGrapheme.walkLength == 50);
assert(lstrSequence1.walkLength == 50); /// checking
UGA sizes; ie code‐point count
assert(lstrSequence2.walkLength == 50);
assert(lstrSequence3.walkLength == 52);
assert(lstrSequence1.length == 50); /// checking UTF
sizes; ie code‐unit count
assert(lstrSequence2.length == 60);
assert(lstrSequence3.length == 91);
testUTFcommon!(stringUTF08,
stringUGC08)(lstrSequence3); /// checking for correct string
manipulation
}
void testUTF16(
const stringUTF16 lstrSequence1,
const stringUTF16 lstrSequence2,
const stringUTF16 lstrSequence3
) {
assert(lstrSequence1.byGrapheme.walkLength == 50); ///
checking UGC sizes; ie grapheme‐cluster count
assert(lstrSequence2.byGrapheme.walkLength == 50);
assert(lstrSequence3.byGrapheme.walkLength == 50);
assert(lstrSequence1.walkLength == 50); /// checking
UGA sizes; ie code‐point count
assert(lstrSequence2.walkLength == 50);
assert(lstrSequence3.walkLength == 52);
assert(lstrSequence1.length == 50); /// checking UTF
sizes; ie code‐unit count
assert(lstrSequence2.length == 50);
assert(lstrSequence3.length == 57);
testUTFcommon!(stringUTF16,
stringUGC16)(lstrSequence3); /// checking for correct string
manipulation
}
void testUTF32(
const stringUTF32 lstrSequence1,
const stringUTF32 lstrSequence2,
const stringUTF32 lstrSequence3
) {
assert(lstrSequence1.byGrapheme.walkLength == 50); ///
checking UGC sizes; ie grapheme‐cluster count
assert(lstrSequence2.byGrapheme.walkLength == 50);
assert(lstrSequence3.byGrapheme.walkLength == 50);
assert(lstrSequence1.walkLength == 50); /// checking
UGA sizes; ie code‐point count
assert(lstrSequence2.walkLength == 50);
assert(lstrSequence3.walkLength == 52);
assert(lstrSequence1.length == 50); /// checking UTF
sizes; ie code‐unit count
assert(lstrSequence2.length == 50);
assert(lstrSequence3.length == 52);
testUTFcommon!(stringUTF32,
stringUGC32)(lstrSequence3); /// checking for correct string
manipulation
}
testUTF08(
r"12345678901234567890123456789012345678901234567890"c,
r"1234567890АВГДЕЗИЙКЛABCDEFGHIJabcdefghijQRSTUVWXYZ"c,
r"äëåčñœß … russian = русский 🇷🇺 ≠ 🇯🇵 日本語 = japanese 😎"c
);
testUTF16(
r"12345678901234567890123456789012345678901234567890"w,
r"1234567890АВГДЕЗИЙКЛABCDEFGHIJabcdefghijQRSTUVWXYZ"w,
r"äëåčñœß … russian = русский 🇷🇺 ≠ 🇯🇵 日本語 = japanese 😎"w
);
testUTF32(
r"12345678901234567890123456789012345678901234567890"d,
r"1234567890АВГДЕЗИЙКЛABCDEFGHIJabcdefghijQRSTUVWXYZ"d,
r"äëåčñœß … russian = русский 🇷🇺 ≠ 🇯🇵 日本語 = japanese 😎"d
);
//stringUGC32 lugcSequence3 = stringUGC32(cast(char) 'x');
//stringUGC32 lugcSequence3 = stringUGC32(1);
}
```