On Wednesday, 14 July 2021 at 19:00:08 UTC, jfondren wrote:
On Wednesday, 14 July 2021 at 18:04:44 UTC, someone wrote:
On Wednesday, 14 July 2021 at 06:28:37 UTC, jfondren wrote:

alternate 1:
- pull tests out into a named enum template, like std.traits
- always static assert enum, rather than conditionally asserting false
- always have the rest of the code

```d
enum isString(T) = is(T == string) || is(T == wstring) || is(T == dstring);
// very similar to std.traits.isSomeString

struct gudtUGC(T) {
    static assert(isString!T, "error message");
    // unconditional structure code
}
```

... is it me or this isn't triggering the assert either ?

This isn't a complete example. The same problem elsewhere in your program can cause dmd to exit before getting to the static assert here, for the same reason that the static assert in your original code wasn't got to.

Here's a complete example:

```d
enum isString(T) = is(T == string) || is(T == wstring) || is(T == dstring);

struct gudtUGC(T) {
    static assert(isString!T, "error message");
    // unconditional structure code
}

unittest {
    gudtUGC!int;
}
```

which fails with

```
example.d(4): Error: static assert:  "error message"
example.d(9):        instantiated from here: `gudtUGC!int`
```

If you have the static assert there but then still follow up with static ifs that only conditionally produce an alias, then you have the same problem still.

Please, go to the bottom of the unittest block and uncomment one of those lines (DMD version here is DMD64 D Compiler v2.096.1):
```d
/// implementation-bugs [-] using foreach (with this structure) always misses the last grapheme‐cluster … possible phobos bug # 20483 @ unittest's testUTFcommon() last line
/// implementation-bugs [-] static assert not firing

/// implementation‐tasks [✓] for the time being input parameters are declared const instead of in; eventually they'll be back to in when the related DIP was setted once and for all; but, definetely—not scope const /// implementation‐tasks [✓] reconsider excessive cast usage as suggested by Ali: bypassing compiler checks could be potentially harmful … cast and integer promotion @ http://ddili.org/ders/d.en/cast.html /// implementation‐tasks [-] reconsider making this whole UDT zero‐based as suggested by ag0aep6g—has a good point

/// implementation‐tasks‐possible [-] pad[L|R]
/// implementation‐tasks‐possible [-] replicate/repeat
/// implementation‐tasks‐possible [-] replace(string, string)
/// implementation‐tasks‐possible [-] translate(string, string) … same‐size strings matching one‐to‐one

/// usage: array slicing can be used for usual things like: left() right() substr() etc … mainly when grapheme‐clusters are not expected at all /// usage: array slicing needs a zero‐based first range argument and a second one one‐based (or one‐past‐beyond; which it is somehow … counter‐intuitive

module fw.types.UniCode;

import std.algorithm : map, joiner;
import std.array : array;
import std.conv : to;
import std.range : walkLength, take, tail, drop, dropBack; /// repeat, padLeft, padRight
import std.stdio;
import std.uni : Grapheme, byGrapheme;

/// within this file: gudtUGC



shared static this() { } /// the following will be executed only‐once per‐app: static this() { } /// the following will be executed only‐once per‐thread: static ~this() { } /// the following will be executed only‐once per‐thread: shared static ~this() { } /// the following will be executed only‐once per‐app:



alias stringUGC = Grapheme;
alias stringUGC08 = gudtUGC!(stringUTF08);
alias stringUGC16 = gudtUGC!(stringUTF16);
alias stringUGC32 = gudtUGC!(stringUTF32);
alias stringUTF08 = string;  /// same as immutable(char )[];
alias stringUTF16 = wstring; /// same as immutable(wchar)[];
alias stringUTF32 = dstring; /// same as immutable(dchar)[];

    enum isTypeSupported(type) =
       is(type == stringUTF08) ||
       is(type == stringUTF16) ||
       is(type == stringUTF32)
       ;

/// mixin templateUGC!(stringUTF08, r"gudtUGC08"d);
/// mixin templateUGC!(stringUTF16, r"gudtUGC16"d);
/// mixin templateUGC!(stringUTF32, r"gudtUGC32"d);
/// template templateUGC (typeStringUTF, alias lstrStructureID) { /// if these were possible there will be no need for stringUGC## aliases in main()

public struct gudtUGC(typeStringUTF) { /// UniCode grapheme‐cluster‐aware string manipulation (implemented for one‐based operations)

       static assert(
          isTypeSupported!(typeStringUTF),
r"ooops … gudtUGC structure requires [string|wstring|dstring] ≠ ["d ~ typeStringUTF.stringof ~ r"]"d ); /// meaning: this will halt compilation if this UDT was instantiated with a type other than the ones intended

       /// provides: public property size_t count

/// provides: public size_t decode(typeStringUTF strSequence)
       /// provides: public typeStringUTF encode()

/// provides: public gudtUGC!(typeStringUTF) take(size_t intStart, size_t intCount = 1) /// provides: public gudtUGC!(typeStringUTF) takeL(size_t intCount) /// provides: public gudtUGC!(typeStringUTF) takeR(size_t intCount) /// provides: public gudtUGC!(typeStringUTF) chopL(size_t intCount) /// provides: public gudtUGC!(typeStringUTF) chopR(size_t intCount) /// provides: public gudtUGC!(typeStringUTF) padL(size_t intCount, typeStringUTF strPadding = r" ") /// provides: public gudtUGC!(typeStringUTF) padR(size_t intCount, typeStringUTF strPadding = r" ")

/// provides: public typeStringUTF takeasUTF(size_t intStart, size_t intCount = 1) /// provides: public typeStringUTF takeLasUTF(size_t intCount) /// provides: public typeStringUTF takeRasUTF(size_t intCount) /// provides: public typeStringUTF chopLasUTF(size_t intCount) /// provides: public typeStringUTF chopRasUTF(size_t intCount) /// provides: public typeStringUTF padL(size_t intCount, typeStringUTF strPadding = r" ") /// provides: public typeStringUTF padR(size_t intCount, typeStringUTF strPadding = r" ")

/// usage; eg: stringUGC32("äëåčñœß … russian = русский 🇷🇺 ≠ 🇯🇵 日本語 = japanese"d).take(35, 3).take(1,2).take(1,1).encode(); /// 日 /// usage; eg: stringUGC32("äëåčñœß … russian = русский 🇷🇺 ≠ 🇯🇵 日本語 = japanese"d).take(35).encode(); /// 日 /// usage; eg: stringUGC32("äëåčñœß … russian = русский 🇷🇺 ≠ 🇯🇵 日本語 = japanese"d).takeasUTF(35); /// 日

       void popFront() { ++pintSequenceCurrent; }
bool empty() { return pintSequenceCurrent == pintSequenceCount; } typeStringUTF front() { return takeasUTF(pintSequenceCurrent); }

       private stringUGC[] pugcSequence;
private size_t pintSequenceCount = 0; @property public size_t count() { return pintSequenceCount; }
       private size_t pintSequenceCurrent = 0;

       this(
          const typeStringUTF lstrSequence
          ) {

          /// (1) given UTF‐encoded sequence

          this.decode(lstrSequence);

       }

@safe public size_t decode( /// UniCode (UTF‐encoded → grapheme‐cluster) sequence
          const typeStringUTF lstrSequence
          ) {

          /// (1) given UTF‐encoded sequence

          size_t lintSequenceCount = 0;

          if (lstrSequence is null) {

             pugcSequence = null;
             pintSequenceCount = 0;
             pintSequenceCurrent = 0;

          } else {

             pugcSequence = lstrSequence.byGrapheme.array;
             pintSequenceCount = pugcSequence.walkLength;
             pintSequenceCurrent = 1;

             lintSequenceCount = pintSequenceCount;

          }

          return lintSequenceCount;

       }

@safe public typeStringUTF encode() { /// UniCode (grapheme‐cluster → UTF‐encoded) sequence

          typeStringUTF lstrSequence = null;

          if (pintSequenceCount >= 1) {

             lstrSequence = pugcSequence
                .map!((ref g) => g[])
                .joiner
                .to!(typeStringUTF)
                ;

          }

          return lstrSequence;

       }

@safe public gudtUGC!(typeStringUTF) take( /// UniCode (grapheme‐cluster → grapheme‐cluster) sequence
          const size_t lintStart,
          const size_t lintCount = 1
          ) {

          /// (1) given start position >= 1
          /// (2) given count >= 1

          gudtUGC!(typeStringUTF) lugcSequence;

          if (lintStart >= 1 && lintCount >= 1) {

/// eg#1: takeasUTF(1,3) → range#1=start-1=1-1=0 and range#2=range#1+count=0+3=3 → 0..3 /// eg#1: takeasUTF(6,3) → range#2=start-1=6-1=5 and range#2=range#1+count=5+3=8 → 5..8

/// eg#2: takeasUTF(01,1) → range#1=start-1=01-1=00 and range#2=range#1+count=00+1=01 → 00..01 /// eg#2: takeasUTF(50,1) → range#2=start-1=50-1=49 and range#2=range#1+count=49+1=50 → 49..50

             size_t lintRange1 = lintStart - 1;
             size_t lintRange2 = lintRange1 + lintCount;

             if (lintRange2 <= pintSequenceCount) {

lugcSequence = gudtUGC!(typeStringUTF)(pugcSequence[lintRange1..lintRange2]
                   .map!((ref g) => g[])
                   .joiner
                   .to!(typeStringUTF)
                   );

             }

          }

          return lugcSequence;

       }

@safe public gudtUGC!(typeStringUTF) takeL( /// UniCode (grapheme‐cluster → grapheme‐cluster) sequence
          const size_t lintCount
          ) {

          /// (1) given count >= 1

          gudtUGC!(typeStringUTF) lugcSequence;

          if (lintCount >= 1 && lintCount <= pintSequenceCount) {

             lugcSequence = gudtUGC!(typeStringUTF)(pugcSequence
                .take(lintCount)
                .map!((ref g) => g[])
                .joiner
                .to!(typeStringUTF)
                );

          }

          return lugcSequence;

       }

@safe public gudtUGC!(typeStringUTF) takeR( /// UniCode (grapheme‐cluster → grapheme‐cluster) sequence
          const size_t lintCount
          ) {

          /// (1) given count >= 1

          gudtUGC!(typeStringUTF) lugcSequence;

          if (lintCount >= 1 && lintCount <= pintSequenceCount) {

             lugcSequence = gudtUGC!(typeStringUTF)(pugcSequence
                .tail(lintCount)
                .map!((ref g) => g[])
                .joiner
                .to!(typeStringUTF)
                );

          }

          return lugcSequence;

       }

@safe public gudtUGC!(typeStringUTF) chopL( /// UniCode (grapheme‐cluster → grapheme‐cluster) sequence
          const size_t lintCount
          ) {

          /// (1) given count >= 1

          gudtUGC!(typeStringUTF) lugcSequence;

          if (lintCount >= 1 && lintCount <= pintSequenceCount) {

             lugcSequence = gudtUGC!(typeStringUTF)(pugcSequence
                .drop(lintCount)
                .map!((ref g) => g[])
                .joiner
                .to!(typeStringUTF)
                );

          }

          return lugcSequence;

       }

@safe public gudtUGC!(typeStringUTF) chopR( /// UniCode (grapheme‐cluster → grapheme‐cluster) sequence
          const size_t lintCount
          ) {

          /// (1) given count >= 1

          gudtUGC!(typeStringUTF) lugcSequence;

          if (lintCount >= 1 && lintCount <= pintSequenceCount) {

             lugcSequence = gudtUGC!(typeStringUTF)(pugcSequence
                .dropBack(lintCount)
                .map!((ref g) => g[])
                .joiner
                .to!(typeStringUTF)
                );

          }

          return lugcSequence;

       }

@safe public typeStringUTF takeasUTF( /// UniCode (grapheme‐cluster → UTF‐encoded) sequence
          const size_t lintStart,
          const size_t lintCount = 1
          ) {

          /// (1) given start position >= 1
          /// (2) given count >= 1

          typeStringUTF lstrSequence = null;

if (lintStart >= 1 && lintCount >= 1) { /// eg: see take()

             size_t lintRange1 = lintStart - 1;
             size_t lintRange2 = lintRange1 + lintCount;

             if (lintRange2 <= pintSequenceCount) {

lstrSequence = pugcSequence[lintRange1..lintRange2]
                   .map!((ref g) => g[])
                   .joiner
                   .to!(typeStringUTF)
                   ;

             }

          }

          return lstrSequence;

       }

@safe public typeStringUTF takeLasUTF( /// UniCode (grapheme‐cluster → UTF‐encoded) sequence
          const size_t lintCount
          ) {

          /// (1) given count >= 1

          typeStringUTF lstrSequence = null;

          if (lintCount >= 1 && lintCount <= pintSequenceCount) {

             lstrSequence = pugcSequence
                .take(lintCount)
                .map!((ref g) => g[])
                .joiner
                .to!(typeStringUTF)
                ;

          }

          return lstrSequence;

       }

@safe public typeStringUTF takeRasUTF( /// UniCode (grapheme‐cluster → UTF‐encoded) sequence
          const size_t lintCount
          ) {

          /// (1) given count >= 1

          typeStringUTF lstrSequence = null;

          if (lintCount >= 1 && lintCount <= pintSequenceCount) {

             lstrSequence = pugcSequence
                .tail(lintCount)
                .map!((ref g) => g[])
                .joiner
                .to!(typeStringUTF)
                ;

          }

          return lstrSequence;

       }

@safe public typeStringUTF chopLasUTF( /// UniCode (grapheme‐cluster → UTF‐encoded) sequence
          const size_t lintCount
          ) {

          /// (1) given count >= 1

          typeStringUTF lstrSequence = null;

          if (lintCount >= 1 && lintCount <= pintSequenceCount) {

             lstrSequence = pugcSequence
                .drop(lintCount)
                .map!((ref g) => g[])
                .joiner
                .to!(typeStringUTF)
                ;

          }

          return lstrSequence;

       }

@safe public typeStringUTF chopRasUTF( /// UniCode (grapheme‐cluster → UTF‐encoded) sequence
          const size_t lintCount
          ) {

          /// (1) given count >= 1

          typeStringUTF lstrSequence = null;

          if (lintCount >= 1 && lintCount <= pintSequenceCount) {

             lstrSequence = pugcSequence
                .dropBack(lintCount)
                .map!((ref g) => g[])
                .joiner
                .to!(typeStringUTF)
                ;

          }

          return lstrSequence;

       }

@safe public typeStringUTF padLasUTF( /// UniCode (grapheme‐cluster → UTF‐encoded) sequence
          const size_t lintCount,
          const typeStringUTF lstrPadding = r" "
          ) {

          /// (1) given count >= 1
          /// [2] given padding (default is a single blank space)

          typeStringUTF lstrSequence = null;

          if (lintCount >= 1 && lintCount > pintSequenceCount) {

             lstrSequence = null; /// pending

          }

          return lstrSequence;

       }

@safe public typeStringUTF padRasUTF( /// UniCode (grapheme‐cluster → UTF‐encoded) sequence
          const size_t lintCount,
          const typeStringUTF lstrPadding = r" "
          ) {

          /// (1) given count >= 1
          /// [2] given padding (default is a single blank space)

          typeStringUTF lstrSequence = null;

          if (lintCount >= 1 && lintCount > pintSequenceCount) {

             lstrSequence = null; /// pending

          }

          return lstrSequence;

       }

}

unittest {

void testUTFcommon( /// the following should be the same (regardless of the encoding being used) and is the whole point of this UDT being made:
          typeStringUTF,
          typeStringUGC
          )(
          const typeStringUTF lstrSequence3
          ) {

typeStringUGC lugcSequence3 = typeStringUGC(lstrSequence3);

          with (lugcSequence3) {

             assert(encode() == lstrSequence3);

assert(take(35, 3).take(1,2).take(1,1).encode() == r"日");

             assert(take(21).encode() == r"р");
             assert(take(27).encode() == r"й");
             assert(take(35).encode() == r"日");
             assert(take(37).encode() == r"語");
             assert(take(21, 7).encode() == r"русский");
             assert(take(35, 3).encode() == r"日本語");

             assert(takeasUTF(21) == r"р");
             assert(takeasUTF(27) == r"й");
             assert(takeasUTF(35) == r"日");
             assert(takeasUTF(37) == r"語");
             assert(takeasUTF(21, 7) == r"русский");
             assert(takeasUTF(35, 3) == r"日本語");

             assert(takeL(1).encode() == r"ä");
             assert(takeR(1).encode() == r"😎");
             assert(takeL(7).encode() == r"äëåčñœß");
             assert(takeR(16).encode() == r"日本語 = japanese 😎");

             assert(takeLasUTF(1) == r"ä");
             assert(takeRasUTF(1) == r"😎");
             assert(takeLasUTF(7) == r"äëåčñœß");
             assert(takeRasUTF(16) == r"日本語 = japanese 😎");

assert(chopL(10).encode() == r"russian = русский 🇷🇺 ≠ 🇯🇵 日本語 = japanese 😎"); assert(chopR(21).encode() == r"äëåčñœß … russian = русский 🇷🇺");

assert(chopLasUTF(10) == r"russian = русский 🇷🇺 ≠ 🇯🇵 日本語 = japanese 😎"); assert(chopRasUTF(21) == r"äëåčñœß … russian = русский 🇷🇺");

          }

          typeStringUTF lstrSequence3reencoded;

          for (
             size_t lintSequenceUGC = 1;
lintSequenceUGC <= lstrSequence3.byGrapheme.walkLength;
             ++lintSequenceUGC
             ) {

lstrSequence3reencoded ~= lugcSequence3.takeasUTF(lintSequenceUGC);

          }

          assert(lstrSequence3reencoded == lstrSequence3);

          lstrSequence3reencoded = null;

foreach (typeStringUTF lstrSequence3UGC; lugcSequence3) { lstrSequence3reencoded ~= lstrSequence3UGC; }

//assert(lstrSequence3reencoded == lstrSequence3); /// ooops … always missing last grapheme‐cluster: possible bug # 20483

       }

       void testUTF08(
          const stringUTF08 lstrSequence1,
          const stringUTF08 lstrSequence2,
          const stringUTF08 lstrSequence3
          ) {

assert(lstrSequence1.byGrapheme.walkLength == 50); /// checking UGC sizes; ie grapheme‐cluster count
          assert(lstrSequence2.byGrapheme.walkLength == 50);
          assert(lstrSequence3.byGrapheme.walkLength == 50);

assert(lstrSequence1.walkLength == 50); /// checking UGA sizes; ie code‐point count
          assert(lstrSequence2.walkLength == 50);
          assert(lstrSequence3.walkLength == 52);

assert(lstrSequence1.length == 50); /// checking UTF sizes; ie code‐unit count
          assert(lstrSequence2.length == 60);
          assert(lstrSequence3.length == 91);

testUTFcommon!(stringUTF08, stringUGC08)(lstrSequence3); /// checking for correct string manipulation

       }

       void testUTF16(
          const stringUTF16 lstrSequence1,
          const stringUTF16 lstrSequence2,
          const stringUTF16 lstrSequence3
          ) {

assert(lstrSequence1.byGrapheme.walkLength == 50); /// checking UGC sizes; ie grapheme‐cluster count
          assert(lstrSequence2.byGrapheme.walkLength == 50);
          assert(lstrSequence3.byGrapheme.walkLength == 50);

assert(lstrSequence1.walkLength == 50); /// checking UGA sizes; ie code‐point count
          assert(lstrSequence2.walkLength == 50);
          assert(lstrSequence3.walkLength == 52);

assert(lstrSequence1.length == 50); /// checking UTF sizes; ie code‐unit count
          assert(lstrSequence2.length == 50);
          assert(lstrSequence3.length == 57);

testUTFcommon!(stringUTF16, stringUGC16)(lstrSequence3); /// checking for correct string manipulation

       }

       void testUTF32(
          const stringUTF32 lstrSequence1,
          const stringUTF32 lstrSequence2,
          const stringUTF32 lstrSequence3
          ) {

assert(lstrSequence1.byGrapheme.walkLength == 50); /// checking UGC sizes; ie grapheme‐cluster count
          assert(lstrSequence2.byGrapheme.walkLength == 50);
          assert(lstrSequence3.byGrapheme.walkLength == 50);

assert(lstrSequence1.walkLength == 50); /// checking UGA sizes; ie code‐point count
          assert(lstrSequence2.walkLength == 50);
          assert(lstrSequence3.walkLength == 52);

assert(lstrSequence1.length == 50); /// checking UTF sizes; ie code‐unit count
          assert(lstrSequence2.length == 50);
          assert(lstrSequence3.length == 52);

testUTFcommon!(stringUTF32, stringUGC32)(lstrSequence3); /// checking for correct string manipulation

       }

       testUTF08(
          r"12345678901234567890123456789012345678901234567890"c,
          r"1234567890АВГДЕЗИЙКЛABCDEFGHIJabcdefghijQRSTUVWXYZ"c,
          r"äëåčñœß … russian = русский 🇷🇺 ≠ 🇯🇵 日本語 = japanese 😎"c
          );

       testUTF16(
          r"12345678901234567890123456789012345678901234567890"w,
          r"1234567890АВГДЕЗИЙКЛABCDEFGHIJabcdefghijQRSTUVWXYZ"w,
          r"äëåčñœß … russian = русский 🇷🇺 ≠ 🇯🇵 日本語 = japanese 😎"w
          );

       testUTF32(
          r"12345678901234567890123456789012345678901234567890"d,
          r"1234567890АВГДЕЗИЙКЛABCDEFGHIJabcdefghijQRSTUVWXYZ"d,
          r"äëåčñœß … russian = русский 🇷🇺 ≠ 🇯🇵 日本語 = japanese 😎"d
          );

       //stringUGC32 lugcSequence3 = stringUGC32(cast(char) 'x');
       //stringUGC32 lugcSequence3 = stringUGC32(1);

}
```

Reply via email to