```python
>>> tbl.shape
(29701056, 373)
>>> Counter([field.type for field in tbl.schema])
Counter({TimestampType(timestamp[us]): 1,
         DataType(double): 370,
         DictionaryType(dictionary<values=int64, indices=int32, ordered=0>): 1,
         DictionaryType(dictionary<values=string, indices=int32, ordered=0>): 
1})
>>> df = tbl.to_pandas()
>>> np.asarray([df.iloc[:, col].nunique() for col in range(1, 371)]) # double 
>>> columns
array([ 5503,  5762,  5672,  5156,  6385,  6576,  5173,  5055,   311,
          51,  1125,   961,  6202,  5862,  4919,  5358,  2939,  2927,
        3380,  3451,  2297,  1676,  2292,  2482,  2518,  2347,  6551,
        2651,  2693,  2966,  2972,  2892,  2942,  2441,  2540,  2550,
        2439,  3078,  3127,  2832,  3449,  2291,  3087,  3218,  1694,
        1656,  1952,  1784,  2879,     1,     1,   482,     1,     1,
         593,     1,     1,   140,     1,     1,  1413,     1,     1,
          24,   303,   324,  1314,  8816,  4986,   308,   318,   196,
         207,   189,   187,  1800,  1091,   764,   931,   608,   403,
        3520,  1284,   267,   597,  1262,   779,   318,   959,  2013,
        1158,  1161,   668,   716,   830,  1104,  1124,   515,   506,
         296,  2369, 11296,   295,   245,    15,  3192,  1001,  4281,
        1428,  1536,   761,   423,   417,  1735,  3631,   521,   627,
         920,  1148,  1792,  1842,  1723,  1878,  4419,     1,     1,
           1,     1,     1,     1,     1,     1,     1,     1,     1,
           1,     1,     1,     1,     1,     1,     1,     1,     1,
           1,     1,     1,     1,     1,     1,     1,     1,     1,
           1,     1,     1,     1,     1,     1,     1,     1,     1,
           1,     1,     1,     1,     1,     1,     1,     1,     1,
           1,     1,     1,     1,     1,     1,     1,     1,     1,
           1,     1,     1,     1,     1,     1,     1,     1,     1,
           1,     1,     1,     1,     1,     1,     1,     1,     1,
           1,     1,     1,     1,     1,     1,     1,     1,     1,
           1,     1,     1,     1,     1,  1719,     1,    95,  5294,
        6499,  1035,   277,     1, 16847,     1, 18471,     1,     1,
           1,     1,     1, 15995,     1,   645, 15382,   500,  9802,
        2201,     1,  2581,  1635,  2039,  2042,  1583,  2602, 36498,
        5988,  6027,  5080, 22053, 49031, 27200,  4003,  7591, 24653,
       25501, 15233,  7058,  3660,  5685, 17246,   737,   343, 24883,
       14109,  2685,     1, 22599,     1,     1,     1,     1,     1,
        1152,   975,  1030,   986,     1,  1518,  1553,  1433,  1470,
          28,  1248,  1547,  1624,  1555,  1601,  1602,     1,  1041,
         725,    60,  1145,   944,   231,   303,    19,   318,   300,
         314,   370,  1277,   118,   737,    29,    26,   500,   229,
         232,   238,   246,  1248,   198,   403,   403,   245,  1393,
         581,   402,   420,   488,   510,   480,   407,   813,   836,
         836,  1309,  1121,  2784,  2732,  1580,   425,   465,   459,
         448,   417,   375,   531,   561,  1021,   506,  1501,   501,
         987,  1632,   464,  2259,  1058,  1653,   421,   562,  1312,
         526,   937,   708,  1001,   911,   978,  1554,   387,   984,
        1261,  1261,  1110,  1681,  2281,  1055,   305,  4196,  1305,
         631])
```

[ Full content available at: https://github.com/apache/arrow/issues/2614 ]
This message was relayed via gitbox.apache.org for [email protected]

Reply via email to