diff --git a/uppsrc/Core/Fn.h b/uppsrc/Core/Fn.h index e08ddf46e..bf1d4d612 100644 --- a/uppsrc/Core/Fn.h +++ b/uppsrc/Core/Fn.h @@ -60,18 +60,6 @@ constexpr int findarg(const T& sel, const K& k, const L& ...args) //$-constexpr auto decode(const T& x, const T1& p0, const V1& v0, ...); -template -constexpr const V& decode(const T& sel, const V& def) -{ - return def; -} - -template -constexpr V decode(const T& sel, const K& k, const V& v, const L& ...args) -{ - return sel == k ? v : (V)decode(sel, args...); -} - template constexpr const char *decode_chr_(const T& sel, const char *def) { @@ -90,6 +78,18 @@ constexpr const char *decode(const T& sel, const K& k, const char *v, const L& . return decode_chr_(sel, k, v, args...); } +template +constexpr const V& decode(const T& sel, const V& def) +{ + return def; +} + +template +constexpr V decode(const T& sel, const K& k, const V& v, const L& ...args) +{ + return sel == k ? v : (V)decode(sel, args...); +} + //$-constexpr T get_i(int i, const T& p0, const T1& p1, ...); template diff --git a/uppsrc/Core/Mem.cpp b/uppsrc/Core/Mem.cpp index c084ffbd3..73b9ff55b 100644 --- a/uppsrc/Core/Mem.cpp +++ b/uppsrc/Core/Mem.cpp @@ -2,7 +2,7 @@ namespace Upp { -#ifdef CPU_X86 +#ifdef CPU_SIMD void memset8__(void *p, i16x8 data, size_t len) { diff --git a/uppsrc/Draw/SSE2.h b/uppsrc/Draw/SSE2.h index 997b895e4..215be5d7f 100644 --- a/uppsrc/Draw/SSE2.h +++ b/uppsrc/Draw/SSE2.h @@ -1,5 +1,3 @@ -#ifdef CPU_X86 - #ifdef CPU_SIMD force_inline @@ -86,7 +84,7 @@ void StoreRGBAF(RGBA *t, f32x4 s) force_inline f32x4 ClampRGBAF(f32x4 p) -{return p; +{ #ifdef PLATFORM_MACOS f32x4 alpha = Broadcast0(p); #else @@ -96,100 +94,4 @@ f32x4 ClampRGBAF(f32x4 p) return min(p, alpha); } -#else - -force_inline -__m128i LoadRGBA(const RGBA *s) -{ - return _mm_unpacklo_epi8(_mm_set_epi32(0, 0, 0, *(dword *)s), _mm_setzero_si128()); -} - -force_inline -__m128i LoadRGBA2(const RGBA& c) -{ - return _mm_unpacklo_epi8(_mm_set_epi32(0, 0, *(dword *)&c, *(dword *)&c), _mm_setzero_si128()); -} - -force_inline -__m128i LoadRGBA2(const RGBA *s0, const RGBA *s1) -{ - return _mm_unpacklo_epi8(_mm_set_epi32(0, 0, *(dword *)s1, *(dword *)s0), _mm_setzero_si128()); -} - -force_inline -__m128i LoadRGBA2(const RGBA *s) -{ - return _mm_unpacklo_epi8(_mm_set_epi32(0, 0, *(dword *)(s + 1), *(dword *)s), _mm_setzero_si128()); -} - -force_inline -__m128i LoadRGBAL(__m128i x) -{ - return _mm_unpacklo_epi8(x, _mm_setzero_si128()); -} - -force_inline -__m128i LoadRGBAH(__m128i x) -{ - return _mm_unpackhi_epi8(x, _mm_setzero_si128()); -} - -force_inline -void LoadRGBA4(const RGBA *s, __m128i& l, __m128i& h) -{ - __m128i t4 = _mm_loadu_si128((__m128i *)s); - l = LoadRGBAL(t4); - h = LoadRGBAH(t4); -} - -force_inline -__m128i PackRGBA(__m128i l, __m128i h) -{ - return _mm_packus_epi16(l, h); -} - -force_inline -void StoreRGBA(RGBA *rgba, __m128i x) -{ - _mm_store_ss((float *)rgba, _mm_castsi128_ps(PackRGBA(x, _mm_setzero_si128()))); -} - -force_inline -void StoreRGBA2(RGBA *rgba, __m128i x) -{ - _mm_storel_pd((double *)rgba, _mm_castsi128_pd(PackRGBA(x, _mm_setzero_si128()))); -} - -force_inline -void StoreRGBA4(RGBA *rgba, __m128i l, __m128i h) -{ - _mm_storeu_si128((__m128i *)rgba, PackRGBA(l, h)); -} - -force_inline -__m128 LoadRGBAF(const RGBA *s) -{ - return _mm_cvtepi32_ps(_mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_set_epi32(0, 0, 0, *(dword *)s), _mm_setzero_si128()), _mm_setzero_si128())); -} - -force_inline -void StoreRGBAF(RGBA *t, __m128 s) -{ - _mm_store_ss((float *)t, _mm_castsi128_ps( - _mm_packus_epi16( - _mm_packs_epi32(_mm_cvttps_epi32(s), _mm_setzero_si128()), - _mm_setzero_si128() - ))); -} - -force_inline -__m128 ClampRGBAF(__m128 p) -{ // TODO: MacOS - __m128 alpha = _mm_shuffle_ps(p, p, _MM_SHUFFLE(3, 3, 3, 3)); - alpha = _mm_min_ps(alpha, _mm_set1_ps(255.0)); - return _mm_min_ps(p, alpha); -} - #endif - -#endif \ No newline at end of file diff --git a/uppsrc/Painter/AlphaBlend.h b/uppsrc/Painter/AlphaBlend.h index 62cc2569b..f6d86dbff 100644 --- a/uppsrc/Painter/AlphaBlend.h +++ b/uppsrc/Painter/AlphaBlend.h @@ -7,8 +7,6 @@ void AlphaBlend(RGBA *t, const RGBA& c, int alpha); void AlphaBlend(RGBA *t, const RGBA *s, int alpha, int len); void AlphaBlend(RGBA *t, const RGBA& c, int alpha, int len); -#if defined(CPU_SSE2) && !defined(flagNOSIMD) - #ifdef CPU_SIMD force_inline @@ -148,146 +146,6 @@ void AlphaBlend(RGBA *t, const RGBA *s, int alpha, int len) #else -force_inline -__m128i BroadcastAlpha(__m128i x) -{ -#ifdef PLATFORM_MACOS - return _mm_shufflelo_epi16(_mm_shufflehi_epi16(x, 0x00), 0x00); -#else - return _mm_shufflelo_epi16(_mm_shufflehi_epi16(x, 0xff), 0xff); -#endif -} - -force_inline -__m128i Mul8(__m128i x, int alpha) -{ - return _mm_srli_epi16(_mm_mullo_epi16(_mm_set1_epi16(alpha), x), 8); // c.a * alpha >> 8; -} - -force_inline -__m128i MakeAlpha(__m128i x) -{ - x = BroadcastAlpha(x); -#ifdef PLATFORM_MACOS - x = _mm_srli_epi16(_mm_mullo_epi16(_mm_set_epi16(129, 129, 129, 128, 129, 129, 129, 128), x), 7); // a for alpha, 256*a/255 for color -#else - x = _mm_srli_epi16(_mm_mullo_epi16(_mm_set_epi16(128, 129, 129, 129, 128, 129, 129, 129), x), 7); // a for alpha, 256*a/255 for color -#endif - x = _mm_sub_epi16(_mm_set1_epi16(256), x); // 256 - a for alpha, 256 - 256*a/255 for color; - return x; -} - -force_inline -__m128i AlphaBlendSSE2(__m128i t, __m128i s, __m128i alpha) -{ - return _mm_adds_epi16(s, _mm_srli_epi16(_mm_mullo_epi16(t, alpha), 8)); // t = c + (t * alpha >> 8); -} - -force_inline -void AlphaBlend1(RGBA *t, __m128i s, __m128i alpha) -{ - StoreRGBA(t, AlphaBlendSSE2(LoadRGBA(t), s, alpha)); -} - -force_inline -void AlphaBlend2(RGBA *t, __m128i s, __m128i alpha) -{ - StoreRGBA2(t, AlphaBlendSSE2(LoadRGBA2(t), s, alpha)); -} - -force_inline -void AlphaBlend4(RGBA *t, __m128i sl, __m128i al, __m128i sh, __m128i ah) -{ - __m128i t4 = _mm_loadu_si128((__m128i *)t); - _mm_storeu_si128((__m128i *)t, - PackRGBA( - AlphaBlendSSE2(LoadRGBAL(t4), sl, al), - AlphaBlendSSE2(LoadRGBAH(t4), sh, ah))); -} - -force_inline -void AlphaBlend(RGBA *t, const RGBA& c) -{ - __m128i s = LoadRGBA(&c); - StoreRGBA(t, AlphaBlendSSE2(LoadRGBA(t), s, MakeAlpha(s))); -} - -force_inline -void AlphaBlend(RGBA *t, const RGBA& c, int alpha) -{ - __m128i s = Mul8(LoadRGBA(&c), alpha); - StoreRGBA(t, AlphaBlendSSE2(LoadRGBA(t), s, MakeAlpha(s))); -} - -force_inline -void AlphaBlend(RGBA *t, const RGBA& c, int alpha, int len) -{ - __m128i s = Mul8(LoadRGBA2(c), alpha); - __m128i a = MakeAlpha(s); - while(len >= 4) { - AlphaBlend4(t, s, a, s, a); - t += 4; - len -= 4; - } - if(len & 2) { - AlphaBlend2(t, s, a); - t += 2; - } - if(len & 1) - AlphaBlend1(t, s, a); -} - -force_inline -void AlphaBlend(RGBA *t, const RGBA *s, int alpha, int len) -{ - if(alpha == 256) { - while(len >= 4) { - __m128i m = _mm_loadu_si128((__m128i *)s); - __m128i s0 = LoadRGBAL(m); - __m128i s1 = LoadRGBAH(m); - AlphaBlend4(t, s0, MakeAlpha(s0), s1, MakeAlpha(s1)); - t += 4; - s += 4; - len -= 4; - } - if(len & 2) { - __m128i s0 = LoadRGBA2(s); - AlphaBlend2(t, s0, MakeAlpha(s0)); - t += 2; - s += 2; - } - if(len & 1) { - __m128i s0 = LoadRGBA(s); - AlphaBlend1(t, s0, MakeAlpha(s0)); - } - } - else { - while(len >= 4) { - __m128i m = _mm_loadu_si128((__m128i *)s); - __m128i s0 = Mul8(LoadRGBAL(m), alpha); - __m128i s1 = Mul8(LoadRGBAH(m), alpha); - AlphaBlend4(t, s0, MakeAlpha(s0), s1, MakeAlpha(s1)); - t += 4; - s += 4; - len -= 4; - } - if(len & 2) { - __m128i s0 = Mul8(LoadRGBA2(s), alpha); - AlphaBlend2(t, s0, MakeAlpha(s0)); - t += 2; - s += 2; - } - if(len & 1) { - __m128i s0 = Mul8(LoadRGBA(s), alpha); - AlphaBlend1(t, s0, MakeAlpha(s0)); - } - } -} - -#endif - -#else - force_inline void AlphaBlend__(RGBA& t, const RGBA& c) { diff --git a/uppsrc/Painter/Image.cpp b/uppsrc/Painter/Image.cpp index 2a45cf8b9..2f16edb74 100644 --- a/uppsrc/Painter/Image.cpp +++ b/uppsrc/Painter/Image.cpp @@ -178,182 +178,6 @@ struct PainterImageSpan : SpanSource, PainterImageSpanData { } }; -#if 0 -force_inline -int IntAndFraction(__m128 x, __m128& fraction) -{ - x = _mm_add_ps(x, _mm_set1_ps(8000)); // cvttps truncates toward 0, need to fix negatives - __m128i m = _mm_cvttps_epi32(x); - fraction = _mm_sub_ps(x, _mm_cvtepi32_ps(m)); - return _mm_cvtsi128_si32(m) - 8000; -} - -force_inline -int Int(__m128 x) -{ - return _mm_cvtsi128_si32(_mm_cvttps_epi32(_mm_add_ps(x, _mm_set1_ps(8000)))) - 8000; -} - -struct PainterImageSpanData { - int ax, ay, cx, cy, maxx, maxy; - byte style; - byte hstyle, vstyle; - bool fast; - bool fixed; - Image image; - Xform2D xform; - - PainterImageSpanData(dword flags, const Xform2D& m, const Image& img, bool co, bool imagecache) { - style = byte(flags & 15); - hstyle = byte(flags & 3); - vstyle = byte(flags & 12); - fast = flags & FILL_FAST; - image = img; - int nx = 1; - int ny = 1; - if(!fast) { - Pointf sc = m.GetScaleXY(); - if(sc.x >= 0.01 && sc.y >= 0.01) { - nx = (int)max(1.0, 1.0 / sc.x); - ny = (int)max(1.0, 1.0 / sc.y); - } - } - if(nx == 1 && ny == 1) - xform = Inverse(m); - else { - if(!fast) - image = (imagecache ? MinifyCached : Minify)(image, nx, ny, co); - xform = Inverse(m) * Xform2D::Scale(1.0 / nx, 1.0 / ny); - } - cx = image.GetWidth(); - cy = image.GetHeight(); - maxx = cx - 1; - maxy = cy - 1; - ax = 6000000 / cx * cx * 2; - ay = 6000000 / cy * cy * 2; - } - - PainterImageSpanData() {} -}; - - -struct PainterImageSpan : SpanSource, PainterImageSpanData { - PainterImageSpan(const PainterImageSpanData& f) - : PainterImageSpanData(f) {} - - const RGBA *Pixel(int x, int y) { return &image[y][x]; } - - const RGBA *GetPixel(int x, int y) { - if(hstyle == FILL_HPAD) - x = minmax(x, 0, maxx); - else - if(hstyle == FILL_HREFLECT) - x = (x + ax) / cx & 1 ? (ax - x - 1) % cx : (x + ax) % cx; - else - if(hstyle == FILL_HREPEAT) - x = (x + ax) % cx; - if(vstyle == FILL_VPAD) - y = minmax(y, 0, maxy); - else - if(vstyle == FILL_VREFLECT) - y = (y + ay) / cy & 1 ? (ay - y - 1) % cy : (y + ay) % cy; - else - if(vstyle == FILL_VREPEAT) - y = (y + ay) % cy; - static RGBA zero; - return fixed || (x >= 0 && x < cx && y >= 0 && y < cy) ? &image[y][x] : &zero; - } - - virtual void Get(RGBA *span, int x, int y, unsigned len) - { - PAINTER_TIMING("ImageSpan::Get"); - - Pointf p0 = xform.Transform(Pointf(x, y)); - Pointf dd = xform.Transform(Pointf(x + 1, y)) - p0; - - __m128 x0 = _mm_set_ps1((float)p0.x); - __m128 y0 = _mm_set_ps1((float)p0.y); - __m128 dx = _mm_set_ps1((float)dd.x); - __m128 dy = _mm_set_ps1((float)dd.y); - __m128 ii = _mm_setzero_ps(); - __m128 v1 = _mm_set_ps1(1); - __m128 ix, iy; - - auto GetIXY = [&] { - ix = _mm_add_ps(x0, _mm_mul_ps(ii, dx)); - iy = _mm_add_ps(y0, _mm_mul_ps(ii, dy)); - ii = _mm_add_ps(ii, v1); - }; - - fixed = hstyle && vstyle; - if(hstyle + vstyle == 0 && fast) { - while(len--) { - GetIXY(); - Point l(Int(ix), Int(iy)); - if(l.x > 0 && l.x < maxx && l.y > 0 && l.y < maxy) - *span = *Pixel(l.x, l.y); - else - if(style == 0 && (l.x < -1 || l.x > cx || l.y < -1 || l.y > cy)) - *span = RGBAZero(); - else - *span = *GetPixel(l.x, l.y); - ++span; - } - return; - } - while(len--) { - GetIXY(); - __m128 fx, fy; - Point l(IntAndFraction(ix, fx), IntAndFraction(iy, fy)); - if(hstyle == FILL_HREPEAT) - l.x = (l.x + ax) % cx; - if(vstyle == FILL_VREPEAT) - l.y = (l.y + ay) % cy; - if(style == 0 && (l.x < -1 || l.x > cx || l.y < -1 || l.y > cy)) - *span = RGBAZero(); - else - if(fast) { - if(l.x > 0 && l.x < maxx && l.y > 0 && l.y < maxy) - *span = *Pixel(l.x, l.y); - else - *span = *GetPixel(l.x, l.y); - } - else { - __m128 p00, p01, p10, p11; - if(l.x > 0 && l.x < maxx && l.y > 0 && l.y < maxy) { - p00 = LoadRGBAF(Pixel(l.x + 0, l.y + 0)); - p01 = LoadRGBAF(Pixel(l.x + 0, l.y + 1)); - p10 = LoadRGBAF(Pixel(l.x + 1, l.y + 0)); - p11 = LoadRGBAF(Pixel(l.x + 1, l.y + 1)); - } - else { - p00 = LoadRGBAF(GetPixel(l.x + 0, l.y + 0)); - p01 = LoadRGBAF(GetPixel(l.x + 0, l.y + 1)); - p10 = LoadRGBAF(GetPixel(l.x + 1, l.y + 0)); - p11 = LoadRGBAF(GetPixel(l.x + 1, l.y + 1)); - } - - p01 = _mm_mul_ps(p01, fy); - p11 = _mm_mul_ps(p11, fy); - p10 = _mm_mul_ps(p10, fx); - p11 = _mm_mul_ps(p11, fx); - - fx = _mm_sub_ps(v1, fx); - fy = _mm_sub_ps(v1, fy); - - p00 = _mm_mul_ps(p00, fy); - p10 = _mm_mul_ps(p10, fy); - p00 = _mm_mul_ps(p00, fx); - p01 = _mm_mul_ps(p01, fx); - - StoreRGBAF(span, _mm_add_ps(p00, _mm_add_ps(p01, _mm_add_ps(p10, p11)))); - } - ++span; - } - } -}; -#endif - void BufferPainter::RenderImage(double width, const Image& image, const Xform2D& transsrc, dword flags) { current = Null; diff --git a/uppsrc/RichText/Object.cpp b/uppsrc/RichText/Object.cpp index 4a980b304..9a6063715 100644 --- a/uppsrc/RichText/Object.cpp +++ b/uppsrc/RichText/Object.cpp @@ -247,7 +247,6 @@ bool RichObject::Set(const String& _type_name, const Value& _data, Size maxsiz bool RichObject::Read(const String& _type_name, const String& _data, Size sz, void *context) { - NewSerial(); type_name = _type_name; RichObjectType *t = Map().Get(type_name, NULL); if(t) { @@ -258,10 +257,12 @@ bool RichObject::Read(const String& _type_name, const String& _data, Size sz, pixel_size = type->GetPixelSize(data, context); AdjustPhysicalSize(); size = sz; + NewSerial(); return true; } data = _data; physical_size = pixel_size = size = sz; + NewSerial(); return false; }