Kaynağa Gözat

Use 32bit writes in copyMacroblock

Dominic Szablewski 12 yıl önce
ebeveyn
işleme
2f97245624
1 değiştirilmiş dosya ile 170 ekleme ve 48 silme
  1. 170 48
      jsmpg.js

+ 170 - 48
jsmpg.js Dosyayı Görüntüle

@@ -14,7 +14,6 @@
14 14
 // Inspired by "MPEG Decoder in Java ME" by Nokia:
15 15
 // http://www.developer.nokia.com/Community/Wiki/MPEG_decoder_in_Java_ME
16 16
 
17
-
18 17
 var requestAnimFrame = (function(){
19 18
 	return window.requestAnimationFrame ||
20 19
 		window.webkitRequestAnimationFrame ||
@@ -227,12 +226,23 @@ jsmpeg.prototype.decodeSequenceHeader = function() {
227 226
 	
228 227
 	// Allocated buffers and resize the canvas
229 228
 	this.currentY = new Uint8ClampedArray(this.codedSize);
229
+	this.currentY32 = new Uint32Array(this.currentY.buffer);
230
+
230 231
 	this.currentCr = new Uint8ClampedArray(this.codedSize >> 2);
232
+	this.currentCr32 = new Uint32Array(this.currentCr.buffer);
233
+
231 234
 	this.currentCb = new Uint8ClampedArray(this.codedSize >> 2);
235
+	this.currentCb32 = new Uint32Array(this.currentCb.buffer);
232 236
 	
237
+
233 238
 	this.forwardY = new Uint8ClampedArray(this.codedSize);
239
+	this.forwardY32 = new Uint32Array(this.forwardY.buffer);
240
+
234 241
 	this.forwardCr = new Uint8ClampedArray(this.codedSize >> 2);
242
+	this.forwardCr32 = new Uint32Array(this.forwardCr.buffer);
243
+
235 244
 	this.forwardCb = new Uint8ClampedArray(this.codedSize >> 2);
245
+	this.forwardCb32 = new Uint32Array(this.forwardCb.buffer);
236 246
 	
237 247
 	this.canvas.width = this.width;
238 248
 	this.canvas.height = this.height;
@@ -310,16 +320,25 @@ jsmpeg.prototype.decodePicture = function() {
310 320
 	if( this.pictureCodingType == PICTURE_TYPE_I || this.pictureCodingType == PICTURE_TYPE_P ) {
311 321
 		var 
312 322
 			tmpY = this.forwardY,
323
+			tmpY32 = this.forwardY32,
313 324
 			tmpCr = this.forwardCr,
314
-			tmpCb = this.forwardCb;
325
+			tmpCr32 = this.forwardCr32,
326
+			tmpCb = this.forwardCb,
327
+			tmpCb32 = this.forwardCb32;
315 328
 
316 329
 		this.forwardY = this.currentY;
330
+		this.forwardY32 = this.currentY32;
317 331
 		this.forwardCr = this.currentCr;
332
+		this.forwardCr32 = this.currentCr32;
318 333
 		this.forwardCb = this.currentCb;
334
+		this.forwardCb32 = this.currentCb32;
319 335
 
320 336
 		this.currentY = tmpY;
337
+		this.currentY32 = tmpY32;
321 338
 		this.currentCr = tmpCr;
339
+		this.currentCr32 = tmpCr32;
322 340
 		this.currentCb = tmpCb;
341
+		this.currentCb32 = tmpCb32;
323 342
 	}
324 343
 };
325 344
 
@@ -328,7 +347,7 @@ jsmpeg.prototype.YCbCrToRGBA = function() {
328 347
 	var pCb = this.currentCb;
329 348
 	var pCr = this.currentCr;
330 349
 	var pRGBA = this.currentRGBA.data;
331
-	
350
+
332 351
 
333 352
 
334 353
 	// Chroma values are the same for each block of 4 pixels, so we proccess
@@ -609,9 +628,9 @@ jsmpeg.prototype.copyMacroblock = function(motionH, motionV, sY, sCr, sCb ) {
609 628
 		H, V, oddH, oddV,
610 629
 		src, dest, last;
611 630
 
612
-	var dY = this.currentY;
613
-	var dCb = this.currentCb;
614
-	var dCr = this.currentCr;
631
+	var dY = this.currentY32;
632
+	var dCb = this.currentCb32;
633
+	var dCr = this.currentCr32;
615 634
 
616 635
 	// Luminance
617 636
 	width = this.codedWidth;
@@ -623,47 +642,79 @@ jsmpeg.prototype.copyMacroblock = function(motionH, motionV, sY, sCr, sCb ) {
623 642
 	oddV = (motionV & 1) == 1;
624 643
 	
625 644
 	src = ((this.mbRow << 4) + V) * width + (this.mbCol << 4) + H;
626
-	dest = (this.mbRow * width + this.mbCol) << 4;
627
-	last = dest + (width << 4);
628
-
645
+	dest = (this.mbRow * width + this.mbCol) << 2;
646
+	last = dest + (width << 2);
629 647
 
648
+	var y11, y21, y12, y22, y;
630 649
 	if( oddH ) {
631 650
 		if( oddV ) {
632 651
 			while( dest < last ) {
633
-				for( var x = 0; x < 16; x++ ) {
634
-					dY[dest] = (sY[src] + sY[src+1] + sY[src+width] + sY[src+width+1] + 2) >> 2;
635
-					dest++; src++;
652
+				y21 = sY[src]; y22 = sY[src+width]; src++;
653
+				for( var x = 0; x < 4; x++ ) {
654
+					y11 = y21; y12 = y22; y21 = sY[src]; y22 = sY[src+width]; src++;
655
+					y = (((y11 + y21 + y12 + y22 + 2) >> 2) & 0xff);
656
+
657
+					y11 = y21; y12 = y22; y21 = sY[src]; y22 = sY[src+width]; src++;
658
+					y |= (((y11 + y21 + y12 + y22 + 2) << 6) & 0xff00);
659
+					
660
+					y11 = y21; y12 = y22; y21 = sY[src]; y22 = sY[src+width]; src++;
661
+					y |= (((y11 + y21 + y12 + y22 + 2) << 14) & 0xff0000);
662
+
663
+					y11 = y21; y12 = y22; y21 = sY[src]; y22 = sY[src+width]; src++;
664
+					y |= (((y11 + y21 + y12 + y22 + 2) << 22) & 0xff000000);
665
+
666
+					dY[dest++] = y;
636 667
 				}
637
-				dest += scan; src += scan;
668
+				dest += scan >> 2; src += scan-1;
638 669
 			}
639 670
 		}
640 671
 		else {
641 672
 			while( dest < last ) {
642
-				for( var x = 0; x < 16; x++ ) {
643
-					dY[dest] = (sY[src] + sY[src+1] + 1) >> 1;
644
-					dest++; src++;
673
+				y21 = sY[src]; src++;
674
+				for( var x = 0; x < 4; x++ ) {
675
+					y11 = y21; y21 = sY[src]; src++;
676
+					y = (((y11 + y21 + 1) >> 1) & 0xff);
677
+					
678
+					y11 = y21; y21 = sY[src]; src++;
679
+					y |= (((y11 + y21 + 1) << 7) & 0xff00);
680
+					
681
+					y11 = y21; y21 = sY[src]; src++;
682
+					y |= (((y11 + y21 + 1) << 15) & 0xff0000);
683
+					
684
+					y11 = y21; y21 = sY[src]; src++;
685
+					y |= (((y11 + y21 + 1) << 23) & 0xff000000);
686
+
687
+					dY[dest++] = y;
645 688
 				}
646
-				dest += scan; src += scan;
689
+				dest += scan >> 2; src += scan-1;
647 690
 			}
648 691
 		}
649 692
 	}
650 693
 	else {
651 694
 		if( oddV ) {
652 695
 			while( dest < last ) {
653
-				for( var x = 0; x < 16; x++ ) {
654
-					dY[dest] = (sY[src] + sY[src+width] + 1) >> 1;
655
-					dest++; src++;
696
+				for( var x = 0; x < 4; x++ ) {
697
+					y = (((sY[src] + sY[src+width] + 1) >> 1) & 0xff); src++;
698
+					y |= (((sY[src] + sY[src+width] + 1) << 7) & 0xff00); src++;
699
+					y |= (((sY[src] + sY[src+width] + 1) << 15) & 0xff0000); src++;
700
+					y |= (((sY[src] + sY[src+width] + 1) << 23) & 0xff000000); src++;
701
+					
702
+					dY[dest++] = y;
656 703
 				}
657
-				dest += scan; src += scan;
704
+				dest += scan >> 2; src += scan;
658 705
 			}
659 706
 		}
660 707
 		else {
661 708
 			while( dest < last ) {
662
-				for( var x = 0; x < 16; x++ ) {
663
-					dY[dest] = sY[src];
664
-					dest++; src++;
709
+				for( var x = 0; x < 4; x++ ) {
710
+					y = sY[src]; src++;
711
+					y |= sY[src] << 8; src++;
712
+					y |= sY[src] << 16; src++;
713
+					y |= sY[src] << 24; src++;
714
+
715
+					dY[dest++] = y;
665 716
 				}
666
-				dest += scan; src += scan;
717
+				dest += scan >> 2; src += scan;
667 718
 			}
668 719
 		}
669 720
 	}
@@ -680,50 +731,122 @@ jsmpeg.prototype.copyMacroblock = function(motionH, motionV, sY, sCr, sCb ) {
680 731
 	oddV = ((motionV/2) & 1) == 1;
681 732
 	
682 733
 	src = ((this.mbRow << 3) + V) * width + (this.mbCol << 3) + H;
683
-	dest = (this.mbRow * width + this.mbCol) << 3;
684
-	last = dest + (width << 3);
734
+	dest = (this.mbRow * width + this.mbCol) << 1;
735
+	last = dest + (width << 1);
685 736
 	
737
+	var cr11, cr21, cr12, cr22, cr;
738
+	var cb11, cb21, cb12, cb22, cb;
686 739
 	if( oddH ) {
687 740
 		if( oddV ) {
688 741
 			while( dest < last ) {
689
-				for( var x = 0; x < 8; x++ ) {
690
-					dCr[dest] = (sCr[src] + sCr[src+1] + sCr[src+width] + sCr[src+width+1] + 2) >> 2;
691
-					dCb[dest] = (sCb[src] + sCb[src+1] + sCb[src+width] + sCb[src+width+1] + 2) >> 2;
692
-					dest++; src++;
742
+				cr21 = sCr[src]; cr22 = sCr[src+width];
743
+				cb21 = sCb[src]; cb22 = sCb[src+width];
744
+				src++;
745
+				for( var x = 0; x < 2; x++ ) {
746
+					cr11 = cr21; cr12 = cr22; cr21 = sCr[src]; cr22 = sCr[src+width];
747
+					cb11 = cb21; cb12 = cb22; cb21 = sCb[src]; cb22 = sCb[src+width]; src++;
748
+					cr = (((cr11 + cr21 + cr12 + cr22 + 2) >> 2) & 0xff);
749
+					cb = (((cb11 + cb21 + cb12 + cb22 + 2) >> 2) & 0xff);
750
+
751
+					cr11 = cr21; cr12 = cr22; cr21 = sCr[src]; cr22 = sCr[src+width];
752
+					cb11 = cb21; cb12 = cb22; cb21 = sCb[src]; cb22 = sCb[src+width]; src++;
753
+					cr |= (((cr11 + cr21 + cr12 + cr22 + 2) << 6) & 0xff00);
754
+					cb |= (((cb11 + cb21 + cb12 + cb22 + 2) << 6) & 0xff00);
755
+
756
+					cr11 = cr21; cr12 = cr22; cr21 = sCr[src]; cr22 = sCr[src+width];
757
+					cb11 = cb21; cb12 = cb22; cb21 = sCb[src]; cb22 = sCb[src+width]; src++;
758
+					cr |= (((cr11 + cr21 + cr12 + cr22 + 2) << 14) & 0xff0000);
759
+					cb |= (((cb11 + cb21 + cb12 + cb22 + 2) << 14) & 0xff0000);
760
+
761
+					cr11 = cr21; cr12 = cr22; cr21 = sCr[src]; cr22 = sCr[src+width];
762
+					cb11 = cb21; cb12 = cb22; cb21 = sCb[src]; cb22 = sCb[src+width]; src++;
763
+					cr |= (((cr11 + cr21 + cr12 + cr22 + 2) << 22) & 0xff000000);
764
+					cb |= (((cb11 + cb21 + cb12 + cb22 + 2) << 22) & 0xff000000);
765
+
766
+					dCr[dest] = cr;
767
+					dCb[dest] = cb;
768
+					dest++;
693 769
 				}
694
-				dest += scan; src += scan;
770
+				dest += scan >> 2; src += scan-1;
695 771
 			}
696 772
 		}
697 773
 		else {
698 774
 			while( dest < last ) {
699
-				for( var x = 0; x < 8; x++ ) {
700
-					dCr[dest] = (sCr[src] + sCr[src+1] + 1) >> 1;
701
-					dCb[dest] = (sCb[src] + sCb[src+1] + 1) >> 1;
702
-					dest++; src++;
775
+				cr21 = sCr[src];
776
+				cb21 = sCb[src];
777
+				src++;
778
+				for( var x = 0; x < 2; x++ ) {
779
+					cr11 = cr21; cr21 = sCr[src];
780
+					cb11 = cb21; cb21 = sCb[src]; src++;
781
+					cr = (((cr11 + cr21 + 1) >> 1) & 0xff);
782
+					cb = (((cb11 + cb21 + 1) >> 1) & 0xff);
783
+
784
+					cr11 = cr21; cr21 = sCr[src];
785
+					cb11 = cb21; cb21 = sCb[src]; src++;
786
+					cr |= (((cr11 + cr21 + 1) << 7) & 0xff00);
787
+					cb |= (((cb11 + cb21 + 1) << 7) & 0xff00);
788
+
789
+					cr11 = cr21; cr21 = sCr[src];
790
+					cb11 = cb21; cb21 = sCb[src]; src++;
791
+					cr |= (((cr11 + cr21 + 1) << 15) & 0xff0000);
792
+					cb |= (((cb11 + cb21 + 1) << 15) & 0xff0000);
793
+
794
+					cr11 = cr21; cr21 = sCr[src];
795
+					cb11 = cb21; cb21 = sCb[src]; src++;
796
+					cr |= (((cr11 + cr21 + 1) << 23) & 0xff000000);
797
+					cb |= (((cb11 + cb21 + 1) << 23) & 0xff000000);
798
+
799
+					dCr[dest] = cr;
800
+					dCb[dest] = cb;
801
+					dest++;
703 802
 				}
704
-				dest += scan; src += scan;
803
+				dest += scan >> 2; src += scan-1;
705 804
 			}
706 805
 		}
707 806
 	}
708 807
 	else {
709 808
 		if( oddV ) {
710 809
 			while( dest < last ) {
711
-				for( var x = 0; x < 8; x++ ) {
712
-					dCr[dest] = (sCr[src] + sCr[src+width] + 1) >> 1;
713
-					dCb[dest] = (sCb[src] + sCb[src+width] + 1) >> 1;
714
-					dest++; src++;
810
+				for( var x = 0; x < 2; x++ ) {
811
+					cr = (((sCr[src] + sCr[src+width] + 1) >> 1) & 0xff);
812
+					cb = (((sCb[src] + sCb[src+width] + 1) >> 1) & 0xff); src++;
813
+
814
+					cr |= (((sCr[src] + sCr[src+width] + 1) << 7) & 0xff00);
815
+					cb |= (((sCb[src] + sCb[src+width] + 1) << 7) & 0xff00); src++;
816
+
817
+					cr |= (((sCr[src] + sCr[src+width] + 1) << 15) & 0xff0000);
818
+					cb |= (((sCb[src] + sCb[src+width] + 1) << 15) & 0xff0000); src++;
819
+
820
+					cr |= (((sCr[src] + sCr[src+width] + 1) << 23) & 0xff000000);
821
+					cb |= (((sCb[src] + sCb[src+width] + 1) << 23) & 0xff000000); src++;
822
+					
823
+					dCr[dest] = cr;
824
+					dCb[dest] = cb;
825
+					dest++;
715 826
 				}
716
-				dest += scan; src += scan;
827
+				dest += scan >> 2; src += scan;
717 828
 			}
718 829
 		}
719 830
 		else {
720 831
 			while( dest < last ) {
721
-				for( var x = 0; x < 8; x++ ) {
722
-					dCr[dest] = sCr[src];
723
-					dCb[dest] = sCb[src];
724
-					dest++; src++;
832
+				for( var x = 0; x < 2; x++ ) {
833
+					cr = sCr[src];
834
+					cb = sCb[src]; src++;
835
+
836
+					cr |= sCr[src] << 8;
837
+					cb |= sCb[src] << 8; src++;
838
+
839
+					cr |= sCr[src] << 16;
840
+					cb |= sCb[src] << 16; src++;
841
+
842
+					cr |= sCr[src] << 24;
843
+					cb |= sCb[src] << 24; src++;
844
+
845
+					dCr[dest] = cr;
846
+					dCb[dest] = cb;
847
+					dest++;
725 848
 				}
726
-				dest += scan; src += scan;
849
+				dest += scan >> 2; src += scan;
727 850
 			}
728 851
 		}
729 852
 	}
@@ -892,7 +1015,6 @@ jsmpeg.prototype.decodeBlock = function(block) {
892 1015
 	
893 1016
 	var blockData = this.blockData;
894 1017
 	if( this.macroblockIntra ) {
895
-		var mult = 0;
896 1018
 		// Overwrite (no prediction)
897 1019
 		for( var i = 0; i < 8; i++ ) {
898 1020
 			for( var j = 0; j < 8; j++ ) {