Sfoglia il codice sorgente

optimize copyMacroblock by keeping partial sums and reusing them. Also cleanup.

Maik Merten 12 anni fa
parent
commit
b494fe061e
1 ha cambiato i file con 63 aggiunte e 88 eliminazioni
  1. 63 88
      jsmpg.js

+ 63 - 88
jsmpg.js Vedi File

@@ -935,27 +935,23 @@ jsmpeg.prototype.copyMacroblock = function(motionH, motionV, sY, sCr, sCb ) {
935 935
 	dest = (this.mbRow * width + this.mbCol) << 2;
936 936
 	last = dest + (width << 2);
937 937
 
938
-	var y11, y21, y12, y22, y;
938
+	var y1, y2, y;
939 939
 	if( oddH ) {
940 940
 		if( oddV ) {
941 941
 			while( dest < last ) {
942
-				y21 = sY[src]; y22 = sY[src+width]; src++;
942
+				y1 = sY[src] + sY[src+width]; src++;
943 943
 				for( var x = 0; x < 4; x++ ) {
944
-					//y11 = y21; y12 = y22; y21 = sY[src]; y22 = sY[src+width]; src++;
945
-					y11 = sY[src]; y12 = sY[src+width]; src++;
946
-					y = (((y11 + y21 + y12 + y22 + 2) >> 2) & 0xff);
944
+					y2 = sY[src] + sY[src+width]; src++;
945
+					y = (((y1 + y2 + 2) >> 2) & 0xff);
947 946
 
948
-					//y11 = y21; y12 = y22; y21 = sY[src]; y22 = sY[src+width]; src++;
949
-					y21 = sY[src]; y22 = sY[src+width]; src++;
950
-					y |= (((y11 + y21 + y12 + y22 + 2) << 6) & 0xff00);
947
+					y1 = sY[src] + sY[src+width]; src++;
948
+					y |= (((y1 + y2 + 2) << 6) & 0xff00);
951 949
 					
952
-					//y11 = y21; y12 = y22; y21 = sY[src]; y22 = sY[src+width]; src++;
953
-					y11 = sY[src]; y12 = sY[src+width]; src++;
954
-					y |= (((y11 + y21 + y12 + y22 + 2) << 14) & 0xff0000);
950
+					y2 = sY[src] + sY[src+width]; src++;
951
+					y |= (((y1 + y2 + 2) << 14) & 0xff0000);
955 952
 
956
-					//y11 = y21; y12 = y22; y21 = sY[src]; y22 = sY[src+width]; src++;
957
-					y21 = sY[src]; y22 = sY[src+width]; src++;
958
-					y |= (((y11 + y21 + y12 + y22 + 2) << 22) & 0xff000000);
953
+					y1 = sY[src] + sY[src+width]; src++;
954
+					y |= (((y1 + y2 + 2) << 22) & 0xff000000);
959 955
 
960 956
 					dY[dest++] = y;
961 957
 				}
@@ -964,24 +960,19 @@ jsmpeg.prototype.copyMacroblock = function(motionH, motionV, sY, sCr, sCb ) {
964 960
 		}
965 961
 		else {
966 962
 			while( dest < last ) {
967
-				//y21 = sY[src]; src++;
968
-				y11 = sY[src++];
963
+				y1 = sY[src++];
969 964
 				for( var x = 0; x < 4; x++ ) {
970
-					//y11 = y21; y21 = sY[src]; src++;
971
-					y21 = sY[src++];
972
-					y = (((y11 + y21 + 1) >> 1) & 0xff);
965
+					y2 = sY[src++];
966
+					y = (((y1 + y2 + 1) >> 1) & 0xff);
973 967
 					
974
-					//y11 = y21; y21 = sY[src]; src++;
975
-					y11 = sY[src++];
976
-					y |= (((y11 + y21 + 1) << 7) & 0xff00);
968
+					y1 = sY[src++];
969
+					y |= (((y1 + y2 + 1) << 7) & 0xff00);
977 970
 					
978
-					//y11 = y21; y21 = sY[src]; src++;
979
-					y21 = sY[src++];
980
-					y |= (((y11 + y21 + 1) << 15) & 0xff0000);
971
+					y2 = sY[src++];
972
+					y |= (((y1 + y2 + 1) << 15) & 0xff0000);
981 973
 					
982
-					//y11 = y21; y21 = sY[src]; src++;
983
-					y11 = sY[src++];
984
-					y |= (((y11 + y21 + 1) << 23) & 0xff000000);
974
+					y1 = sY[src++];
975
+					y |= (((y1 + y2 + 1) << 23) & 0xff000000);
985 976
 
986 977
 					dY[dest++] = y;
987 978
 				}
@@ -1038,42 +1029,34 @@ jsmpeg.prototype.copyMacroblock = function(motionH, motionV, sY, sCr, sCb ) {
1038 1029
 	dest = (this.mbRow * width + this.mbCol) << 1;
1039 1030
 	last = dest + (width << 1);
1040 1031
 	
1041
-	var cr11, cr21, cr12, cr22, cr;
1042
-	var cb11, cb21, cb12, cb22, cb;
1032
+	var cr1, cr2, cr;
1033
+	var cb1, cb2, cb;
1043 1034
 	if( oddH ) {
1044 1035
 		if( oddV ) {
1045 1036
 			while( dest < last ) {
1046
-				cr21 = sCr[src]; cr22 = sCr[src+width];
1047
-				cb21 = sCb[src]; cb22 = sCb[src+width];
1037
+				cr1 = sCr[src] + sCr[src+width];
1038
+				cb1 = sCb[src] + sCb[src+width];
1048 1039
 				src++;
1049 1040
 				for( var x = 0; x < 2; x++ ) {
1050
-					//cr11 = cr21; cr12 = cr22; cr21 = sCr[src]; cr22 = sCr[src+width];
1051
-					cr11 = sCr[src]; cr12 = sCr[src+width];
1052
-					//cb11 = cb21; cb12 = cb22; cb21 = sCb[src]; cb22 = sCb[src+width]; src++;
1053
-					cb11 = sCb[src]; cb12 = sCb[src+width]; src++;
1054
-					cr = (((cr11 + cr21 + cr12 + cr22 + 2) >> 2) & 0xff);
1055
-					cb = (((cb11 + cb21 + cb12 + cb22 + 2) >> 2) & 0xff);
1056
-
1057
-					//cr11 = cr21; cr12 = cr22; cr21 = sCr[src]; cr22 = sCr[src+width];
1058
-					cr21 = sCr[src]; cr22 = sCr[src+width];
1059
-					//cb11 = cb21; cb12 = cb22; cb21 = sCb[src]; cb22 = sCb[src+width]; src++;
1060
-					cb21 = sCb[src]; cb22 = sCb[src+width]; src++;
1061
-					cr |= (((cr11 + cr21 + cr12 + cr22 + 2) << 6) & 0xff00);
1062
-					cb |= (((cb11 + cb21 + cb12 + cb22 + 2) << 6) & 0xff00);
1063
-
1064
-					//cr11 = cr21; cr12 = cr22; cr21 = sCr[src]; cr22 = sCr[src+width];
1065
-					cr11 = sCr[src]; cr12 = sCr[src+width];
1066
-					//cb11 = cb21; cb12 = cb22; cb21 = sCb[src]; cb22 = sCb[src+width]; src++;
1067
-					cb11 = sCb[src]; cb12 = sCb[src+width]; src++;
1068
-					cr |= (((cr11 + cr21 + cr12 + cr22 + 2) << 14) & 0xff0000);
1069
-					cb |= (((cb11 + cb21 + cb12 + cb22 + 2) << 14) & 0xff0000);
1070
-
1071
-					//cr11 = cr21; cr12 = cr22; cr21 = sCr[src]; cr22 = sCr[src+width];
1072
-					cr21 = sCr[src]; cr22 = sCr[src+width];
1073
-					//cb11 = cb21; cb12 = cb22; cb21 = sCb[src]; cb22 = sCb[src+width]; src++;
1074
-					cb21 = sCb[src]; cb22 = sCb[src+width]; src++;
1075
-					cr |= (((cr11 + cr21 + cr12 + cr22 + 2) << 22) & 0xff000000);
1076
-					cb |= (((cb11 + cb21 + cb12 + cb22 + 2) << 22) & 0xff000000);
1041
+					cr2 = sCr[src] + sCr[src+width];
1042
+					cb2 = sCb[src] + sCb[src+width]; src++;
1043
+					cr = (((cr1 + cr2 + 2) >> 2) & 0xff);
1044
+					cb = (((cb1 + cb2 + 2) >> 2) & 0xff);
1045
+
1046
+					cr1 = sCr[src] + sCr[src+width];
1047
+					cb1 = sCb[src] + sCb[src+width]; src++;
1048
+					cr |= (((cr1 + cr2 + 2) << 6) & 0xff00);
1049
+					cb |= (((cb1 + cb2 + 2) << 6) & 0xff00);
1050
+
1051
+					cr2 = sCr[src] + sCr[src+width];
1052
+					cb2 = sCb[src] + sCb[src+width]; src++;
1053
+					cr |= (((cr1 + cr2 + 2) << 14) & 0xff0000);
1054
+					cb |= (((cb1 + cb2 + 2) << 14) & 0xff0000);
1055
+
1056
+					cr1 = sCr[src] + sCr[src+width];
1057
+					cb1 = sCb[src] + sCb[src+width]; src++;
1058
+					cr |= (((cr1 + cr2 + 2) << 22) & 0xff000000);
1059
+					cb |= (((cb1 + cb2 + 2) << 22) & 0xff000000);
1077 1060
 
1078 1061
 					dCr[dest] = cr;
1079 1062
 					dCb[dest] = cb;
@@ -1084,37 +1067,29 @@ jsmpeg.prototype.copyMacroblock = function(motionH, motionV, sY, sCr, sCb ) {
1084 1067
 		}
1085 1068
 		else {
1086 1069
 			while( dest < last ) {
1087
-				cr21 = sCr[src];
1088
-				cb21 = sCb[src];
1070
+				cr1 = sCr[src];
1071
+				cb1 = sCb[src];
1089 1072
 				src++;
1090 1073
 				for( var x = 0; x < 2; x++ ) {
1091
-					//cr11 = cr21; cr21 = sCr[src];
1092
-					cr11 = sCr[src];
1093
-					//cb11 = cb21; cb21 = sCb[src]; src++;
1094
-					cb11 = sCb[src++];
1095
-					cr = (((cr11 + cr21 + 1) >> 1) & 0xff);
1096
-					cb = (((cb11 + cb21 + 1) >> 1) & 0xff);
1097
-
1098
-					//cr11 = cr21; cr21 = sCr[src];
1099
-					cr21 = sCr[src];
1100
-					//cb11 = cb21; cb21 = sCb[src]; src++;
1101
-					cb21 = sCb[src++];
1102
-					cr |= (((cr11 + cr21 + 1) << 7) & 0xff00);
1103
-					cb |= (((cb11 + cb21 + 1) << 7) & 0xff00);
1104
-
1105
-					//cr11 = cr21; cr21 = sCr[src];
1106
-					cr11 = sCr[src];
1107
-					//cb11 = cb21; cb21 = sCb[src]; src++;
1108
-					cb11 = sCb[src++];
1109
-					cr |= (((cr11 + cr21 + 1) << 15) & 0xff0000);
1110
-					cb |= (((cb11 + cb21 + 1) << 15) & 0xff0000);
1111
-
1112
-					//cr11 = cr21; cr21 = sCr[src];
1113
-					cr21 = sCr[src];
1114
-					//cb11 = cb21; cb21 = sCb[src]; src++;
1115
-					cb21 = sCb[src++];
1116
-					cr |= (((cr11 + cr21 + 1) << 23) & 0xff000000);
1117
-					cb |= (((cb11 + cb21 + 1) << 23) & 0xff000000);
1074
+					cr2 = sCr[src];
1075
+					cb2 = sCb[src++];
1076
+					cr = (((cr1 + cr2 + 1) >> 1) & 0xff);
1077
+					cb = (((cb1 + cb2 + 1) >> 1) & 0xff);
1078
+
1079
+					cr1 = sCr[src];
1080
+					cb1 = sCb[src++];
1081
+					cr |= (((cr1 + cr2 + 1) << 7) & 0xff00);
1082
+					cb |= (((cb1 + cb2 + 1) << 7) & 0xff00);
1083
+
1084
+					cr2 = sCr[src];
1085
+					cb2 = sCb[src++];
1086
+					cr |= (((cr1 + cr2 + 1) << 15) & 0xff0000);
1087
+					cb |= (((cb1 + cb2 + 1) << 15) & 0xff0000);
1088
+
1089
+					cr1 = sCr[src];
1090
+					cb1 = sCb[src++];
1091
+					cr |= (((cr1 + cr2 + 1) << 23) & 0xff000000);
1092
+					cb |= (((cb1 + cb2 + 1) << 23) & 0xff000000);
1118 1093
 
1119 1094
 					dCr[dest] = cr;
1120 1095
 					dCb[dest] = cb;