5 yıl önce · a6a84fca55
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -0,0 +1,181 @@
 
				+#folder
			
 
				+obj
			
 
				+libs
			
 
				+.vscode/
			
 
				+
			
 
				+#appcode filder or file
			
 
				+.idea
			
 
				+.gradle
			
 
				+.vscode
			
 
				+
			
 
				+#mac
			
 
				+.DS_Store
			
 
				+
			
 
				+# c/c++
			
 
				+# Prerequisites
			
 
				+*.d
			
 
				+
			
 
				+# Object files
			
 
				+*.ko
			
 
				+*.obj
			
 
				+*.elf
			
 
				+
			
 
				+# Linker output
			
 
				+*.ilk
			
 
				+*.map
			
 
				+*.exp
			
 
				+
			
 
				+# Precompiled Headers
			
 
				+*.gch
			
 
				+*.pch
			
 
				+
			
 
				+# Debug files
			
 
				+*.dSYM/
			
 
				+*.su
			
 
				+*.idb
			
 
				+*.pdb
			
 
				+
			
 
				+# Kernel Module Compile Results
			
 
				+*.mod*
			
 
				+*.cmd
			
 
				+modules.order
			
 
				+Module.symvers
			
 
				+Mkfile.old
			
 
				+dkms.conf
			
 
				+
			
 
				+# Xcode Project
			
 
				+#
			
 
				+# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
			
 
				+
			
 
				+## Build generated
			
 
				+build/
			
 
				+DerivedData/
			
 
				+
			
 
				+## Various settings
			
 
				+*.pbxuser
			
 
				+!default.pbxuser
			
 
				+*.mode1v3
			
 
				+!default.mode1v3
			
 
				+*.mode2v3
			
 
				+!default.mode2v3
			
 
				+*.perspectivev3
			
 
				+!default.perspectivev3
			
 
				+xcuserdata/
			
 
				+
			
 
				+## Other
			
 
				+*.moved-aside
			
 
				+*.xccheckout
			
 
				+*.xcscmblueprint
			
 
				+
			
 
				+## Obj-C/Swift specific
			
 
				+*.hmap
			
 
				+*.ipa
			
 
				+*.dSYM.zip
			
 
				+*.dSYM
			
 
				+
			
 
				+# CocoaPods
			
 
				+#
			
 
				+# We recommend against adding the Pods directory to your .gitignore. However
			
 
				+# you should judge for yourself, the pros and cons are mentioned at:
			
 
				+# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
			
 
				+#
			
 
				+# Pods/
			
 
				+
			
 
				+# Carthage
			
 
				+#
			
 
				+# Add this line if you want to avoid checking in source code from Carthage dependencies.
			
 
				+# Carthage/Checkouts
			
 
				+
			
 
				+Carthage/Build
			
 
				+
			
 
				+# fastlane
			
 
				+#
			
 
				+# It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the
			
 
				+# screenshots whenever they are needed.
			
 
				+# For more information about the recommended setup visit:
			
 
				+# https://docs.fastlane.tools/best-practices/source-control/#source-control
			
 
				+
			
 
				+fastlane/report.xml
			
 
				+fastlane/Preview.html
			
 
				+fastlane/screenshots
			
 
				+fastlane/test_output
			
 
				+
			
 
				+# Code Injection
			
 
				+#
			
 
				+# After new code Injection tools there's a generated folder /iOSInjectionProject
			
 
				+# https://github.com/johnno1962/injectionforxcode
			
 
				+
			
 
				+iOSInjectionProject/
			
 
				+
			
 
				+### android
			
 
				+# Built application files
			
 
				+*.apk
			
 
				+*.ap_
			
 
				+
			
 
				+# Files for the ART/Dalvik VM
			
 
				+*.dex
			
 
				+
			
 
				+# Java class files
			
 
				+*.class
			
 
				+
			
 
				+# Generated files
			
 
				+bin/
			
 
				+gen/
			
 
				+out/
			
 
				+
			
 
				+# Log Files
			
 
				+*.log
			
 
				+
			
 
				+# Android Studio Navigation editor temp files
			
 
				+.navigation/
			
 
				+
			
 
				+# Android Studio captures folder
			
 
				+captures/
			
 
				+
			
 
				+#other
			
 
				+demo_Android/local.properties
			
 
				+
			
 
				+# Intellij
			
 
				+*.iml
			
 
				+.idea/workspace.xml
			
 
				+.idea/tasks.xml
			
 
				+.idea/gradle.xml
			
 
				+.idea/dictionaries
			
 
				+.idea/libraries
			
 
				+
			
 
				+# Keystore files
			
 
				+*.jks
			
 
				+
			
 
				+# External native build folder generated in Android Studio 2.2 and later
			
 
				+.externalNativeBuild
			
 
				+
			
 
				+# Google Services (e.g. APIs or Firebase)
			
 
				+google-services.json
			
 
				+
			
 
				+# Freeline
			
 
				+freeline.py
			
 
				+freeline/
			
 
				+freeline_project_description.json
			
 
				+
			
 
				+#vc
			
 
				+ECMedia/Ecmedia.aps
			
 
				+demo_Win/.vs/
			
 
				+demo_Win/MyWebRtc.VC.VC.opendb
			
 
				+demo_Win/MyWebRtc.VC.db
			
 
				+demo_Win/Win32/
			
 
				+demo_Win/ipch/
			
 
				+demo_Win/serphonetest/serphonetest.aps
			
 
				+demo_Win/MyWebRtc.sln
			
 
				+demo_Win/MyWebRtc.sln
			
 
				+ECMedia/source/temp
			
 
				+third_party/protobuf/python/
			
 
				+third_party/protobuf_261/
			
 
				+third_party/srtp/crypto/include/config.h
			
 
				+demo_Win/Debug/
			
 
				+demo_Win/Release/
			
 
				+third_party/protobuf/cmake/solution/CMakeFiles/3.7.1/CompilerIdC/
			
 
				+module/congestion_controller/congestion_controller/Debug/
			
 
				+third_party/protobuf/cmake/solution/CMakeFiles/3.7.1/CompilerIdCXX/CompilerIdCXX.exe
			
 
				+third_party/protobuf/cmake/solution/Release/
			
 
				+third_party/protobuf/cmake/solution/libprotobuf-lite.dir/Release/
			
 
				+*.lib
			
--- a/src/.project
+++ b/src/.project
@@ -0,0 +1,17 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<projectDescription>
			
 
				+	<name>android_live_pusher</name>
			
 
				+	<comment>Project android_live_pusher created by Buildship.</comment>
			
 
				+	<projects>
			
 
				+	</projects>
			
 
				+	<buildSpec>
			
 
				+		<buildCommand>
			
 
				+			<name>org.eclipse.buildship.core.gradleprojectbuilder</name>
			
 
				+			<arguments>
			
 
				+			</arguments>
			
 
				+		</buildCommand>
			
 
				+	</buildSpec>
			
 
				+	<natures>
			
 
				+		<nature>org.eclipse.buildship.core.gradleprojectnature</nature>
			
 
				+	</natures>
			
 
				+</projectDescription>
			
--- a/src/app/.classpath
+++ b/src/app/.classpath
@@ -0,0 +1,6 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<classpath>
			
 
				+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8/"/>
			
 
				+	<classpathentry kind="con" path="org.eclipse.buildship.core.gradleclasspathcontainer"/>
			
 
				+	<classpathentry kind="output" path="bin/default"/>
			
 
				+</classpath>
			
--- a/src/app/.project
+++ b/src/app/.project
@@ -0,0 +1,23 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<projectDescription>
			
 
				+	<name>app</name>
			
 
				+	<comment>Project app created by Buildship.</comment>
			
 
				+	<projects>
			
 
				+	</projects>
			
 
				+	<buildSpec>
			
 
				+		<buildCommand>
			
 
				+			<name>org.eclipse.jdt.core.javabuilder</name>
			
 
				+			<arguments>
			
 
				+			</arguments>
			
 
				+		</buildCommand>
			
 
				+		<buildCommand>
			
 
				+			<name>org.eclipse.buildship.core.gradleprojectbuilder</name>
			
 
				+			<arguments>
			
 
				+			</arguments>
			
 
				+		</buildCommand>
			
 
				+	</buildSpec>
			
 
				+	<natures>
			
 
				+		<nature>org.eclipse.jdt.core.javanature</nature>
			
 
				+		<nature>org.eclipse.buildship.core.gradleprojectnature</nature>
			
 
				+	</natures>
			
 
				+</projectDescription>
			
--- a/src/app/build.gradle
+++ b/src/app/build.gradle
@@ -0,0 +1,26 @@
 
				+apply plugin: 'com.android.application'
			
 
				+
			
 
				+android {
			
 
				+    compileSdkVersion 23
			
 
				+    buildToolsVersion '25.0.0'
			
 
				+
			
 
				+    defaultConfig {
			
 
				+        applicationId "com.spark.live.kit"
			
 
				+        minSdkVersion 19
			
 
				+        targetSdkVersion 23
			
 
				+        versionCode 1
			
 
				+        versionName "1.0"
			
 
				+    }
			
 
				+    buildTypes {
			
 
				+        release {
			
 
				+            minifyEnabled false
			
 
				+            proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro'
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+dependencies {
			
 
				+    compile fileTree(include: ['*.jar'], dir: 'libs')
			
 
				+    testCompile 'junit:junit:4.12'
			
 
				+    compile project(':library')
			
 
				+}
			
--- a/src/app/proguard-rules.pro
+++ b/src/app/proguard-rules.pro
@@ -0,0 +1,17 @@
 
				+# Add project specific ProGuard rules here.
			
 
				+# By default, the flags in this file are appended to flags specified
			
 
				+# in E:\AndroidWorkspace\sdk/tools/proguard/proguard-android.txt
			
 
				+# You can edit the include path and order by changing the proguardFiles
			
 
				+# directive in build.gradle.
			
 
				+#
			
 
				+# For more details, see
			
 
				+#   http://developer.android.com/guide/developing/tools/proguard.html
			
 
				+
			
 
				+# Add any project specific keep options here:
			
 
				+
			
 
				+# If your project uses WebView with JS, uncomment the following
			
 
				+# and specify the fully qualified class name to the JavaScript interface
			
 
				+# class:
			
 
				+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
			
 
				+#   public *;
			
 
				+#}
			
--- a/src/app/src/main/AndroidManifest.xml
+++ b/src/app/src/main/AndroidManifest.xml
@@ -0,0 +1,36 @@
 
				+<?xml version="1.0" encoding="utf-8"?>
			
 
				+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
			
 
				+    xmlns:tools="http://schemas.android.com/tools"
			
 
				+    package="com.spark.live">
			
 
				+
			
 
				+    <uses-permission android:name="android.permission.CHANGE_CONFIGURATION"
			
 
				+        tools:ignore="ProtectedPermissions" />
			
 
				+    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
			
 
				+    <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE"/>
			
 
				+    <uses-permission android:name="android.permission.RECORD_AUDIO"/>
			
 
				+    <uses-permission android:name="android.permission.CAMERA"/>
			
 
				+
			
 
				+    <application
			
 
				+        android:allowBackup="true"
			
 
				+        android:icon="@mipmap/ic_launcher"
			
 
				+        android:label="@string/app_name"
			
 
				+        android:supportsRtl="true"
			
 
				+        android:theme="@style/AppTheme">
			
 
				+        <activity
			
 
				+            android:name=".MainActivity"
			
 
				+            android:label="@string/app_name"
			
 
				+            android:theme="@style/AppTheme.NoActionBar">
			
 
				+            <intent-filter>
			
 
				+                <action android:name="android.intent.action.MAIN" />
			
 
				+
			
 
				+                <category android:name="android.intent.category.LAUNCHER" />
			
 
				+            </intent-filter>
			
 
				+        </activity>
			
 
				+        <activity android:name=".PreviewActivity"
			
 
				+            android:configChanges="keyboardHidden|orientation|screenSize"
			
 
				+            android:theme="@android:style/Theme.NoTitleBar.Fullscreen"
			
 
				+            android:screenOrientation="fullSensor">
			
 
				+        </activity>
			
 
				+    </application>
			
 
				+
			
 
				+</manifest>
			
--- a/src/app/src/main/java/com/spark/live/MainActivity.java
+++ b/src/app/src/main/java/com/spark/live/MainActivity.java
@@ -0,0 +1,122 @@
 
				+package com.spark.live;
			
 
				+
			
 
				+import android.app.Activity;
			
 
				+import android.content.Intent;
			
 
				+import android.content.SharedPreferences;
			
 
				+import android.os.Bundle;
			
 
				+import android.text.Editable;
			
 
				+import android.text.TextWatcher;
			
 
				+import android.util.Log;
			
 
				+import android.view.View;
			
 
				+import android.view.WindowManager;
			
 
				+import android.widget.Button;
			
 
				+import android.widget.EditText;
			
 
				+
			
 
				+public class MainActivity extends Activity {
			
 
				+
			
 
				+    private String rtmpUrl = "rtmp://192.168.0.166/live/livedemo";
			
 
				+
			
 
				+    // the bitrate in kbps.
			
 
				+    private int vbitrate_kbps = 800;
			
 
				+
			
 
				+    // settings storage
			
 
				+    private SharedPreferences sp;
			
 
				+    private static final String TAG = "SimpleLivePublisher";
			
 
				+    private Button btnPublish = null;
			
 
				+
			
 
				+    @Override
			
 
				+    protected void onCreate(Bundle savedInstanceState) {
			
 
				+        super.onCreate(savedInstanceState);
			
 
				+
			
 
				+        sp = getSharedPreferences("SrsPublisher", MODE_PRIVATE);
			
 
				+
			
 
				+        getWindow().addFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON);
			
 
				+        setContentView(R.layout.activity_main);
			
 
				+
			
 
				+        // restore data.
			
 
				+        rtmpUrl = sp.getString("FLV_URL", rtmpUrl);
			
 
				+        vbitrate_kbps = sp.getInt("VBITRATE", vbitrate_kbps);
			
 
				+        Log.i(TAG, String.format("initialize flv url to %s, vbitrate=%dkbps", rtmpUrl, vbitrate_kbps));
			
 
				+
			
 
				+        // initialize url.
			
 
				+        final EditText efu = (EditText) findViewById(R.id.rtmp_url);
			
 
				+        efu.setText(rtmpUrl);
			
 
				+        efu.addTextChangedListener(new TextWatcher() {
			
 
				+            @Override
			
 
				+            public void beforeTextChanged(CharSequence s, int start, int count, int after) {
			
 
				+            }
			
 
				+
			
 
				+            @Override
			
 
				+            public void onTextChanged(CharSequence s, int start, int before, int count) {
			
 
				+            }
			
 
				+
			
 
				+            @Override
			
 
				+            public void afterTextChanged(Editable s) {
			
 
				+                String fu = efu.getText().toString();
			
 
				+                if (fu == rtmpUrl || fu.isEmpty()) {
			
 
				+                    return;
			
 
				+                }
			
 
				+
			
 
				+                rtmpUrl = fu;
			
 
				+                Log.i(TAG, String.format("flv url changed to %s", rtmpUrl));
			
 
				+
			
 
				+                SharedPreferences.Editor editor = sp.edit();
			
 
				+                editor.putString("FLV_URL", rtmpUrl);
			
 
				+                editor.commit();
			
 
				+            }
			
 
				+        });
			
 
				+
			
 
				+        final EditText evb = (EditText) findViewById(R.id.vbitrate);
			
 
				+        evb.setText(String.format("%dkbps", vbitrate_kbps));
			
 
				+        evb.addTextChangedListener(new TextWatcher() {
			
 
				+            @Override
			
 
				+            public void beforeTextChanged(CharSequence s, int start, int count, int after) {
			
 
				+            }
			
 
				+
			
 
				+            @Override
			
 
				+            public void onTextChanged(CharSequence s, int start, int before, int count) {
			
 
				+            }
			
 
				+
			
 
				+            @Override
			
 
				+            public void afterTextChanged(Editable s) {
			
 
				+                int vb = Integer.parseInt(evb.getText().toString().replaceAll("kbps", ""));
			
 
				+                if (vb == vbitrate_kbps) {
			
 
				+                    return;
			
 
				+                }
			
 
				+
			
 
				+                vbitrate_kbps = vb;
			
 
				+                Log.i(TAG, String.format("video bitrate changed to %d", vbitrate_kbps));
			
 
				+
			
 
				+                SharedPreferences.Editor editor = sp.edit();
			
 
				+                editor.putInt("VBITRATE", vbitrate_kbps);
			
 
				+                editor.commit();
			
 
				+            }
			
 
				+        });
			
 
				+
			
 
				+        // for camera, @see https://developer.android.com/reference/android/hardware/Camera.html
			
 
				+        btnPublish = (Button) findViewById(R.id.capture);
			
 
				+        btnPublish.setOnClickListener(new View.OnClickListener() {
			
 
				+            @Override
			
 
				+            public void onClick(View v) {
			
 
				+                Intent publishIntent = new Intent(MainActivity.this, PreviewActivity.class);
			
 
				+                publishIntent.putExtra("url", rtmpUrl);
			
 
				+                publishIntent.putExtra("vBitrate", vbitrate_kbps);
			
 
				+                startActivity(publishIntent);
			
 
				+            }
			
 
				+        });
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    @Override
			
 
				+    protected void onResume() {
			
 
				+        super.onResume();
			
 
				+
			
 
				+        final Button btn = (Button) findViewById(R.id.capture);
			
 
				+        btn.setEnabled(true);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected void onPause() {
			
 
				+        super.onPause();
			
 
				+    }
			
 
				+}
			
--- a/src/app/src/main/java/com/spark/live/PreviewActivity.java
+++ b/src/app/src/main/java/com/spark/live/PreviewActivity.java
@@ -0,0 +1,82 @@
 
				+package com.spark.live;
			
 
				+
			
 
				+import android.app.Activity;
			
 
				+import android.content.Intent;
			
 
				+import android.content.res.Configuration;
			
 
				+import android.os.Bundle;
			
 
				+import android.view.SurfaceHolder;
			
 
				+import android.view.SurfaceView;
			
 
				+import android.view.View;
			
 
				+import android.widget.Button;
			
 
				+import android.widget.Toast;
			
 
				+
			
 
				+import com.spark.live.sdk.engine.ISimpleLiveEngine;
			
 
				+import com.spark.live.sdk.engine.SimpleLivePulisherEngine;
			
 
				+
			
 
				+public class PreviewActivity extends Activity implements View.OnClickListener{
			
 
				+
			
 
				+    Button btnSwitch;
			
 
				+    ISimpleLiveEngine engine;
			
 
				+    String url;
			
 
				+    long lastTimeFlag = 0;
			
 
				+    boolean isRunning = false;
			
 
				+    @Override
			
 
				+    protected void onCreate(Bundle savedInstanceState) {
			
 
				+        super.onCreate(savedInstanceState);
			
 
				+
			
 
				+        setContentView(R.layout.activity_play);
			
 
				+        Intent intent = getIntent();
			
 
				+        url = intent.getStringExtra("url");
			
 
				+
			
 
				+        SurfaceView preview = (SurfaceView)findViewById(R.id.camera_preview);
			
 
				+        btnSwitch = (Button) findViewById(R.id.btn_switch);
			
 
				+        btnSwitch.setOnClickListener(this);
			
 
				+        engine = SimpleLivePulisherEngine.getInstance();
			
 
				+        engine.Init(this);
			
 
				+        assert preview != null;
			
 
				+        SurfaceHolder holder = preview.getHolder();
			
 
				+        holder.addCallback(engine);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected void onResume() {
			
 
				+        if(!isRunning) {
			
 
				+            engine.Start(url);
			
 
				+            isRunning = true;
			
 
				+        }
			
 
				+        super.onResume();
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void onClick(View v) {
			
 
				+        if (v == btnSwitch) {
			
 
				+            long curTime = System.currentTimeMillis();
			
 
				+            if (curTime - lastTimeFlag > 3000) {
			
 
				+                lastTimeFlag = curTime;
			
 
				+                engine.SwitchCamera();
			
 
				+            } else {
			
 
				+                Toast.makeText(getApplicationContext(), "切换太快了，休息下", Toast.LENGTH_SHORT).show();
			
 
				+            }
			
 
				+
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected void onDestroy() {
			
 
				+        engine.Destroy();
			
 
				+        super.onDestroy();
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    @Override
			
 
				+    public void onBackPressed() {
			
 
				+        finish();
			
 
				+        super.onBackPressed();
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    @Override
			
 
				+    public void onConfigurationChanged(Configuration newConfig) {
			
 
				+        super.onConfigurationChanged(newConfig);
			
 
				+    }
			
 
				+}
			
--- a/src/app/src/main/res/ic_launcher.png
+++ b/src/app/src/main/res/ic_launcher.png
--- a/src/app/src/main/res/layout/activity_main.xml
+++ b/src/app/src/main/res/layout/activity_main.xml
@@ -0,0 +1,34 @@
 
				+<RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
			
 
				+                xmlns:tools="http://schemas.android.com/tools"
			
 
				+                android:layout_width="match_parent"
			
 
				+                android:layout_height="match_parent"
			
 
				+                android:paddingBottom="@dimen/activity_vertical_margin"
			
 
				+                android:paddingLeft="@dimen/activity_horizontal_margin"
			
 
				+                android:paddingRight="@dimen/activity_horizontal_margin"
			
 
				+                android:paddingTop="@dimen/activity_vertical_margin"
			
 
				+                tools:context=".MainActivity">
			
 
				+
			
 
				+    <Button
			
 
				+        android:id="@+id/capture"
			
 
				+        android:layout_width="wrap_content"
			
 
				+        android:layout_height="wrap_content"
			
 
				+        android:layout_marginTop="0dp"
			
 
				+        android:text="@string/btn_capture"/>
			
 
				+
			
 
				+    <EditText
			
 
				+        android:id="@+id/vbitrate"
			
 
				+        android:layout_width="wrap_content"
			
 
				+        android:layout_height="wrap_content"
			
 
				+        android:layout_marginTop="0dp"
			
 
				+        android:layout_below="@+id/capture"
			
 
				+        android:textSize="14dp"/>
			
 
				+
			
 
				+    <EditText
			
 
				+        android:id="@+id/rtmp_url"
			
 
				+        android:layout_width="wrap_content"
			
 
				+        android:layout_height="wrap_content"
			
 
				+        android:layout_below="@+id/vbitrate"
			
 
				+        android:layout_marginTop="0dp"
			
 
				+        android:textSize="14dp"/>
			
 
				+
			
 
				+</RelativeLayout>
			
--- a/src/app/src/main/res/layout/activity_play.xml
+++ b/src/app/src/main/res/layout/activity_play.xml
@@ -0,0 +1,44 @@
 
				+<RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
			
 
				+                xmlns:tools="http://schemas.android.com/tools"
			
 
				+                android:layout_width="match_parent"
			
 
				+                android:layout_height="match_parent"
			
 
				+                tools:context=".PreviewActivity"
			
 
				+                android:keepScreenOn="true">
			
 
				+
			
 
				+    <FrameLayout
			
 
				+        android:id="@+id/video_frame"
			
 
				+        android:layout_width="match_parent"
			
 
				+        android:layout_height="match_parent">
			
 
				+
			
 
				+        <SurfaceView
			
 
				+            android:id="@+id/camera_preview"
			
 
				+            android:layout_width="match_parent"
			
 
				+            android:layout_height="match_parent"
			
 
				+            android:layout_gravity="center"/>
			
 
				+    </FrameLayout>
			
 
				+
			
 
				+    <Button
			
 
				+        android:id="@+id/stop"
			
 
				+        android:layout_width="wrap_content"
			
 
				+        android:layout_height="wrap_content"
			
 
				+        android:layout_marginTop="0dp"
			
 
				+        android:text="@string/btn_stop"/>
			
 
				+
			
 
				+    <Button
			
 
				+        android:id="@+id/btn_switch"
			
 
				+        android:layout_width="wrap_content"
			
 
				+        android:layout_height="wrap_content"
			
 
				+        android:layout_marginTop="0dp"
			
 
				+        android:layout_toRightOf="@+id/stop"
			
 
				+        android:text="Switch"/>
			
 
				+
			
 
				+
			
 
				+    <TextView
			
 
				+        android:id="@+id/status_indication"
			
 
				+        android:layout_width="wrap_content"
			
 
				+        android:layout_height="wrap_content"
			
 
				+        android:layout_toRightOf="@id/btn_switch"
			
 
				+        android:layout_marginTop="10dp"
			
 
				+        android:layout_marginLeft="50dp"
			
 
				+        android:layout_alignParentRight="true"/>
			
 
				+</RelativeLayout>
			
--- a/src/app/src/main/res/menu/menu_main.xml
+++ b/src/app/src/main/res/menu/menu_main.xml
@@ -0,0 +1,9 @@
 
				+<menu xmlns:android="http://schemas.android.com/apk/res/android"
			
 
				+      xmlns:app="http://schemas.android.com/apk/res-auto"
			
 
				+      xmlns:tools="http://schemas.android.com/tools"
			
 
				+      tools:context=".MainActivity">
			
 
				+    <item android:id="@+id/action_settings"
			
 
				+          android:title="@string/action_settings"
			
 
				+          android:orderInCategory="100"
			
 
				+          app:showAsAction="never"/>
			
 
				+</menu>
			
--- a/src/app/src/main/res/mipmap-hdpi/ic_launcher.png
+++ b/src/app/src/main/res/mipmap-hdpi/ic_launcher.png
--- a/src/app/src/main/res/mipmap-ldpi/ic_launcher.png
+++ b/src/app/src/main/res/mipmap-ldpi/ic_launcher.png
--- a/src/app/src/main/res/mipmap-mdpi/ic_launcher.png
+++ b/src/app/src/main/res/mipmap-mdpi/ic_launcher.png
--- a/src/app/src/main/res/mipmap-xhdpi/ic_launcher.png
+++ b/src/app/src/main/res/mipmap-xhdpi/ic_launcher.png
--- a/src/app/src/main/res/mipmap-xxhdpi/ic_launcher.png
+++ b/src/app/src/main/res/mipmap-xxhdpi/ic_launcher.png
--- a/src/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png
+++ b/src/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png
--- a/src/app/src/main/res/values-v21/styles.xml
+++ b/src/app/src/main/res/values-v21/styles.xml
@@ -0,0 +1,8 @@
 
				+<resources>>
			
 
				+    <style name="AppTheme.NoActionBar">
			
 
				+        <item name="windowActionBar">false</item>
			
 
				+        <item name="windowNoTitle">true</item>
			
 
				+        <item name="android:windowDrawsSystemBarBackgrounds">true</item>
			
 
				+        <item name="android:statusBarColor">@android:color/transparent</item>
			
 
				+    </style>
			
 
				+</resources>
			
--- a/src/app/src/main/res/values-w820dp/dimens.xml
+++ b/src/app/src/main/res/values-w820dp/dimens.xml
@@ -0,0 +1,6 @@
 
				+<resources>
			
 
				+    <!-- Example customization of dimensions originally defined in res/values/dimens.xml
			
 
				+         (such as screen margins) for screens with more than 820dp of available width. This
			
 
				+         would include 7" and 10" devices in landscape (~960dp and ~1280dp respectively). -->
			
 
				+    <dimen name="activity_horizontal_margin">64dp</dimen>
			
 
				+</resources>
			
--- a/src/app/src/main/res/values/colors.xml
+++ b/src/app/src/main/res/values/colors.xml
@@ -0,0 +1,6 @@
 
				+<?xml version="1.0" encoding="utf-8"?>
			
 
				+<resources>
			
 
				+    <color name="colorPrimary">#3F51B5</color>
			
 
				+    <color name="colorPrimaryDark">#303F9F</color>
			
 
				+    <color name="colorAccent">#FF4081</color>
			
 
				+</resources>
			
--- a/src/app/src/main/res/values/dimens.xml
+++ b/src/app/src/main/res/values/dimens.xml
@@ -0,0 +1,6 @@
 
				+<resources>
			
 
				+    <!-- Default screen margins, per the Android Design guidelines. -->
			
 
				+    <dimen name="activity_horizontal_margin">16dp</dimen>
			
 
				+    <dimen name="activity_vertical_margin">16dp</dimen>
			
 
				+    <dimen name="fab_margin">16dp</dimen>
			
 
				+</resources>
			
--- a/src/app/src/main/res/values/strings.xml
+++ b/src/app/src/main/res/values/strings.xml
@@ -0,0 +1,6 @@
 
				+<resources>
			
 
				+    <string name="app_name">SimpleLive</string>
			
 
				+    <string name="action_settings">Settings</string>
			
 
				+    <string name="btn_capture">Publish</string>
			
 
				+    <string name="btn_stop">Stop</string>
			
 
				+</resources>
			
--- a/src/app/src/main/res/values/styles.xml
+++ b/src/app/src/main/res/values/styles.xml
@@ -0,0 +1,17 @@
 
				+<resources>
			
 
				+
			
 
				+    <!-- Base application theme. -->
			
 
				+    <style name="AppTheme" parent="Theme.AppCompat.Light.DarkActionBar">
			
 
				+        <!-- Customize your theme here. -->
			
 
				+        <item name="colorPrimary">@color/colorPrimary</item>
			
 
				+        <item name="colorPrimaryDark">@color/colorPrimaryDark</item>
			
 
				+        <item name="colorAccent">@color/colorAccent</item>
			
 
				+    </style>
			
 
				+    <style name="AppTheme.NoActionBar">
			
 
				+        <item name="windowActionBar">false</item>
			
 
				+        <item name="windowNoTitle">true</item>
			
 
				+    </style>
			
 
				+    <style name="AppTheme.AppBarOverlay" parent="ThemeOverlay.AppCompat.Dark.ActionBar"/>
			
 
				+    <style name="AppTheme.PopupOverlay" parent="ThemeOverlay.AppCompat.Light"/>
			
 
				+
			
 
				+</resources>
			
--- a/src/build.gradle
+++ b/src/build.gradle
@@ -0,0 +1,25 @@
 
				+// Top-level build file where you can add configuration options common to all sub-projects/modules.
			
 
				+
			
 
				+buildscript {
			
 
				+    repositories {
			
 
				+        jcenter()
			
 
				+        google()
			
 
				+    }
			
 
				+    dependencies {
			
 
				+        classpath 'com.android.tools.build:gradle:3.6.3'
			
 
				+
			
 
				+        // NOTE: Do not place your application dependencies here; they belong
			
 
				+        // in the individual module build.gradle files
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+allprojects {
			
 
				+    repositories {
			
 
				+        jcenter()
			
 
				+        google()
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+task clean(type: Delete) {
			
 
				+    delete rootProject.buildDir
			
 
				+}
			
--- a/src/gradle.properties
+++ b/src/gradle.properties
@@ -0,0 +1,17 @@
 
				+## Project-wide Gradle settings.
			
 
				+#
			
 
				+# For more details on how to configure your build environment visit
			
 
				+# http://www.gradle.org/docs/current/userguide/build_environment.html
			
 
				+#
			
 
				+# Specifies the JVM arguments used for the daemon process.
			
 
				+# The setting is particularly useful for tweaking memory settings.
			
 
				+# Default value: -Xmx10248m -XX:MaxPermSize=256m
			
 
				+# org.gradle.jvmargs=-Xmx2048m -XX:MaxPermSize=512m -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8
			
 
				+#
			
 
				+# When configured, Gradle will run in incubating parallel mode.
			
 
				+# This option should only be used with decoupled projects. More details, visit
			
 
				+# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
			
 
				+# org.gradle.parallel=true
			
 
				+#Thu May 19 14:58:46 CST 2016
			
 
				+systemProp.http.proxyHost=127.0.0.1
			
 
				+systemProp.http.proxyPort=8787
			
--- a/src/gradle/wrapper/gradle-wrapper.jar
+++ b/src/gradle/wrapper/gradle-wrapper.jar
--- a/src/gradle/wrapper/gradle-wrapper.properties
+++ b/src/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
 
				+#Fri Apr 24 11:15:28 CST 2020
			
 
				+distributionBase=GRADLE_USER_HOME
			
 
				+distributionPath=wrapper/dists
			
 
				+zipStoreBase=GRADLE_USER_HOME
			
 
				+zipStorePath=wrapper/dists
			
 
				+distributionUrl=https\://services.gradle.org/distributions/gradle-5.6.4-all.zip
			
--- a/src/gradlew
+++ b/src/gradlew
@@ -0,0 +1,164 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+##############################################################################
			
 
				+##
			
 
				+##  Gradle start up script for UN*X
			
 
				+##
			
 
				+##############################################################################
			
 
				+
			
 
				+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
			
 
				+DEFAULT_JVM_OPTS=""
			
 
				+
			
 
				+APP_NAME="Gradle"
			
 
				+APP_BASE_NAME=`basename "$0"`
			
 
				+
			
 
				+# Use the maximum available, or set MAX_FD != -1 to use that value.
			
 
				+MAX_FD="maximum"
			
 
				+
			
 
				+warn ( ) {
			
 
				+    echo "$*"
			
 
				+}
			
 
				+
			
 
				+die ( ) {
			
 
				+    echo
			
 
				+    echo "$*"
			
 
				+    echo
			
 
				+    exit 1
			
 
				+}
			
 
				+
			
 
				+# OS specific support (must be 'true' or 'false').
			
 
				+cygwin=false
			
 
				+msys=false
			
 
				+darwin=false
			
 
				+case "`uname`" in
			
 
				+  CYGWIN* )
			
 
				+    cygwin=true
			
 
				+    ;;
			
 
				+  Darwin* )
			
 
				+    darwin=true
			
 
				+    ;;
			
 
				+  MINGW* )
			
 
				+    msys=true
			
 
				+    ;;
			
 
				+esac
			
 
				+
			
 
				+# For Cygwin, ensure paths are in UNIX format before anything is touched.
			
 
				+if $cygwin ; then
			
 
				+    [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
			
 
				+fi
			
 
				+
			
 
				+# Attempt to set APP_HOME
			
 
				+# Resolve links: $0 may be a link
			
 
				+PRG="$0"
			
 
				+# Need this for relative symlinks.
			
 
				+while [ -h "$PRG" ] ; do
			
 
				+    ls=`ls -ld "$PRG"`
			
 
				+    link=`expr "$ls" : '.*-> \(.*\)$'`
			
 
				+    if expr "$link" : '/.*' > /dev/null; then
			
 
				+        PRG="$link"
			
 
				+    else
			
 
				+        PRG=`dirname "$PRG"`"/$link"
			
 
				+    fi
			
 
				+done
			
 
				+SAVED="`pwd`"
			
 
				+cd "`dirname \"$PRG\"`/" >&-
			
 
				+APP_HOME="`pwd -P`"
			
 
				+cd "$SAVED" >&-
			
 
				+
			
 
				+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
			
 
				+
			
 
				+# Determine the Java command to use to start the JVM.
			
 
				+if [ -n "$JAVA_HOME" ] ; then
			
 
				+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
			
 
				+        # IBM's JDK on AIX uses strange locations for the executables
			
 
				+        JAVACMD="$JAVA_HOME/jre/sh/java"
			
 
				+    else
			
 
				+        JAVACMD="$JAVA_HOME/bin/java"
			
 
				+    fi
			
 
				+    if [ ! -x "$JAVACMD" ] ; then
			
 
				+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
			
 
				+
			
 
				+Please set the JAVA_HOME variable in your environment to match the
			
 
				+location of your Java installation."
			
 
				+    fi
			
 
				+else
			
 
				+    JAVACMD="java"
			
 
				+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
			
 
				+
			
 
				+Please set the JAVA_HOME variable in your environment to match the
			
 
				+location of your Java installation."
			
 
				+fi
			
 
				+
			
 
				+# Increase the maximum file descriptors if we can.
			
 
				+if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
			
 
				+    MAX_FD_LIMIT=`ulimit -H -n`
			
 
				+    if [ $? -eq 0 ] ; then
			
 
				+        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
			
 
				+            MAX_FD="$MAX_FD_LIMIT"
			
 
				+        fi
			
 
				+        ulimit -n $MAX_FD
			
 
				+        if [ $? -ne 0 ] ; then
			
 
				+            warn "Could not set maximum file descriptor limit: $MAX_FD"
			
 
				+        fi
			
 
				+    else
			
 
				+        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
			
 
				+    fi
			
 
				+fi
			
 
				+
			
 
				+# For Darwin, add options to specify how the application appears in the dock
			
 
				+if $darwin; then
			
 
				+    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
			
 
				+fi
			
 
				+
			
 
				+# For Cygwin, switch paths to Windows format before running java
			
 
				+if $cygwin ; then
			
 
				+    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
			
 
				+    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
			
 
				+
			
 
				+    # We build the pattern for arguments to be converted via cygpath
			
 
				+    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
			
 
				+    SEP=""
			
 
				+    for dir in $ROOTDIRSRAW ; do
			
 
				+        ROOTDIRS="$ROOTDIRS$SEP$dir"
			
 
				+        SEP="|"
			
 
				+    done
			
 
				+    OURCYGPATTERN="(^($ROOTDIRS))"
			
 
				+    # Add a user-defined pattern to the cygpath arguments
			
 
				+    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
			
 
				+        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
			
 
				+    fi
			
 
				+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
			
 
				+    i=0
			
 
				+    for arg in "$@" ; do
			
 
				+        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
			
 
				+        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
			
 
				+
			
 
				+        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
			
 
				+            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
			
 
				+        else
			
 
				+            eval `echo args$i`="\"$arg\""
			
 
				+        fi
			
 
				+        i=$((i+1))
			
 
				+    done
			
 
				+    case $i in
			
 
				+        (0) set -- ;;
			
 
				+        (1) set -- "$args0" ;;
			
 
				+        (2) set -- "$args0" "$args1" ;;
			
 
				+        (3) set -- "$args0" "$args1" "$args2" ;;
			
 
				+        (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
			
 
				+        (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
			
 
				+        (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
			
 
				+        (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
			
 
				+        (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
			
 
				+        (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
			
 
				+    esac
			
 
				+fi
			
 
				+
			
 
				+# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
			
 
				+function splitJvmOpts() {
			
 
				+    JVM_OPTS=("$@")
			
 
				+}
			
 
				+eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
			
 
				+JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
			
 
				+
			
 
				+exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
			
--- a/src/gradlew.bat
+++ b/src/gradlew.bat
@@ -0,0 +1,90 @@
 
				+@if "%DEBUG%" == "" @echo off
			
 
				+@rem ##########################################################################
			
 
				+@rem
			
 
				+@rem  Gradle startup script for Windows
			
 
				+@rem
			
 
				+@rem ##########################################################################
			
 
				+
			
 
				+@rem Set local scope for the variables with windows NT shell
			
 
				+if "%OS%"=="Windows_NT" setlocal
			
 
				+
			
 
				+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
			
 
				+set DEFAULT_JVM_OPTS=
			
 
				+
			
 
				+set DIRNAME=%~dp0
			
 
				+if "%DIRNAME%" == "" set DIRNAME=.
			
 
				+set APP_BASE_NAME=%~n0
			
 
				+set APP_HOME=%DIRNAME%
			
 
				+
			
 
				+@rem Find java.exe
			
 
				+if defined JAVA_HOME goto findJavaFromJavaHome
			
 
				+
			
 
				+set JAVA_EXE=java.exe
			
 
				+%JAVA_EXE% -version >NUL 2>&1
			
 
				+if "%ERRORLEVEL%" == "0" goto init
			
 
				+
			
 
				+echo.
			
 
				+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
			
 
				+echo.
			
 
				+echo Please set the JAVA_HOME variable in your environment to match the
			
 
				+echo location of your Java installation.
			
 
				+
			
 
				+goto fail
			
 
				+
			
 
				+:findJavaFromJavaHome
			
 
				+set JAVA_HOME=%JAVA_HOME:"=%
			
 
				+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
			
 
				+
			
 
				+if exist "%JAVA_EXE%" goto init
			
 
				+
			
 
				+echo.
			
 
				+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
			
 
				+echo.
			
 
				+echo Please set the JAVA_HOME variable in your environment to match the
			
 
				+echo location of your Java installation.
			
 
				+
			
 
				+goto fail
			
 
				+
			
 
				+:init
			
 
				+@rem Get command-line arguments, handling Windowz variants
			
 
				+
			
 
				+if not "%OS%" == "Windows_NT" goto win9xME_args
			
 
				+if "%@eval[2+2]" == "4" goto 4NT_args
			
 
				+
			
 
				+:win9xME_args
			
 
				+@rem Slurp the command line arguments.
			
 
				+set CMD_LINE_ARGS=
			
 
				+set _SKIP=2
			
 
				+
			
 
				+:win9xME_args_slurp
			
 
				+if "x%~1" == "x" goto execute
			
 
				+
			
 
				+set CMD_LINE_ARGS=%*
			
 
				+goto execute
			
 
				+
			
 
				+:4NT_args
			
 
				+@rem Get arguments from the 4NT Shell from JP Software
			
 
				+set CMD_LINE_ARGS=%$
			
 
				+
			
 
				+:execute
			
 
				+@rem Setup the command line
			
 
				+
			
 
				+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
			
 
				+
			
 
				+@rem Execute Gradle
			
 
				+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
			
 
				+
			
 
				+:end
			
 
				+@rem End local scope for the variables with windows NT shell
			
 
				+if "%ERRORLEVEL%"=="0" goto mainEnd
			
 
				+
			
 
				+:fail
			
 
				+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
			
 
				+rem the _cmd.exe /c_ return code!
			
 
				+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
			
 
				+exit /b 1
			
 
				+
			
 
				+:mainEnd
			
 
				+if "%OS%"=="Windows_NT" endlocal
			
 
				+
			
 
				+:omega
			
--- a/src/jni/Android.mk
+++ b/src/jni/Android.mk
@@ -0,0 +1,19 @@
 
				+JNI_PATH := $(call my-dir)
			
 
				+include $(JNI_PATH)/libyuv/Android.mk
			
 
				+
			
 
				+include $(CLEAR_VARS)
			
 
				+LOCAL_ARM_MODE := arm
			
 
				+LOCAL_MODULE := libyuv_util
			
 
				+LOCAL_MODULE_TAGS := optional
			
 
				+
			
 
				+LOCAL_SRC_FILES := $(JNI_PATH)/yuv_util/yuv_util.cc \
			
 
				+
			
 
				+
			
 
				+
			
 
				+LOCAL_C_INCLUDES += $(JNI_PATH)/libyuv/include \
			
 
				+
			
 
				+LOCAL_SHARED_LIBRARIES := \
			
 
				+					libyuv
			
 
				+
			
 
				+
			
 
				+include $(BUILD_SHARED_LIBRARY)
			
--- a/src/jni/Application.mk
+++ b/src/jni/Application.mk
@@ -0,0 +1,4 @@
 
				+APP_STL := c++_static
			
 
				+APP_ABI := armeabi-v7a arm64-v8a x86
			
 
				+APP_PLATFORM := android-21
			
 
				+APP_ALLOW_MISSING_DEPS=true
			
--- a/src/jni/libyuv/Android.mk
+++ b/src/jni/libyuv/Android.mk
@@ -0,0 +1,72 @@
 
				+# This is the Android makefile for libyuv for both platform and NDK.
			
 
				+LOCAL_PATH:= $(call my-dir)
			
 
				+
			
 
				+include $(CLEAR_VARS)
			
 
				+
			
 
				+LOCAL_CPP_EXTENSION := .cc
			
 
				+
			
 
				+LOCAL_SRC_FILES := \
			
 
				+    source/compare.cc           \
			
 
				+    source/compare_common.cc    \
			
 
				+    source/convert.cc           \
			
 
				+    source/convert_argb.cc      \
			
 
				+    source/convert_from.cc      \
			
 
				+    source/convert_from_argb.cc \
			
 
				+    source/convert_to_argb.cc   \
			
 
				+    source/convert_to_i420.cc   \
			
 
				+    source/cpu_id.cc            \
			
 
				+    source/planar_functions.cc  \
			
 
				+    source/rotate.cc            \
			
 
				+    source/rotate_any.cc        \
			
 
				+    source/rotate_argb.cc       \
			
 
				+    source/rotate_common.cc     \
			
 
				+    source/row_any.cc           \
			
 
				+    source/row_common.cc        \
			
 
				+    source/scale.cc             \
			
 
				+    source/scale_any.cc         \
			
 
				+    source/scale_argb.cc        \
			
 
				+    source/scale_common.cc      \
			
 
				+    source/video_common.cc
			
 
				+
			
 
				+ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
			
 
				+    LOCAL_ARM_NEON := true
			
 
				+    LOCAL_CFLAGS += -DLIBYUV_NEON
			
 
				+    LOCAL_SRC_FILES += \
			
 
				+        source/compare_neon.cc.neon    \
			
 
				+        source/rotate_neon.cc.neon     \
			
 
				+        source/row_neon.cc.neon        \
			
 
				+        source/scale_neon.cc.neon
			
 
				+endif
			
 
				+
			
 
				+ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
			
 
				+    LOCAL_ARM_NEON := true
			
 
				+    LOCAL_CFLAGS += -DLIBYUV_NEON
			
 
				+    LOCAL_SRC_FILES += \
			
 
				+        source/compare_neon64.cc    \
			
 
				+        source/rotate_neon64.cc     \
			
 
				+        source/row_neon64.cc        \
			
 
				+        source/scale_neon64.cc 
			
 
				+endif
			
 
				+
			
 
				+ifeq ($(TARGET_ARCH_ABI),$(filter $(TARGET_ARCH_ABI), x86 x86_64))
			
 
				+    LOCAL_SRC_FILES += \
			
 
				+        source/compare_gcc.cc       \
			
 
				+        source/rotate_gcc.cc        \
			
 
				+        source/row_gcc.cc           \
			
 
				+        source/scale_gcc.cc
			
 
				+endif
			
 
				+
			
 
				+ifeq ($(TARGET_ARCH_ABI),$(filter $(TARGET_ARCH_ABI), mips mips_64))
			
 
				+    LOCAL_SRC_FILES += \
			
 
				+        source/rotate_mips.cc        \
			
 
				+        source/row_mips.cc           \
			
 
				+        source/scale_mips.cc
			
 
				+endif
			
 
				+
			
 
				+LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/include
			
 
				+LOCAL_C_INCLUDES += $(LOCAL_PATH)/include
			
 
				+LOCAL_EXPORT_LDLIBS := -llog
			
 
				+LOCAL_MODULE := libyuv
			
 
				+
			
 
				+include $(BUILD_SHARED_LIBRARY)
			
 
				+# include $(BUILD_STATIC_LIBRARY)
			
--- a/src/jni/libyuv/LICENSE
+++ b/src/jni/libyuv/LICENSE
@@ -0,0 +1,29 @@
 
				+Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+
			
 
				+Redistribution and use in source and binary forms, with or without
			
 
				+modification, are permitted provided that the following conditions are
			
 
				+met:
			
 
				+
			
 
				+  * Redistributions of source code must retain the above copyright
			
 
				+    notice, this list of conditions and the following disclaimer.
			
 
				+
			
 
				+  * Redistributions in binary form must reproduce the above copyright
			
 
				+    notice, this list of conditions and the following disclaimer in
			
 
				+    the documentation and/or other materials provided with the
			
 
				+    distribution.
			
 
				+
			
 
				+  * Neither the name of Google nor the names of its contributors may
			
 
				+    be used to endorse or promote products derived from this software
			
 
				+    without specific prior written permission.
			
 
				+
			
 
				+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
--- a/src/jni/libyuv/include/libyuv.h
+++ b/src/jni/libyuv/include/libyuv.h
@@ -0,0 +1,32 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+#include "libyuv/compare.h"
			
 
				+#include "libyuv/convert.h"
			
 
				+#include "libyuv/convert_argb.h"
			
 
				+#include "libyuv/convert_from.h"
			
 
				+#include "libyuv/convert_from_argb.h"
			
 
				+#include "libyuv/cpu_id.h"
			
 
				+#include "libyuv/mjpeg_decoder.h"
			
 
				+#include "libyuv/planar_functions.h"
			
 
				+#include "libyuv/rotate.h"
			
 
				+#include "libyuv/rotate_argb.h"
			
 
				+#include "libyuv/row.h"
			
 
				+#include "libyuv/scale.h"
			
 
				+#include "libyuv/scale_argb.h"
			
 
				+#include "libyuv/scale_row.h"
			
 
				+#include "libyuv/version.h"
			
 
				+#include "libyuv/video_common.h"
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/basic_types.h
+++ b/src/jni/libyuv/include/libyuv/basic_types.h
@@ -0,0 +1,118 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_BASIC_TYPES_H_
			
 
				+
			
 
				+#include <stddef.h>  // for NULL, size_t
			
 
				+
			
 
				+#if defined(__ANDROID__) || (defined(_MSC_VER) && (_MSC_VER < 1600))
			
 
				+#include <sys/types.h>  // for uintptr_t on x86
			
 
				+#else
			
 
				+#include <stdint.h>  // for uintptr_t
			
 
				+#endif
			
 
				+
			
 
				+#ifndef GG_LONGLONG
			
 
				+#ifndef INT_TYPES_DEFINED
			
 
				+#define INT_TYPES_DEFINED
			
 
				+#ifdef COMPILER_MSVC
			
 
				+typedef unsigned __int64 uint64;
			
 
				+typedef __int64 int64;
			
 
				+#ifndef INT64_C
			
 
				+#define INT64_C(x) x ## I64
			
 
				+#endif
			
 
				+#ifndef UINT64_C
			
 
				+#define UINT64_C(x) x ## UI64
			
 
				+#endif
			
 
				+#define INT64_F "I64"
			
 
				+#else  // COMPILER_MSVC
			
 
				+#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
			
 
				+typedef unsigned long uint64;  // NOLINT
			
 
				+typedef long int64;  // NOLINT
			
 
				+#ifndef INT64_C
			
 
				+#define INT64_C(x) x ## L
			
 
				+#endif
			
 
				+#ifndef UINT64_C
			
 
				+#define UINT64_C(x) x ## UL
			
 
				+#endif
			
 
				+#define INT64_F "l"
			
 
				+#else  // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
			
 
				+typedef unsigned long long uint64;  // NOLINT
			
 
				+typedef long long int64;  // NOLINT
			
 
				+#ifndef INT64_C
			
 
				+#define INT64_C(x) x ## LL
			
 
				+#endif
			
 
				+#ifndef UINT64_C
			
 
				+#define UINT64_C(x) x ## ULL
			
 
				+#endif
			
 
				+#define INT64_F "ll"
			
 
				+#endif  // __LP64__
			
 
				+#endif  // COMPILER_MSVC
			
 
				+typedef unsigned int uint32;
			
 
				+typedef int int32;
			
 
				+typedef unsigned short uint16;  // NOLINT
			
 
				+typedef short int16;  // NOLINT
			
 
				+typedef unsigned char uint8;
			
 
				+typedef signed char int8;
			
 
				+#endif  // INT_TYPES_DEFINED
			
 
				+#endif  // GG_LONGLONG
			
 
				+
			
 
				+// Detect compiler is for x86 or x64.
			
 
				+#if defined(__x86_64__) || defined(_M_X64) || \
			
 
				+    defined(__i386__) || defined(_M_IX86)
			
 
				+#define CPU_X86 1
			
 
				+#endif
			
 
				+// Detect compiler is for ARM.
			
 
				+#if defined(__arm__) || defined(_M_ARM)
			
 
				+#define CPU_ARM 1
			
 
				+#endif
			
 
				+
			
 
				+#ifndef ALIGNP
			
 
				+#ifdef __cplusplus
			
 
				+#define ALIGNP(p, t) \
			
 
				+    (reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \
			
 
				+    ((t) - 1)) & ~((t) - 1))))
			
 
				+#else
			
 
				+#define ALIGNP(p, t) \
			
 
				+    ((uint8*)((((uintptr_t)(p) + ((t) - 1)) & ~((t) - 1))))  /* NOLINT */
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#if !defined(LIBYUV_API)
			
 
				+#if defined(_WIN32) || defined(__CYGWIN__)
			
 
				+#if defined(LIBYUV_BUILDING_SHARED_LIBRARY)
			
 
				+#define LIBYUV_API __declspec(dllexport)
			
 
				+#elif defined(LIBYUV_USING_SHARED_LIBRARY)
			
 
				+#define LIBYUV_API __declspec(dllimport)
			
 
				+#else
			
 
				+#define LIBYUV_API
			
 
				+#endif  // LIBYUV_BUILDING_SHARED_LIBRARY
			
 
				+#elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \
			
 
				+    (defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \
			
 
				+    defined(LIBYUV_USING_SHARED_LIBRARY))
			
 
				+#define LIBYUV_API __attribute__ ((visibility ("default")))
			
 
				+#else
			
 
				+#define LIBYUV_API
			
 
				+#endif  // __GNUC__
			
 
				+#endif  // LIBYUV_API
			
 
				+
			
 
				+#define LIBYUV_BOOL int
			
 
				+#define LIBYUV_FALSE 0
			
 
				+#define LIBYUV_TRUE 1
			
 
				+
			
 
				+// Visual C x86 or GCC little endian.
			
 
				+#if defined(__x86_64__) || defined(_M_X64) || \
			
 
				+  defined(__i386__) || defined(_M_IX86) || \
			
 
				+  defined(__arm__) || defined(_M_ARM) || \
			
 
				+  (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
			
 
				+#define LIBYUV_LITTLE_ENDIAN
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_BASIC_TYPES_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/compare.h
+++ b/src/jni/libyuv/include/libyuv/compare.h
@@ -0,0 +1,78 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_COMPARE_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_COMPARE_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Compute a hash for specified memory. Seed of 5381 recommended.
			
 
				+LIBYUV_API
			
 
				+uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);
			
 
				+
			
 
				+// Scan an opaque argb image and return fourcc based on alpha offset.
			
 
				+// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
			
 
				+LIBYUV_API
			
 
				+uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height);
			
 
				+
			
 
				+// Sum Square Error - used to compute Mean Square Error or PSNR.
			
 
				+LIBYUV_API
			
 
				+uint64 ComputeSumSquareError(const uint8* src_a,
			
 
				+                             const uint8* src_b, int count);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
			
 
				+                                  const uint8* src_b, int stride_b,
			
 
				+                                  int width, int height);
			
 
				+
			
 
				+static const int kMaxPsnr = 128;
			
 
				+
			
 
				+LIBYUV_API
			
 
				+double SumSquareErrorToPsnr(uint64 sse, uint64 count);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+double CalcFramePsnr(const uint8* src_a, int stride_a,
			
 
				+                     const uint8* src_b, int stride_b,
			
 
				+                     int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+double I420Psnr(const uint8* src_y_a, int stride_y_a,
			
 
				+                const uint8* src_u_a, int stride_u_a,
			
 
				+                const uint8* src_v_a, int stride_v_a,
			
 
				+                const uint8* src_y_b, int stride_y_b,
			
 
				+                const uint8* src_u_b, int stride_u_b,
			
 
				+                const uint8* src_v_b, int stride_v_b,
			
 
				+                int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+double CalcFrameSsim(const uint8* src_a, int stride_a,
			
 
				+                     const uint8* src_b, int stride_b,
			
 
				+                     int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+double I420Ssim(const uint8* src_y_a, int stride_y_a,
			
 
				+                const uint8* src_u_a, int stride_u_a,
			
 
				+                const uint8* src_v_a, int stride_v_a,
			
 
				+                const uint8* src_y_b, int stride_y_b,
			
 
				+                const uint8* src_u_b, int stride_u_b,
			
 
				+                const uint8* src_v_b, int stride_v_b,
			
 
				+                int width, int height);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_COMPARE_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/compare_row.h
+++ b/src/jni/libyuv/include/libyuv/compare_row.h
@@ -0,0 +1,84 @@
 
				+/*
			
 
				+ *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_COMPARE_ROW_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_COMPARE_ROW_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+#if defined(__pnacl__) || defined(__CLR_VER) || \
			
 
				+    (defined(__i386__) && !defined(__SSE2__))
			
 
				+#define LIBYUV_DISABLE_X86
			
 
				+#endif
			
 
				+// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
			
 
				+#if defined(__has_feature)
			
 
				+#if __has_feature(memory_sanitizer)
			
 
				+#define LIBYUV_DISABLE_X86
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+// Visual C 2012 required for AVX2.
			
 
				+#if defined(_M_IX86) && !defined(__clang__) && \
			
 
				+    defined(_MSC_VER) && _MSC_VER >= 1700
			
 
				+#define VISUALC_HAS_AVX2 1
			
 
				+#endif  // VisualStudio >= 2012
			
 
				+
			
 
				+// clang >= 3.4.0 required for AVX2.
			
 
				+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
			
 
				+#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
			
 
				+#define CLANG_HAS_AVX2 1
			
 
				+#endif  // clang >= 3.4
			
 
				+#endif  // __clang__
			
 
				+
			
 
				+#if !defined(LIBYUV_DISABLE_X86) && \
			
 
				+    defined(_M_IX86) && (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
			
 
				+#define HAS_HASHDJB2_AVX2
			
 
				+#endif
			
 
				+
			
 
				+// The following are available for Visual C and GCC:
			
 
				+#if !defined(LIBYUV_DISABLE_X86) && \
			
 
				+    (defined(__x86_64__) || (defined(__i386__) || defined(_M_IX86)))
			
 
				+#define HAS_HASHDJB2_SSE41
			
 
				+#define HAS_SUMSQUAREERROR_SSE2
			
 
				+#endif
			
 
				+
			
 
				+// The following are available for Visual C and clangcl 32 bit:
			
 
				+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
			
 
				+    (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
			
 
				+#define HAS_HASHDJB2_AVX2
			
 
				+#define HAS_SUMSQUAREERROR_AVX2
			
 
				+#endif
			
 
				+
			
 
				+// The following are available for Neon:
			
 
				+#if !defined(LIBYUV_DISABLE_NEON) && \
			
 
				+    (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
			
 
				+#define HAS_SUMSQUAREERROR_NEON
			
 
				+#endif
			
 
				+
			
 
				+uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
			
 
				+uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
			
 
				+uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
			
 
				+uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
			
 
				+
			
 
				+uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
			
 
				+uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
			
 
				+uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_COMPARE_ROW_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/convert.h
+++ b/src/jni/libyuv/include/libyuv/convert.h
@@ -0,0 +1,245 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_CONVERT_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_CONVERT_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+// TODO(fbarchard): Remove the following headers includes.
			
 
				+#include "libyuv/convert_from.h"
			
 
				+#include "libyuv/planar_functions.h"
			
 
				+#include "libyuv/rotate.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Convert I444 to I420.
			
 
				+LIBYUV_API
			
 
				+int I444ToI420(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert I422 to I420.
			
 
				+LIBYUV_API
			
 
				+int I422ToI420(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert I411 to I420.
			
 
				+LIBYUV_API
			
 
				+int I411ToI420(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Copy I420 to I420.
			
 
				+#define I420ToI420 I420Copy
			
 
				+LIBYUV_API
			
 
				+int I420Copy(const uint8* src_y, int src_stride_y,
			
 
				+             const uint8* src_u, int src_stride_u,
			
 
				+             const uint8* src_v, int src_stride_v,
			
 
				+             uint8* dst_y, int dst_stride_y,
			
 
				+             uint8* dst_u, int dst_stride_u,
			
 
				+             uint8* dst_v, int dst_stride_v,
			
 
				+             int width, int height);
			
 
				+
			
 
				+// Convert I400 (grey) to I420.
			
 
				+LIBYUV_API
			
 
				+int I400ToI420(const uint8* src_y, int src_stride_y,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+#define J400ToJ420 I400ToI420
			
 
				+
			
 
				+// Convert NV12 to I420.
			
 
				+LIBYUV_API
			
 
				+int NV12ToI420(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_uv, int src_stride_uv,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert NV21 to I420.
			
 
				+LIBYUV_API
			
 
				+int NV21ToI420(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_vu, int src_stride_vu,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert YUY2 to I420.
			
 
				+LIBYUV_API
			
 
				+int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert UYVY to I420.
			
 
				+LIBYUV_API
			
 
				+int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert M420 to I420.
			
 
				+LIBYUV_API
			
 
				+int M420ToI420(const uint8* src_m420, int src_stride_m420,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// ARGB little endian (bgra in memory) to I420.
			
 
				+LIBYUV_API
			
 
				+int ARGBToI420(const uint8* src_frame, int src_stride_frame,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// BGRA little endian (argb in memory) to I420.
			
 
				+LIBYUV_API
			
 
				+int BGRAToI420(const uint8* src_frame, int src_stride_frame,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// ABGR little endian (rgba in memory) to I420.
			
 
				+LIBYUV_API
			
 
				+int ABGRToI420(const uint8* src_frame, int src_stride_frame,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// RGBA little endian (abgr in memory) to I420.
			
 
				+LIBYUV_API
			
 
				+int RGBAToI420(const uint8* src_frame, int src_stride_frame,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// RGB little endian (bgr in memory) to I420.
			
 
				+LIBYUV_API
			
 
				+int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
			
 
				+                uint8* dst_y, int dst_stride_y,
			
 
				+                uint8* dst_u, int dst_stride_u,
			
 
				+                uint8* dst_v, int dst_stride_v,
			
 
				+                int width, int height);
			
 
				+
			
 
				+// RGB big endian (rgb in memory) to I420.
			
 
				+LIBYUV_API
			
 
				+int RAWToI420(const uint8* src_frame, int src_stride_frame,
			
 
				+              uint8* dst_y, int dst_stride_y,
			
 
				+              uint8* dst_u, int dst_stride_u,
			
 
				+              uint8* dst_v, int dst_stride_v,
			
 
				+              int width, int height);
			
 
				+
			
 
				+// RGB16 (RGBP fourcc) little endian to I420.
			
 
				+LIBYUV_API
			
 
				+int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
			
 
				+                 uint8* dst_y, int dst_stride_y,
			
 
				+                 uint8* dst_u, int dst_stride_u,
			
 
				+                 uint8* dst_v, int dst_stride_v,
			
 
				+                 int width, int height);
			
 
				+
			
 
				+// RGB15 (RGBO fourcc) little endian to I420.
			
 
				+LIBYUV_API
			
 
				+int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
			
 
				+                   uint8* dst_y, int dst_stride_y,
			
 
				+                   uint8* dst_u, int dst_stride_u,
			
 
				+                   uint8* dst_v, int dst_stride_v,
			
 
				+                   int width, int height);
			
 
				+
			
 
				+// RGB12 (R444 fourcc) little endian to I420.
			
 
				+LIBYUV_API
			
 
				+int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
			
 
				+                   uint8* dst_y, int dst_stride_y,
			
 
				+                   uint8* dst_u, int dst_stride_u,
			
 
				+                   uint8* dst_v, int dst_stride_v,
			
 
				+                   int width, int height);
			
 
				+
			
 
				+#ifdef HAVE_JPEG
			
 
				+// src_width/height provided by capture.
			
 
				+// dst_width/height for clipping determine final size.
			
 
				+LIBYUV_API
			
 
				+int MJPGToI420(const uint8* sample, size_t sample_size,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int src_width, int src_height,
			
 
				+               int dst_width, int dst_height);
			
 
				+
			
 
				+// Query size of MJPG in pixels.
			
 
				+LIBYUV_API
			
 
				+int MJPGSize(const uint8* sample, size_t sample_size,
			
 
				+             int* width, int* height);
			
 
				+#endif
			
 
				+
			
 
				+// Convert camera sample to I420 with cropping, rotation and vertical flip.
			
 
				+// "src_size" is needed to parse MJPG.
			
 
				+// "dst_stride_y" number of bytes in a row of the dst_y plane.
			
 
				+//   Normally this would be the same as dst_width, with recommended alignment
			
 
				+//   to 16 bytes for better efficiency.
			
 
				+//   If rotation of 90 or 270 is used, stride is affected. The caller should
			
 
				+//   allocate the I420 buffer according to rotation.
			
 
				+// "dst_stride_u" number of bytes in a row of the dst_u plane.
			
 
				+//   Normally this would be the same as (dst_width + 1) / 2, with
			
 
				+//   recommended alignment to 16 bytes for better efficiency.
			
 
				+//   If rotation of 90 or 270 is used, stride is affected.
			
 
				+// "crop_x" and "crop_y" are starting position for cropping.
			
 
				+//   To center, crop_x = (src_width - dst_width) / 2
			
 
				+//              crop_y = (src_height - dst_height) / 2
			
 
				+// "src_width" / "src_height" is size of src_frame in pixels.
			
 
				+//   "src_height" can be negative indicating a vertically flipped image source.
			
 
				+// "crop_width" / "crop_height" is the size to crop the src to.
			
 
				+//    Must be less than or equal to src_width/src_height
			
 
				+//    Cropping parameters are pre-rotation.
			
 
				+// "rotation" can be 0, 90, 180 or 270.
			
 
				+// "format" is a fourcc. ie 'I420', 'YUY2'
			
 
				+// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
			
 
				+LIBYUV_API
			
 
				+int ConvertToI420(const uint8* src_frame, size_t src_size,
			
 
				+                  uint8* dst_y, int dst_stride_y,
			
 
				+                  uint8* dst_u, int dst_stride_u,
			
 
				+                  uint8* dst_v, int dst_stride_v,
			
 
				+                  int crop_x, int crop_y,
			
 
				+                  int src_width, int src_height,
			
 
				+                  int crop_width, int crop_height,
			
 
				+                  enum RotationMode rotation,
			
 
				+                  uint32 format);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_CONVERT_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/convert_argb.h
+++ b/src/jni/libyuv/include/libyuv/convert_argb.h
@@ -0,0 +1,313 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+// TODO(fbarchard): Remove the following headers includes
			
 
				+#include "libyuv/convert_from.h"
			
 
				+#include "libyuv/planar_functions.h"
			
 
				+#include "libyuv/rotate.h"
			
 
				+
			
 
				+// TODO(fbarchard): This set of functions should exactly match convert.h
			
 
				+// TODO(fbarchard): Add tests. Create random content of right size and convert
			
 
				+// with C vs Opt and or to I420 and compare.
			
 
				+// TODO(fbarchard): Some of these functions lack parameter setting.
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Alias.
			
 
				+#define ARGBToARGB ARGBCopy
			
 
				+
			
 
				+// Copy ARGB to ARGB.
			
 
				+LIBYUV_API
			
 
				+int ARGBCopy(const uint8* src_argb, int src_stride_argb,
			
 
				+             uint8* dst_argb, int dst_stride_argb,
			
 
				+             int width, int height);
			
 
				+
			
 
				+// Convert I420 to ARGB.
			
 
				+LIBYUV_API
			
 
				+int I420ToARGB(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert I422 to ARGB.
			
 
				+LIBYUV_API
			
 
				+int I422ToARGB(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert I444 to ARGB.
			
 
				+LIBYUV_API
			
 
				+int I444ToARGB(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert J444 to ARGB.
			
 
				+LIBYUV_API
			
 
				+int J444ToARGB(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert I444 to ABGR.
			
 
				+LIBYUV_API
			
 
				+int I444ToABGR(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_abgr, int dst_stride_abgr,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert I411 to ARGB.
			
 
				+LIBYUV_API
			
 
				+int I411ToARGB(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert I420 with Alpha to preattenuated ARGB.
			
 
				+LIBYUV_API
			
 
				+int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
			
 
				+                    const uint8* src_u, int src_stride_u,
			
 
				+                    const uint8* src_v, int src_stride_v,
			
 
				+                    const uint8* src_a, int src_stride_a,
			
 
				+                    uint8* dst_argb, int dst_stride_argb,
			
 
				+                    int width, int height, int attenuate);
			
 
				+
			
 
				+// Convert I420 with Alpha to preattenuated ABGR.
			
 
				+LIBYUV_API
			
 
				+int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
			
 
				+                    const uint8* src_u, int src_stride_u,
			
 
				+                    const uint8* src_v, int src_stride_v,
			
 
				+                    const uint8* src_a, int src_stride_a,
			
 
				+                    uint8* dst_abgr, int dst_stride_abgr,
			
 
				+                    int width, int height, int attenuate);
			
 
				+
			
 
				+// Convert I400 (grey) to ARGB.  Reverse of ARGBToI400.
			
 
				+LIBYUV_API
			
 
				+int I400ToARGB(const uint8* src_y, int src_stride_y,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert J400 (jpeg grey) to ARGB.
			
 
				+LIBYUV_API
			
 
				+int J400ToARGB(const uint8* src_y, int src_stride_y,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Alias.
			
 
				+#define YToARGB I400ToARGB
			
 
				+
			
 
				+// Convert NV12 to ARGB.
			
 
				+LIBYUV_API
			
 
				+int NV12ToARGB(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_uv, int src_stride_uv,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert NV21 to ARGB.
			
 
				+LIBYUV_API
			
 
				+int NV21ToARGB(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_vu, int src_stride_vu,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert M420 to ARGB.
			
 
				+LIBYUV_API
			
 
				+int M420ToARGB(const uint8* src_m420, int src_stride_m420,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert YUY2 to ARGB.
			
 
				+LIBYUV_API
			
 
				+int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert UYVY to ARGB.
			
 
				+LIBYUV_API
			
 
				+int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert J420 to ARGB.
			
 
				+LIBYUV_API
			
 
				+int J420ToARGB(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert J422 to ARGB.
			
 
				+LIBYUV_API
			
 
				+int J422ToARGB(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert J420 to ABGR.
			
 
				+LIBYUV_API
			
 
				+int J420ToABGR(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_abgr, int dst_stride_abgr,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert J422 to ABGR.
			
 
				+LIBYUV_API
			
 
				+int J422ToABGR(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_abgr, int dst_stride_abgr,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert H420 to ARGB.
			
 
				+LIBYUV_API
			
 
				+int H420ToARGB(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert H422 to ARGB.
			
 
				+LIBYUV_API
			
 
				+int H422ToARGB(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert H420 to ABGR.
			
 
				+LIBYUV_API
			
 
				+int H420ToABGR(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_abgr, int dst_stride_abgr,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert H422 to ABGR.
			
 
				+LIBYUV_API
			
 
				+int H422ToABGR(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_abgr, int dst_stride_abgr,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// BGRA little endian (argb in memory) to ARGB.
			
 
				+LIBYUV_API
			
 
				+int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// ABGR little endian (rgba in memory) to ARGB.
			
 
				+LIBYUV_API
			
 
				+int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// RGBA little endian (abgr in memory) to ARGB.
			
 
				+LIBYUV_API
			
 
				+int RGBAToARGB(const uint8* src_frame, int src_stride_frame,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Deprecated function name.
			
 
				+#define BG24ToARGB RGB24ToARGB
			
 
				+
			
 
				+// RGB little endian (bgr in memory) to ARGB.
			
 
				+LIBYUV_API
			
 
				+int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
			
 
				+                uint8* dst_argb, int dst_stride_argb,
			
 
				+                int width, int height);
			
 
				+
			
 
				+// RGB big endian (rgb in memory) to ARGB.
			
 
				+LIBYUV_API
			
 
				+int RAWToARGB(const uint8* src_frame, int src_stride_frame,
			
 
				+              uint8* dst_argb, int dst_stride_argb,
			
 
				+              int width, int height);
			
 
				+
			
 
				+// RGB16 (RGBP fourcc) little endian to ARGB.
			
 
				+LIBYUV_API
			
 
				+int RGB565ToARGB(const uint8* src_frame, int src_stride_frame,
			
 
				+                 uint8* dst_argb, int dst_stride_argb,
			
 
				+                 int width, int height);
			
 
				+
			
 
				+// RGB15 (RGBO fourcc) little endian to ARGB.
			
 
				+LIBYUV_API
			
 
				+int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame,
			
 
				+                   uint8* dst_argb, int dst_stride_argb,
			
 
				+                   int width, int height);
			
 
				+
			
 
				+// RGB12 (R444 fourcc) little endian to ARGB.
			
 
				+LIBYUV_API
			
 
				+int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame,
			
 
				+                   uint8* dst_argb, int dst_stride_argb,
			
 
				+                   int width, int height);
			
 
				+
			
 
				+#ifdef HAVE_JPEG
			
 
				+// src_width/height provided by capture
			
 
				+// dst_width/height for clipping determine final size.
			
 
				+LIBYUV_API
			
 
				+int MJPGToARGB(const uint8* sample, size_t sample_size,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int src_width, int src_height,
			
 
				+               int dst_width, int dst_height);
			
 
				+#endif
			
 
				+
			
 
				+// Convert camera sample to ARGB with cropping, rotation and vertical flip.
			
 
				+// "src_size" is needed to parse MJPG.
			
 
				+// "dst_stride_argb" number of bytes in a row of the dst_argb plane.
			
 
				+//   Normally this would be the same as dst_width, with recommended alignment
			
 
				+//   to 16 bytes for better efficiency.
			
 
				+//   If rotation of 90 or 270 is used, stride is affected. The caller should
			
 
				+//   allocate the I420 buffer according to rotation.
			
 
				+// "dst_stride_u" number of bytes in a row of the dst_u plane.
			
 
				+//   Normally this would be the same as (dst_width + 1) / 2, with
			
 
				+//   recommended alignment to 16 bytes for better efficiency.
			
 
				+//   If rotation of 90 or 270 is used, stride is affected.
			
 
				+// "crop_x" and "crop_y" are starting position for cropping.
			
 
				+//   To center, crop_x = (src_width - dst_width) / 2
			
 
				+//              crop_y = (src_height - dst_height) / 2
			
 
				+// "src_width" / "src_height" is size of src_frame in pixels.
			
 
				+//   "src_height" can be negative indicating a vertically flipped image source.
			
 
				+// "crop_width" / "crop_height" is the size to crop the src to.
			
 
				+//    Must be less than or equal to src_width/src_height
			
 
				+//    Cropping parameters are pre-rotation.
			
 
				+// "rotation" can be 0, 90, 180 or 270.
			
 
				+// "format" is a fourcc. ie 'I420', 'YUY2'
			
 
				+// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
			
 
				+LIBYUV_API
			
 
				+int ConvertToARGB(const uint8* src_frame, size_t src_size,
			
 
				+                  uint8* dst_argb, int dst_stride_argb,
			
 
				+                  int crop_x, int crop_y,
			
 
				+                  int src_width, int src_height,
			
 
				+                  int crop_width, int crop_height,
			
 
				+                  enum RotationMode rotation,
			
 
				+                  uint32 format);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_CONVERT_ARGB_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/convert_from.h
+++ b/src/jni/libyuv/include/libyuv/convert_from.h
@@ -0,0 +1,181 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_CONVERT_FROM_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+#include "libyuv/rotate.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// See Also convert.h for conversions from formats to I420.
			
 
				+
			
 
				+// I420Copy in convert to I420ToI420.
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToI422(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToI444(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToI411(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
			
 
				+LIBYUV_API
			
 
				+int I400Copy(const uint8* src_y, int src_stride_y,
			
 
				+             uint8* dst_y, int dst_stride_y,
			
 
				+             int width, int height);
			
 
				+
			
 
				+// TODO(fbarchard): I420ToM420
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToNV12(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_uv, int dst_stride_uv,
			
 
				+               int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToNV21(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_vu, int dst_stride_vu,
			
 
				+               int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToYUY2(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_frame, int dst_stride_frame,
			
 
				+               int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToUYVY(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_frame, int dst_stride_frame,
			
 
				+               int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToARGB(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToBGRA(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToABGR(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToRGBA(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_rgba, int dst_stride_rgba,
			
 
				+               int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToRGB24(const uint8* src_y, int src_stride_y,
			
 
				+                const uint8* src_u, int src_stride_u,
			
 
				+                const uint8* src_v, int src_stride_v,
			
 
				+                uint8* dst_frame, int dst_stride_frame,
			
 
				+                int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToRAW(const uint8* src_y, int src_stride_y,
			
 
				+              const uint8* src_u, int src_stride_u,
			
 
				+              const uint8* src_v, int src_stride_v,
			
 
				+              uint8* dst_frame, int dst_stride_frame,
			
 
				+              int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToRGB565(const uint8* src_y, int src_stride_y,
			
 
				+                 const uint8* src_u, int src_stride_u,
			
 
				+                 const uint8* src_v, int src_stride_v,
			
 
				+                 uint8* dst_frame, int dst_stride_frame,
			
 
				+                 int width, int height);
			
 
				+
			
 
				+// Convert I420 To RGB565 with 4x4 dither matrix (16 bytes).
			
 
				+// Values in dither matrix from 0 to 7 recommended.
			
 
				+// The order of the dither matrix is first byte is upper left.
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
			
 
				+                       const uint8* src_u, int src_stride_u,
			
 
				+                       const uint8* src_v, int src_stride_v,
			
 
				+                       uint8* dst_frame, int dst_stride_frame,
			
 
				+                       const uint8* dither4x4, int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToARGB1555(const uint8* src_y, int src_stride_y,
			
 
				+                   const uint8* src_u, int src_stride_u,
			
 
				+                   const uint8* src_v, int src_stride_v,
			
 
				+                   uint8* dst_frame, int dst_stride_frame,
			
 
				+                   int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420ToARGB4444(const uint8* src_y, int src_stride_y,
			
 
				+                   const uint8* src_u, int src_stride_u,
			
 
				+                   const uint8* src_v, int src_stride_v,
			
 
				+                   uint8* dst_frame, int dst_stride_frame,
			
 
				+                   int width, int height);
			
 
				+
			
 
				+// Convert I420 to specified format.
			
 
				+// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
			
 
				+//    buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
			
 
				+LIBYUV_API
			
 
				+int ConvertFromI420(const uint8* y, int y_stride,
			
 
				+                    const uint8* u, int u_stride,
			
 
				+                    const uint8* v, int v_stride,
			
 
				+                    uint8* dst_sample, int dst_sample_stride,
			
 
				+                    int width, int height,
			
 
				+                    uint32 format);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_CONVERT_FROM_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/convert_from_argb.h
+++ b/src/jni/libyuv/include/libyuv/convert_from_argb.h
@@ -0,0 +1,190 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Copy ARGB to ARGB.
			
 
				+#define ARGBToARGB ARGBCopy
			
 
				+LIBYUV_API
			
 
				+int ARGBCopy(const uint8* src_argb, int src_stride_argb,
			
 
				+             uint8* dst_argb, int dst_stride_argb,
			
 
				+             int width, int height);
			
 
				+
			
 
				+// Convert ARGB To BGRA.
			
 
				+LIBYUV_API
			
 
				+int ARGBToBGRA(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_bgra, int dst_stride_bgra,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert ARGB To ABGR.
			
 
				+LIBYUV_API
			
 
				+int ARGBToABGR(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_abgr, int dst_stride_abgr,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert ARGB To RGBA.
			
 
				+LIBYUV_API
			
 
				+int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_rgba, int dst_stride_rgba,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert ARGB To RGB24.
			
 
				+LIBYUV_API
			
 
				+int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
			
 
				+                uint8* dst_rgb24, int dst_stride_rgb24,
			
 
				+                int width, int height);
			
 
				+
			
 
				+// Convert ARGB To RAW.
			
 
				+LIBYUV_API
			
 
				+int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
			
 
				+              uint8* dst_rgb, int dst_stride_rgb,
			
 
				+              int width, int height);
			
 
				+
			
 
				+// Convert ARGB To RGB565.
			
 
				+LIBYUV_API
			
 
				+int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
			
 
				+                 uint8* dst_rgb565, int dst_stride_rgb565,
			
 
				+                 int width, int height);
			
 
				+
			
 
				+// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
			
 
				+// Values in dither matrix from 0 to 7 recommended.
			
 
				+// The order of the dither matrix is first byte is upper left.
			
 
				+// TODO(fbarchard): Consider pointer to 2d array for dither4x4.
			
 
				+// const uint8(*dither)[4][4];
			
 
				+LIBYUV_API
			
 
				+int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
			
 
				+                       uint8* dst_rgb565, int dst_stride_rgb565,
			
 
				+                       const uint8* dither4x4, int width, int height);
			
 
				+
			
 
				+// Convert ARGB To ARGB1555.
			
 
				+LIBYUV_API
			
 
				+int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
			
 
				+                   uint8* dst_argb1555, int dst_stride_argb1555,
			
 
				+                   int width, int height);
			
 
				+
			
 
				+// Convert ARGB To ARGB4444.
			
 
				+LIBYUV_API
			
 
				+int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
			
 
				+                   uint8* dst_argb4444, int dst_stride_argb4444,
			
 
				+                   int width, int height);
			
 
				+
			
 
				+// Convert ARGB To I444.
			
 
				+LIBYUV_API
			
 
				+int ARGBToI444(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert ARGB To I422.
			
 
				+LIBYUV_API
			
 
				+int ARGBToI422(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert ARGB To I420. (also in convert.h)
			
 
				+LIBYUV_API
			
 
				+int ARGBToI420(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert ARGB to J420. (JPeg full range I420).
			
 
				+LIBYUV_API
			
 
				+int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_yj, int dst_stride_yj,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert ARGB to J422.
			
 
				+LIBYUV_API
			
 
				+int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_yj, int dst_stride_yj,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert ARGB To I411.
			
 
				+LIBYUV_API
			
 
				+int ARGBToI411(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert ARGB to J400. (JPeg full range).
			
 
				+LIBYUV_API
			
 
				+int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_yj, int dst_stride_yj,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert ARGB to I400.
			
 
				+LIBYUV_API
			
 
				+int ARGBToI400(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB)
			
 
				+LIBYUV_API
			
 
				+int ARGBToG(const uint8* src_argb, int src_stride_argb,
			
 
				+            uint8* dst_g, int dst_stride_g,
			
 
				+            int width, int height);
			
 
				+
			
 
				+// Convert ARGB To NV12.
			
 
				+LIBYUV_API
			
 
				+int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_uv, int dst_stride_uv,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert ARGB To NV21.
			
 
				+LIBYUV_API
			
 
				+int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_vu, int dst_stride_vu,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert ARGB To NV21.
			
 
				+LIBYUV_API
			
 
				+int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_vu, int dst_stride_vu,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert ARGB To YUY2.
			
 
				+LIBYUV_API
			
 
				+int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_yuy2, int dst_stride_yuy2,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert ARGB To UYVY.
			
 
				+LIBYUV_API
			
 
				+int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_uyvy, int dst_stride_uyvy,
			
 
				+               int width, int height);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/cpu_id.h
+++ b/src/jni/libyuv/include/libyuv/cpu_id.h
@@ -0,0 +1,80 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_CPU_ID_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_CPU_ID_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Internal flag to indicate cpuid requires initialization.
			
 
				+static const int kCpuInitialized = 0x1;
			
 
				+
			
 
				+// These flags are only valid on ARM processors.
			
 
				+static const int kCpuHasARM = 0x2;
			
 
				+static const int kCpuHasNEON = 0x4;
			
 
				+// 0x8 reserved for future ARM flag.
			
 
				+
			
 
				+// These flags are only valid on x86 processors.
			
 
				+static const int kCpuHasX86 = 0x10;
			
 
				+static const int kCpuHasSSE2 = 0x20;
			
 
				+static const int kCpuHasSSSE3 = 0x40;
			
 
				+static const int kCpuHasSSE41 = 0x80;
			
 
				+static const int kCpuHasSSE42 = 0x100;
			
 
				+static const int kCpuHasAVX = 0x200;
			
 
				+static const int kCpuHasAVX2 = 0x400;
			
 
				+static const int kCpuHasERMS = 0x800;
			
 
				+static const int kCpuHasFMA3 = 0x1000;
			
 
				+static const int kCpuHasAVX3 = 0x2000;
			
 
				+// 0x2000, 0x4000, 0x8000 reserved for future X86 flags.
			
 
				+
			
 
				+// These flags are only valid on MIPS processors.
			
 
				+static const int kCpuHasMIPS = 0x10000;
			
 
				+static const int kCpuHasDSPR2 = 0x20000;
			
 
				+
			
 
				+// Internal function used to auto-init.
			
 
				+LIBYUV_API
			
 
				+int InitCpuFlags(void);
			
 
				+
			
 
				+// Internal function for parsing /proc/cpuinfo.
			
 
				+LIBYUV_API
			
 
				+int ArmCpuCaps(const char* cpuinfo_name);
			
 
				+
			
 
				+// Detect CPU has SSE2 etc.
			
 
				+// Test_flag parameter should be one of kCpuHas constants above.
			
 
				+// returns non-zero if instruction set is detected
			
 
				+static __inline int TestCpuFlag(int test_flag) {
			
 
				+  LIBYUV_API extern int cpu_info_;
			
 
				+  return (!cpu_info_ ? InitCpuFlags() : cpu_info_) & test_flag;
			
 
				+}
			
 
				+
			
 
				+// For testing, allow CPU flags to be disabled.
			
 
				+// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
			
 
				+// MaskCpuFlags(-1) to enable all cpu specific optimizations.
			
 
				+// MaskCpuFlags(0) to disable all cpu specific optimizations.
			
 
				+LIBYUV_API
			
 
				+void MaskCpuFlags(int enable_flags);
			
 
				+
			
 
				+// Low level cpuid for X86. Returns zeros on other CPUs.
			
 
				+// eax is the info type that you want.
			
 
				+// ecx is typically the cpu number, and should normally be zero.
			
 
				+LIBYUV_API
			
 
				+void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_CPU_ID_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/mjpeg_decoder.h
+++ b/src/jni/libyuv/include/libyuv/mjpeg_decoder.h
@@ -0,0 +1,192 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_MJPEG_DECODER_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+// NOTE: For a simplified public API use convert.h MJPGToI420().
			
 
				+
			
 
				+struct jpeg_common_struct;
			
 
				+struct jpeg_decompress_struct;
			
 
				+struct jpeg_source_mgr;
			
 
				+
			
 
				+namespace libyuv {
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+static const uint32 kUnknownDataSize = 0xFFFFFFFF;
			
 
				+
			
 
				+enum JpegSubsamplingType {
			
 
				+  kJpegYuv420,
			
 
				+  kJpegYuv422,
			
 
				+  kJpegYuv411,
			
 
				+  kJpegYuv444,
			
 
				+  kJpegYuv400,
			
 
				+  kJpegUnknown
			
 
				+};
			
 
				+
			
 
				+struct Buffer {
			
 
				+  const uint8* data;
			
 
				+  int len;
			
 
				+};
			
 
				+
			
 
				+struct BufferVector {
			
 
				+  Buffer* buffers;
			
 
				+  int len;
			
 
				+  int pos;
			
 
				+};
			
 
				+
			
 
				+struct SetJmpErrorMgr;
			
 
				+
			
 
				+// MJPEG ("Motion JPEG") is a pseudo-standard video codec where the frames are
			
 
				+// simply independent JPEG images with a fixed huffman table (which is omitted).
			
 
				+// It is rarely used in video transmission, but is common as a camera capture
			
 
				+// format, especially in Logitech devices. This class implements a decoder for
			
 
				+// MJPEG frames.
			
 
				+//
			
 
				+// See http://tools.ietf.org/html/rfc2435
			
 
				+class LIBYUV_API MJpegDecoder {
			
 
				+ public:
			
 
				+  typedef void (*CallbackFunction)(void* opaque,
			
 
				+                                   const uint8* const* data,
			
 
				+                                   const int* strides,
			
 
				+                                   int rows);
			
 
				+
			
 
				+  static const int kColorSpaceUnknown;
			
 
				+  static const int kColorSpaceGrayscale;
			
 
				+  static const int kColorSpaceRgb;
			
 
				+  static const int kColorSpaceYCbCr;
			
 
				+  static const int kColorSpaceCMYK;
			
 
				+  static const int kColorSpaceYCCK;
			
 
				+
			
 
				+  MJpegDecoder();
			
 
				+  ~MJpegDecoder();
			
 
				+
			
 
				+  // Loads a new frame, reads its headers, and determines the uncompressed
			
 
				+  // image format.
			
 
				+  // Returns LIBYUV_TRUE if image looks valid and format is supported.
			
 
				+  // If return value is LIBYUV_TRUE, then the values for all the following
			
 
				+  // getters are populated.
			
 
				+  // src_len is the size of the compressed mjpeg frame in bytes.
			
 
				+  LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len);
			
 
				+
			
 
				+  // Returns width of the last loaded frame in pixels.
			
 
				+  int GetWidth();
			
 
				+
			
 
				+  // Returns height of the last loaded frame in pixels.
			
 
				+  int GetHeight();
			
 
				+
			
 
				+  // Returns format of the last loaded frame. The return value is one of the
			
 
				+  // kColorSpace* constants.
			
 
				+  int GetColorSpace();
			
 
				+
			
 
				+  // Number of color components in the color space.
			
 
				+  int GetNumComponents();
			
 
				+
			
 
				+  // Sample factors of the n-th component.
			
 
				+  int GetHorizSampFactor(int component);
			
 
				+
			
 
				+  int GetVertSampFactor(int component);
			
 
				+
			
 
				+  int GetHorizSubSampFactor(int component);
			
 
				+
			
 
				+  int GetVertSubSampFactor(int component);
			
 
				+
			
 
				+  // Public for testability.
			
 
				+  int GetImageScanlinesPerImcuRow();
			
 
				+
			
 
				+  // Public for testability.
			
 
				+  int GetComponentScanlinesPerImcuRow(int component);
			
 
				+
			
 
				+  // Width of a component in bytes.
			
 
				+  int GetComponentWidth(int component);
			
 
				+
			
 
				+  // Height of a component.
			
 
				+  int GetComponentHeight(int component);
			
 
				+
			
 
				+  // Width of a component in bytes with padding for DCTSIZE. Public for testing.
			
 
				+  int GetComponentStride(int component);
			
 
				+
			
 
				+  // Size of a component in bytes.
			
 
				+  int GetComponentSize(int component);
			
 
				+
			
 
				+  // Call this after LoadFrame() if you decide you don't want to decode it
			
 
				+  // after all.
			
 
				+  LIBYUV_BOOL UnloadFrame();
			
 
				+
			
 
				+  // Decodes the entire image into a one-buffer-per-color-component format.
			
 
				+  // dst_width must match exactly. dst_height must be <= to image height; if
			
 
				+  // less, the image is cropped. "planes" must have size equal to at least
			
 
				+  // GetNumComponents() and they must point to non-overlapping buffers of size
			
 
				+  // at least GetComponentSize(i). The pointers in planes are incremented
			
 
				+  // to point to after the end of the written data.
			
 
				+  // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
			
 
				+  LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height);
			
 
				+
			
 
				+  // Decodes the entire image and passes the data via repeated calls to a
			
 
				+  // callback function. Each call will get the data for a whole number of
			
 
				+  // image scanlines.
			
 
				+  // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
			
 
				+  LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque,
			
 
				+                        int dst_width, int dst_height);
			
 
				+
			
 
				+  // The helper function which recognizes the jpeg sub-sampling type.
			
 
				+  static JpegSubsamplingType JpegSubsamplingTypeHelper(
			
 
				+     int* subsample_x, int* subsample_y, int number_of_components);
			
 
				+
			
 
				+ private:
			
 
				+  void AllocOutputBuffers(int num_outbufs);
			
 
				+  void DestroyOutputBuffers();
			
 
				+
			
 
				+  LIBYUV_BOOL StartDecode();
			
 
				+  LIBYUV_BOOL FinishDecode();
			
 
				+
			
 
				+  void SetScanlinePointers(uint8** data);
			
 
				+  LIBYUV_BOOL DecodeImcuRow();
			
 
				+
			
 
				+  int GetComponentScanlinePadding(int component);
			
 
				+
			
 
				+  // A buffer holding the input data for a frame.
			
 
				+  Buffer buf_;
			
 
				+  BufferVector buf_vec_;
			
 
				+
			
 
				+  jpeg_decompress_struct* decompress_struct_;
			
 
				+  jpeg_source_mgr* source_mgr_;
			
 
				+  SetJmpErrorMgr* error_mgr_;
			
 
				+
			
 
				+  // LIBYUV_TRUE iff at least one component has scanline padding. (i.e.,
			
 
				+  // GetComponentScanlinePadding() != 0.)
			
 
				+  LIBYUV_BOOL has_scanline_padding_;
			
 
				+
			
 
				+  // Temporaries used to point to scanline outputs.
			
 
				+  int num_outbufs_;  // Outermost size of all arrays below.
			
 
				+  uint8*** scanlines_;
			
 
				+  int* scanlines_sizes_;
			
 
				+  // Temporary buffer used for decoding when we can't decode directly to the
			
 
				+  // output buffers. Large enough for just one iMCU row.
			
 
				+  uint8** databuf_;
			
 
				+  int* databuf_strides_;
			
 
				+};
			
 
				+
			
 
				+}  // namespace libyuv
			
 
				+
			
 
				+#endif  //  __cplusplus
			
 
				+#endif  // INCLUDE_LIBYUV_MJPEG_DECODER_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/planar_functions.h
+++ b/src/jni/libyuv/include/libyuv/planar_functions.h
@@ -0,0 +1,501 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+// TODO(fbarchard): Remove the following headers includes.
			
 
				+#include "libyuv/convert.h"
			
 
				+#include "libyuv/convert_argb.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Copy a plane of data.
			
 
				+LIBYUV_API
			
 
				+void CopyPlane(const uint8* src_y, int src_stride_y,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+void CopyPlane_16(const uint16* src_y, int src_stride_y,
			
 
				+                  uint16* dst_y, int dst_stride_y,
			
 
				+                  int width, int height);
			
 
				+
			
 
				+// Set a plane of data to a 32 bit value.
			
 
				+LIBYUV_API
			
 
				+void SetPlane(uint8* dst_y, int dst_stride_y,
			
 
				+              int width, int height,
			
 
				+              uint32 value);
			
 
				+
			
 
				+// Copy I400.  Supports inverting.
			
 
				+LIBYUV_API
			
 
				+int I400ToI400(const uint8* src_y, int src_stride_y,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               int width, int height);
			
 
				+
			
 
				+#define J400ToJ400 I400ToI400
			
 
				+
			
 
				+// Copy I422 to I422.
			
 
				+#define I422ToI422 I422Copy
			
 
				+LIBYUV_API
			
 
				+int I422Copy(const uint8* src_y, int src_stride_y,
			
 
				+             const uint8* src_u, int src_stride_u,
			
 
				+             const uint8* src_v, int src_stride_v,
			
 
				+             uint8* dst_y, int dst_stride_y,
			
 
				+             uint8* dst_u, int dst_stride_u,
			
 
				+             uint8* dst_v, int dst_stride_v,
			
 
				+             int width, int height);
			
 
				+
			
 
				+// Copy I444 to I444.
			
 
				+#define I444ToI444 I444Copy
			
 
				+LIBYUV_API
			
 
				+int I444Copy(const uint8* src_y, int src_stride_y,
			
 
				+             const uint8* src_u, int src_stride_u,
			
 
				+             const uint8* src_v, int src_stride_v,
			
 
				+             uint8* dst_y, int dst_stride_y,
			
 
				+             uint8* dst_u, int dst_stride_u,
			
 
				+             uint8* dst_v, int dst_stride_v,
			
 
				+             int width, int height);
			
 
				+
			
 
				+// Convert YUY2 to I422.
			
 
				+LIBYUV_API
			
 
				+int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert UYVY to I422.
			
 
				+LIBYUV_API
			
 
				+int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_uv, int dst_stride_uv,
			
 
				+               int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_uv, int dst_stride_uv,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert I420 to I400. (calls CopyPlane ignoring u/v).
			
 
				+LIBYUV_API
			
 
				+int I420ToI400(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Alias
			
 
				+#define J420ToJ400 I420ToI400
			
 
				+#define I420ToI420Mirror I420Mirror
			
 
				+
			
 
				+// I420 mirror.
			
 
				+LIBYUV_API
			
 
				+int I420Mirror(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Alias
			
 
				+#define I400ToI400Mirror I400Mirror
			
 
				+
			
 
				+// I400 mirror.  A single plane is mirrored horizontally.
			
 
				+// Pass negative height to achieve 180 degree rotation.
			
 
				+LIBYUV_API
			
 
				+int I400Mirror(const uint8* src_y, int src_stride_y,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Alias
			
 
				+#define ARGBToARGBMirror ARGBMirror
			
 
				+
			
 
				+// ARGB mirror.
			
 
				+LIBYUV_API
			
 
				+int ARGBMirror(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert NV12 to RGB565.
			
 
				+LIBYUV_API
			
 
				+int NV12ToRGB565(const uint8* src_y, int src_stride_y,
			
 
				+                 const uint8* src_uv, int src_stride_uv,
			
 
				+                 uint8* dst_rgb565, int dst_stride_rgb565,
			
 
				+                 int width, int height);
			
 
				+
			
 
				+// I422ToARGB is in convert_argb.h
			
 
				+// Convert I422 to BGRA.
			
 
				+LIBYUV_API
			
 
				+int I422ToBGRA(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_bgra, int dst_stride_bgra,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert I422 to ABGR.
			
 
				+LIBYUV_API
			
 
				+int I422ToABGR(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_abgr, int dst_stride_abgr,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert I422 to RGBA.
			
 
				+LIBYUV_API
			
 
				+int I422ToRGBA(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_rgba, int dst_stride_rgba,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Alias
			
 
				+#define RGB24ToRAW RAWToRGB24
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int RAWToRGB24(const uint8* src_raw, int src_stride_raw,
			
 
				+               uint8* dst_rgb24, int dst_stride_rgb24,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Draw a rectangle into I420.
			
 
				+LIBYUV_API
			
 
				+int I420Rect(uint8* dst_y, int dst_stride_y,
			
 
				+             uint8* dst_u, int dst_stride_u,
			
 
				+             uint8* dst_v, int dst_stride_v,
			
 
				+             int x, int y, int width, int height,
			
 
				+             int value_y, int value_u, int value_v);
			
 
				+
			
 
				+// Draw a rectangle into ARGB.
			
 
				+LIBYUV_API
			
 
				+int ARGBRect(uint8* dst_argb, int dst_stride_argb,
			
 
				+             int x, int y, int width, int height, uint32 value);
			
 
				+
			
 
				+// Convert ARGB to gray scale ARGB.
			
 
				+LIBYUV_API
			
 
				+int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Make a rectangle of ARGB gray scale.
			
 
				+LIBYUV_API
			
 
				+int ARGBGray(uint8* dst_argb, int dst_stride_argb,
			
 
				+             int x, int y, int width, int height);
			
 
				+
			
 
				+// Make a rectangle of ARGB Sepia tone.
			
 
				+LIBYUV_API
			
 
				+int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
			
 
				+              int x, int y, int width, int height);
			
 
				+
			
 
				+// Apply a matrix rotation to each ARGB pixel.
			
 
				+// matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2.
			
 
				+// The first 4 coefficients apply to B, G, R, A and produce B of the output.
			
 
				+// The next 4 coefficients apply to B, G, R, A and produce G of the output.
			
 
				+// The next 4 coefficients apply to B, G, R, A and produce R of the output.
			
 
				+// The last 4 coefficients apply to B, G, R, A and produce A of the output.
			
 
				+LIBYUV_API
			
 
				+int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
			
 
				+                    uint8* dst_argb, int dst_stride_argb,
			
 
				+                    const int8* matrix_argb,
			
 
				+                    int width, int height);
			
 
				+
			
 
				+// Deprecated. Use ARGBColorMatrix instead.
			
 
				+// Apply a matrix rotation to each ARGB pixel.
			
 
				+// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1.
			
 
				+// The first 4 coefficients apply to B, G, R, A and produce B of the output.
			
 
				+// The next 4 coefficients apply to B, G, R, A and produce G of the output.
			
 
				+// The last 4 coefficients apply to B, G, R, A and produce R of the output.
			
 
				+LIBYUV_API
			
 
				+int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
			
 
				+                   const int8* matrix_rgb,
			
 
				+                   int x, int y, int width, int height);
			
 
				+
			
 
				+// Apply a color table each ARGB pixel.
			
 
				+// Table contains 256 ARGB values.
			
 
				+LIBYUV_API
			
 
				+int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
			
 
				+                   const uint8* table_argb,
			
 
				+                   int x, int y, int width, int height);
			
 
				+
			
 
				+// Apply a color table each ARGB pixel but preserve destination alpha.
			
 
				+// Table contains 256 ARGB values.
			
 
				+LIBYUV_API
			
 
				+int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
			
 
				+                  const uint8* table_argb,
			
 
				+                  int x, int y, int width, int height);
			
 
				+
			
 
				+// Apply a luma/color table each ARGB pixel but preserve destination alpha.
			
 
				+// Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from
			
 
				+// RGB (YJ style) and C is an 8 bit color component (R, G or B).
			
 
				+LIBYUV_API
			
 
				+int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
			
 
				+                       uint8* dst_argb, int dst_stride_argb,
			
 
				+                       const uint8* luma_rgb_table,
			
 
				+                       int width, int height);
			
 
				+
			
 
				+// Apply a 3 term polynomial to ARGB values.
			
 
				+// poly points to a 4x4 matrix.  The first row is constants.  The 2nd row is
			
 
				+// coefficients for b, g, r and a.  The 3rd row is coefficients for b squared,
			
 
				+// g squared, r squared and a squared.  The 4rd row is coefficients for b to
			
 
				+// the 3, g to the 3, r to the 3 and a to the 3.  The values are summed and
			
 
				+// result clamped to 0 to 255.
			
 
				+// A polynomial approximation can be dirived using software such as 'R'.
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
			
 
				+                   uint8* dst_argb, int dst_stride_argb,
			
 
				+                   const float* poly,
			
 
				+                   int width, int height);
			
 
				+
			
 
				+// Quantize a rectangle of ARGB. Alpha unaffected.
			
 
				+// scale is a 16 bit fractional fixed point scaler between 0 and 65535.
			
 
				+// interval_size should be a value between 1 and 255.
			
 
				+// interval_offset should be a value between 0 and 255.
			
 
				+LIBYUV_API
			
 
				+int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
			
 
				+                 int scale, int interval_size, int interval_offset,
			
 
				+                 int x, int y, int width, int height);
			
 
				+
			
 
				+// Copy ARGB to ARGB.
			
 
				+LIBYUV_API
			
 
				+int ARGBCopy(const uint8* src_argb, int src_stride_argb,
			
 
				+             uint8* dst_argb, int dst_stride_argb,
			
 
				+             int width, int height);
			
 
				+
			
 
				+// Copy Alpha channel of ARGB to alpha of ARGB.
			
 
				+LIBYUV_API
			
 
				+int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
			
 
				+                  uint8* dst_argb, int dst_stride_argb,
			
 
				+                  int width, int height);
			
 
				+
			
 
				+// Copy Y channel to Alpha of ARGB.
			
 
				+LIBYUV_API
			
 
				+int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
			
 
				+                     uint8* dst_argb, int dst_stride_argb,
			
 
				+                     int width, int height);
			
 
				+
			
 
				+typedef void (*ARGBBlendRow)(const uint8* src_argb0, const uint8* src_argb1,
			
 
				+                             uint8* dst_argb, int width);
			
 
				+
			
 
				+// Get function to Alpha Blend ARGB pixels and store to destination.
			
 
				+LIBYUV_API
			
 
				+ARGBBlendRow GetARGBBlend();
			
 
				+
			
 
				+// Alpha Blend ARGB images and store to destination.
			
 
				+// Source is pre-multiplied by alpha using ARGBAttenuate.
			
 
				+// Alpha of destination is set to 255.
			
 
				+LIBYUV_API
			
 
				+int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
			
 
				+              const uint8* src_argb1, int src_stride_argb1,
			
 
				+              uint8* dst_argb, int dst_stride_argb,
			
 
				+              int width, int height);
			
 
				+
			
 
				+// Alpha Blend plane and store to destination.
			
 
				+// Source is not pre-multiplied by alpha.
			
 
				+LIBYUV_API
			
 
				+int BlendPlane(const uint8* src_y0, int src_stride_y0,
			
 
				+               const uint8* src_y1, int src_stride_y1,
			
 
				+               const uint8* alpha, int alpha_stride,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Alpha Blend YUV images and store to destination.
			
 
				+// Source is not pre-multiplied by alpha.
			
 
				+// Alpha is full width x height and subsampled to half size to apply to UV.
			
 
				+LIBYUV_API
			
 
				+int I420Blend(const uint8* src_y0, int src_stride_y0,
			
 
				+              const uint8* src_u0, int src_stride_u0,
			
 
				+              const uint8* src_v0, int src_stride_v0,
			
 
				+              const uint8* src_y1, int src_stride_y1,
			
 
				+              const uint8* src_u1, int src_stride_u1,
			
 
				+              const uint8* src_v1, int src_stride_v1,
			
 
				+              const uint8* alpha, int alpha_stride,
			
 
				+              uint8* dst_y, int dst_stride_y,
			
 
				+              uint8* dst_u, int dst_stride_u,
			
 
				+              uint8* dst_v, int dst_stride_v,
			
 
				+              int width, int height);
			
 
				+
			
 
				+// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
			
 
				+LIBYUV_API
			
 
				+int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
			
 
				+                 const uint8* src_argb1, int src_stride_argb1,
			
 
				+                 uint8* dst_argb, int dst_stride_argb,
			
 
				+                 int width, int height);
			
 
				+
			
 
				+// Add ARGB image with ARGB image. Saturates to 255.
			
 
				+LIBYUV_API
			
 
				+int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
			
 
				+            const uint8* src_argb1, int src_stride_argb1,
			
 
				+            uint8* dst_argb, int dst_stride_argb,
			
 
				+            int width, int height);
			
 
				+
			
 
				+// Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0.
			
 
				+LIBYUV_API
			
 
				+int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
			
 
				+                 const uint8* src_argb1, int src_stride_argb1,
			
 
				+                 uint8* dst_argb, int dst_stride_argb,
			
 
				+                 int width, int height);
			
 
				+
			
 
				+// Convert I422 to YUY2.
			
 
				+LIBYUV_API
			
 
				+int I422ToYUY2(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_frame, int dst_stride_frame,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert I422 to UYVY.
			
 
				+LIBYUV_API
			
 
				+int I422ToUYVY(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_frame, int dst_stride_frame,
			
 
				+               int width, int height);
			
 
				+
			
 
				+// Convert unattentuated ARGB to preattenuated ARGB.
			
 
				+LIBYUV_API
			
 
				+int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
			
 
				+                  uint8* dst_argb, int dst_stride_argb,
			
 
				+                  int width, int height);
			
 
				+
			
 
				+// Convert preattentuated ARGB to unattenuated ARGB.
			
 
				+LIBYUV_API
			
 
				+int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
			
 
				+                    uint8* dst_argb, int dst_stride_argb,
			
 
				+                    int width, int height);
			
 
				+
			
 
				+// Internal function - do not call directly.
			
 
				+// Computes table of cumulative sum for image where the value is the sum
			
 
				+// of all values above and to the left of the entry. Used by ARGBBlur.
			
 
				+LIBYUV_API
			
 
				+int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
			
 
				+                             int32* dst_cumsum, int dst_stride32_cumsum,
			
 
				+                             int width, int height);
			
 
				+
			
 
				+// Blur ARGB image.
			
 
				+// dst_cumsum table of width * (height + 1) * 16 bytes aligned to
			
 
				+//   16 byte boundary.
			
 
				+// dst_stride32_cumsum is number of ints in a row (width * 4).
			
 
				+// radius is number of pixels around the center.  e.g. 1 = 3x3. 2=5x5.
			
 
				+// Blur is optimized for radius of 5 (11x11) or less.
			
 
				+LIBYUV_API
			
 
				+int ARGBBlur(const uint8* src_argb, int src_stride_argb,
			
 
				+             uint8* dst_argb, int dst_stride_argb,
			
 
				+             int32* dst_cumsum, int dst_stride32_cumsum,
			
 
				+             int width, int height, int radius);
			
 
				+
			
 
				+// Multiply ARGB image by ARGB value.
			
 
				+LIBYUV_API
			
 
				+int ARGBShade(const uint8* src_argb, int src_stride_argb,
			
 
				+              uint8* dst_argb, int dst_stride_argb,
			
 
				+              int width, int height, uint32 value);
			
 
				+
			
 
				+// Interpolate between two images using specified amount of interpolation
			
 
				+// (0 to 255) and store to destination.
			
 
				+// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
			
 
				+// and 255 means 1% src0 and 99% src1.
			
 
				+LIBYUV_API
			
 
				+int InterpolatePlane(const uint8* src0, int src_stride0,
			
 
				+                     const uint8* src1, int src_stride1,
			
 
				+                     uint8* dst, int dst_stride,
			
 
				+                     int width, int height, int interpolation);
			
 
				+
			
 
				+// Interpolate between two ARGB images using specified amount of interpolation
			
 
				+// Internally calls InterpolatePlane with width * 4 (bpp).
			
 
				+LIBYUV_API
			
 
				+int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
			
 
				+                    const uint8* src_argb1, int src_stride_argb1,
			
 
				+                    uint8* dst_argb, int dst_stride_argb,
			
 
				+                    int width, int height, int interpolation);
			
 
				+
			
 
				+// Interpolate between two YUV images using specified amount of interpolation
			
 
				+// Internally calls InterpolatePlane on each plane where the U and V planes
			
 
				+// are half width and half height.
			
 
				+LIBYUV_API
			
 
				+int I420Interpolate(const uint8* src0_y, int src0_stride_y,
			
 
				+                    const uint8* src0_u, int src0_stride_u,
			
 
				+                    const uint8* src0_v, int src0_stride_v,
			
 
				+                    const uint8* src1_y, int src1_stride_y,
			
 
				+                    const uint8* src1_u, int src1_stride_u,
			
 
				+                    const uint8* src1_v, int src1_stride_v,
			
 
				+                    uint8* dst_y, int dst_stride_y,
			
 
				+                    uint8* dst_u, int dst_stride_u,
			
 
				+                    uint8* dst_v, int dst_stride_v,
			
 
				+                    int width, int height, int interpolation);
			
 
				+
			
 
				+#if defined(__pnacl__) || defined(__CLR_VER) || \
			
 
				+    (defined(__i386__) && !defined(__SSE2__))
			
 
				+#define LIBYUV_DISABLE_X86
			
 
				+#endif
			
 
				+// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
			
 
				+#if defined(__has_feature)
			
 
				+#if __has_feature(memory_sanitizer)
			
 
				+#define LIBYUV_DISABLE_X86
			
 
				+#endif
			
 
				+#endif
			
 
				+// The following are available on all x86 platforms:
			
 
				+#if !defined(LIBYUV_DISABLE_X86) && \
			
 
				+    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
			
 
				+#define HAS_ARGBAFFINEROW_SSE2
			
 
				+#endif
			
 
				+
			
 
				+// Row function for copying pixels from a source with a slope to a row
			
 
				+// of destination. Useful for scaling, rotation, mirror, texture mapping.
			
 
				+LIBYUV_API
			
 
				+void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
			
 
				+                     uint8* dst_argb, const float* uv_dudv, int width);
			
 
				+LIBYUV_API
			
 
				+void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
			
 
				+                        uint8* dst_argb, const float* uv_dudv, int width);
			
 
				+
			
 
				+// Shuffle ARGB channel order.  e.g. BGRA to ARGB.
			
 
				+// shuffler is 16 bytes and must be aligned.
			
 
				+LIBYUV_API
			
 
				+int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
			
 
				+                uint8* dst_argb, int dst_stride_argb,
			
 
				+                const uint8* shuffler, int width, int height);
			
 
				+
			
 
				+// Sobel ARGB effect with planar output.
			
 
				+LIBYUV_API
			
 
				+int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
			
 
				+                     uint8* dst_y, int dst_stride_y,
			
 
				+                     int width, int height);
			
 
				+
			
 
				+// Sobel ARGB effect.
			
 
				+LIBYUV_API
			
 
				+int ARGBSobel(const uint8* src_argb, int src_stride_argb,
			
 
				+              uint8* dst_argb, int dst_stride_argb,
			
 
				+              int width, int height);
			
 
				+
			
 
				+// Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB.
			
 
				+LIBYUV_API
			
 
				+int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
			
 
				+                uint8* dst_argb, int dst_stride_argb,
			
 
				+                int width, int height);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/rotate.h
+++ b/src/jni/libyuv/include/libyuv/rotate.h
@@ -0,0 +1,117 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_ROTATE_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_ROTATE_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Supported rotation.
			
 
				+typedef enum RotationMode {
			
 
				+  kRotate0 = 0,  // No rotation.
			
 
				+  kRotate90 = 90,  // Rotate 90 degrees clockwise.
			
 
				+  kRotate180 = 180,  // Rotate 180 degrees.
			
 
				+  kRotate270 = 270,  // Rotate 270 degrees clockwise.
			
 
				+
			
 
				+  // Deprecated.
			
 
				+  kRotateNone = 0,
			
 
				+  kRotateClockwise = 90,
			
 
				+  kRotateCounterClockwise = 270,
			
 
				+} RotationModeEnum;
			
 
				+
			
 
				+// Rotate I420 frame.
			
 
				+LIBYUV_API
			
 
				+int I420Rotate(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int src_width, int src_height, enum RotationMode mode);
			
 
				+
			
 
				+// Rotate NV12 input and store in I420.
			
 
				+LIBYUV_API
			
 
				+int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
			
 
				+                     const uint8* src_uv, int src_stride_uv,
			
 
				+                     uint8* dst_y, int dst_stride_y,
			
 
				+                     uint8* dst_u, int dst_stride_u,
			
 
				+                     uint8* dst_v, int dst_stride_v,
			
 
				+                     int src_width, int src_height, enum RotationMode mode);
			
 
				+
			
 
				+// Rotate a plane by 0, 90, 180, or 270.
			
 
				+LIBYUV_API
			
 
				+int RotatePlane(const uint8* src, int src_stride,
			
 
				+                uint8* dst, int dst_stride,
			
 
				+                int src_width, int src_height, enum RotationMode mode);
			
 
				+
			
 
				+// Rotate planes by 90, 180, 270. Deprecated.
			
 
				+LIBYUV_API
			
 
				+void RotatePlane90(const uint8* src, int src_stride,
			
 
				+                   uint8* dst, int dst_stride,
			
 
				+                   int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+void RotatePlane180(const uint8* src, int src_stride,
			
 
				+                    uint8* dst, int dst_stride,
			
 
				+                    int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+void RotatePlane270(const uint8* src, int src_stride,
			
 
				+                    uint8* dst, int dst_stride,
			
 
				+                    int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+void RotateUV90(const uint8* src, int src_stride,
			
 
				+                uint8* dst_a, int dst_stride_a,
			
 
				+                uint8* dst_b, int dst_stride_b,
			
 
				+                int width, int height);
			
 
				+
			
 
				+// Rotations for when U and V are interleaved.
			
 
				+// These functions take one input pointer and
			
 
				+// split the data into two buffers while
			
 
				+// rotating them. Deprecated.
			
 
				+LIBYUV_API
			
 
				+void RotateUV180(const uint8* src, int src_stride,
			
 
				+                 uint8* dst_a, int dst_stride_a,
			
 
				+                 uint8* dst_b, int dst_stride_b,
			
 
				+                 int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+void RotateUV270(const uint8* src, int src_stride,
			
 
				+                 uint8* dst_a, int dst_stride_a,
			
 
				+                 uint8* dst_b, int dst_stride_b,
			
 
				+                 int width, int height);
			
 
				+
			
 
				+// The 90 and 270 functions are based on transposes.
			
 
				+// Doing a transpose with reversing the read/write
			
 
				+// order will result in a rotation by +- 90 degrees.
			
 
				+// Deprecated.
			
 
				+LIBYUV_API
			
 
				+void TransposePlane(const uint8* src, int src_stride,
			
 
				+                    uint8* dst, int dst_stride,
			
 
				+                    int width, int height);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+void TransposeUV(const uint8* src, int src_stride,
			
 
				+                 uint8* dst_a, int dst_stride_a,
			
 
				+                 uint8* dst_b, int dst_stride_b,
			
 
				+                 int width, int height);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_ROTATE_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/rotate_argb.h
+++ b/src/jni/libyuv/include/libyuv/rotate_argb.h
@@ -0,0 +1,33 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_ROTATE_ARGB_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+#include "libyuv/rotate.h"  // For RotationMode.
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Rotate ARGB frame
			
 
				+LIBYUV_API
			
 
				+int ARGBRotate(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_argb, int dst_stride_argb,
			
 
				+               int src_width, int src_height, enum RotationMode mode);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_ROTATE_ARGB_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/rotate_row.h
+++ b/src/jni/libyuv/include/libyuv/rotate_row.h
@@ -0,0 +1,121 @@
 
				+/*
			
 
				+ *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_ROTATE_ROW_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_ROTATE_ROW_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+#if defined(__pnacl__) || defined(__CLR_VER) || \
			
 
				+    (defined(__i386__) && !defined(__SSE2__))
			
 
				+#define LIBYUV_DISABLE_X86
			
 
				+#endif
			
 
				+// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
			
 
				+#if defined(__has_feature)
			
 
				+#if __has_feature(memory_sanitizer)
			
 
				+#define LIBYUV_DISABLE_X86
			
 
				+#endif
			
 
				+#endif
			
 
				+// The following are available for Visual C and clangcl 32 bit:
			
 
				+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
			
 
				+#define HAS_TRANSPOSEWX8_SSSE3
			
 
				+#define HAS_TRANSPOSEUVWX8_SSE2
			
 
				+#endif
			
 
				+
			
 
				+// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit:
			
 
				+#if !defined(LIBYUV_DISABLE_X86) && \
			
 
				+    (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
			
 
				+#define HAS_TRANSPOSEWX8_SSSE3
			
 
				+#endif
			
 
				+
			
 
				+// The following are available for 64 bit GCC but not NaCL:
			
 
				+#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
			
 
				+    defined(__x86_64__)
			
 
				+#define HAS_TRANSPOSEWX8_FAST_SSSE3
			
 
				+#define HAS_TRANSPOSEUVWX8_SSE2
			
 
				+#endif
			
 
				+
			
 
				+#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
			
 
				+    (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
			
 
				+#define HAS_TRANSPOSEWX8_NEON
			
 
				+#define HAS_TRANSPOSEUVWX8_NEON
			
 
				+#endif
			
 
				+
			
 
				+#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
			
 
				+    defined(__mips__) && \
			
 
				+    defined(__mips_dsp) && (__mips_dsp_rev >= 2)
			
 
				+#define HAS_TRANSPOSEWX8_DSPR2
			
 
				+#define HAS_TRANSPOSEUVWX8_DSPR2
			
 
				+#endif  // defined(__mips__)
			
 
				+
			
 
				+void TransposeWxH_C(const uint8* src, int src_stride,
			
 
				+                    uint8* dst, int dst_stride, int width, int height);
			
 
				+
			
 
				+void TransposeWx8_C(const uint8* src, int src_stride,
			
 
				+                    uint8* dst, int dst_stride, int width);
			
 
				+void TransposeWx8_NEON(const uint8* src, int src_stride,
			
 
				+                       uint8* dst, int dst_stride, int width);
			
 
				+void TransposeWx8_SSSE3(const uint8* src, int src_stride,
			
 
				+                        uint8* dst, int dst_stride, int width);
			
 
				+void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
			
 
				+                             uint8* dst, int dst_stride, int width);
			
 
				+void TransposeWx8_DSPR2(const uint8* src, int src_stride,
			
 
				+                        uint8* dst, int dst_stride, int width);
			
 
				+void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
			
 
				+                             uint8* dst, int dst_stride, int width);
			
 
				+
			
 
				+void TransposeWx8_Any_NEON(const uint8* src, int src_stride,
			
 
				+                           uint8* dst, int dst_stride, int width);
			
 
				+void TransposeWx8_Any_SSSE3(const uint8* src, int src_stride,
			
 
				+                            uint8* dst, int dst_stride, int width);
			
 
				+void TransposeWx8_Fast_Any_SSSE3(const uint8* src, int src_stride,
			
 
				+                                 uint8* dst, int dst_stride, int width);
			
 
				+void TransposeWx8_Any_DSPR2(const uint8* src, int src_stride,
			
 
				+                            uint8* dst, int dst_stride, int width);
			
 
				+
			
 
				+void TransposeUVWxH_C(const uint8* src, int src_stride,
			
 
				+                      uint8* dst_a, int dst_stride_a,
			
 
				+                      uint8* dst_b, int dst_stride_b,
			
 
				+                      int width, int height);
			
 
				+
			
 
				+void TransposeUVWx8_C(const uint8* src, int src_stride,
			
 
				+                      uint8* dst_a, int dst_stride_a,
			
 
				+                      uint8* dst_b, int dst_stride_b, int width);
			
 
				+void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
			
 
				+                         uint8* dst_a, int dst_stride_a,
			
 
				+                         uint8* dst_b, int dst_stride_b, int width);
			
 
				+void TransposeUVWx8_NEON(const uint8* src, int src_stride,
			
 
				+                         uint8* dst_a, int dst_stride_a,
			
 
				+                         uint8* dst_b, int dst_stride_b, int width);
			
 
				+void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
			
 
				+                          uint8* dst_a, int dst_stride_a,
			
 
				+                          uint8* dst_b, int dst_stride_b, int width);
			
 
				+
			
 
				+void TransposeUVWx8_Any_SSE2(const uint8* src, int src_stride,
			
 
				+                             uint8* dst_a, int dst_stride_a,
			
 
				+                             uint8* dst_b, int dst_stride_b, int width);
			
 
				+void TransposeUVWx8_Any_NEON(const uint8* src, int src_stride,
			
 
				+                             uint8* dst_a, int dst_stride_a,
			
 
				+                             uint8* dst_b, int dst_stride_b, int width);
			
 
				+void TransposeUVWx8_Any_DSPR2(const uint8* src, int src_stride,
			
 
				+                              uint8* dst_a, int dst_stride_a,
			
 
				+                              uint8* dst_b, int dst_stride_b, int width);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_ROTATE_ROW_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/row.h
+++ b/src/jni/libyuv/include/libyuv/row.h
--- a/src/jni/libyuv/include/libyuv/scale.h
+++ b/src/jni/libyuv/include/libyuv/scale.h
@@ -0,0 +1,103 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_SCALE_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_SCALE_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Supported filtering.
			
 
				+typedef enum FilterMode {
			
 
				+  kFilterNone = 0,  // Point sample; Fastest.
			
 
				+  kFilterLinear = 1,  // Filter horizontally only.
			
 
				+  kFilterBilinear = 2,  // Faster than box, but lower quality scaling down.
			
 
				+  kFilterBox = 3  // Highest quality.
			
 
				+} FilterModeEnum;
			
 
				+
			
 
				+// Scale a YUV plane.
			
 
				+LIBYUV_API
			
 
				+void ScalePlane(const uint8* src, int src_stride,
			
 
				+                int src_width, int src_height,
			
 
				+                uint8* dst, int dst_stride,
			
 
				+                int dst_width, int dst_height,
			
 
				+                enum FilterMode filtering);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+void ScalePlane_16(const uint16* src, int src_stride,
			
 
				+                   int src_width, int src_height,
			
 
				+                   uint16* dst, int dst_stride,
			
 
				+                   int dst_width, int dst_height,
			
 
				+                   enum FilterMode filtering);
			
 
				+
			
 
				+// Scales a YUV 4:2:0 image from the src width and height to the
			
 
				+// dst width and height.
			
 
				+// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
			
 
				+// used. This produces basic (blocky) quality at the fastest speed.
			
 
				+// If filtering is kFilterBilinear, interpolation is used to produce a better
			
 
				+// quality image, at the expense of speed.
			
 
				+// If filtering is kFilterBox, averaging is used to produce ever better
			
 
				+// quality image, at further expense of speed.
			
 
				+// Returns 0 if successful.
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420Scale(const uint8* src_y, int src_stride_y,
			
 
				+              const uint8* src_u, int src_stride_u,
			
 
				+              const uint8* src_v, int src_stride_v,
			
 
				+              int src_width, int src_height,
			
 
				+              uint8* dst_y, int dst_stride_y,
			
 
				+              uint8* dst_u, int dst_stride_u,
			
 
				+              uint8* dst_v, int dst_stride_v,
			
 
				+              int dst_width, int dst_height,
			
 
				+              enum FilterMode filtering);
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420Scale_16(const uint16* src_y, int src_stride_y,
			
 
				+                 const uint16* src_u, int src_stride_u,
			
 
				+                 const uint16* src_v, int src_stride_v,
			
 
				+                 int src_width, int src_height,
			
 
				+                 uint16* dst_y, int dst_stride_y,
			
 
				+                 uint16* dst_u, int dst_stride_u,
			
 
				+                 uint16* dst_v, int dst_stride_v,
			
 
				+                 int dst_width, int dst_height,
			
 
				+                 enum FilterMode filtering);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+// Legacy API.  Deprecated.
			
 
				+LIBYUV_API
			
 
				+int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
			
 
				+          int src_stride_y, int src_stride_u, int src_stride_v,
			
 
				+          int src_width, int src_height,
			
 
				+          uint8* dst_y, uint8* dst_u, uint8* dst_v,
			
 
				+          int dst_stride_y, int dst_stride_u, int dst_stride_v,
			
 
				+          int dst_width, int dst_height,
			
 
				+          LIBYUV_BOOL interpolate);
			
 
				+
			
 
				+// Legacy API.  Deprecated.
			
 
				+LIBYUV_API
			
 
				+int ScaleOffset(const uint8* src_i420, int src_width, int src_height,
			
 
				+                uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset,
			
 
				+                LIBYUV_BOOL interpolate);
			
 
				+
			
 
				+// For testing, allow disabling of specialized scalers.
			
 
				+LIBYUV_API
			
 
				+void SetUseReferenceImpl(LIBYUV_BOOL use);
			
 
				+#endif  // __cplusplus
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_SCALE_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/scale_argb.h
+++ b/src/jni/libyuv/include/libyuv/scale_argb.h
@@ -0,0 +1,56 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_SCALE_ARGB_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+#include "libyuv/scale.h"  // For FilterMode
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int ARGBScale(const uint8* src_argb, int src_stride_argb,
			
 
				+              int src_width, int src_height,
			
 
				+              uint8* dst_argb, int dst_stride_argb,
			
 
				+              int dst_width, int dst_height,
			
 
				+              enum FilterMode filtering);
			
 
				+
			
 
				+// Clipped scale takes destination rectangle coordinates for clip values.
			
 
				+LIBYUV_API
			
 
				+int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
			
 
				+                  int src_width, int src_height,
			
 
				+                  uint8* dst_argb, int dst_stride_argb,
			
 
				+                  int dst_width, int dst_height,
			
 
				+                  int clip_x, int clip_y, int clip_width, int clip_height,
			
 
				+                  enum FilterMode filtering);
			
 
				+
			
 
				+// Scale with YUV conversion to ARGB and clipping.
			
 
				+LIBYUV_API
			
 
				+int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
			
 
				+                       const uint8* src_u, int src_stride_u,
			
 
				+                       const uint8* src_v, int src_stride_v,
			
 
				+                       uint32 src_fourcc,
			
 
				+                       int src_width, int src_height,
			
 
				+                       uint8* dst_argb, int dst_stride_argb,
			
 
				+                       uint32 dst_fourcc,
			
 
				+                       int dst_width, int dst_height,
			
 
				+                       int clip_x, int clip_y, int clip_width, int clip_height,
			
 
				+                       enum FilterMode filtering);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_SCALE_ARGB_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/scale_row.h
+++ b/src/jni/libyuv/include/libyuv/scale_row.h
@@ -0,0 +1,503 @@
 
				+/*
			
 
				+ *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_SCALE_ROW_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+#include "libyuv/scale.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+#if defined(__pnacl__) || defined(__CLR_VER) || \
			
 
				+    (defined(__i386__) && !defined(__SSE2__))
			
 
				+#define LIBYUV_DISABLE_X86
			
 
				+#endif
			
 
				+// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
			
 
				+#if defined(__has_feature)
			
 
				+#if __has_feature(memory_sanitizer)
			
 
				+#define LIBYUV_DISABLE_X86
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+// GCC >= 4.7.0 required for AVX2.
			
 
				+#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
			
 
				+#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
			
 
				+#define GCC_HAS_AVX2 1
			
 
				+#endif  // GNUC >= 4.7
			
 
				+#endif  // __GNUC__
			
 
				+
			
 
				+// clang >= 3.4.0 required for AVX2.
			
 
				+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
			
 
				+#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
			
 
				+#define CLANG_HAS_AVX2 1
			
 
				+#endif  // clang >= 3.4
			
 
				+#endif  // __clang__
			
 
				+
			
 
				+// Visual C 2012 required for AVX2.
			
 
				+#if defined(_M_IX86) && !defined(__clang__) && \
			
 
				+    defined(_MSC_VER) && _MSC_VER >= 1700
			
 
				+#define VISUALC_HAS_AVX2 1
			
 
				+#endif  // VisualStudio >= 2012
			
 
				+
			
 
				+// The following are available on all x86 platforms:
			
 
				+#if !defined(LIBYUV_DISABLE_X86) && \
			
 
				+    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
			
 
				+#define HAS_FIXEDDIV1_X86
			
 
				+#define HAS_FIXEDDIV_X86
			
 
				+#define HAS_SCALEARGBCOLS_SSE2
			
 
				+#define HAS_SCALEARGBCOLSUP2_SSE2
			
 
				+#define HAS_SCALEARGBFILTERCOLS_SSSE3
			
 
				+#define HAS_SCALEARGBROWDOWN2_SSE2
			
 
				+#define HAS_SCALEARGBROWDOWNEVEN_SSE2
			
 
				+#define HAS_SCALECOLSUP2_SSE2
			
 
				+#define HAS_SCALEFILTERCOLS_SSSE3
			
 
				+#define HAS_SCALEROWDOWN2_SSSE3
			
 
				+#define HAS_SCALEROWDOWN34_SSSE3
			
 
				+#define HAS_SCALEROWDOWN38_SSSE3
			
 
				+#define HAS_SCALEROWDOWN4_SSSE3
			
 
				+#define HAS_SCALEADDROW_SSE2
			
 
				+#endif
			
 
				+
			
 
				+// The following are available on all x86 platforms, but
			
 
				+// require VS2012, clang 3.4 or gcc 4.7.
			
 
				+// The code supports NaCL but requires a new compiler and validator.
			
 
				+#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
			
 
				+    defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
			
 
				+#define HAS_SCALEADDROW_AVX2
			
 
				+#define HAS_SCALEROWDOWN2_AVX2
			
 
				+#define HAS_SCALEROWDOWN4_AVX2
			
 
				+#endif
			
 
				+
			
 
				+// The following are available on Neon platforms:
			
 
				+#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
			
 
				+    (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
			
 
				+#define HAS_SCALEARGBCOLS_NEON
			
 
				+#define HAS_SCALEARGBROWDOWN2_NEON
			
 
				+#define HAS_SCALEARGBROWDOWNEVEN_NEON
			
 
				+#define HAS_SCALEFILTERCOLS_NEON
			
 
				+#define HAS_SCALEROWDOWN2_NEON
			
 
				+#define HAS_SCALEROWDOWN34_NEON
			
 
				+#define HAS_SCALEROWDOWN38_NEON
			
 
				+#define HAS_SCALEROWDOWN4_NEON
			
 
				+#define HAS_SCALEARGBFILTERCOLS_NEON
			
 
				+#endif
			
 
				+
			
 
				+// The following are available on Mips platforms:
			
 
				+#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
			
 
				+    defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
			
 
				+#define HAS_SCALEROWDOWN2_DSPR2
			
 
				+#define HAS_SCALEROWDOWN4_DSPR2
			
 
				+#define HAS_SCALEROWDOWN34_DSPR2
			
 
				+#define HAS_SCALEROWDOWN38_DSPR2
			
 
				+#endif
			
 
				+
			
 
				+// Scale ARGB vertically with bilinear interpolation.
			
 
				+void ScalePlaneVertical(int src_height,
			
 
				+                        int dst_width, int dst_height,
			
 
				+                        int src_stride, int dst_stride,
			
 
				+                        const uint8* src_argb, uint8* dst_argb,
			
 
				+                        int x, int y, int dy,
			
 
				+                        int bpp, enum FilterMode filtering);
			
 
				+
			
 
				+void ScalePlaneVertical_16(int src_height,
			
 
				+                           int dst_width, int dst_height,
			
 
				+                           int src_stride, int dst_stride,
			
 
				+                           const uint16* src_argb, uint16* dst_argb,
			
 
				+                           int x, int y, int dy,
			
 
				+                           int wpp, enum FilterMode filtering);
			
 
				+
			
 
				+// Simplify the filtering based on scale factors.
			
 
				+enum FilterMode ScaleFilterReduce(int src_width, int src_height,
			
 
				+                                  int dst_width, int dst_height,
			
 
				+                                  enum FilterMode filtering);
			
 
				+
			
 
				+// Divide num by div and return as 16.16 fixed point result.
			
 
				+int FixedDiv_C(int num, int div);
			
 
				+int FixedDiv_X86(int num, int div);
			
 
				+// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
			
 
				+int FixedDiv1_C(int num, int div);
			
 
				+int FixedDiv1_X86(int num, int div);
			
 
				+#ifdef HAS_FIXEDDIV_X86
			
 
				+#define FixedDiv FixedDiv_X86
			
 
				+#define FixedDiv1 FixedDiv1_X86
			
 
				+#else
			
 
				+#define FixedDiv FixedDiv_C
			
 
				+#define FixedDiv1 FixedDiv1_C
			
 
				+#endif
			
 
				+
			
 
				+// Compute slope values for stepping.
			
 
				+void ScaleSlope(int src_width, int src_height,
			
 
				+                int dst_width, int dst_height,
			
 
				+                enum FilterMode filtering,
			
 
				+                int* x, int* y, int* dx, int* dy);
			
 
				+
			
 
				+void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                     uint8* dst, int dst_width);
			
 
				+void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
			
 
				+                        uint16* dst, int dst_width);
			
 
				+void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                           uint8* dst, int dst_width);
			
 
				+void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
			
 
				+                              uint16* dst, int dst_width);
			
 
				+void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                        uint8* dst, int dst_width);
			
 
				+void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                            uint8* dst, int dst_width);
			
 
				+void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
			
 
				+                           uint16* dst, int dst_width);
			
 
				+void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                     uint8* dst, int dst_width);
			
 
				+void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
			
 
				+                        uint16* dst, int dst_width);
			
 
				+void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                        uint8* dst, int dst_width);
			
 
				+void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
			
 
				+                           uint16* dst, int dst_width);
			
 
				+void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                      uint8* dst, int dst_width);
			
 
				+void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
			
 
				+                         uint16* dst, int dst_width);
			
 
				+void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                            uint8* d, int dst_width);
			
 
				+void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
			
 
				+                               uint16* d, int dst_width);
			
 
				+void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                            uint8* d, int dst_width);
			
 
				+void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
			
 
				+                               uint16* d, int dst_width);
			
 
				+void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
			
 
				+                 int dst_width, int x, int dx);
			
 
				+void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
			
 
				+                    int dst_width, int x, int dx);
			
 
				+void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
			
 
				+                    int dst_width, int, int);
			
 
				+void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
			
 
				+                       int dst_width, int, int);
			
 
				+void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
			
 
				+                       int dst_width, int x, int dx);
			
 
				+void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
			
 
				+                          int dst_width, int x, int dx);
			
 
				+void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
			
 
				+                         int dst_width, int x, int dx);
			
 
				+void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
			
 
				+                            int dst_width, int x, int dx);
			
 
				+void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                      uint8* dst, int dst_width);
			
 
				+void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
			
 
				+                         uint16* dst, int dst_width);
			
 
				+void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
			
 
				+                            ptrdiff_t src_stride,
			
 
				+                            uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
			
 
				+                               ptrdiff_t src_stride,
			
 
				+                               uint16* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                            uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
			
 
				+                               uint16* dst_ptr, int dst_width);
			
 
				+void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width);
			
 
				+void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width);
			
 
				+void ScaleARGBRowDown2_C(const uint8* src_argb,
			
 
				+                         ptrdiff_t src_stride,
			
 
				+                         uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
			
 
				+                               ptrdiff_t src_stride,
			
 
				+                               uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                            uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                            int src_stepx,
			
 
				+                            uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
			
 
				+                               ptrdiff_t src_stride,
			
 
				+                               int src_stepx,
			
 
				+                               uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
			
 
				+                     int dst_width, int x, int dx);
			
 
				+void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
			
 
				+                       int dst_width, int x, int dx);
			
 
				+void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
			
 
				+                        int dst_width, int, int);
			
 
				+void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
			
 
				+                           int dst_width, int x, int dx);
			
 
				+void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
			
 
				+                             int dst_width, int x, int dx);
			
 
				+
			
 
				+// Specialized scalers for x86.
			
 
				+void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                         uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                               uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                            uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                        uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                              uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                           uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                         uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                            uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                        uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                           uint8* dst_ptr, int dst_width);
			
 
				+
			
 
				+void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                          uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
			
 
				+                                ptrdiff_t src_stride,
			
 
				+                                uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
			
 
				+                                ptrdiff_t src_stride,
			
 
				+                                uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                          uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
			
 
				+                                ptrdiff_t src_stride,
			
 
				+                                uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
			
 
				+                                ptrdiff_t src_stride,
			
 
				+                                uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                             uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                   uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown2Box_Odd_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                            uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                  uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                               uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown2Box_Odd_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                               uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                             uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown4_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                            uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                               uint8* dst_ptr, int dst_width);
			
 
				+
			
 
				+void ScaleRowDown34_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                              uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown34_1_Box_Any_SSSE3(const uint8* src_ptr,
			
 
				+                                    ptrdiff_t src_stride,
			
 
				+                                    uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown34_0_Box_Any_SSSE3(const uint8* src_ptr,
			
 
				+                                    ptrdiff_t src_stride,
			
 
				+                                    uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown38_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                              uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown38_3_Box_Any_SSSE3(const uint8* src_ptr,
			
 
				+                                    ptrdiff_t src_stride,
			
 
				+                                    uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown38_2_Box_Any_SSSE3(const uint8* src_ptr,
			
 
				+                                    ptrdiff_t src_stride,
			
 
				+                                    uint8* dst_ptr, int dst_width);
			
 
				+
			
 
				+void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
			
 
				+void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
			
 
				+void ScaleAddRow_Any_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
			
 
				+void ScaleAddRow_Any_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
			
 
				+
			
 
				+void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
			
 
				+                           int dst_width, int x, int dx);
			
 
				+void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
			
 
				+                       int dst_width, int x, int dx);
			
 
				+
			
 
				+
			
 
				+// ARGB Column functions
			
 
				+void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
			
 
				+                        int dst_width, int x, int dx);
			
 
				+void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
			
 
				+                               int dst_width, int x, int dx);
			
 
				+void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
			
 
				+                           int dst_width, int x, int dx);
			
 
				+void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
			
 
				+                              int dst_width, int x, int dx);
			
 
				+void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
			
 
				+                        int dst_width, int x, int dx);
			
 
				+void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
			
 
				+                                  int dst_width, int x, int dx);
			
 
				+void ScaleARGBCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
			
 
				+                            int dst_width, int x, int dx);
			
 
				+
			
 
				+// ARGB Row functions
			
 
				+void ScaleARGBRowDown2_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                            uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                                  uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                               uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                            uint8* dst, int dst_width);
			
 
				+void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                                  uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                               uint8* dst, int dst_width);
			
 
				+void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                                uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDown2Linear_Any_SSE2(const uint8* src_argb,
			
 
				+                                      ptrdiff_t src_stride,
			
 
				+                                      uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDown2Box_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                                   uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                uint8* dst, int dst_width);
			
 
				+void ScaleARGBRowDown2Linear_Any_NEON(const uint8* src_argb,
			
 
				+                                      ptrdiff_t src_stride,
			
 
				+                                      uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                   uint8* dst, int dst_width);
			
 
				+
			
 
				+void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                               int src_stepx, uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                                  int src_stepx,
			
 
				+                                  uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                               int src_stepx,
			
 
				+                               uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                                  int src_stepx,
			
 
				+                                  uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDownEven_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                                   int src_stepx,
			
 
				+                                   uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8* src_argb,
			
 
				+                                      ptrdiff_t src_stride,
			
 
				+                                      int src_stepx,
			
 
				+                                      uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDownEven_Any_NEON(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                                   int src_stepx,
			
 
				+                                   uint8* dst_argb, int dst_width);
			
 
				+void ScaleARGBRowDownEvenBox_Any_NEON(const uint8* src_argb,
			
 
				+                                      ptrdiff_t src_stride,
			
 
				+                                      int src_stepx,
			
 
				+                                      uint8* dst_argb, int dst_width);
			
 
				+
			
 
				+// ScaleRowDown2Box also used by planar functions
			
 
				+// NEON downscalers with interpolation.
			
 
				+
			
 
				+// Note - not static due to reuse in convert for 444 to 420.
			
 
				+void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                        uint8* dst, int dst_width);
			
 
				+void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                              uint8* dst, int dst_width);
			
 
				+void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                           uint8* dst, int dst_width);
			
 
				+
			
 
				+void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                        uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                           uint8* dst_ptr, int dst_width);
			
 
				+
			
 
				+// Down scale from 4 to 3 pixels. Use the neon multilane read/write
			
 
				+//  to load up the every 4th pixel into a 4 different registers.
			
 
				+// Point samples 32 pixels to 24 pixels.
			
 
				+void ScaleRowDown34_NEON(const uint8* src_ptr,
			
 
				+                         ptrdiff_t src_stride,
			
 
				+                         uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
			
 
				+                               ptrdiff_t src_stride,
			
 
				+                               uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
			
 
				+                               ptrdiff_t src_stride,
			
 
				+                               uint8* dst_ptr, int dst_width);
			
 
				+
			
 
				+// 32 -> 12
			
 
				+void ScaleRowDown38_NEON(const uint8* src_ptr,
			
 
				+                         ptrdiff_t src_stride,
			
 
				+                         uint8* dst_ptr, int dst_width);
			
 
				+// 32x3 -> 12x1
			
 
				+void ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
			
 
				+                               ptrdiff_t src_stride,
			
 
				+                               uint8* dst_ptr, int dst_width);
			
 
				+// 32x2 -> 12x1
			
 
				+void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
			
 
				+                               ptrdiff_t src_stride,
			
 
				+                               uint8* dst_ptr, int dst_width);
			
 
				+
			
 
				+void ScaleRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                            uint8* dst, int dst_width);
			
 
				+void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                  uint8* dst, int dst_width);
			
 
				+void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                               uint8* dst, int dst_width);
			
 
				+void ScaleRowDown2Box_Odd_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                               uint8* dst, int dst_width);
			
 
				+void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                            uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                               uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown34_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                             uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown34_0_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                   uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown34_1_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                   uint8* dst_ptr, int dst_width);
			
 
				+// 32 -> 12
			
 
				+void ScaleRowDown38_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                             uint8* dst_ptr, int dst_width);
			
 
				+// 32x3 -> 12x1
			
 
				+void ScaleRowDown38_3_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                               uint8* dst_ptr, int dst_width);
			
 
				+// 32x2 -> 12x1
			
 
				+void ScaleRowDown38_2_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                               uint8* dst_ptr, int dst_width);
			
 
				+
			
 
				+void ScaleAddRow_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
			
 
				+void ScaleAddRow_Any_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
			
 
				+
			
 
				+void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
			
 
				+                          int dst_width, int x, int dx);
			
 
				+
			
 
				+void ScaleFilterCols_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
			
 
				+                              int dst_width, int x, int dx);
			
 
				+
			
 
				+void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                         uint8* dst, int dst_width);
			
 
				+void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                            uint8* dst, int dst_width);
			
 
				+void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                         uint8* dst, int dst_width);
			
 
				+void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                            uint8* dst, int dst_width);
			
 
				+void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                          uint8* dst, int dst_width);
			
 
				+void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                uint8* d, int dst_width);
			
 
				+void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                uint8* d, int dst_width);
			
 
				+void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                          uint8* dst, int dst_width);
			
 
				+void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                uint8* dst_ptr, int dst_width);
			
 
				+void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                uint8* dst_ptr, int dst_width);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_SCALE_ROW_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/version.h
+++ b/src/jni/libyuv/include/libyuv/version.h
@@ -0,0 +1,16 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_VERSION_H_
			
 
				+
			
 
				+#define LIBYUV_VERSION 1580
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
			
--- a/src/jni/libyuv/include/libyuv/video_common.h
+++ b/src/jni/libyuv/include/libyuv/video_common.h
@@ -0,0 +1,184 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+// Common definitions for video, including fourcc and VideoFormat.
			
 
				+
			
 
				+#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_  // NOLINT
			
 
				+#define INCLUDE_LIBYUV_VIDEO_COMMON_H_
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+//////////////////////////////////////////////////////////////////////////////
			
 
				+// Definition of FourCC codes
			
 
				+//////////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+// Convert four characters to a FourCC code.
			
 
				+// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
			
 
				+// constants are used in a switch.
			
 
				+#ifdef __cplusplus
			
 
				+#define FOURCC(a, b, c, d) ( \
			
 
				+    (static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \
			
 
				+    (static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24))
			
 
				+#else
			
 
				+#define FOURCC(a, b, c, d) ( \
			
 
				+    ((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \
			
 
				+    ((uint32)(c) << 16) | ((uint32)(d) << 24))  /* NOLINT */
			
 
				+#endif
			
 
				+
			
 
				+// Some pages discussing FourCC codes:
			
 
				+//   http://www.fourcc.org/yuv.php
			
 
				+//   http://v4l2spec.bytesex.org/spec/book1.htm
			
 
				+//   http://developer.apple.com/quicktime/icefloe/dispatch020.html
			
 
				+//   http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12
			
 
				+//   http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt
			
 
				+
			
 
				+// FourCC codes grouped according to implementation efficiency.
			
 
				+// Primary formats should convert in 1 efficient step.
			
 
				+// Secondary formats are converted in 2 steps.
			
 
				+// Auxilliary formats call primary converters.
			
 
				+enum FourCC {
			
 
				+  // 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
			
 
				+  FOURCC_I420 = FOURCC('I', '4', '2', '0'),
			
 
				+  FOURCC_I422 = FOURCC('I', '4', '2', '2'),
			
 
				+  FOURCC_I444 = FOURCC('I', '4', '4', '4'),
			
 
				+  FOURCC_I411 = FOURCC('I', '4', '1', '1'),
			
 
				+  FOURCC_I400 = FOURCC('I', '4', '0', '0'),
			
 
				+  FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
			
 
				+  FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
			
 
				+  FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
			
 
				+  FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
			
 
				+
			
 
				+  // 2 Secondary YUV formats: row biplanar.
			
 
				+  FOURCC_M420 = FOURCC('M', '4', '2', '0'),
			
 
				+  FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),  // deprecated.
			
 
				+
			
 
				+  // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
			
 
				+  FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
			
 
				+  FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
			
 
				+  FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
			
 
				+  FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
			
 
				+  FOURCC_RAW  = FOURCC('r', 'a', 'w', ' '),
			
 
				+  FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
			
 
				+  FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'),  // rgb565 LE.
			
 
				+  FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'),  // argb1555 LE.
			
 
				+  FOURCC_R444 = FOURCC('R', '4', '4', '4'),  // argb4444 LE.
			
 
				+
			
 
				+  // 4 Secondary RGB formats: 4 Bayer Patterns. deprecated.
			
 
				+  FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
			
 
				+  FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
			
 
				+  FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
			
 
				+  FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
			
 
				+
			
 
				+  // 1 Primary Compressed YUV format.
			
 
				+  FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
			
 
				+
			
 
				+  // 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
			
 
				+  FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
			
 
				+  FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
			
 
				+  FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
			
 
				+  FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'),  // Linux version of I420.
			
 
				+  FOURCC_J420 = FOURCC('J', '4', '2', '0'),
			
 
				+  FOURCC_J400 = FOURCC('J', '4', '0', '0'),  // unofficial fourcc
			
 
				+  FOURCC_H420 = FOURCC('H', '4', '2', '0'),  // unofficial fourcc
			
 
				+
			
 
				+  // 14 Auxiliary aliases.  CanonicalFourCC() maps these to canonical fourcc.
			
 
				+  FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'),  // Alias for I420.
			
 
				+  FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'),  // Alias for I422.
			
 
				+  FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'),  // Alias for I444.
			
 
				+  FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'),  // Alias for YUY2.
			
 
				+  FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'),  // Alias for YUY2 on Mac.
			
 
				+  FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'),  // Alias for UYVY.
			
 
				+  FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'),  // Alias for UYVY on Mac.
			
 
				+  FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'),  // Alias for MJPG.
			
 
				+  FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'),  // Alias for MJPG on Mac.
			
 
				+  FOURCC_BA81 = FOURCC('B', 'A', '8', '1'),  // Alias for BGGR.
			
 
				+  FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'),  // Alias for RAW.
			
 
				+  FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'),  // Alias for 24BG.
			
 
				+  FOURCC_CM32 = FOURCC(0, 0, 0, 32),  // Alias for BGRA kCMPixelFormat_32ARGB
			
 
				+  FOURCC_CM24 = FOURCC(0, 0, 0, 24),  // Alias for RAW kCMPixelFormat_24RGB
			
 
				+  FOURCC_L555 = FOURCC('L', '5', '5', '5'),  // Alias for RGBO.
			
 
				+  FOURCC_L565 = FOURCC('L', '5', '6', '5'),  // Alias for RGBP.
			
 
				+  FOURCC_5551 = FOURCC('5', '5', '5', '1'),  // Alias for RGBO.
			
 
				+
			
 
				+  // 1 Auxiliary compressed YUV format set aside for capturer.
			
 
				+  FOURCC_H264 = FOURCC('H', '2', '6', '4'),
			
 
				+
			
 
				+  // Match any fourcc.
			
 
				+  FOURCC_ANY = -1,
			
 
				+};
			
 
				+
			
 
				+enum FourCCBpp {
			
 
				+  // Canonical fourcc codes used in our code.
			
 
				+  FOURCC_BPP_I420 = 12,
			
 
				+  FOURCC_BPP_I422 = 16,
			
 
				+  FOURCC_BPP_I444 = 24,
			
 
				+  FOURCC_BPP_I411 = 12,
			
 
				+  FOURCC_BPP_I400 = 8,
			
 
				+  FOURCC_BPP_NV21 = 12,
			
 
				+  FOURCC_BPP_NV12 = 12,
			
 
				+  FOURCC_BPP_YUY2 = 16,
			
 
				+  FOURCC_BPP_UYVY = 16,
			
 
				+  FOURCC_BPP_M420 = 12,
			
 
				+  FOURCC_BPP_Q420 = 12,
			
 
				+  FOURCC_BPP_ARGB = 32,
			
 
				+  FOURCC_BPP_BGRA = 32,
			
 
				+  FOURCC_BPP_ABGR = 32,
			
 
				+  FOURCC_BPP_RGBA = 32,
			
 
				+  FOURCC_BPP_24BG = 24,
			
 
				+  FOURCC_BPP_RAW  = 24,
			
 
				+  FOURCC_BPP_RGBP = 16,
			
 
				+  FOURCC_BPP_RGBO = 16,
			
 
				+  FOURCC_BPP_R444 = 16,
			
 
				+  FOURCC_BPP_RGGB = 8,
			
 
				+  FOURCC_BPP_BGGR = 8,
			
 
				+  FOURCC_BPP_GRBG = 8,
			
 
				+  FOURCC_BPP_GBRG = 8,
			
 
				+  FOURCC_BPP_YV12 = 12,
			
 
				+  FOURCC_BPP_YV16 = 16,
			
 
				+  FOURCC_BPP_YV24 = 24,
			
 
				+  FOURCC_BPP_YU12 = 12,
			
 
				+  FOURCC_BPP_J420 = 12,
			
 
				+  FOURCC_BPP_J400 = 8,
			
 
				+  FOURCC_BPP_H420 = 12,
			
 
				+  FOURCC_BPP_MJPG = 0,  // 0 means unknown.
			
 
				+  FOURCC_BPP_H264 = 0,
			
 
				+  FOURCC_BPP_IYUV = 12,
			
 
				+  FOURCC_BPP_YU16 = 16,
			
 
				+  FOURCC_BPP_YU24 = 24,
			
 
				+  FOURCC_BPP_YUYV = 16,
			
 
				+  FOURCC_BPP_YUVS = 16,
			
 
				+  FOURCC_BPP_HDYC = 16,
			
 
				+  FOURCC_BPP_2VUY = 16,
			
 
				+  FOURCC_BPP_JPEG = 1,
			
 
				+  FOURCC_BPP_DMB1 = 1,
			
 
				+  FOURCC_BPP_BA81 = 8,
			
 
				+  FOURCC_BPP_RGB3 = 24,
			
 
				+  FOURCC_BPP_BGR3 = 24,
			
 
				+  FOURCC_BPP_CM32 = 32,
			
 
				+  FOURCC_BPP_CM24 = 24,
			
 
				+
			
 
				+  // Match any fourcc.
			
 
				+  FOURCC_BPP_ANY  = 0,  // 0 means unknown.
			
 
				+};
			
 
				+
			
 
				+// Converts fourcc aliases into canonical ones.
			
 
				+LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+#endif  // INCLUDE_LIBYUV_VIDEO_COMMON_H_  NOLINT
			
--- a/src/jni/libyuv/source/compare.cc
+++ b/src/jni/libyuv/source/compare.cc
@@ -0,0 +1,340 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/compare.h"
			
 
				+
			
 
				+#include <float.h>
			
 
				+#include <math.h>
			
 
				+#ifdef _OPENMP
			
 
				+#include <omp.h>
			
 
				+#endif
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+#include "libyuv/compare_row.h"
			
 
				+#include "libyuv/cpu_id.h"
			
 
				+#include "libyuv/row.h"
			
 
				+#include "libyuv/video_common.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// hash seed of 5381 recommended.
			
 
				+LIBYUV_API
			
 
				+uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
			
 
				+  const int kBlockSize = 1 << 15;  // 32768;
			
 
				+  int remainder;
			
 
				+  uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) =
			
 
				+      HashDjb2_C;
			
 
				+#if defined(HAS_HASHDJB2_SSE41)
			
 
				+  if (TestCpuFlag(kCpuHasSSE41)) {
			
 
				+    HashDjb2_SSE = HashDjb2_SSE41;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_HASHDJB2_AVX2)
			
 
				+  if (TestCpuFlag(kCpuHasAVX2)) {
			
 
				+    HashDjb2_SSE = HashDjb2_AVX2;
			
 
				+  }
			
 
				+#endif
			
 
				+
			
 
				+  while (count >= (uint64)(kBlockSize)) {
			
 
				+    seed = HashDjb2_SSE(src, kBlockSize, seed);
			
 
				+    src += kBlockSize;
			
 
				+    count -= kBlockSize;
			
 
				+  }
			
 
				+  remainder = (int)(count) & ~15;
			
 
				+  if (remainder) {
			
 
				+    seed = HashDjb2_SSE(src, remainder, seed);
			
 
				+    src += remainder;
			
 
				+    count -= remainder;
			
 
				+  }
			
 
				+  remainder = (int)(count) & 15;
			
 
				+  if (remainder) {
			
 
				+    seed = HashDjb2_C(src, remainder, seed);
			
 
				+  }
			
 
				+  return seed;
			
 
				+}
			
 
				+
			
 
				+static uint32 ARGBDetectRow_C(const uint8* argb, int width) {
			
 
				+  int x;
			
 
				+  for (x = 0; x < width - 1; x += 2) {
			
 
				+    if (argb[0] != 255) {  // First byte is not Alpha of 255, so not ARGB.
			
 
				+      return FOURCC_BGRA;
			
 
				+    }
			
 
				+    if (argb[3] != 255) {  // 4th byte is not Alpha of 255, so not BGRA.
			
 
				+      return FOURCC_ARGB;
			
 
				+    }
			
 
				+    if (argb[4] != 255) {  // Second pixel first byte is not Alpha of 255.
			
 
				+      return FOURCC_BGRA;
			
 
				+    }
			
 
				+    if (argb[7] != 255) {  // Second pixel 4th byte is not Alpha of 255.
			
 
				+      return FOURCC_ARGB;
			
 
				+    }
			
 
				+    argb += 8;
			
 
				+  }
			
 
				+  if (width & 1) {
			
 
				+    if (argb[0] != 255) {  // First byte is not Alpha of 255, so not ARGB.
			
 
				+      return FOURCC_BGRA;
			
 
				+    }
			
 
				+    if (argb[3] != 255) {  // 4th byte is not Alpha of 255, so not BGRA.
			
 
				+      return FOURCC_ARGB;
			
 
				+    }
			
 
				+  }
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+// Scan an opaque argb image and return fourcc based on alpha offset.
			
 
				+// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
			
 
				+LIBYUV_API
			
 
				+uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
			
 
				+  uint32 fourcc = 0;
			
 
				+  int h;
			
 
				+
			
 
				+  // Coalesce rows.
			
 
				+  if (stride_argb == width * 4) {
			
 
				+    width *= height;
			
 
				+    height = 1;
			
 
				+    stride_argb = 0;
			
 
				+  }
			
 
				+  for (h = 0; h < height && fourcc == 0; ++h) {
			
 
				+    fourcc = ARGBDetectRow_C(argb, width);
			
 
				+    argb += stride_argb;
			
 
				+  }
			
 
				+  return fourcc;
			
 
				+}
			
 
				+
			
 
				+// TODO(fbarchard): Refactor into row function.
			
 
				+LIBYUV_API
			
 
				+uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
			
 
				+                             int count) {
			
 
				+  // SumSquareError returns values 0 to 65535 for each squared difference.
			
 
				+  // Up to 65536 of those can be summed and remain within a uint32.
			
 
				+  // After each block of 65536 pixels, accumulate into a uint64.
			
 
				+  const int kBlockSize = 65536;
			
 
				+  int remainder = count & (kBlockSize - 1) & ~31;
			
 
				+  uint64 sse = 0;
			
 
				+  int i;
			
 
				+  uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
			
 
				+      SumSquareError_C;
			
 
				+#if defined(HAS_SUMSQUAREERROR_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    SumSquareError = SumSquareError_NEON;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_SUMSQUAREERROR_SSE2)
			
 
				+  if (TestCpuFlag(kCpuHasSSE2)) {
			
 
				+    // Note only used for multiples of 16 so count is not checked.
			
 
				+    SumSquareError = SumSquareError_SSE2;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_SUMSQUAREERROR_AVX2)
			
 
				+  if (TestCpuFlag(kCpuHasAVX2)) {
			
 
				+    // Note only used for multiples of 32 so count is not checked.
			
 
				+    SumSquareError = SumSquareError_AVX2;
			
 
				+  }
			
 
				+#endif
			
 
				+#ifdef _OPENMP
			
 
				+#pragma omp parallel for reduction(+: sse)
			
 
				+#endif
			
 
				+  for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
			
 
				+    sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
			
 
				+  }
			
 
				+  src_a += count & ~(kBlockSize - 1);
			
 
				+  src_b += count & ~(kBlockSize - 1);
			
 
				+  if (remainder) {
			
 
				+    sse += SumSquareError(src_a, src_b, remainder);
			
 
				+    src_a += remainder;
			
 
				+    src_b += remainder;
			
 
				+  }
			
 
				+  remainder = count & 31;
			
 
				+  if (remainder) {
			
 
				+    sse += SumSquareError_C(src_a, src_b, remainder);
			
 
				+  }
			
 
				+  return sse;
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
			
 
				+                                  const uint8* src_b, int stride_b,
			
 
				+                                  int width, int height) {
			
 
				+  uint64 sse = 0;
			
 
				+  int h;
			
 
				+  // Coalesce rows.
			
 
				+  if (stride_a == width &&
			
 
				+      stride_b == width) {
			
 
				+    width *= height;
			
 
				+    height = 1;
			
 
				+    stride_a = stride_b = 0;
			
 
				+  }
			
 
				+  for (h = 0; h < height; ++h) {
			
 
				+    sse += ComputeSumSquareError(src_a, src_b, width);
			
 
				+    src_a += stride_a;
			
 
				+    src_b += stride_b;
			
 
				+  }
			
 
				+  return sse;
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
			
 
				+  double psnr;
			
 
				+  if (sse > 0) {
			
 
				+    double mse = (double)(count) / (double)(sse);
			
 
				+    psnr = 10.0 * log10(255.0 * 255.0 * mse);
			
 
				+  } else {
			
 
				+    psnr = kMaxPsnr;      // Limit to prevent divide by 0
			
 
				+  }
			
 
				+
			
 
				+  if (psnr > kMaxPsnr)
			
 
				+    psnr = kMaxPsnr;
			
 
				+
			
 
				+  return psnr;
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+double CalcFramePsnr(const uint8* src_a, int stride_a,
			
 
				+                     const uint8* src_b, int stride_b,
			
 
				+                     int width, int height) {
			
 
				+  const uint64 samples = width * height;
			
 
				+  const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a,
			
 
				+                                                src_b, stride_b,
			
 
				+                                                width, height);
			
 
				+  return SumSquareErrorToPsnr(sse, samples);
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+double I420Psnr(const uint8* src_y_a, int stride_y_a,
			
 
				+                const uint8* src_u_a, int stride_u_a,
			
 
				+                const uint8* src_v_a, int stride_v_a,
			
 
				+                const uint8* src_y_b, int stride_y_b,
			
 
				+                const uint8* src_u_b, int stride_u_b,
			
 
				+                const uint8* src_v_b, int stride_v_b,
			
 
				+                int width, int height) {
			
 
				+  const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a,
			
 
				+                                                  src_y_b, stride_y_b,
			
 
				+                                                  width, height);
			
 
				+  const int width_uv = (width + 1) >> 1;
			
 
				+  const int height_uv = (height + 1) >> 1;
			
 
				+  const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a,
			
 
				+                                                  src_u_b, stride_u_b,
			
 
				+                                                  width_uv, height_uv);
			
 
				+  const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a,
			
 
				+                                                  src_v_b, stride_v_b,
			
 
				+                                                  width_uv, height_uv);
			
 
				+  const uint64 samples = width * height + 2 * (width_uv * height_uv);
			
 
				+  const uint64 sse = sse_y + sse_u + sse_v;
			
 
				+  return SumSquareErrorToPsnr(sse, samples);
			
 
				+}
			
 
				+
			
 
				+static const int64 cc1 =  26634;  // (64^2*(.01*255)^2
			
 
				+static const int64 cc2 = 239708;  // (64^2*(.03*255)^2
			
 
				+
			
 
				+static double Ssim8x8_C(const uint8* src_a, int stride_a,
			
 
				+                        const uint8* src_b, int stride_b) {
			
 
				+  int64 sum_a = 0;
			
 
				+  int64 sum_b = 0;
			
 
				+  int64 sum_sq_a = 0;
			
 
				+  int64 sum_sq_b = 0;
			
 
				+  int64 sum_axb = 0;
			
 
				+
			
 
				+  int i;
			
 
				+  for (i = 0; i < 8; ++i) {
			
 
				+    int j;
			
 
				+    for (j = 0; j < 8; ++j) {
			
 
				+      sum_a += src_a[j];
			
 
				+      sum_b += src_b[j];
			
 
				+      sum_sq_a += src_a[j] * src_a[j];
			
 
				+      sum_sq_b += src_b[j] * src_b[j];
			
 
				+      sum_axb += src_a[j] * src_b[j];
			
 
				+    }
			
 
				+
			
 
				+    src_a += stride_a;
			
 
				+    src_b += stride_b;
			
 
				+  }
			
 
				+
			
 
				+  {
			
 
				+    const int64 count = 64;
			
 
				+    // scale the constants by number of pixels
			
 
				+    const int64 c1 = (cc1 * count * count) >> 12;
			
 
				+    const int64 c2 = (cc2 * count * count) >> 12;
			
 
				+
			
 
				+    const int64 sum_a_x_sum_b = sum_a * sum_b;
			
 
				+
			
 
				+    const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
			
 
				+                         (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
			
 
				+
			
 
				+    const int64 sum_a_sq = sum_a*sum_a;
			
 
				+    const int64 sum_b_sq = sum_b*sum_b;
			
 
				+
			
 
				+    const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) *
			
 
				+                         (count * sum_sq_a - sum_a_sq +
			
 
				+                          count * sum_sq_b - sum_b_sq + c2);
			
 
				+
			
 
				+    if (ssim_d == 0.0) {
			
 
				+      return DBL_MAX;
			
 
				+    }
			
 
				+    return ssim_n * 1.0 / ssim_d;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// We are using a 8x8 moving window with starting location of each 8x8 window
			
 
				+// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
			
 
				+// block boundaries to penalize blocking artifacts.
			
 
				+LIBYUV_API
			
 
				+double CalcFrameSsim(const uint8* src_a, int stride_a,
			
 
				+                     const uint8* src_b, int stride_b,
			
 
				+                     int width, int height) {
			
 
				+  int samples = 0;
			
 
				+  double ssim_total = 0;
			
 
				+  double (*Ssim8x8)(const uint8* src_a, int stride_a,
			
 
				+                    const uint8* src_b, int stride_b) = Ssim8x8_C;
			
 
				+
			
 
				+  // sample point start with each 4x4 location
			
 
				+  int i;
			
 
				+  for (i = 0; i < height - 8; i += 4) {
			
 
				+    int j;
			
 
				+    for (j = 0; j < width - 8; j += 4) {
			
 
				+      ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b);
			
 
				+      samples++;
			
 
				+    }
			
 
				+
			
 
				+    src_a += stride_a * 4;
			
 
				+    src_b += stride_b * 4;
			
 
				+  }
			
 
				+
			
 
				+  ssim_total /= samples;
			
 
				+  return ssim_total;
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+double I420Ssim(const uint8* src_y_a, int stride_y_a,
			
 
				+                const uint8* src_u_a, int stride_u_a,
			
 
				+                const uint8* src_v_a, int stride_v_a,
			
 
				+                const uint8* src_y_b, int stride_y_b,
			
 
				+                const uint8* src_u_b, int stride_u_b,
			
 
				+                const uint8* src_v_b, int stride_v_b,
			
 
				+                int width, int height) {
			
 
				+  const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a,
			
 
				+                                      src_y_b, stride_y_b, width, height);
			
 
				+  const int width_uv = (width + 1) >> 1;
			
 
				+  const int height_uv = (height + 1) >> 1;
			
 
				+  const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a,
			
 
				+                                      src_u_b, stride_u_b,
			
 
				+                                      width_uv, height_uv);
			
 
				+  const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a,
			
 
				+                                      src_v_b, stride_v_b,
			
 
				+                                      width_uv, height_uv);
			
 
				+  return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
			
 
				+}
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/compare_common.cc
+++ b/src/jni/libyuv/source/compare_common.cc
@@ -0,0 +1,44 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#include "libyuv/compare_row.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
			
 
				+  uint32 sse = 0u;
			
 
				+  int i;
			
 
				+  for (i = 0; i < count; ++i) {
			
 
				+    int diff = src_a[i] - src_b[i];
			
 
				+    sse += (uint32)(diff * diff);
			
 
				+  }
			
 
				+  return sse;
			
 
				+}
			
 
				+
			
 
				+// hash seed of 5381 recommended.
			
 
				+// Internal C version of HashDjb2 with int sized count for efficiency.
			
 
				+uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) {
			
 
				+  uint32 hash = seed;
			
 
				+  int i;
			
 
				+  for (i = 0; i < count; ++i) {
			
 
				+    hash += (hash << 5) + src[i];
			
 
				+  }
			
 
				+  return hash;
			
 
				+}
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/compare_gcc.cc
+++ b/src/jni/libyuv/source/compare_gcc.cc
@@ -0,0 +1,151 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#include "libyuv/compare_row.h"
			
 
				+#include "libyuv/row.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// This module is for GCC x86 and x64.
			
 
				+#if !defined(LIBYUV_DISABLE_X86) && \
			
 
				+    (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
			
 
				+
			
 
				+uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
			
 
				+  uint32 sse;
			
 
				+  asm volatile (
			
 
				+    "pxor      %%xmm0,%%xmm0                   \n"
			
 
				+    "pxor      %%xmm5,%%xmm5                   \n"
			
 
				+    LABELALIGN
			
 
				+  "1:                                          \n"
			
 
				+    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
			
 
				+    "lea       " MEMLEA(0x10, 0) ",%0          \n"
			
 
				+    "movdqu    " MEMACCESS(1) ",%%xmm2         \n"
			
 
				+    "lea       " MEMLEA(0x10, 1) ",%1          \n"
			
 
				+    "movdqa    %%xmm1,%%xmm3                   \n"
			
 
				+    "psubusb   %%xmm2,%%xmm1                   \n"
			
 
				+    "psubusb   %%xmm3,%%xmm2                   \n"
			
 
				+    "por       %%xmm2,%%xmm1                   \n"
			
 
				+    "movdqa    %%xmm1,%%xmm2                   \n"
			
 
				+    "punpcklbw %%xmm5,%%xmm1                   \n"
			
 
				+    "punpckhbw %%xmm5,%%xmm2                   \n"
			
 
				+    "pmaddwd   %%xmm1,%%xmm1                   \n"
			
 
				+    "pmaddwd   %%xmm2,%%xmm2                   \n"
			
 
				+    "paddd     %%xmm1,%%xmm0                   \n"
			
 
				+    "paddd     %%xmm2,%%xmm0                   \n"
			
 
				+    "sub       $0x10,%2                        \n"
			
 
				+    "jg        1b                              \n"
			
 
				+
			
 
				+    "pshufd    $0xee,%%xmm0,%%xmm1             \n"
			
 
				+    "paddd     %%xmm1,%%xmm0                   \n"
			
 
				+    "pshufd    $0x1,%%xmm0,%%xmm1              \n"
			
 
				+    "paddd     %%xmm1,%%xmm0                   \n"
			
 
				+    "movd      %%xmm0,%3                       \n"
			
 
				+
			
 
				+  : "+r"(src_a),      // %0
			
 
				+    "+r"(src_b),      // %1
			
 
				+    "+r"(count),      // %2
			
 
				+    "=g"(sse)         // %3
			
 
				+  :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
			
 
				+  );
			
 
				+  return sse;
			
 
				+}
			
 
				+
			
 
				+static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
			
 
				+static uvec32 kHashMul0 = {
			
 
				+  0x0c3525e1,  // 33 ^ 15
			
 
				+  0xa3476dc1,  // 33 ^ 14
			
 
				+  0x3b4039a1,  // 33 ^ 13
			
 
				+  0x4f5f0981,  // 33 ^ 12
			
 
				+};
			
 
				+static uvec32 kHashMul1 = {
			
 
				+  0x30f35d61,  // 33 ^ 11
			
 
				+  0x855cb541,  // 33 ^ 10
			
 
				+  0x040a9121,  // 33 ^ 9
			
 
				+  0x747c7101,  // 33 ^ 8
			
 
				+};
			
 
				+static uvec32 kHashMul2 = {
			
 
				+  0xec41d4e1,  // 33 ^ 7
			
 
				+  0x4cfa3cc1,  // 33 ^ 6
			
 
				+  0x025528a1,  // 33 ^ 5
			
 
				+  0x00121881,  // 33 ^ 4
			
 
				+};
			
 
				+static uvec32 kHashMul3 = {
			
 
				+  0x00008c61,  // 33 ^ 3
			
 
				+  0x00000441,  // 33 ^ 2
			
 
				+  0x00000021,  // 33 ^ 1
			
 
				+  0x00000001,  // 33 ^ 0
			
 
				+};
			
 
				+
			
 
				+uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
			
 
				+  uint32 hash;
			
 
				+  asm volatile (
			
 
				+    "movd      %2,%%xmm0                       \n"
			
 
				+    "pxor      %%xmm7,%%xmm7                   \n"
			
 
				+    "movdqa    %4,%%xmm6                       \n"
			
 
				+    LABELALIGN
			
 
				+  "1:                                          \n"
			
 
				+    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
			
 
				+    "lea       " MEMLEA(0x10, 0) ",%0          \n"
			
 
				+    "pmulld    %%xmm6,%%xmm0                   \n"
			
 
				+    "movdqa    %5,%%xmm5                       \n"
			
 
				+    "movdqa    %%xmm1,%%xmm2                   \n"
			
 
				+    "punpcklbw %%xmm7,%%xmm2                   \n"
			
 
				+    "movdqa    %%xmm2,%%xmm3                   \n"
			
 
				+    "punpcklwd %%xmm7,%%xmm3                   \n"
			
 
				+    "pmulld    %%xmm5,%%xmm3                   \n"
			
 
				+    "movdqa    %6,%%xmm5                       \n"
			
 
				+    "movdqa    %%xmm2,%%xmm4                   \n"
			
 
				+    "punpckhwd %%xmm7,%%xmm4                   \n"
			
 
				+    "pmulld    %%xmm5,%%xmm4                   \n"
			
 
				+    "movdqa    %7,%%xmm5                       \n"
			
 
				+    "punpckhbw %%xmm7,%%xmm1                   \n"
			
 
				+    "movdqa    %%xmm1,%%xmm2                   \n"
			
 
				+    "punpcklwd %%xmm7,%%xmm2                   \n"
			
 
				+    "pmulld    %%xmm5,%%xmm2                   \n"
			
 
				+    "movdqa    %8,%%xmm5                       \n"
			
 
				+    "punpckhwd %%xmm7,%%xmm1                   \n"
			
 
				+    "pmulld    %%xmm5,%%xmm1                   \n"
			
 
				+    "paddd     %%xmm4,%%xmm3                   \n"
			
 
				+    "paddd     %%xmm2,%%xmm1                   \n"
			
 
				+    "paddd     %%xmm3,%%xmm1                   \n"
			
 
				+    "pshufd    $0xe,%%xmm1,%%xmm2              \n"
			
 
				+    "paddd     %%xmm2,%%xmm1                   \n"
			
 
				+    "pshufd    $0x1,%%xmm1,%%xmm2              \n"
			
 
				+    "paddd     %%xmm2,%%xmm1                   \n"
			
 
				+    "paddd     %%xmm1,%%xmm0                   \n"
			
 
				+    "sub       $0x10,%1                        \n"
			
 
				+    "jg        1b                              \n"
			
 
				+    "movd      %%xmm0,%3                       \n"
			
 
				+  : "+r"(src),        // %0
			
 
				+    "+r"(count),      // %1
			
 
				+    "+rm"(seed),      // %2
			
 
				+    "=g"(hash)        // %3
			
 
				+  : "m"(kHash16x33),  // %4
			
 
				+    "m"(kHashMul0),   // %5
			
 
				+    "m"(kHashMul1),   // %6
			
 
				+    "m"(kHashMul2),   // %7
			
 
				+    "m"(kHashMul3)    // %8
			
 
				+  : "memory", "cc"
			
 
				+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
			
 
				+  );
			
 
				+  return hash;
			
 
				+}
			
 
				+#endif  // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
--- a/src/jni/libyuv/source/compare_neon.cc
+++ b/src/jni/libyuv/source/compare_neon.cc
@@ -0,0 +1,66 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#include "libyuv/compare_row.h"
			
 
				+#include "libyuv/row.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
			
 
				+    !defined(__aarch64__)
			
 
				+
			
 
				+uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
			
 
				+  volatile uint32 sse;
			
 
				+  asm volatile (
			
 
				+    "vmov.u8    q8, #0                         \n"
			
 
				+    "vmov.u8    q10, #0                        \n"
			
 
				+    "vmov.u8    q9, #0                         \n"
			
 
				+    "vmov.u8    q11, #0                        \n"
			
 
				+
			
 
				+  "1:                                          \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.8     {q0}, [%0]!                    \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld1.8     {q1}, [%1]!                    \n"
			
 
				+    "subs       %2, %2, #16                    \n"
			
 
				+    "vsubl.u8   q2, d0, d2                     \n"
			
 
				+    "vsubl.u8   q3, d1, d3                     \n"
			
 
				+    "vmlal.s16  q8, d4, d4                     \n"
			
 
				+    "vmlal.s16  q9, d6, d6                     \n"
			
 
				+    "vmlal.s16  q10, d5, d5                    \n"
			
 
				+    "vmlal.s16  q11, d7, d7                    \n"
			
 
				+    "bgt        1b                             \n"
			
 
				+
			
 
				+    "vadd.u32   q8, q8, q9                     \n"
			
 
				+    "vadd.u32   q10, q10, q11                  \n"
			
 
				+    "vadd.u32   q11, q8, q10                   \n"
			
 
				+    "vpaddl.u32 q1, q11                        \n"
			
 
				+    "vadd.u64   d0, d2, d3                     \n"
			
 
				+    "vmov.32    %3, d0[0]                      \n"
			
 
				+    : "+r"(src_a),
			
 
				+      "+r"(src_b),
			
 
				+      "+r"(count),
			
 
				+      "=r"(sse)
			
 
				+    :
			
 
				+    : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
			
 
				+  return sse;
			
 
				+}
			
 
				+
			
 
				+#endif  // defined(__ARM_NEON__) && !defined(__aarch64__)
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/compare_neon64.cc
+++ b/src/jni/libyuv/source/compare_neon64.cc
@@ -0,0 +1,64 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#include "libyuv/compare_row.h"
			
 
				+#include "libyuv/row.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
			
 
				+
			
 
				+uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
			
 
				+  volatile uint32 sse;
			
 
				+  asm volatile (
			
 
				+    "eor        v16.16b, v16.16b, v16.16b      \n"
			
 
				+    "eor        v18.16b, v18.16b, v18.16b      \n"
			
 
				+    "eor        v17.16b, v17.16b, v17.16b      \n"
			
 
				+    "eor        v19.16b, v19.16b, v19.16b      \n"
			
 
				+
			
 
				+  "1:                                          \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1        {v0.16b}, [%0], #16            \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld1        {v1.16b}, [%1], #16            \n"
			
 
				+    "subs       %w2, %w2, #16                  \n"
			
 
				+    "usubl      v2.8h, v0.8b, v1.8b            \n"
			
 
				+    "usubl2     v3.8h, v0.16b, v1.16b          \n"
			
 
				+    "smlal      v16.4s, v2.4h, v2.4h           \n"
			
 
				+    "smlal      v17.4s, v3.4h, v3.4h           \n"
			
 
				+    "smlal2     v18.4s, v2.8h, v2.8h           \n"
			
 
				+    "smlal2     v19.4s, v3.8h, v3.8h           \n"
			
 
				+    "b.gt       1b                             \n"
			
 
				+
			
 
				+    "add        v16.4s, v16.4s, v17.4s         \n"
			
 
				+    "add        v18.4s, v18.4s, v19.4s         \n"
			
 
				+    "add        v19.4s, v16.4s, v18.4s         \n"
			
 
				+    "addv       s0, v19.4s                     \n"
			
 
				+    "fmov       %w3, s0                        \n"
			
 
				+    : "+r"(src_a),
			
 
				+      "+r"(src_b),
			
 
				+      "+r"(count),
			
 
				+      "=r"(sse)
			
 
				+    :
			
 
				+    : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
			
 
				+  return sse;
			
 
				+}
			
 
				+
			
 
				+#endif  // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/compare_win.cc
+++ b/src/jni/libyuv/source/compare_win.cc
@@ -0,0 +1,222 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#include "libyuv/compare_row.h"
			
 
				+#include "libyuv/row.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// This module is for 32 bit Visual C x86 and clangcl
			
 
				+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
			
 
				+
			
 
				+__declspec(naked)
			
 
				+uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
			
 
				+  __asm {
			
 
				+    mov        eax, [esp + 4]    // src_a
			
 
				+    mov        edx, [esp + 8]    // src_b
			
 
				+    mov        ecx, [esp + 12]   // count
			
 
				+    pxor       xmm0, xmm0
			
 
				+    pxor       xmm5, xmm5
			
 
				+
			
 
				+  wloop:
			
 
				+    movdqu     xmm1, [eax]
			
 
				+    lea        eax,  [eax + 16]
			
 
				+    movdqu     xmm2, [edx]
			
 
				+    lea        edx,  [edx + 16]
			
 
				+    movdqa     xmm3, xmm1  // abs trick
			
 
				+    psubusb    xmm1, xmm2
			
 
				+    psubusb    xmm2, xmm3
			
 
				+    por        xmm1, xmm2
			
 
				+    movdqa     xmm2, xmm1
			
 
				+    punpcklbw  xmm1, xmm5
			
 
				+    punpckhbw  xmm2, xmm5
			
 
				+    pmaddwd    xmm1, xmm1
			
 
				+    pmaddwd    xmm2, xmm2
			
 
				+    paddd      xmm0, xmm1
			
 
				+    paddd      xmm0, xmm2
			
 
				+    sub        ecx, 16
			
 
				+    jg         wloop
			
 
				+
			
 
				+    pshufd     xmm1, xmm0, 0xee
			
 
				+    paddd      xmm0, xmm1
			
 
				+    pshufd     xmm1, xmm0, 0x01
			
 
				+    paddd      xmm0, xmm1
			
 
				+    movd       eax, xmm0
			
 
				+    ret
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Visual C 2012 required for AVX2.
			
 
				+#if _MSC_VER >= 1700
			
 
				+// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
			
 
				+#pragma warning(disable: 4752)
			
 
				+__declspec(naked)
			
 
				+uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
			
 
				+  __asm {
			
 
				+    mov        eax, [esp + 4]    // src_a
			
 
				+    mov        edx, [esp + 8]    // src_b
			
 
				+    mov        ecx, [esp + 12]   // count
			
 
				+    vpxor      ymm0, ymm0, ymm0  // sum
			
 
				+    vpxor      ymm5, ymm5, ymm5  // constant 0 for unpck
			
 
				+    sub        edx, eax
			
 
				+
			
 
				+  wloop:
			
 
				+    vmovdqu    ymm1, [eax]
			
 
				+    vmovdqu    ymm2, [eax + edx]
			
 
				+    lea        eax,  [eax + 32]
			
 
				+    vpsubusb   ymm3, ymm1, ymm2  // abs difference trick
			
 
				+    vpsubusb   ymm2, ymm2, ymm1
			
 
				+    vpor       ymm1, ymm2, ymm3
			
 
				+    vpunpcklbw ymm2, ymm1, ymm5  // u16.  mutates order.
			
 
				+    vpunpckhbw ymm1, ymm1, ymm5
			
 
				+    vpmaddwd   ymm2, ymm2, ymm2  // square + hadd to u32.
			
 
				+    vpmaddwd   ymm1, ymm1, ymm1
			
 
				+    vpaddd     ymm0, ymm0, ymm1
			
 
				+    vpaddd     ymm0, ymm0, ymm2
			
 
				+    sub        ecx, 32
			
 
				+    jg         wloop
			
 
				+
			
 
				+    vpshufd    ymm1, ymm0, 0xee  // 3, 2 + 1, 0 both lanes.
			
 
				+    vpaddd     ymm0, ymm0, ymm1
			
 
				+    vpshufd    ymm1, ymm0, 0x01  // 1 + 0 both lanes.
			
 
				+    vpaddd     ymm0, ymm0, ymm1
			
 
				+    vpermq     ymm1, ymm0, 0x02  // high + low lane.
			
 
				+    vpaddd     ymm0, ymm0, ymm1
			
 
				+    vmovd      eax, xmm0
			
 
				+    vzeroupper
			
 
				+    ret
			
 
				+  }
			
 
				+}
			
 
				+#endif  // _MSC_VER >= 1700
			
 
				+
			
 
				+uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
			
 
				+uvec32 kHashMul0 = {
			
 
				+  0x0c3525e1,  // 33 ^ 15
			
 
				+  0xa3476dc1,  // 33 ^ 14
			
 
				+  0x3b4039a1,  // 33 ^ 13
			
 
				+  0x4f5f0981,  // 33 ^ 12
			
 
				+};
			
 
				+uvec32 kHashMul1 = {
			
 
				+  0x30f35d61,  // 33 ^ 11
			
 
				+  0x855cb541,  // 33 ^ 10
			
 
				+  0x040a9121,  // 33 ^ 9
			
 
				+  0x747c7101,  // 33 ^ 8
			
 
				+};
			
 
				+uvec32 kHashMul2 = {
			
 
				+  0xec41d4e1,  // 33 ^ 7
			
 
				+  0x4cfa3cc1,  // 33 ^ 6
			
 
				+  0x025528a1,  // 33 ^ 5
			
 
				+  0x00121881,  // 33 ^ 4
			
 
				+};
			
 
				+uvec32 kHashMul3 = {
			
 
				+  0x00008c61,  // 33 ^ 3
			
 
				+  0x00000441,  // 33 ^ 2
			
 
				+  0x00000021,  // 33 ^ 1
			
 
				+  0x00000001,  // 33 ^ 0
			
 
				+};
			
 
				+
			
 
				+__declspec(naked)
			
 
				+uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
			
 
				+  __asm {
			
 
				+    mov        eax, [esp + 4]    // src
			
 
				+    mov        ecx, [esp + 8]    // count
			
 
				+    movd       xmm0, [esp + 12]  // seed
			
 
				+
			
 
				+    pxor       xmm7, xmm7        // constant 0 for unpck
			
 
				+    movdqa     xmm6, xmmword ptr kHash16x33
			
 
				+
			
 
				+  wloop:
			
 
				+    movdqu     xmm1, [eax]       // src[0-15]
			
 
				+    lea        eax, [eax + 16]
			
 
				+    pmulld     xmm0, xmm6        // hash *= 33 ^ 16
			
 
				+    movdqa     xmm5, xmmword ptr kHashMul0
			
 
				+    movdqa     xmm2, xmm1
			
 
				+    punpcklbw  xmm2, xmm7        // src[0-7]
			
 
				+    movdqa     xmm3, xmm2
			
 
				+    punpcklwd  xmm3, xmm7        // src[0-3]
			
 
				+    pmulld     xmm3, xmm5
			
 
				+    movdqa     xmm5, xmmword ptr kHashMul1
			
 
				+    movdqa     xmm4, xmm2
			
 
				+    punpckhwd  xmm4, xmm7        // src[4-7]
			
 
				+    pmulld     xmm4, xmm5
			
 
				+    movdqa     xmm5, xmmword ptr kHashMul2
			
 
				+    punpckhbw  xmm1, xmm7        // src[8-15]
			
 
				+    movdqa     xmm2, xmm1
			
 
				+    punpcklwd  xmm2, xmm7        // src[8-11]
			
 
				+    pmulld     xmm2, xmm5
			
 
				+    movdqa     xmm5, xmmword ptr kHashMul3
			
 
				+    punpckhwd  xmm1, xmm7        // src[12-15]
			
 
				+    pmulld     xmm1, xmm5
			
 
				+    paddd      xmm3, xmm4        // add 16 results
			
 
				+    paddd      xmm1, xmm2
			
 
				+    paddd      xmm1, xmm3
			
 
				+
			
 
				+    pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords
			
 
				+    paddd      xmm1, xmm2
			
 
				+    pshufd     xmm2, xmm1, 0x01
			
 
				+    paddd      xmm1, xmm2
			
 
				+    paddd      xmm0, xmm1
			
 
				+    sub        ecx, 16
			
 
				+    jg         wloop
			
 
				+
			
 
				+    movd       eax, xmm0         // return hash
			
 
				+    ret
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Visual C 2012 required for AVX2.
			
 
				+#if _MSC_VER >= 1700
			
 
				+__declspec(naked)
			
 
				+uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
			
 
				+  __asm {
			
 
				+    mov        eax, [esp + 4]    // src
			
 
				+    mov        ecx, [esp + 8]    // count
			
 
				+    vmovd      xmm0, [esp + 12]  // seed
			
 
				+
			
 
				+  wloop:
			
 
				+    vpmovzxbd  xmm3, [eax]  // src[0-3]
			
 
				+    vpmulld    xmm0, xmm0, xmmword ptr kHash16x33  // hash *= 33 ^ 16
			
 
				+    vpmovzxbd  xmm4, [eax + 4]  // src[4-7]
			
 
				+    vpmulld    xmm3, xmm3, xmmword ptr kHashMul0
			
 
				+    vpmovzxbd  xmm2, [eax + 8]  // src[8-11]
			
 
				+    vpmulld    xmm4, xmm4, xmmword ptr kHashMul1
			
 
				+    vpmovzxbd  xmm1, [eax + 12]  // src[12-15]
			
 
				+    vpmulld    xmm2, xmm2, xmmword ptr kHashMul2
			
 
				+    lea        eax, [eax + 16]
			
 
				+    vpmulld    xmm1, xmm1, xmmword ptr kHashMul3
			
 
				+    vpaddd     xmm3, xmm3, xmm4        // add 16 results
			
 
				+    vpaddd     xmm1, xmm1, xmm2
			
 
				+    vpaddd     xmm1, xmm1, xmm3
			
 
				+    vpshufd    xmm2, xmm1, 0x0e  // upper 2 dwords
			
 
				+    vpaddd     xmm1, xmm1,xmm2
			
 
				+    vpshufd    xmm2, xmm1, 0x01
			
 
				+    vpaddd     xmm1, xmm1, xmm2
			
 
				+    vpaddd     xmm0, xmm0, xmm1
			
 
				+    sub        ecx, 16
			
 
				+    jg         wloop
			
 
				+
			
 
				+    vmovd      eax, xmm0         // return hash
			
 
				+    vzeroupper
			
 
				+    ret
			
 
				+  }
			
 
				+}
			
 
				+#endif  // _MSC_VER >= 1700
			
 
				+
			
 
				+#endif  // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/convert.cc
+++ b/src/jni/libyuv/source/convert.cc
--- a/src/jni/libyuv/source/convert_argb.cc
+++ b/src/jni/libyuv/source/convert_argb.cc
--- a/src/jni/libyuv/source/convert_from.cc
+++ b/src/jni/libyuv/source/convert_from.cc
--- a/src/jni/libyuv/source/convert_from_argb.cc
+++ b/src/jni/libyuv/source/convert_from_argb.cc
--- a/src/jni/libyuv/source/convert_jpeg.cc
+++ b/src/jni/libyuv/source/convert_jpeg.cc
@@ -0,0 +1,392 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/convert.h"
			
 
				+
			
 
				+#ifdef HAVE_JPEG
			
 
				+#include "libyuv/mjpeg_decoder.h"
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+#ifdef HAVE_JPEG
			
 
				+struct I420Buffers {
			
 
				+  uint8* y;
			
 
				+  int y_stride;
			
 
				+  uint8* u;
			
 
				+  int u_stride;
			
 
				+  uint8* v;
			
 
				+  int v_stride;
			
 
				+  int w;
			
 
				+  int h;
			
 
				+};
			
 
				+
			
 
				+static void JpegCopyI420(void* opaque,
			
 
				+                         const uint8* const* data,
			
 
				+                         const int* strides,
			
 
				+                         int rows) {
			
 
				+  I420Buffers* dest = (I420Buffers*)(opaque);
			
 
				+  I420Copy(data[0], strides[0],
			
 
				+           data[1], strides[1],
			
 
				+           data[2], strides[2],
			
 
				+           dest->y, dest->y_stride,
			
 
				+           dest->u, dest->u_stride,
			
 
				+           dest->v, dest->v_stride,
			
 
				+           dest->w, rows);
			
 
				+  dest->y += rows * dest->y_stride;
			
 
				+  dest->u += ((rows + 1) >> 1) * dest->u_stride;
			
 
				+  dest->v += ((rows + 1) >> 1) * dest->v_stride;
			
 
				+  dest->h -= rows;
			
 
				+}
			
 
				+
			
 
				+static void JpegI422ToI420(void* opaque,
			
 
				+                           const uint8* const* data,
			
 
				+                           const int* strides,
			
 
				+                           int rows) {
			
 
				+  I420Buffers* dest = (I420Buffers*)(opaque);
			
 
				+  I422ToI420(data[0], strides[0],
			
 
				+             data[1], strides[1],
			
 
				+             data[2], strides[2],
			
 
				+             dest->y, dest->y_stride,
			
 
				+             dest->u, dest->u_stride,
			
 
				+             dest->v, dest->v_stride,
			
 
				+             dest->w, rows);
			
 
				+  dest->y += rows * dest->y_stride;
			
 
				+  dest->u += ((rows + 1) >> 1) * dest->u_stride;
			
 
				+  dest->v += ((rows + 1) >> 1) * dest->v_stride;
			
 
				+  dest->h -= rows;
			
 
				+}
			
 
				+
			
 
				+static void JpegI444ToI420(void* opaque,
			
 
				+                           const uint8* const* data,
			
 
				+                           const int* strides,
			
 
				+                           int rows) {
			
 
				+  I420Buffers* dest = (I420Buffers*)(opaque);
			
 
				+  I444ToI420(data[0], strides[0],
			
 
				+             data[1], strides[1],
			
 
				+             data[2], strides[2],
			
 
				+             dest->y, dest->y_stride,
			
 
				+             dest->u, dest->u_stride,
			
 
				+             dest->v, dest->v_stride,
			
 
				+             dest->w, rows);
			
 
				+  dest->y += rows * dest->y_stride;
			
 
				+  dest->u += ((rows + 1) >> 1) * dest->u_stride;
			
 
				+  dest->v += ((rows + 1) >> 1) * dest->v_stride;
			
 
				+  dest->h -= rows;
			
 
				+}
			
 
				+
			
 
				+static void JpegI411ToI420(void* opaque,
			
 
				+                           const uint8* const* data,
			
 
				+                           const int* strides,
			
 
				+                           int rows) {
			
 
				+  I420Buffers* dest = (I420Buffers*)(opaque);
			
 
				+  I411ToI420(data[0], strides[0],
			
 
				+             data[1], strides[1],
			
 
				+             data[2], strides[2],
			
 
				+             dest->y, dest->y_stride,
			
 
				+             dest->u, dest->u_stride,
			
 
				+             dest->v, dest->v_stride,
			
 
				+             dest->w, rows);
			
 
				+  dest->y += rows * dest->y_stride;
			
 
				+  dest->u += ((rows + 1) >> 1) * dest->u_stride;
			
 
				+  dest->v += ((rows + 1) >> 1) * dest->v_stride;
			
 
				+  dest->h -= rows;
			
 
				+}
			
 
				+
			
 
				+static void JpegI400ToI420(void* opaque,
			
 
				+                           const uint8* const* data,
			
 
				+                           const int* strides,
			
 
				+                           int rows) {
			
 
				+  I420Buffers* dest = (I420Buffers*)(opaque);
			
 
				+  I400ToI420(data[0], strides[0],
			
 
				+             dest->y, dest->y_stride,
			
 
				+             dest->u, dest->u_stride,
			
 
				+             dest->v, dest->v_stride,
			
 
				+             dest->w, rows);
			
 
				+  dest->y += rows * dest->y_stride;
			
 
				+  dest->u += ((rows + 1) >> 1) * dest->u_stride;
			
 
				+  dest->v += ((rows + 1) >> 1) * dest->v_stride;
			
 
				+  dest->h -= rows;
			
 
				+}
			
 
				+
			
 
				+// Query size of MJPG in pixels.
			
 
				+LIBYUV_API
			
 
				+int MJPGSize(const uint8* sample, size_t sample_size,
			
 
				+             int* width, int* height) {
			
 
				+  MJpegDecoder mjpeg_decoder;
			
 
				+  LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
			
 
				+  if (ret) {
			
 
				+    *width = mjpeg_decoder.GetWidth();
			
 
				+    *height = mjpeg_decoder.GetHeight();
			
 
				+  }
			
 
				+  mjpeg_decoder.UnloadFrame();
			
 
				+  return ret ? 0 : -1;  // -1 for runtime failure.
			
 
				+}
			
 
				+
			
 
				+// MJPG (Motion JPeg) to I420
			
 
				+// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
			
 
				+LIBYUV_API
			
 
				+int MJPGToI420(const uint8* sample,
			
 
				+               size_t sample_size,
			
 
				+               uint8* y, int y_stride,
			
 
				+               uint8* u, int u_stride,
			
 
				+               uint8* v, int v_stride,
			
 
				+               int w, int h,
			
 
				+               int dw, int dh) {
			
 
				+  if (sample_size == kUnknownDataSize) {
			
 
				+    // ERROR: MJPEG frame size unknown
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  // TODO(fbarchard): Port MJpeg to C.
			
 
				+  MJpegDecoder mjpeg_decoder;
			
 
				+  LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
			
 
				+  if (ret && (mjpeg_decoder.GetWidth() != w ||
			
 
				+              mjpeg_decoder.GetHeight() != h)) {
			
 
				+    // ERROR: MJPEG frame has unexpected dimensions
			
 
				+    mjpeg_decoder.UnloadFrame();
			
 
				+    return 1;  // runtime failure
			
 
				+  }
			
 
				+  if (ret) {
			
 
				+    I420Buffers bufs = { y, y_stride, u, u_stride, v, v_stride, dw, dh };
			
 
				+    // YUV420
			
 
				+    if (mjpeg_decoder.GetColorSpace() ==
			
 
				+            MJpegDecoder::kColorSpaceYCbCr &&
			
 
				+        mjpeg_decoder.GetNumComponents() == 3 &&
			
 
				+        mjpeg_decoder.GetVertSampFactor(0) == 2 &&
			
 
				+        mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
			
 
				+        mjpeg_decoder.GetVertSampFactor(1) == 1 &&
			
 
				+        mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
			
 
				+        mjpeg_decoder.GetVertSampFactor(2) == 1 &&
			
 
				+        mjpeg_decoder.GetHorizSampFactor(2) == 1) {
			
 
				+      ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dw, dh);
			
 
				+    // YUV422
			
 
				+    } else if (mjpeg_decoder.GetColorSpace() ==
			
 
				+                   MJpegDecoder::kColorSpaceYCbCr &&
			
 
				+               mjpeg_decoder.GetNumComponents() == 3 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
			
 
				+      ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dw, dh);
			
 
				+    // YUV444
			
 
				+    } else if (mjpeg_decoder.GetColorSpace() ==
			
 
				+                   MJpegDecoder::kColorSpaceYCbCr &&
			
 
				+               mjpeg_decoder.GetNumComponents() == 3 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
			
 
				+      ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh);
			
 
				+    // YUV411
			
 
				+    } else if (mjpeg_decoder.GetColorSpace() ==
			
 
				+                   MJpegDecoder::kColorSpaceYCbCr &&
			
 
				+               mjpeg_decoder.GetNumComponents() == 3 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
			
 
				+      ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToI420, &bufs, dw, dh);
			
 
				+    // YUV400
			
 
				+    } else if (mjpeg_decoder.GetColorSpace() ==
			
 
				+                   MJpegDecoder::kColorSpaceGrayscale &&
			
 
				+               mjpeg_decoder.GetNumComponents() == 1 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(0) == 1) {
			
 
				+      ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dw, dh);
			
 
				+    } else {
			
 
				+      // TODO(fbarchard): Implement conversion for any other colorspace/sample
			
 
				+      // factors that occur in practice. 411 is supported by libjpeg
			
 
				+      // ERROR: Unable to convert MJPEG frame because format is not supported
			
 
				+      mjpeg_decoder.UnloadFrame();
			
 
				+      return 1;
			
 
				+    }
			
 
				+  }
			
 
				+  return ret ? 0 : 1;
			
 
				+}
			
 
				+
			
 
				+#ifdef HAVE_JPEG
			
 
				+struct ARGBBuffers {
			
 
				+  uint8* argb;
			
 
				+  int argb_stride;
			
 
				+  int w;
			
 
				+  int h;
			
 
				+};
			
 
				+
			
 
				+static void JpegI420ToARGB(void* opaque,
			
 
				+                         const uint8* const* data,
			
 
				+                         const int* strides,
			
 
				+                         int rows) {
			
 
				+  ARGBBuffers* dest = (ARGBBuffers*)(opaque);
			
 
				+  I420ToARGB(data[0], strides[0],
			
 
				+             data[1], strides[1],
			
 
				+             data[2], strides[2],
			
 
				+             dest->argb, dest->argb_stride,
			
 
				+             dest->w, rows);
			
 
				+  dest->argb += rows * dest->argb_stride;
			
 
				+  dest->h -= rows;
			
 
				+}
			
 
				+
			
 
				+static void JpegI422ToARGB(void* opaque,
			
 
				+                           const uint8* const* data,
			
 
				+                           const int* strides,
			
 
				+                           int rows) {
			
 
				+  ARGBBuffers* dest = (ARGBBuffers*)(opaque);
			
 
				+  I422ToARGB(data[0], strides[0],
			
 
				+             data[1], strides[1],
			
 
				+             data[2], strides[2],
			
 
				+             dest->argb, dest->argb_stride,
			
 
				+             dest->w, rows);
			
 
				+  dest->argb += rows * dest->argb_stride;
			
 
				+  dest->h -= rows;
			
 
				+}
			
 
				+
			
 
				+static void JpegI444ToARGB(void* opaque,
			
 
				+                           const uint8* const* data,
			
 
				+                           const int* strides,
			
 
				+                           int rows) {
			
 
				+  ARGBBuffers* dest = (ARGBBuffers*)(opaque);
			
 
				+  I444ToARGB(data[0], strides[0],
			
 
				+             data[1], strides[1],
			
 
				+             data[2], strides[2],
			
 
				+             dest->argb, dest->argb_stride,
			
 
				+             dest->w, rows);
			
 
				+  dest->argb += rows * dest->argb_stride;
			
 
				+  dest->h -= rows;
			
 
				+}
			
 
				+
			
 
				+static void JpegI411ToARGB(void* opaque,
			
 
				+                           const uint8* const* data,
			
 
				+                           const int* strides,
			
 
				+                           int rows) {
			
 
				+  ARGBBuffers* dest = (ARGBBuffers*)(opaque);
			
 
				+  I411ToARGB(data[0], strides[0],
			
 
				+             data[1], strides[1],
			
 
				+             data[2], strides[2],
			
 
				+             dest->argb, dest->argb_stride,
			
 
				+             dest->w, rows);
			
 
				+  dest->argb += rows * dest->argb_stride;
			
 
				+  dest->h -= rows;
			
 
				+}
			
 
				+
			
 
				+static void JpegI400ToARGB(void* opaque,
			
 
				+                           const uint8* const* data,
			
 
				+                           const int* strides,
			
 
				+                           int rows) {
			
 
				+  ARGBBuffers* dest = (ARGBBuffers*)(opaque);
			
 
				+  I400ToARGB(data[0], strides[0],
			
 
				+             dest->argb, dest->argb_stride,
			
 
				+             dest->w, rows);
			
 
				+  dest->argb += rows * dest->argb_stride;
			
 
				+  dest->h -= rows;
			
 
				+}
			
 
				+
			
 
				+// MJPG (Motion JPeg) to ARGB
			
 
				+// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
			
 
				+LIBYUV_API
			
 
				+int MJPGToARGB(const uint8* sample,
			
 
				+               size_t sample_size,
			
 
				+               uint8* argb, int argb_stride,
			
 
				+               int w, int h,
			
 
				+               int dw, int dh) {
			
 
				+  if (sample_size == kUnknownDataSize) {
			
 
				+    // ERROR: MJPEG frame size unknown
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  // TODO(fbarchard): Port MJpeg to C.
			
 
				+  MJpegDecoder mjpeg_decoder;
			
 
				+  LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
			
 
				+  if (ret && (mjpeg_decoder.GetWidth() != w ||
			
 
				+              mjpeg_decoder.GetHeight() != h)) {
			
 
				+    // ERROR: MJPEG frame has unexpected dimensions
			
 
				+    mjpeg_decoder.UnloadFrame();
			
 
				+    return 1;  // runtime failure
			
 
				+  }
			
 
				+  if (ret) {
			
 
				+    ARGBBuffers bufs = { argb, argb_stride, dw, dh };
			
 
				+    // YUV420
			
 
				+    if (mjpeg_decoder.GetColorSpace() ==
			
 
				+            MJpegDecoder::kColorSpaceYCbCr &&
			
 
				+        mjpeg_decoder.GetNumComponents() == 3 &&
			
 
				+        mjpeg_decoder.GetVertSampFactor(0) == 2 &&
			
 
				+        mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
			
 
				+        mjpeg_decoder.GetVertSampFactor(1) == 1 &&
			
 
				+        mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
			
 
				+        mjpeg_decoder.GetVertSampFactor(2) == 1 &&
			
 
				+        mjpeg_decoder.GetHorizSampFactor(2) == 1) {
			
 
				+      ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dw, dh);
			
 
				+    // YUV422
			
 
				+    } else if (mjpeg_decoder.GetColorSpace() ==
			
 
				+                   MJpegDecoder::kColorSpaceYCbCr &&
			
 
				+               mjpeg_decoder.GetNumComponents() == 3 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
			
 
				+      ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dw, dh);
			
 
				+    // YUV444
			
 
				+    } else if (mjpeg_decoder.GetColorSpace() ==
			
 
				+                   MJpegDecoder::kColorSpaceYCbCr &&
			
 
				+               mjpeg_decoder.GetNumComponents() == 3 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
			
 
				+      ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dw, dh);
			
 
				+    // YUV411
			
 
				+    } else if (mjpeg_decoder.GetColorSpace() ==
			
 
				+                   MJpegDecoder::kColorSpaceYCbCr &&
			
 
				+               mjpeg_decoder.GetNumComponents() == 3 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
			
 
				+      ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToARGB, &bufs, dw, dh);
			
 
				+    // YUV400
			
 
				+    } else if (mjpeg_decoder.GetColorSpace() ==
			
 
				+                   MJpegDecoder::kColorSpaceGrayscale &&
			
 
				+               mjpeg_decoder.GetNumComponents() == 1 &&
			
 
				+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
			
 
				+               mjpeg_decoder.GetHorizSampFactor(0) == 1) {
			
 
				+      ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dw, dh);
			
 
				+    } else {
			
 
				+      // TODO(fbarchard): Implement conversion for any other colorspace/sample
			
 
				+      // factors that occur in practice. 411 is supported by libjpeg
			
 
				+      // ERROR: Unable to convert MJPEG frame because format is not supported
			
 
				+      mjpeg_decoder.UnloadFrame();
			
 
				+      return 1;
			
 
				+    }
			
 
				+  }
			
 
				+  return ret ? 0 : 1;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/convert_to_argb.cc
+++ b/src/jni/libyuv/source/convert_to_argb.cc
@@ -0,0 +1,306 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/convert_argb.h"
			
 
				+
			
 
				+#include "libyuv/cpu_id.h"
			
 
				+#ifdef HAVE_JPEG
			
 
				+#include "libyuv/mjpeg_decoder.h"
			
 
				+#endif
			
 
				+#include "libyuv/rotate_argb.h"
			
 
				+#include "libyuv/row.h"
			
 
				+#include "libyuv/video_common.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Convert camera sample to I420 with cropping, rotation and vertical flip.
			
 
				+// src_width is used for source stride computation
			
 
				+// src_height is used to compute location of planes, and indicate inversion
			
 
				+// sample_size is measured in bytes and is the size of the frame.
			
 
				+//   With MJPEG it is the compressed size of the frame.
			
 
				+LIBYUV_API
			
 
				+int ConvertToARGB(const uint8* sample, size_t sample_size,
			
 
				+                  uint8* crop_argb, int argb_stride,
			
 
				+                  int crop_x, int crop_y,
			
 
				+                  int src_width, int src_height,
			
 
				+                  int crop_width, int crop_height,
			
 
				+                  enum RotationMode rotation,
			
 
				+                  uint32 fourcc) {
			
 
				+  uint32 format = CanonicalFourCC(fourcc);
			
 
				+  int aligned_src_width = (src_width + 1) & ~1;
			
 
				+  const uint8* src;
			
 
				+  const uint8* src_uv;
			
 
				+  int abs_src_height = (src_height < 0) ? -src_height : src_height;
			
 
				+  int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
			
 
				+  int r = 0;
			
 
				+
			
 
				+  // One pass rotation is available for some formats. For the rest, convert
			
 
				+  // to I420 (with optional vertical flipping) into a temporary I420 buffer,
			
 
				+  // and then rotate the I420 to the final destination buffer.
			
 
				+  // For in-place conversion, if destination crop_argb is same as source sample,
			
 
				+  // also enable temporary buffer.
			
 
				+  LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) ||
			
 
				+      crop_argb == sample;
			
 
				+  uint8* tmp_argb = crop_argb;
			
 
				+  int tmp_argb_stride = argb_stride;
			
 
				+  uint8* rotate_buffer = NULL;
			
 
				+  int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
			
 
				+
			
 
				+  if (crop_argb == NULL || sample == NULL ||
			
 
				+      src_width <= 0 || crop_width <= 0 ||
			
 
				+      src_height == 0 || crop_height == 0) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+  if (src_height < 0) {
			
 
				+    inv_crop_height = -inv_crop_height;
			
 
				+  }
			
 
				+
			
 
				+  if (need_buf) {
			
 
				+    int argb_size = crop_width * abs_crop_height * 4;
			
 
				+    rotate_buffer = (uint8*)malloc(argb_size);
			
 
				+    if (!rotate_buffer) {
			
 
				+      return 1;  // Out of memory runtime error.
			
 
				+    }
			
 
				+    crop_argb = rotate_buffer;
			
 
				+    argb_stride = crop_width;
			
 
				+  }
			
 
				+
			
 
				+  switch (format) {
			
 
				+    // Single plane formats
			
 
				+    case FOURCC_YUY2:
			
 
				+      src = sample + (aligned_src_width * crop_y + crop_x) * 2;
			
 
				+      r = YUY2ToARGB(src, aligned_src_width * 2,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_UYVY:
			
 
				+      src = sample + (aligned_src_width * crop_y + crop_x) * 2;
			
 
				+      r = UYVYToARGB(src, aligned_src_width * 2,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_24BG:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 3;
			
 
				+      r = RGB24ToARGB(src, src_width * 3,
			
 
				+                      crop_argb, argb_stride,
			
 
				+                      crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_RAW:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 3;
			
 
				+      r = RAWToARGB(src, src_width * 3,
			
 
				+                    crop_argb, argb_stride,
			
 
				+                    crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_ARGB:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 4;
			
 
				+      r = ARGBToARGB(src, src_width * 4,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_BGRA:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 4;
			
 
				+      r = BGRAToARGB(src, src_width * 4,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_ABGR:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 4;
			
 
				+      r = ABGRToARGB(src, src_width * 4,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_RGBA:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 4;
			
 
				+      r = RGBAToARGB(src, src_width * 4,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_RGBP:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 2;
			
 
				+      r = RGB565ToARGB(src, src_width * 2,
			
 
				+                       crop_argb, argb_stride,
			
 
				+                       crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_RGBO:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 2;
			
 
				+      r = ARGB1555ToARGB(src, src_width * 2,
			
 
				+                         crop_argb, argb_stride,
			
 
				+                         crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_R444:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 2;
			
 
				+      r = ARGB4444ToARGB(src, src_width * 2,
			
 
				+                         crop_argb, argb_stride,
			
 
				+                         crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_I400:
			
 
				+      src = sample + src_width * crop_y + crop_x;
			
 
				+      r = I400ToARGB(src, src_width,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+
			
 
				+    // Biplanar formats
			
 
				+    case FOURCC_NV12:
			
 
				+      src = sample + (src_width * crop_y + crop_x);
			
 
				+      src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
			
 
				+      r = NV12ToARGB(src, src_width,
			
 
				+                     src_uv, aligned_src_width,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_NV21:
			
 
				+      src = sample + (src_width * crop_y + crop_x);
			
 
				+      src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
			
 
				+      // Call NV12 but with u and v parameters swapped.
			
 
				+      r = NV21ToARGB(src, src_width,
			
 
				+                     src_uv, aligned_src_width,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_M420:
			
 
				+      src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
			
 
				+      r = M420ToARGB(src, src_width,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    // Triplanar formats
			
 
				+    case FOURCC_I420:
			
 
				+    case FOURCC_YU12:
			
 
				+    case FOURCC_YV12: {
			
 
				+      const uint8* src_y = sample + (src_width * crop_y + crop_x);
			
 
				+      const uint8* src_u;
			
 
				+      const uint8* src_v;
			
 
				+      int halfwidth = (src_width + 1) / 2;
			
 
				+      int halfheight = (abs_src_height + 1) / 2;
			
 
				+      if (format == FOURCC_YV12) {
			
 
				+        src_v = sample + src_width * abs_src_height +
			
 
				+            (halfwidth * crop_y + crop_x) / 2;
			
 
				+        src_u = sample + src_width * abs_src_height +
			
 
				+            halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
			
 
				+      } else {
			
 
				+        src_u = sample + src_width * abs_src_height +
			
 
				+            (halfwidth * crop_y + crop_x) / 2;
			
 
				+        src_v = sample + src_width * abs_src_height +
			
 
				+            halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
			
 
				+      }
			
 
				+      r = I420ToARGB(src_y, src_width,
			
 
				+                     src_u, halfwidth,
			
 
				+                     src_v, halfwidth,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    }
			
 
				+
			
 
				+    case FOURCC_J420: {
			
 
				+      const uint8* src_y = sample + (src_width * crop_y + crop_x);
			
 
				+      const uint8* src_u;
			
 
				+      const uint8* src_v;
			
 
				+      int halfwidth = (src_width + 1) / 2;
			
 
				+      int halfheight = (abs_src_height + 1) / 2;
			
 
				+      src_u = sample + src_width * abs_src_height +
			
 
				+          (halfwidth * crop_y + crop_x) / 2;
			
 
				+      src_v = sample + src_width * abs_src_height +
			
 
				+          halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
			
 
				+      r = J420ToARGB(src_y, src_width,
			
 
				+                     src_u, halfwidth,
			
 
				+                     src_v, halfwidth,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    }
			
 
				+
			
 
				+    case FOURCC_I422:
			
 
				+    case FOURCC_YV16: {
			
 
				+      const uint8* src_y = sample + src_width * crop_y + crop_x;
			
 
				+      const uint8* src_u;
			
 
				+      const uint8* src_v;
			
 
				+      int halfwidth = (src_width + 1) / 2;
			
 
				+      if (format == FOURCC_YV16) {
			
 
				+        src_v = sample + src_width * abs_src_height +
			
 
				+            halfwidth * crop_y + crop_x / 2;
			
 
				+        src_u = sample + src_width * abs_src_height +
			
 
				+            halfwidth * (abs_src_height + crop_y) + crop_x / 2;
			
 
				+      } else {
			
 
				+        src_u = sample + src_width * abs_src_height +
			
 
				+            halfwidth * crop_y + crop_x / 2;
			
 
				+        src_v = sample + src_width * abs_src_height +
			
 
				+            halfwidth * (abs_src_height + crop_y) + crop_x / 2;
			
 
				+      }
			
 
				+      r = I422ToARGB(src_y, src_width,
			
 
				+                     src_u, halfwidth,
			
 
				+                     src_v, halfwidth,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    }
			
 
				+    case FOURCC_I444:
			
 
				+    case FOURCC_YV24: {
			
 
				+      const uint8* src_y = sample + src_width * crop_y + crop_x;
			
 
				+      const uint8* src_u;
			
 
				+      const uint8* src_v;
			
 
				+      if (format == FOURCC_YV24) {
			
 
				+        src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
			
 
				+        src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
			
 
				+      } else {
			
 
				+        src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
			
 
				+        src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
			
 
				+      }
			
 
				+      r = I444ToARGB(src_y, src_width,
			
 
				+                     src_u, src_width,
			
 
				+                     src_v, src_width,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    }
			
 
				+    case FOURCC_I411: {
			
 
				+      int quarterwidth = (src_width + 3) / 4;
			
 
				+      const uint8* src_y = sample + src_width * crop_y + crop_x;
			
 
				+      const uint8* src_u = sample + src_width * abs_src_height +
			
 
				+          quarterwidth * crop_y + crop_x / 4;
			
 
				+      const uint8* src_v = sample + src_width * abs_src_height +
			
 
				+          quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
			
 
				+      r = I411ToARGB(src_y, src_width,
			
 
				+                     src_u, quarterwidth,
			
 
				+                     src_v, quarterwidth,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    }
			
 
				+#ifdef HAVE_JPEG
			
 
				+    case FOURCC_MJPG:
			
 
				+      r = MJPGToARGB(sample, sample_size,
			
 
				+                     crop_argb, argb_stride,
			
 
				+                     src_width, abs_src_height, crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+#endif
			
 
				+    default:
			
 
				+      r = -1;  // unknown fourcc - return failure code.
			
 
				+  }
			
 
				+
			
 
				+  if (need_buf) {
			
 
				+    if (!r) {
			
 
				+      r = ARGBRotate(crop_argb, argb_stride,
			
 
				+                     tmp_argb, tmp_argb_stride,
			
 
				+                     crop_width, abs_crop_height, rotation);
			
 
				+    }
			
 
				+    free(rotate_buffer);
			
 
				+  }
			
 
				+
			
 
				+  return r;
			
 
				+}
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/convert_to_i420.cc
+++ b/src/jni/libyuv/source/convert_to_i420.cc
@@ -0,0 +1,340 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#include "libyuv/convert.h"
			
 
				+
			
 
				+#include "libyuv/video_common.h"
			
 
				+#include "../include/libyuv.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Convert camera sample to I420 with cropping, rotation and vertical flip.
			
 
				+// src_width is used for source stride computation
			
 
				+// src_height is used to compute location of planes, and indicate inversion
			
 
				+// sample_size is measured in bytes and is the size of the frame.
			
 
				+//   With MJPEG it is the compressed size of the frame.
			
 
				+LIBYUV_API
			
 
				+int ConvertToI420(const uint8* sample,
			
 
				+                  size_t sample_size,
			
 
				+                  uint8* y, int y_stride,
			
 
				+                  uint8* u, int u_stride,
			
 
				+                  uint8* v, int v_stride,
			
 
				+                  int crop_x, int crop_y,
			
 
				+                  int src_width, int src_height,
			
 
				+                  int crop_width, int crop_height,
			
 
				+                  enum RotationMode rotation,
			
 
				+                  uint32 fourcc) {
			
 
				+  uint32 format = CanonicalFourCC(fourcc);
			
 
				+  int aligned_src_width = (src_width + 1) & ~1;
			
 
				+  const uint8* src;
			
 
				+  const uint8* src_uv;
			
 
				+  int abs_src_height = (src_height < 0) ? -src_height : src_height;
			
 
				+  int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
			
 
				+  int r = 0;
			
 
				+  LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 &&
			
 
				+      format != FOURCC_NV12 && format != FOURCC_NV21 &&
			
 
				+      format != FOURCC_YU12 && format != FOURCC_YV12) || y == sample;
			
 
				+  uint8* tmp_y = y;
			
 
				+  uint8* tmp_u = u;
			
 
				+  uint8* tmp_v = v;
			
 
				+  int tmp_y_stride = y_stride;
			
 
				+  int tmp_u_stride = u_stride;
			
 
				+  int tmp_v_stride = v_stride;
			
 
				+  uint8* rotate_buffer = NULL;
			
 
				+  int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
			
 
				+
			
 
				+  if (!y || !u || !v || !sample ||
			
 
				+      src_width <= 0 || crop_width <= 0  ||
			
 
				+      src_height == 0 || crop_height == 0) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+  if (src_height < 0) {
			
 
				+    inv_crop_height = -inv_crop_height;
			
 
				+  }
			
 
				+
			
 
				+  // One pass rotation is available for some formats. For the rest, convert
			
 
				+  // to I420 (with optional vertical flipping) into a temporary I420 buffer,
			
 
				+  // and then rotate the I420 to the final destination buffer.
			
 
				+  // For in-place conversion, if destination y is same as source sample,
			
 
				+  // also enable temporary buffer.
			
 
				+  if (need_buf) {
			
 
				+    int y_size = crop_width * abs_crop_height;
			
 
				+    int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
			
 
				+    rotate_buffer = (uint8*)malloc(y_size + uv_size * 2);
			
 
				+    if (!rotate_buffer) {
			
 
				+      return 1;  // Out of memory runtime error.
			
 
				+    }
			
 
				+    y = rotate_buffer;
			
 
				+    u = y + y_size;
			
 
				+    v = u + uv_size;
			
 
				+    y_stride = crop_width;
			
 
				+    u_stride = v_stride = ((crop_width + 1) / 2);
			
 
				+  }
			
 
				+
			
 
				+  switch (format) {
			
 
				+    // Single plane formats
			
 
				+    case FOURCC_YUY2:
			
 
				+      src = sample + (aligned_src_width * crop_y + crop_x) * 2;
			
 
				+      r = YUY2ToI420(src, aligned_src_width * 2,
			
 
				+                     y, y_stride,
			
 
				+                     u, u_stride,
			
 
				+                     v, v_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_UYVY:
			
 
				+      src = sample + (aligned_src_width * crop_y + crop_x) * 2;
			
 
				+      r = UYVYToI420(src, aligned_src_width * 2,
			
 
				+                     y, y_stride,
			
 
				+                     u, u_stride,
			
 
				+                     v, v_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_RGBP:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 2;
			
 
				+      r = RGB565ToI420(src, src_width * 2,
			
 
				+                       y, y_stride,
			
 
				+                       u, u_stride,
			
 
				+                       v, v_stride,
			
 
				+                       crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_RGBO:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 2;
			
 
				+      r = ARGB1555ToI420(src, src_width * 2,
			
 
				+                         y, y_stride,
			
 
				+                         u, u_stride,
			
 
				+                         v, v_stride,
			
 
				+                         crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_R444:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 2;
			
 
				+      r = ARGB4444ToI420(src, src_width * 2,
			
 
				+                         y, y_stride,
			
 
				+                         u, u_stride,
			
 
				+                         v, v_stride,
			
 
				+                         crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_24BG:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 3;
			
 
				+      r = RGB24ToI420(src, src_width * 3,
			
 
				+                      y, y_stride,
			
 
				+                      u, u_stride,
			
 
				+                      v, v_stride,
			
 
				+                      crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_RAW:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 3;
			
 
				+      r = RAWToI420(src, src_width * 3,
			
 
				+                    y, y_stride,
			
 
				+                    u, u_stride,
			
 
				+                    v, v_stride,
			
 
				+                    crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_ARGB:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 4;
			
 
				+      r = ARGBToI420(src, src_width * 4,
			
 
				+                     y, y_stride,
			
 
				+                     u, u_stride,
			
 
				+                     v, v_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_BGRA:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 4;
			
 
				+      r = BGRAToI420(src, src_width * 4,
			
 
				+                     y, y_stride,
			
 
				+                     u, u_stride,
			
 
				+                     v, v_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_ABGR:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 4;
			
 
				+      r = ABGRToI420(src, src_width * 4,
			
 
				+                     y, y_stride,
			
 
				+                     u, u_stride,
			
 
				+                     v, v_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_RGBA:
			
 
				+      src = sample + (src_width * crop_y + crop_x) * 4;
			
 
				+      r = RGBAToI420(src, src_width * 4,
			
 
				+                     y, y_stride,
			
 
				+                     u, u_stride,
			
 
				+                     v, v_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    case FOURCC_I400:
			
 
				+      src = sample + src_width * crop_y + crop_x;
			
 
				+      r = I400ToI420(src, src_width,
			
 
				+                     y, y_stride,
			
 
				+                     u, u_stride,
			
 
				+                     v, v_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    // Biplanar formats
			
 
				+    case FOURCC_NV12:
			
 
				+      src = sample + (src_width * crop_y + crop_x);
			
 
				+      src_uv = sample + (src_width * src_height) +
			
 
				+        ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
			
 
				+      r = NV12ToI420Rotate(src, src_width,
			
 
				+                           src_uv, aligned_src_width,
			
 
				+                           y, y_stride,
			
 
				+                           u, u_stride,
			
 
				+                           v, v_stride,
			
 
				+                           crop_width, inv_crop_height, rotation);
			
 
				+      break;
			
 
				+    case FOURCC_NV21:
			
 
				+      src = sample + (src_width * crop_y + crop_x);
			
 
				+      src_uv = sample + (src_width * src_height) +
			
 
				+        ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
			
 
				+      // Call NV12 but with u and v parameters swapped.
			
 
				+      r = NV12ToI420Rotate(src, src_width,
			
 
				+                           src_uv, aligned_src_width,
			
 
				+                           y, y_stride,
			
 
				+                           v, v_stride,
			
 
				+                           u, u_stride,
			
 
				+                           crop_width, inv_crop_height, rotation);
			
 
				+      break;
			
 
				+    case FOURCC_M420:
			
 
				+      src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
			
 
				+      r = M420ToI420(src, src_width,
			
 
				+                     y, y_stride,
			
 
				+                     u, u_stride,
			
 
				+                     v, v_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    // Triplanar formats
			
 
				+    case FOURCC_I420:
			
 
				+    case FOURCC_YU12:
			
 
				+    case FOURCC_YV12: {
			
 
				+      const uint8* src_y = sample + (src_width * crop_y + crop_x);
			
 
				+      const uint8* src_u;
			
 
				+      const uint8* src_v;
			
 
				+      int halfwidth = (src_width + 1) / 2;
			
 
				+      int halfheight = (abs_src_height + 1) / 2;
			
 
				+      if (format == FOURCC_YV12) {
			
 
				+        src_v = sample + src_width * abs_src_height +
			
 
				+            (halfwidth * crop_y + crop_x) / 2;
			
 
				+        src_u = sample + src_width * abs_src_height +
			
 
				+            halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
			
 
				+      } else {
			
 
				+        src_u = sample + src_width * abs_src_height +
			
 
				+            (halfwidth * crop_y + crop_x) / 2;
			
 
				+        src_v = sample + src_width * abs_src_height +
			
 
				+            halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
			
 
				+      }
			
 
				+      r = I420Rotate(src_y, src_width,
			
 
				+                     src_u, halfwidth,
			
 
				+                     src_v, halfwidth,
			
 
				+                     y, y_stride,
			
 
				+                     u, u_stride,
			
 
				+                     v, v_stride,
			
 
				+                     crop_width, inv_crop_height, rotation);
			
 
				+      break;
			
 
				+    }
			
 
				+    case FOURCC_I422:
			
 
				+    case FOURCC_YV16: {
			
 
				+      const uint8* src_y = sample + src_width * crop_y + crop_x;
			
 
				+      const uint8* src_u;
			
 
				+      const uint8* src_v;
			
 
				+      int halfwidth = (src_width + 1) / 2;
			
 
				+      if (format == FOURCC_YV16) {
			
 
				+        src_v = sample + src_width * abs_src_height +
			
 
				+            halfwidth * crop_y + crop_x / 2;
			
 
				+        src_u = sample + src_width * abs_src_height +
			
 
				+            halfwidth * (abs_src_height + crop_y) + crop_x / 2;
			
 
				+      } else {
			
 
				+        src_u = sample + src_width * abs_src_height +
			
 
				+            halfwidth * crop_y + crop_x / 2;
			
 
				+        src_v = sample + src_width * abs_src_height +
			
 
				+            halfwidth * (abs_src_height + crop_y) + crop_x / 2;
			
 
				+      }
			
 
				+      r = I422ToI420(src_y, src_width,
			
 
				+                     src_u, halfwidth,
			
 
				+                     src_v, halfwidth,
			
 
				+                     y, y_stride,
			
 
				+                     u, u_stride,
			
 
				+                     v, v_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    }
			
 
				+    case FOURCC_I444:
			
 
				+    case FOURCC_YV24: {
			
 
				+      const uint8* src_y = sample + src_width * crop_y + crop_x;
			
 
				+      const uint8* src_u;
			
 
				+      const uint8* src_v;
			
 
				+      if (format == FOURCC_YV24) {
			
 
				+        src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
			
 
				+        src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
			
 
				+      } else {
			
 
				+        src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
			
 
				+        src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
			
 
				+      }
			
 
				+      r = I444ToI420(src_y, src_width,
			
 
				+                     src_u, src_width,
			
 
				+                     src_v, src_width,
			
 
				+                     y, y_stride,
			
 
				+                     u, u_stride,
			
 
				+                     v, v_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    }
			
 
				+    case FOURCC_I411: {
			
 
				+      int quarterwidth = (src_width + 3) / 4;
			
 
				+      const uint8* src_y = sample + src_width * crop_y + crop_x;
			
 
				+      const uint8* src_u = sample + src_width * abs_src_height +
			
 
				+          quarterwidth * crop_y + crop_x / 4;
			
 
				+      const uint8* src_v = sample + src_width * abs_src_height +
			
 
				+          quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
			
 
				+      r = I411ToI420(src_y, src_width,
			
 
				+                     src_u, quarterwidth,
			
 
				+                     src_v, quarterwidth,
			
 
				+                     y, y_stride,
			
 
				+                     u, u_stride,
			
 
				+                     v, v_stride,
			
 
				+                     crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+    }
			
 
				+#ifdef HAVE_JPEG
			
 
				+    case FOURCC_MJPG:
			
 
				+      r = MJPGToI420(sample, sample_size,
			
 
				+                     y, y_stride,
			
 
				+                     u, u_stride,
			
 
				+                     v, v_stride,
			
 
				+                     src_width, abs_src_height, crop_width, inv_crop_height);
			
 
				+      break;
			
 
				+#endif
			
 
				+    default:
			
 
				+      r = -1;  // unknown fourcc - return failure code.
			
 
				+  }
			
 
				+
			
 
				+  if (need_buf) {
			
 
				+    if (!r) {
			
 
				+      r = I420Rotate(y, y_stride,
			
 
				+                     u, u_stride,
			
 
				+                     v, v_stride,
			
 
				+                     tmp_y, tmp_y_stride,
			
 
				+                     tmp_u, tmp_u_stride,
			
 
				+                     tmp_v, tmp_v_stride,
			
 
				+                     crop_width, abs_crop_height, rotation);
			
 
				+    }
			
 
				+    free(rotate_buffer);
			
 
				+  }
			
 
				+
			
 
				+  return r;
			
 
				+}
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/cpu_id.cc
+++ b/src/jni/libyuv/source/cpu_id.cc
@@ -0,0 +1,299 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/cpu_id.h"
			
 
				+
			
 
				+#if defined(_MSC_VER)
			
 
				+#include <intrin.h>  // For __cpuidex()
			
 
				+#endif
			
 
				+#if !defined(__pnacl__) && !defined(__CLR_VER) && \
			
 
				+    !defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
			
 
				+    defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
			
 
				+#include <immintrin.h>  // For _xgetbv()
			
 
				+#endif
			
 
				+
			
 
				+#if !defined(__native_client__)
			
 
				+#include <stdlib.h>  // For getenv()
			
 
				+#endif
			
 
				+
			
 
				+// For ArmCpuCaps() but unittested on all platforms
			
 
				+#include <stdio.h>
			
 
				+#include <string.h>
			
 
				+
			
 
				+#include "libyuv/basic_types.h"  // For CPU_X86
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// For functions that use the stack and have runtime checks for overflow,
			
 
				+// use SAFEBUFFERS to avoid additional check.
			
 
				+#if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219)
			
 
				+#define SAFEBUFFERS __declspec(safebuffers)
			
 
				+#else
			
 
				+#define SAFEBUFFERS
			
 
				+#endif
			
 
				+
			
 
				+// Low level cpuid for X86.
			
 
				+#if (defined(_M_IX86) || defined(_M_X64) || \
			
 
				+    defined(__i386__) || defined(__x86_64__)) && \
			
 
				+    !defined(__pnacl__) && !defined(__CLR_VER)
			
 
				+LIBYUV_API
			
 
				+void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
			
 
				+#if defined(_MSC_VER)
			
 
				+// Visual C version uses intrinsic or inline x86 assembly.
			
 
				+#if (_MSC_FULL_VER >= 160040219)
			
 
				+  __cpuidex((int*)(cpu_info), info_eax, info_ecx);
			
 
				+#elif defined(_M_IX86)
			
 
				+  __asm {
			
 
				+    mov        eax, info_eax
			
 
				+    mov        ecx, info_ecx
			
 
				+    mov        edi, cpu_info
			
 
				+    cpuid
			
 
				+    mov        [edi], eax
			
 
				+    mov        [edi + 4], ebx
			
 
				+    mov        [edi + 8], ecx
			
 
				+    mov        [edi + 12], edx
			
 
				+  }
			
 
				+#else  // Visual C but not x86
			
 
				+  if (info_ecx == 0) {
			
 
				+    __cpuid((int*)(cpu_info), info_eax);
			
 
				+  } else {
			
 
				+    cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0;
			
 
				+  }
			
 
				+#endif
			
 
				+// GCC version uses inline x86 assembly.
			
 
				+#else  // defined(_MSC_VER)
			
 
				+  uint32 info_ebx, info_edx;
			
 
				+  asm volatile (
			
 
				+#if defined( __i386__) && defined(__PIC__)
			
 
				+    // Preserve ebx for fpic 32 bit.
			
 
				+    "mov %%ebx, %%edi                          \n"
			
 
				+    "cpuid                                     \n"
			
 
				+    "xchg %%edi, %%ebx                         \n"
			
 
				+    : "=D" (info_ebx),
			
 
				+#else
			
 
				+    "cpuid                                     \n"
			
 
				+    : "=b" (info_ebx),
			
 
				+#endif  //  defined( __i386__) && defined(__PIC__)
			
 
				+      "+a" (info_eax), "+c" (info_ecx), "=d" (info_edx));
			
 
				+  cpu_info[0] = info_eax;
			
 
				+  cpu_info[1] = info_ebx;
			
 
				+  cpu_info[2] = info_ecx;
			
 
				+  cpu_info[3] = info_edx;
			
 
				+#endif  // defined(_MSC_VER)
			
 
				+}
			
 
				+#else  // (defined(_M_IX86) || defined(_M_X64) ...
			
 
				+LIBYUV_API
			
 
				+void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
			
 
				+  cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+// For VS2010 and earlier emit can be used:
			
 
				+//   _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0  // For VS2010 and earlier.
			
 
				+//  __asm {
			
 
				+//    xor        ecx, ecx    // xcr 0
			
 
				+//    xgetbv
			
 
				+//    mov        xcr0, eax
			
 
				+//  }
			
 
				+// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
			
 
				+// https://code.google.com/p/libyuv/issues/detail?id=529
			
 
				+#if defined(_M_IX86) && (_MSC_VER < 1900)
			
 
				+#pragma optimize("g", off)
			
 
				+#endif
			
 
				+#if (defined(_M_IX86) || defined(_M_X64) || \
			
 
				+    defined(__i386__) || defined(__x86_64__)) && \
			
 
				+    !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
			
 
				+#define HAS_XGETBV
			
 
				+// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
			
 
				+int GetXCR0() {
			
 
				+  uint32 xcr0 = 0u;
			
 
				+#if (_MSC_FULL_VER >= 160040219)
			
 
				+  xcr0 = (uint32)(_xgetbv(0));  // VS2010 SP1 required.
			
 
				+#elif defined(__i386__) || defined(__x86_64__)
			
 
				+  asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
			
 
				+#endif  // defined(__i386__) || defined(__x86_64__)
			
 
				+  return xcr0;
			
 
				+}
			
 
				+#endif  // defined(_M_IX86) || defined(_M_X64) ..
			
 
				+// Return optimization to previous setting.
			
 
				+#if defined(_M_IX86) && (_MSC_VER < 1900)
			
 
				+#pragma optimize("g", on)
			
 
				+#endif
			
 
				+
			
 
				+// based on libvpx arm_cpudetect.c
			
 
				+// For Arm, but public to allow testing on any CPU
			
 
				+LIBYUV_API SAFEBUFFERS
			
 
				+int ArmCpuCaps(const char* cpuinfo_name) {
			
 
				+  char cpuinfo_line[512];
			
 
				+  FILE* f = fopen(cpuinfo_name, "r");
			
 
				+  if (!f) {
			
 
				+    // Assume Neon if /proc/cpuinfo is unavailable.
			
 
				+    // This will occur for Chrome sandbox for Pepper or Render process.
			
 
				+    return kCpuHasNEON;
			
 
				+  }
			
 
				+  while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
			
 
				+    if (memcmp(cpuinfo_line, "Features", 8) == 0) {
			
 
				+      char* p = strstr(cpuinfo_line, " neon");
			
 
				+      if (p && (p[5] == ' ' || p[5] == '\n')) {
			
 
				+        fclose(f);
			
 
				+        return kCpuHasNEON;
			
 
				+      }
			
 
				+      // aarch64 uses asimd for Neon.
			
 
				+      p = strstr(cpuinfo_line, " asimd");
			
 
				+      if (p && (p[6] == ' ' || p[6] == '\n')) {
			
 
				+        fclose(f);
			
 
				+        return kCpuHasNEON;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  fclose(f);
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+// CPU detect function for SIMD instruction sets.
			
 
				+LIBYUV_API
			
 
				+int cpu_info_ = 0;  // cpu_info is not initialized yet.
			
 
				+
			
 
				+// Test environment variable for disabling CPU features. Any non-zero value
			
 
				+// to disable. Zero ignored to make it easy to set the variable on/off.
			
 
				+#if !defined(__native_client__) && !defined(_M_ARM)
			
 
				+
			
 
				+static LIBYUV_BOOL TestEnv(const char* name) {
			
 
				+  const char* var = getenv(name);
			
 
				+  if (var) {
			
 
				+    if (var[0] != '0') {
			
 
				+      return LIBYUV_TRUE;
			
 
				+    }
			
 
				+  }
			
 
				+  return LIBYUV_FALSE;
			
 
				+}
			
 
				+#else  // nacl does not support getenv().
			
 
				+static LIBYUV_BOOL TestEnv(const char*) {
			
 
				+  return LIBYUV_FALSE;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+LIBYUV_API SAFEBUFFERS
			
 
				+int InitCpuFlags(void) {
			
 
				+  // TODO(fbarchard): swap kCpuInit logic so 0 means uninitialized.
			
 
				+  int cpu_info = 0;
			
 
				+#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
			
 
				+  uint32 cpu_info0[4] = { 0, 0, 0, 0 };
			
 
				+  uint32 cpu_info1[4] = { 0, 0, 0, 0 };
			
 
				+  uint32 cpu_info7[4] = { 0, 0, 0, 0 };
			
 
				+  CpuId(0, 0, cpu_info0);
			
 
				+  CpuId(1, 0, cpu_info1);
			
 
				+  if (cpu_info0[0] >= 7) {
			
 
				+    CpuId(7, 0, cpu_info7);
			
 
				+  }
			
 
				+  cpu_info = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
			
 
				+             ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
			
 
				+             ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
			
 
				+             ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
			
 
				+             ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
			
 
				+             ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
			
 
				+             kCpuHasX86;
			
 
				+
			
 
				+#ifdef HAS_XGETBV
			
 
				+  // AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv
			
 
				+  if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) &&  // AVX and OSXSave
			
 
				+      ((GetXCR0() & 6) == 6)) {  // Test OS saves YMM registers
			
 
				+    cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX;
			
 
				+
			
 
				+    // Detect AVX512bw
			
 
				+    if ((GetXCR0() & 0xe0) == 0xe0) {
			
 
				+      cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+
			
 
				+  // Environment variable overrides for testing.
			
 
				+  if (TestEnv("LIBYUV_DISABLE_X86")) {
			
 
				+    cpu_info &= ~kCpuHasX86;
			
 
				+  }
			
 
				+  if (TestEnv("LIBYUV_DISABLE_SSE2")) {
			
 
				+    cpu_info &= ~kCpuHasSSE2;
			
 
				+  }
			
 
				+  if (TestEnv("LIBYUV_DISABLE_SSSE3")) {
			
 
				+    cpu_info &= ~kCpuHasSSSE3;
			
 
				+  }
			
 
				+  if (TestEnv("LIBYUV_DISABLE_SSE41")) {
			
 
				+    cpu_info &= ~kCpuHasSSE41;
			
 
				+  }
			
 
				+  if (TestEnv("LIBYUV_DISABLE_SSE42")) {
			
 
				+    cpu_info &= ~kCpuHasSSE42;
			
 
				+  }
			
 
				+  if (TestEnv("LIBYUV_DISABLE_AVX")) {
			
 
				+    cpu_info &= ~kCpuHasAVX;
			
 
				+  }
			
 
				+  if (TestEnv("LIBYUV_DISABLE_AVX2")) {
			
 
				+    cpu_info &= ~kCpuHasAVX2;
			
 
				+  }
			
 
				+  if (TestEnv("LIBYUV_DISABLE_ERMS")) {
			
 
				+    cpu_info &= ~kCpuHasERMS;
			
 
				+  }
			
 
				+  if (TestEnv("LIBYUV_DISABLE_FMA3")) {
			
 
				+    cpu_info &= ~kCpuHasFMA3;
			
 
				+  }
			
 
				+  if (TestEnv("LIBYUV_DISABLE_AVX3")) {
			
 
				+    cpu_info &= ~kCpuHasAVX3;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(__mips__) && defined(__linux__)
			
 
				+#if defined(__mips_dspr2)
			
 
				+  cpu_info |= kCpuHasDSPR2;
			
 
				+#endif
			
 
				+  cpu_info |= kCpuHasMIPS;
			
 
				+  if (getenv("LIBYUV_DISABLE_DSPR2")) {
			
 
				+    cpu_info &= ~kCpuHasDSPR2;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(__arm__) || defined(__aarch64__)
			
 
				+// gcc -mfpu=neon defines __ARM_NEON__
			
 
				+// __ARM_NEON__ generates code that requires Neon.  NaCL also requires Neon.
			
 
				+// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
			
 
				+#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
			
 
				+  cpu_info = kCpuHasNEON;
			
 
				+// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
			
 
				+// flag in it.
			
 
				+// So for aarch64, neon enabling is hard coded here.
			
 
				+#endif
			
 
				+#if defined(__aarch64__)
			
 
				+  cpu_info = kCpuHasNEON;
			
 
				+#else
			
 
				+  // Linux arm parse text file for neon detect.
			
 
				+  cpu_info = ArmCpuCaps("/proc/cpuinfo");
			
 
				+#endif
			
 
				+  cpu_info |= kCpuHasARM;
			
 
				+  if (TestEnv("LIBYUV_DISABLE_NEON")) {
			
 
				+    cpu_info &= ~kCpuHasNEON;
			
 
				+  }
			
 
				+#endif  // __arm__
			
 
				+  if (TestEnv("LIBYUV_DISABLE_ASM")) {
			
 
				+    cpu_info = 0;
			
 
				+  }
			
 
				+  cpu_info  |= kCpuInitialized;
			
 
				+  cpu_info_ = cpu_info;
			
 
				+  return cpu_info;
			
 
				+}
			
 
				+
			
 
				+// Note that use of this function is not thread safe.
			
 
				+LIBYUV_API
			
 
				+void MaskCpuFlags(int enable_flags) {
			
 
				+  cpu_info_ = InitCpuFlags() & enable_flags;
			
 
				+}
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/mjpeg_decoder.cc
+++ b/src/jni/libyuv/source/mjpeg_decoder.cc
@@ -0,0 +1,570 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/mjpeg_decoder.h"
			
 
				+
			
 
				+#ifdef HAVE_JPEG
			
 
				+#include <assert.h>
			
 
				+
			
 
				+#if !defined(__pnacl__) && !defined(__CLR_VER) && \
			
 
				+    !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
			
 
				+// Must be included before jpeglib.
			
 
				+#include <setjmp.h>
			
 
				+#define HAVE_SETJMP
			
 
				+
			
 
				+#if defined(_MSC_VER)
			
 
				+// disable warning 4324: structure was padded due to __declspec(align())
			
 
				+#pragma warning(disable:4324)
			
 
				+#endif
			
 
				+
			
 
				+#endif
			
 
				+struct FILE;  // For jpeglib.h.
			
 
				+
			
 
				+// C++ build requires extern C for jpeg internals.
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+#include <jpeglib.h>
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+#endif
			
 
				+
			
 
				+#include "libyuv/planar_functions.h"  // For CopyPlane().
			
 
				+
			
 
				+namespace libyuv {
			
 
				+
			
 
				+#ifdef HAVE_SETJMP
			
 
				+struct SetJmpErrorMgr {
			
 
				+  jpeg_error_mgr base;  // Must be at the top
			
 
				+  jmp_buf setjmp_buffer;
			
 
				+};
			
 
				+#endif
			
 
				+
			
 
				+const int MJpegDecoder::kColorSpaceUnknown = JCS_UNKNOWN;
			
 
				+const int MJpegDecoder::kColorSpaceGrayscale = JCS_GRAYSCALE;
			
 
				+const int MJpegDecoder::kColorSpaceRgb = JCS_RGB;
			
 
				+const int MJpegDecoder::kColorSpaceYCbCr = JCS_YCbCr;
			
 
				+const int MJpegDecoder::kColorSpaceCMYK = JCS_CMYK;
			
 
				+const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
			
 
				+
			
 
				+// Methods that are passed to jpeglib.
			
 
				+boolean fill_input_buffer(jpeg_decompress_struct* cinfo);
			
 
				+void init_source(jpeg_decompress_struct* cinfo);
			
 
				+void skip_input_data(jpeg_decompress_struct* cinfo, long num_bytes);  // NOLINT
			
 
				+void term_source(jpeg_decompress_struct* cinfo);
			
 
				+void ErrorHandler(jpeg_common_struct* cinfo);
			
 
				+
			
 
				+MJpegDecoder::MJpegDecoder()
			
 
				+    : has_scanline_padding_(LIBYUV_FALSE),
			
 
				+      num_outbufs_(0),
			
 
				+      scanlines_(NULL),
			
 
				+      scanlines_sizes_(NULL),
			
 
				+      databuf_(NULL),
			
 
				+      databuf_strides_(NULL) {
			
 
				+  decompress_struct_ = new jpeg_decompress_struct;
			
 
				+  source_mgr_ = new jpeg_source_mgr;
			
 
				+#ifdef HAVE_SETJMP
			
 
				+  error_mgr_ = new SetJmpErrorMgr;
			
 
				+  decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
			
 
				+  // Override standard exit()-based error handler.
			
 
				+  error_mgr_->base.error_exit = &ErrorHandler;
			
 
				+#endif
			
 
				+  decompress_struct_->client_data = NULL;
			
 
				+  source_mgr_->init_source = &init_source;
			
 
				+  source_mgr_->fill_input_buffer = &fill_input_buffer;
			
 
				+  source_mgr_->skip_input_data = &skip_input_data;
			
 
				+  source_mgr_->resync_to_restart = &jpeg_resync_to_restart;
			
 
				+  source_mgr_->term_source = &term_source;
			
 
				+  jpeg_create_decompress(decompress_struct_);
			
 
				+  decompress_struct_->src = source_mgr_;
			
 
				+  buf_vec_.buffers = &buf_;
			
 
				+  buf_vec_.len = 1;
			
 
				+}
			
 
				+
			
 
				+MJpegDecoder::~MJpegDecoder() {
			
 
				+  jpeg_destroy_decompress(decompress_struct_);
			
 
				+  delete decompress_struct_;
			
 
				+  delete source_mgr_;
			
 
				+#ifdef HAVE_SETJMP
			
 
				+  delete error_mgr_;
			
 
				+#endif
			
 
				+  DestroyOutputBuffers();
			
 
				+}
			
 
				+
			
 
				+LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
			
 
				+  if (!ValidateJpeg(src, src_len)) {
			
 
				+    return LIBYUV_FALSE;
			
 
				+  }
			
 
				+
			
 
				+  buf_.data = src;
			
 
				+  buf_.len = static_cast<int>(src_len);
			
 
				+  buf_vec_.pos = 0;
			
 
				+  decompress_struct_->client_data = &buf_vec_;
			
 
				+#ifdef HAVE_SETJMP
			
 
				+  if (setjmp(error_mgr_->setjmp_buffer)) {
			
 
				+    // We called jpeg_read_header, it experienced an error, and we called
			
 
				+    // longjmp() and rewound the stack to here. Return error.
			
 
				+    return LIBYUV_FALSE;
			
 
				+  }
			
 
				+#endif
			
 
				+  if (jpeg_read_header(decompress_struct_, TRUE) != JPEG_HEADER_OK) {
			
 
				+    // ERROR: Bad MJPEG header
			
 
				+    return LIBYUV_FALSE;
			
 
				+  }
			
 
				+  AllocOutputBuffers(GetNumComponents());
			
 
				+  for (int i = 0; i < num_outbufs_; ++i) {
			
 
				+    int scanlines_size = GetComponentScanlinesPerImcuRow(i);
			
 
				+    if (scanlines_sizes_[i] != scanlines_size) {
			
 
				+      if (scanlines_[i]) {
			
 
				+        delete scanlines_[i];
			
 
				+      }
			
 
				+      scanlines_[i] = new uint8* [scanlines_size];
			
 
				+      scanlines_sizes_[i] = scanlines_size;
			
 
				+    }
			
 
				+
			
 
				+    // We allocate padding for the final scanline to pad it up to DCTSIZE bytes
			
 
				+    // to avoid memory errors, since jpeglib only reads full MCUs blocks. For
			
 
				+    // the preceding scanlines, the padding is not needed/wanted because the
			
 
				+    // following addresses will already be valid (they are the initial bytes of
			
 
				+    // the next scanline) and will be overwritten when jpeglib writes out that
			
 
				+    // next scanline.
			
 
				+    int databuf_stride = GetComponentStride(i);
			
 
				+    int databuf_size = scanlines_size * databuf_stride;
			
 
				+    if (databuf_strides_[i] != databuf_stride) {
			
 
				+      if (databuf_[i]) {
			
 
				+        delete databuf_[i];
			
 
				+      }
			
 
				+      databuf_[i] = new uint8[databuf_size];
			
 
				+      databuf_strides_[i] = databuf_stride;
			
 
				+    }
			
 
				+
			
 
				+    if (GetComponentStride(i) != GetComponentWidth(i)) {
			
 
				+      has_scanline_padding_ = LIBYUV_TRUE;
			
 
				+    }
			
 
				+  }
			
 
				+  return LIBYUV_TRUE;
			
 
				+}
			
 
				+
			
 
				+static int DivideAndRoundUp(int numerator, int denominator) {
			
 
				+  return (numerator + denominator - 1) / denominator;
			
 
				+}
			
 
				+
			
 
				+static int DivideAndRoundDown(int numerator, int denominator) {
			
 
				+  return numerator / denominator;
			
 
				+}
			
 
				+
			
 
				+// Returns width of the last loaded frame.
			
 
				+int MJpegDecoder::GetWidth() {
			
 
				+  return decompress_struct_->image_width;
			
 
				+}
			
 
				+
			
 
				+// Returns height of the last loaded frame.
			
 
				+int MJpegDecoder::GetHeight() {
			
 
				+  return decompress_struct_->image_height;
			
 
				+}
			
 
				+
			
 
				+// Returns format of the last loaded frame. The return value is one of the
			
 
				+// kColorSpace* constants.
			
 
				+int MJpegDecoder::GetColorSpace() {
			
 
				+  return decompress_struct_->jpeg_color_space;
			
 
				+}
			
 
				+
			
 
				+// Number of color components in the color space.
			
 
				+int MJpegDecoder::GetNumComponents() {
			
 
				+  return decompress_struct_->num_components;
			
 
				+}
			
 
				+
			
 
				+// Sample factors of the n-th component.
			
 
				+int MJpegDecoder::GetHorizSampFactor(int component) {
			
 
				+  return decompress_struct_->comp_info[component].h_samp_factor;
			
 
				+}
			
 
				+
			
 
				+int MJpegDecoder::GetVertSampFactor(int component) {
			
 
				+  return decompress_struct_->comp_info[component].v_samp_factor;
			
 
				+}
			
 
				+
			
 
				+int MJpegDecoder::GetHorizSubSampFactor(int component) {
			
 
				+  return decompress_struct_->max_h_samp_factor /
			
 
				+      GetHorizSampFactor(component);
			
 
				+}
			
 
				+
			
 
				+int MJpegDecoder::GetVertSubSampFactor(int component) {
			
 
				+  return decompress_struct_->max_v_samp_factor /
			
 
				+      GetVertSampFactor(component);
			
 
				+}
			
 
				+
			
 
				+int MJpegDecoder::GetImageScanlinesPerImcuRow() {
			
 
				+  return decompress_struct_->max_v_samp_factor * DCTSIZE;
			
 
				+}
			
 
				+
			
 
				+int MJpegDecoder::GetComponentScanlinesPerImcuRow(int component) {
			
 
				+  int vs = GetVertSubSampFactor(component);
			
 
				+  return DivideAndRoundUp(GetImageScanlinesPerImcuRow(), vs);
			
 
				+}
			
 
				+
			
 
				+int MJpegDecoder::GetComponentWidth(int component) {
			
 
				+  int hs = GetHorizSubSampFactor(component);
			
 
				+  return DivideAndRoundUp(GetWidth(), hs);
			
 
				+}
			
 
				+
			
 
				+int MJpegDecoder::GetComponentHeight(int component) {
			
 
				+  int vs = GetVertSubSampFactor(component);
			
 
				+  return DivideAndRoundUp(GetHeight(), vs);
			
 
				+}
			
 
				+
			
 
				+// Get width in bytes padded out to a multiple of DCTSIZE
			
 
				+int MJpegDecoder::GetComponentStride(int component) {
			
 
				+  return (GetComponentWidth(component) + DCTSIZE - 1) & ~(DCTSIZE - 1);
			
 
				+}
			
 
				+
			
 
				+int MJpegDecoder::GetComponentSize(int component) {
			
 
				+  return GetComponentWidth(component) * GetComponentHeight(component);
			
 
				+}
			
 
				+
			
 
				+LIBYUV_BOOL MJpegDecoder::UnloadFrame() {
			
 
				+#ifdef HAVE_SETJMP
			
 
				+  if (setjmp(error_mgr_->setjmp_buffer)) {
			
 
				+    // We called jpeg_abort_decompress, it experienced an error, and we called
			
 
				+    // longjmp() and rewound the stack to here. Return error.
			
 
				+    return LIBYUV_FALSE;
			
 
				+  }
			
 
				+#endif
			
 
				+  jpeg_abort_decompress(decompress_struct_);
			
 
				+  return LIBYUV_TRUE;
			
 
				+}
			
 
				+
			
 
				+// TODO(fbarchard): Allow rectangle to be specified: x, y, width, height.
			
 
				+LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
			
 
				+    uint8** planes, int dst_width, int dst_height) {
			
 
				+  if (dst_width != GetWidth() ||
			
 
				+      dst_height > GetHeight()) {
			
 
				+    // ERROR: Bad dimensions
			
 
				+    return LIBYUV_FALSE;
			
 
				+  }
			
 
				+#ifdef HAVE_SETJMP
			
 
				+  if (setjmp(error_mgr_->setjmp_buffer)) {
			
 
				+    // We called into jpeglib, it experienced an error sometime during this
			
 
				+    // function call, and we called longjmp() and rewound the stack to here.
			
 
				+    // Return error.
			
 
				+    return LIBYUV_FALSE;
			
 
				+  }
			
 
				+#endif
			
 
				+  if (!StartDecode()) {
			
 
				+    return LIBYUV_FALSE;
			
 
				+  }
			
 
				+  SetScanlinePointers(databuf_);
			
 
				+  int lines_left = dst_height;
			
 
				+  // Compute amount of lines to skip to implement vertical crop.
			
 
				+  // TODO(fbarchard): Ensure skip is a multiple of maximum component
			
 
				+  // subsample. ie 2
			
 
				+  int skip = (GetHeight() - dst_height) / 2;
			
 
				+  if (skip > 0) {
			
 
				+    // There is no API to skip lines in the output data, so we read them
			
 
				+    // into the temp buffer.
			
 
				+    while (skip >= GetImageScanlinesPerImcuRow()) {
			
 
				+      if (!DecodeImcuRow()) {
			
 
				+        FinishDecode();
			
 
				+        return LIBYUV_FALSE;
			
 
				+      }
			
 
				+      skip -= GetImageScanlinesPerImcuRow();
			
 
				+    }
			
 
				+    if (skip > 0) {
			
 
				+      // Have a partial iMCU row left over to skip. Must read it and then
			
 
				+      // copy the parts we want into the destination.
			
 
				+      if (!DecodeImcuRow()) {
			
 
				+        FinishDecode();
			
 
				+        return LIBYUV_FALSE;
			
 
				+      }
			
 
				+      for (int i = 0; i < num_outbufs_; ++i) {
			
 
				+        // TODO(fbarchard): Compute skip to avoid this
			
 
				+        assert(skip % GetVertSubSampFactor(i) == 0);
			
 
				+        int rows_to_skip =
			
 
				+            DivideAndRoundDown(skip, GetVertSubSampFactor(i));
			
 
				+        int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i) -
			
 
				+                                rows_to_skip;
			
 
				+        int data_to_skip = rows_to_skip * GetComponentStride(i);
			
 
				+        CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i),
			
 
				+                  planes[i], GetComponentWidth(i),
			
 
				+                  GetComponentWidth(i), scanlines_to_copy);
			
 
				+        planes[i] += scanlines_to_copy * GetComponentWidth(i);
			
 
				+      }
			
 
				+      lines_left -= (GetImageScanlinesPerImcuRow() - skip);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // Read full MCUs but cropped horizontally
			
 
				+  for (; lines_left > GetImageScanlinesPerImcuRow();
			
 
				+         lines_left -= GetImageScanlinesPerImcuRow()) {
			
 
				+    if (!DecodeImcuRow()) {
			
 
				+      FinishDecode();
			
 
				+      return LIBYUV_FALSE;
			
 
				+    }
			
 
				+    for (int i = 0; i < num_outbufs_; ++i) {
			
 
				+      int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i);
			
 
				+      CopyPlane(databuf_[i], GetComponentStride(i),
			
 
				+                planes[i], GetComponentWidth(i),
			
 
				+                GetComponentWidth(i), scanlines_to_copy);
			
 
				+      planes[i] += scanlines_to_copy * GetComponentWidth(i);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  if (lines_left > 0) {
			
 
				+    // Have a partial iMCU row left over to decode.
			
 
				+    if (!DecodeImcuRow()) {
			
 
				+      FinishDecode();
			
 
				+      return LIBYUV_FALSE;
			
 
				+    }
			
 
				+    for (int i = 0; i < num_outbufs_; ++i) {
			
 
				+      int scanlines_to_copy =
			
 
				+          DivideAndRoundUp(lines_left, GetVertSubSampFactor(i));
			
 
				+      CopyPlane(databuf_[i], GetComponentStride(i),
			
 
				+                planes[i], GetComponentWidth(i),
			
 
				+                GetComponentWidth(i), scanlines_to_copy);
			
 
				+      planes[i] += scanlines_to_copy * GetComponentWidth(i);
			
 
				+    }
			
 
				+  }
			
 
				+  return FinishDecode();
			
 
				+}
			
 
				+
			
 
				+LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque,
			
 
				+    int dst_width, int dst_height) {
			
 
				+  if (dst_width != GetWidth() ||
			
 
				+      dst_height > GetHeight()) {
			
 
				+    // ERROR: Bad dimensions
			
 
				+    return LIBYUV_FALSE;
			
 
				+  }
			
 
				+#ifdef HAVE_SETJMP
			
 
				+  if (setjmp(error_mgr_->setjmp_buffer)) {
			
 
				+    // We called into jpeglib, it experienced an error sometime during this
			
 
				+    // function call, and we called longjmp() and rewound the stack to here.
			
 
				+    // Return error.
			
 
				+    return LIBYUV_FALSE;
			
 
				+  }
			
 
				+#endif
			
 
				+  if (!StartDecode()) {
			
 
				+    return LIBYUV_FALSE;
			
 
				+  }
			
 
				+  SetScanlinePointers(databuf_);
			
 
				+  int lines_left = dst_height;
			
 
				+  // TODO(fbarchard): Compute amount of lines to skip to implement vertical crop
			
 
				+  int skip = (GetHeight() - dst_height) / 2;
			
 
				+  if (skip > 0) {
			
 
				+    while (skip >= GetImageScanlinesPerImcuRow()) {
			
 
				+      if (!DecodeImcuRow()) {
			
 
				+        FinishDecode();
			
 
				+        return LIBYUV_FALSE;
			
 
				+      }
			
 
				+      skip -= GetImageScanlinesPerImcuRow();
			
 
				+    }
			
 
				+    if (skip > 0) {
			
 
				+      // Have a partial iMCU row left over to skip.
			
 
				+      if (!DecodeImcuRow()) {
			
 
				+        FinishDecode();
			
 
				+        return LIBYUV_FALSE;
			
 
				+      }
			
 
				+      for (int i = 0; i < num_outbufs_; ++i) {
			
 
				+        // TODO(fbarchard): Compute skip to avoid this
			
 
				+        assert(skip % GetVertSubSampFactor(i) == 0);
			
 
				+        int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
			
 
				+        int data_to_skip = rows_to_skip * GetComponentStride(i);
			
 
				+        // Change our own data buffer pointers so we can pass them to the
			
 
				+        // callback.
			
 
				+        databuf_[i] += data_to_skip;
			
 
				+      }
			
 
				+      int scanlines_to_copy = GetImageScanlinesPerImcuRow() - skip;
			
 
				+      (*fn)(opaque, databuf_, databuf_strides_, scanlines_to_copy);
			
 
				+      // Now change them back.
			
 
				+      for (int i = 0; i < num_outbufs_; ++i) {
			
 
				+        int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
			
 
				+        int data_to_skip = rows_to_skip * GetComponentStride(i);
			
 
				+        databuf_[i] -= data_to_skip;
			
 
				+      }
			
 
				+      lines_left -= scanlines_to_copy;
			
 
				+    }
			
 
				+  }
			
 
				+  // Read full MCUs until we get to the crop point.
			
 
				+  for (; lines_left >= GetImageScanlinesPerImcuRow();
			
 
				+         lines_left -= GetImageScanlinesPerImcuRow()) {
			
 
				+    if (!DecodeImcuRow()) {
			
 
				+      FinishDecode();
			
 
				+      return LIBYUV_FALSE;
			
 
				+    }
			
 
				+    (*fn)(opaque, databuf_, databuf_strides_, GetImageScanlinesPerImcuRow());
			
 
				+  }
			
 
				+  if (lines_left > 0) {
			
 
				+    // Have a partial iMCU row left over to decode.
			
 
				+    if (!DecodeImcuRow()) {
			
 
				+      FinishDecode();
			
 
				+      return LIBYUV_FALSE;
			
 
				+    }
			
 
				+    (*fn)(opaque, databuf_, databuf_strides_, lines_left);
			
 
				+  }
			
 
				+  return FinishDecode();
			
 
				+}
			
 
				+
			
 
				+void init_source(j_decompress_ptr cinfo) {
			
 
				+  fill_input_buffer(cinfo);
			
 
				+}
			
 
				+
			
 
				+boolean fill_input_buffer(j_decompress_ptr cinfo) {
			
 
				+  BufferVector* buf_vec = reinterpret_cast<BufferVector*>(cinfo->client_data);
			
 
				+  if (buf_vec->pos >= buf_vec->len) {
			
 
				+    assert(0 && "No more data");
			
 
				+    // ERROR: No more data
			
 
				+    return FALSE;
			
 
				+  }
			
 
				+  cinfo->src->next_input_byte = buf_vec->buffers[buf_vec->pos].data;
			
 
				+  cinfo->src->bytes_in_buffer = buf_vec->buffers[buf_vec->pos].len;
			
 
				+  ++buf_vec->pos;
			
 
				+  return TRUE;
			
 
				+}
			
 
				+
			
 
				+void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {  // NOLINT
			
 
				+  cinfo->src->next_input_byte += num_bytes;
			
 
				+}
			
 
				+
			
 
				+void term_source(j_decompress_ptr cinfo) {
			
 
				+  // Nothing to do.
			
 
				+}
			
 
				+
			
 
				+#ifdef HAVE_SETJMP
			
 
				+void ErrorHandler(j_common_ptr cinfo) {
			
 
				+  // This is called when a jpeglib command experiences an error. Unfortunately
			
 
				+  // jpeglib's error handling model is not very flexible, because it expects the
			
 
				+  // error handler to not return--i.e., it wants the program to terminate. To
			
 
				+  // recover from errors we use setjmp() as shown in their example. setjmp() is
			
 
				+  // C's implementation for the "call with current continuation" functionality
			
 
				+  // seen in some functional programming languages.
			
 
				+  // A formatted message can be output, but is unsafe for release.
			
 
				+#ifdef DEBUG
			
 
				+  char buf[JMSG_LENGTH_MAX];
			
 
				+  (*cinfo->err->format_message)(cinfo, buf);
			
 
				+  // ERROR: Error in jpeglib: buf
			
 
				+#endif
			
 
				+
			
 
				+  SetJmpErrorMgr* mgr = reinterpret_cast<SetJmpErrorMgr*>(cinfo->err);
			
 
				+  // This rewinds the call stack to the point of the corresponding setjmp()
			
 
				+  // and causes it to return (for a second time) with value 1.
			
 
				+  longjmp(mgr->setjmp_buffer, 1);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
			
 
				+  if (num_outbufs != num_outbufs_) {
			
 
				+    // We could perhaps optimize this case to resize the output buffers without
			
 
				+    // necessarily having to delete and recreate each one, but it's not worth
			
 
				+    // it.
			
 
				+    DestroyOutputBuffers();
			
 
				+
			
 
				+    scanlines_ = new uint8** [num_outbufs];
			
 
				+    scanlines_sizes_ = new int[num_outbufs];
			
 
				+    databuf_ = new uint8* [num_outbufs];
			
 
				+    databuf_strides_ = new int[num_outbufs];
			
 
				+
			
 
				+    for (int i = 0; i < num_outbufs; ++i) {
			
 
				+      scanlines_[i] = NULL;
			
 
				+      scanlines_sizes_[i] = 0;
			
 
				+      databuf_[i] = NULL;
			
 
				+      databuf_strides_[i] = 0;
			
 
				+    }
			
 
				+
			
 
				+    num_outbufs_ = num_outbufs;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void MJpegDecoder::DestroyOutputBuffers() {
			
 
				+  for (int i = 0; i < num_outbufs_; ++i) {
			
 
				+    delete [] scanlines_[i];
			
 
				+    delete [] databuf_[i];
			
 
				+  }
			
 
				+  delete [] scanlines_;
			
 
				+  delete [] databuf_;
			
 
				+  delete [] scanlines_sizes_;
			
 
				+  delete [] databuf_strides_;
			
 
				+  scanlines_ = NULL;
			
 
				+  databuf_ = NULL;
			
 
				+  scanlines_sizes_ = NULL;
			
 
				+  databuf_strides_ = NULL;
			
 
				+  num_outbufs_ = 0;
			
 
				+}
			
 
				+
			
 
				+// JDCT_IFAST and do_block_smoothing improve performance substantially.
			
 
				+LIBYUV_BOOL MJpegDecoder::StartDecode() {
			
 
				+  decompress_struct_->raw_data_out = TRUE;
			
 
				+  decompress_struct_->dct_method = JDCT_IFAST;  // JDCT_ISLOW is default
			
 
				+  decompress_struct_->dither_mode = JDITHER_NONE;
			
 
				+  // Not applicable to 'raw':
			
 
				+  decompress_struct_->do_fancy_upsampling = (boolean)(LIBYUV_FALSE);
			
 
				+  // Only for buffered mode:
			
 
				+  decompress_struct_->enable_2pass_quant = (boolean)(LIBYUV_FALSE);
			
 
				+  // Blocky but fast:
			
 
				+  decompress_struct_->do_block_smoothing = (boolean)(LIBYUV_FALSE);
			
 
				+
			
 
				+  if (!jpeg_start_decompress(decompress_struct_)) {
			
 
				+    // ERROR: Couldn't start JPEG decompressor";
			
 
				+    return LIBYUV_FALSE;
			
 
				+  }
			
 
				+  return LIBYUV_TRUE;
			
 
				+}
			
 
				+
			
 
				+LIBYUV_BOOL MJpegDecoder::FinishDecode() {
			
 
				+  // jpeglib considers it an error if we finish without decoding the whole
			
 
				+  // image, so we call "abort" rather than "finish".
			
 
				+  jpeg_abort_decompress(decompress_struct_);
			
 
				+  return LIBYUV_TRUE;
			
 
				+}
			
 
				+
			
 
				+void MJpegDecoder::SetScanlinePointers(uint8** data) {
			
 
				+  for (int i = 0; i < num_outbufs_; ++i) {
			
 
				+    uint8* data_i = data[i];
			
 
				+    for (int j = 0; j < scanlines_sizes_[i]; ++j) {
			
 
				+      scanlines_[i][j] = data_i;
			
 
				+      data_i += GetComponentStride(i);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() {
			
 
				+  return (unsigned int)(GetImageScanlinesPerImcuRow()) ==
			
 
				+      jpeg_read_raw_data(decompress_struct_,
			
 
				+                         scanlines_,
			
 
				+                         GetImageScanlinesPerImcuRow());
			
 
				+}
			
 
				+
			
 
				+// The helper function which recognizes the jpeg sub-sampling type.
			
 
				+JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
			
 
				+    int* subsample_x, int* subsample_y, int number_of_components) {
			
 
				+  if (number_of_components == 3) {  // Color images.
			
 
				+    if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
			
 
				+        subsample_x[1] == 2 && subsample_y[1] == 2 &&
			
 
				+        subsample_x[2] == 2 && subsample_y[2] == 2) {
			
 
				+      return kJpegYuv420;
			
 
				+    } else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
			
 
				+        subsample_x[1] == 2 && subsample_y[1] == 1 &&
			
 
				+        subsample_x[2] == 2 && subsample_y[2] == 1) {
			
 
				+      return kJpegYuv422;
			
 
				+    } else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
			
 
				+        subsample_x[1] == 1 && subsample_y[1] == 1 &&
			
 
				+        subsample_x[2] == 1 && subsample_y[2] == 1) {
			
 
				+      return kJpegYuv444;
			
 
				+    }
			
 
				+  } else if (number_of_components == 1) {  // Grey-scale images.
			
 
				+    if (subsample_x[0] == 1 && subsample_y[0] == 1) {
			
 
				+      return kJpegYuv400;
			
 
				+    }
			
 
				+  }
			
 
				+  return kJpegUnknown;
			
 
				+}
			
 
				+
			
 
				+}  // namespace libyuv
			
 
				+#endif  // HAVE_JPEG
			
 
				+
			
--- a/src/jni/libyuv/source/mjpeg_validate.cc
+++ b/src/jni/libyuv/source/mjpeg_validate.cc
@@ -0,0 +1,71 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/mjpeg_decoder.h"
			
 
				+
			
 
				+#include <string.h>  // For memchr.
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Helper function to scan for EOI marker (0xff 0xd9).
			
 
				+static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) {
			
 
				+  if (sample_size >= 2) {
			
 
				+    const uint8* end = sample + sample_size - 1;
			
 
				+    const uint8* it = sample;
			
 
				+    while (it < end) {
			
 
				+      // TODO(fbarchard): scan for 0xd9 instead.
			
 
				+      it = static_cast<const uint8 *>(memchr(it, 0xff, end - it));
			
 
				+      if (it == NULL) {
			
 
				+        break;
			
 
				+      }
			
 
				+      if (it[1] == 0xd9) {
			
 
				+        return LIBYUV_TRUE;  // Success: Valid jpeg.
			
 
				+      }
			
 
				+      ++it;  // Skip over current 0xff.
			
 
				+    }
			
 
				+  }
			
 
				+  // ERROR: Invalid jpeg end code not found. Size sample_size
			
 
				+  return LIBYUV_FALSE;
			
 
				+}
			
 
				+
			
 
				+// Helper function to validate the jpeg appears intact.
			
 
				+LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
			
 
				+  // Maximum size that ValidateJpeg will consider valid.
			
 
				+  const size_t kMaxJpegSize = 0x7fffffffull;
			
 
				+  const size_t kBackSearchSize = 1024;
			
 
				+  if (sample_size < 64 || sample_size > kMaxJpegSize || !sample) {
			
 
				+    // ERROR: Invalid jpeg size: sample_size
			
 
				+    return LIBYUV_FALSE;
			
 
				+  }
			
 
				+  if (sample[0] != 0xff || sample[1] != 0xd8) {  // SOI marker
			
 
				+    // ERROR: Invalid jpeg initial start code
			
 
				+    return LIBYUV_FALSE;
			
 
				+  }
			
 
				+
			
 
				+  // Look for the End Of Image (EOI) marker near the end of the buffer.
			
 
				+  if (sample_size > kBackSearchSize) {
			
 
				+    if (ScanEOI(sample + sample_size - kBackSearchSize, kBackSearchSize)) {
			
 
				+      return LIBYUV_TRUE;  // Success: Valid jpeg.
			
 
				+    }
			
 
				+    // Reduce search size for forward search.
			
 
				+    sample_size = sample_size - kBackSearchSize + 1;
			
 
				+  }
			
 
				+  // Step over SOI marker and scan for EOI.
			
 
				+  return ScanEOI(sample + 2, sample_size - 2);
			
 
				+}
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
--- a/src/jni/libyuv/source/planar_functions.cc
+++ b/src/jni/libyuv/source/planar_functions.cc
--- a/src/jni/libyuv/source/rotate.cc
+++ b/src/jni/libyuv/source/rotate.cc
@@ -0,0 +1,491 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/rotate.h"
			
 
				+
			
 
				+#include "libyuv/cpu_id.h"
			
 
				+#include "libyuv/convert.h"
			
 
				+#include "libyuv/planar_functions.h"
			
 
				+#include "libyuv/rotate_row.h"
			
 
				+#include "libyuv/row.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+LIBYUV_API
			
 
				+void TransposePlane(const uint8* src, int src_stride,
			
 
				+                    uint8* dst, int dst_stride,
			
 
				+                    int width, int height) {
			
 
				+  int i = height;
			
 
				+  void (*TransposeWx8)(const uint8* src, int src_stride,
			
 
				+                       uint8* dst, int dst_stride, int width) = TransposeWx8_C;
			
 
				+#if defined(HAS_TRANSPOSEWX8_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    TransposeWx8 = TransposeWx8_NEON;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_TRANSPOSEWX8_SSSE3)
			
 
				+  if (TestCpuFlag(kCpuHasSSSE3)) {
			
 
				+    TransposeWx8 = TransposeWx8_Any_SSSE3;
			
 
				+    if (IS_ALIGNED(width, 8)) {
			
 
				+      TransposeWx8 = TransposeWx8_SSSE3;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
			
 
				+  if (TestCpuFlag(kCpuHasSSSE3)) {
			
 
				+    TransposeWx8 = TransposeWx8_Fast_Any_SSSE3;
			
 
				+    if (IS_ALIGNED(width, 16)) {
			
 
				+      TransposeWx8 = TransposeWx8_Fast_SSSE3;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_TRANSPOSEWX8_DSPR2)
			
 
				+  if (TestCpuFlag(kCpuHasDSPR2)) {
			
 
				+    if (IS_ALIGNED(width, 4) &&
			
 
				+        IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
			
 
				+      TransposeWx8 = TransposeWx8_Fast_DSPR2;
			
 
				+    } else {
			
 
				+      TransposeWx8 = TransposeWx8_DSPR2;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+
			
 
				+  // Work across the source in 8x8 tiles
			
 
				+  while (i >= 8) {
			
 
				+    TransposeWx8(src, src_stride, dst, dst_stride, width);
			
 
				+    src += 8 * src_stride;    // Go down 8 rows.
			
 
				+    dst += 8;                 // Move over 8 columns.
			
 
				+    i -= 8;
			
 
				+  }
			
 
				+
			
 
				+  if (i > 0) {
			
 
				+    TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+void RotatePlane90(const uint8* src, int src_stride,
			
 
				+                   uint8* dst, int dst_stride,
			
 
				+                   int width, int height) {
			
 
				+  // Rotate by 90 is a transpose with the source read
			
 
				+  // from bottom to top. So set the source pointer to the end
			
 
				+  // of the buffer and flip the sign of the source stride.
			
 
				+  src += src_stride * (height - 1);
			
 
				+  src_stride = -src_stride;
			
 
				+  TransposePlane(src, src_stride, dst, dst_stride, width, height);
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+void RotatePlane270(const uint8* src, int src_stride,
			
 
				+                    uint8* dst, int dst_stride,
			
 
				+                    int width, int height) {
			
 
				+  // Rotate by 270 is a transpose with the destination written
			
 
				+  // from bottom to top. So set the destination pointer to the end
			
 
				+  // of the buffer and flip the sign of the destination stride.
			
 
				+  dst += dst_stride * (width - 1);
			
 
				+  dst_stride = -dst_stride;
			
 
				+  TransposePlane(src, src_stride, dst, dst_stride, width, height);
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+void RotatePlane180(const uint8* src, int src_stride,
			
 
				+                    uint8* dst, int dst_stride,
			
 
				+                    int width, int height) {
			
 
				+  // Swap first and last row and mirror the content. Uses a temporary row.
			
 
				+  align_buffer_64(row, width);
			
 
				+  const uint8* src_bot = src + src_stride * (height - 1);
			
 
				+  uint8* dst_bot = dst + dst_stride * (height - 1);
			
 
				+  int half_height = (height + 1) >> 1;
			
 
				+  int y;
			
 
				+  void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
			
 
				+  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
			
 
				+#if defined(HAS_MIRRORROW_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    MirrorRow = MirrorRow_Any_NEON;
			
 
				+    if (IS_ALIGNED(width, 16)) {
			
 
				+      MirrorRow = MirrorRow_NEON;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_MIRRORROW_SSSE3)
			
 
				+  if (TestCpuFlag(kCpuHasSSSE3)) {
			
 
				+    MirrorRow = MirrorRow_Any_SSSE3;
			
 
				+    if (IS_ALIGNED(width, 16)) {
			
 
				+      MirrorRow = MirrorRow_SSSE3;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_MIRRORROW_AVX2)
			
 
				+  if (TestCpuFlag(kCpuHasAVX2)) {
			
 
				+    MirrorRow = MirrorRow_Any_AVX2;
			
 
				+    if (IS_ALIGNED(width, 32)) {
			
 
				+      MirrorRow = MirrorRow_AVX2;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+// TODO(fbarchard): Mirror on mips handle unaligned memory.
			
 
				+#if defined(HAS_MIRRORROW_DSPR2)
			
 
				+  if (TestCpuFlag(kCpuHasDSPR2) &&
			
 
				+      IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
			
 
				+      IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) {
			
 
				+    MirrorRow = MirrorRow_DSPR2;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_COPYROW_SSE2)
			
 
				+  if (TestCpuFlag(kCpuHasSSE2)) {
			
 
				+    CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_COPYROW_AVX)
			
 
				+  if (TestCpuFlag(kCpuHasAVX)) {
			
 
				+    CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_COPYROW_ERMS)
			
 
				+  if (TestCpuFlag(kCpuHasERMS)) {
			
 
				+    CopyRow = CopyRow_ERMS;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_COPYROW_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_COPYROW_MIPS)
			
 
				+  if (TestCpuFlag(kCpuHasMIPS)) {
			
 
				+    CopyRow = CopyRow_MIPS;
			
 
				+  }
			
 
				+#endif
			
 
				+
			
 
				+  // Odd height will harmlessly mirror the middle row twice.
			
 
				+  for (y = 0; y < half_height; ++y) {
			
 
				+    MirrorRow(src, row, width);  // Mirror first row into a buffer
			
 
				+    src += src_stride;
			
 
				+    MirrorRow(src_bot, dst, width);  // Mirror last row into first row
			
 
				+    dst += dst_stride;
			
 
				+    CopyRow(row, dst_bot, width);  // Copy first mirrored row into last
			
 
				+    src_bot -= src_stride;
			
 
				+    dst_bot -= dst_stride;
			
 
				+  }
			
 
				+  free_aligned_buffer_64(row);
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+void TransposeUV(const uint8* src, int src_stride,
			
 
				+                 uint8* dst_a, int dst_stride_a,
			
 
				+                 uint8* dst_b, int dst_stride_b,
			
 
				+                 int width, int height) {
			
 
				+  int i = height;
			
 
				+  void (*TransposeUVWx8)(const uint8* src, int src_stride,
			
 
				+                         uint8* dst_a, int dst_stride_a,
			
 
				+                         uint8* dst_b, int dst_stride_b,
			
 
				+                         int width) = TransposeUVWx8_C;
			
 
				+#if defined(HAS_TRANSPOSEUVWX8_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    TransposeUVWx8 = TransposeUVWx8_NEON;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_TRANSPOSEUVWX8_SSE2)
			
 
				+  if (TestCpuFlag(kCpuHasSSE2)) {
			
 
				+    TransposeUVWx8 = TransposeUVWx8_Any_SSE2;
			
 
				+    if (IS_ALIGNED(width, 8)) {
			
 
				+      TransposeUVWx8 = TransposeUVWx8_SSE2;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_TRANSPOSEUVWX8_DSPR2)
			
 
				+  if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) &&
			
 
				+      IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
			
 
				+    TransposeUVWx8 = TransposeUVWx8_DSPR2;
			
 
				+  }
			
 
				+#endif
			
 
				+
			
 
				+  // Work through the source in 8x8 tiles.
			
 
				+  while (i >= 8) {
			
 
				+    TransposeUVWx8(src, src_stride,
			
 
				+                   dst_a, dst_stride_a,
			
 
				+                   dst_b, dst_stride_b,
			
 
				+                   width);
			
 
				+    src += 8 * src_stride;    // Go down 8 rows.
			
 
				+    dst_a += 8;               // Move over 8 columns.
			
 
				+    dst_b += 8;               // Move over 8 columns.
			
 
				+    i -= 8;
			
 
				+  }
			
 
				+
			
 
				+  if (i > 0) {
			
 
				+    TransposeUVWxH_C(src, src_stride,
			
 
				+                     dst_a, dst_stride_a,
			
 
				+                     dst_b, dst_stride_b,
			
 
				+                     width, i);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+void RotateUV90(const uint8* src, int src_stride,
			
 
				+                uint8* dst_a, int dst_stride_a,
			
 
				+                uint8* dst_b, int dst_stride_b,
			
 
				+                int width, int height) {
			
 
				+  src += src_stride * (height - 1);
			
 
				+  src_stride = -src_stride;
			
 
				+
			
 
				+  TransposeUV(src, src_stride,
			
 
				+              dst_a, dst_stride_a,
			
 
				+              dst_b, dst_stride_b,
			
 
				+              width, height);
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+void RotateUV270(const uint8* src, int src_stride,
			
 
				+                 uint8* dst_a, int dst_stride_a,
			
 
				+                 uint8* dst_b, int dst_stride_b,
			
 
				+                 int width, int height) {
			
 
				+  dst_a += dst_stride_a * (width - 1);
			
 
				+  dst_b += dst_stride_b * (width - 1);
			
 
				+  dst_stride_a = -dst_stride_a;
			
 
				+  dst_stride_b = -dst_stride_b;
			
 
				+
			
 
				+  TransposeUV(src, src_stride,
			
 
				+              dst_a, dst_stride_a,
			
 
				+              dst_b, dst_stride_b,
			
 
				+              width, height);
			
 
				+}
			
 
				+
			
 
				+// Rotate 180 is a horizontal and vertical flip.
			
 
				+LIBYUV_API
			
 
				+void RotateUV180(const uint8* src, int src_stride,
			
 
				+                 uint8* dst_a, int dst_stride_a,
			
 
				+                 uint8* dst_b, int dst_stride_b,
			
 
				+                 int width, int height) {
			
 
				+  int i;
			
 
				+  void (*MirrorUVRow)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
			
 
				+      MirrorUVRow_C;
			
 
				+#if defined(HAS_MIRRORUVROW_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
			
 
				+    MirrorUVRow = MirrorUVRow_NEON;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_MIRRORUVROW_SSSE3)
			
 
				+  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
			
 
				+    MirrorUVRow = MirrorUVRow_SSSE3;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_MIRRORUVROW_DSPR2)
			
 
				+  if (TestCpuFlag(kCpuHasDSPR2) &&
			
 
				+      IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
			
 
				+    MirrorUVRow = MirrorUVRow_DSPR2;
			
 
				+  }
			
 
				+#endif
			
 
				+
			
 
				+  dst_a += dst_stride_a * (height - 1);
			
 
				+  dst_b += dst_stride_b * (height - 1);
			
 
				+
			
 
				+  for (i = 0; i < height; ++i) {
			
 
				+    MirrorUVRow(src, dst_a, dst_b, width);
			
 
				+    src += src_stride;
			
 
				+    dst_a -= dst_stride_a;
			
 
				+    dst_b -= dst_stride_b;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int RotatePlane(const uint8* src, int src_stride,
			
 
				+                uint8* dst, int dst_stride,
			
 
				+                int width, int height,
			
 
				+                enum RotationMode mode) {
			
 
				+  if (!src || width <= 0 || height == 0 || !dst) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  // Negative height means invert the image.
			
 
				+  if (height < 0) {
			
 
				+    height = -height;
			
 
				+    src = src + (height - 1) * src_stride;
			
 
				+    src_stride = -src_stride;
			
 
				+  }
			
 
				+
			
 
				+  switch (mode) {
			
 
				+    case kRotate0:
			
 
				+      // copy frame
			
 
				+      CopyPlane(src, src_stride,
			
 
				+                dst, dst_stride,
			
 
				+                width, height);
			
 
				+      return 0;
			
 
				+    case kRotate90:
			
 
				+      RotatePlane90(src, src_stride,
			
 
				+                    dst, dst_stride,
			
 
				+                    width, height);
			
 
				+      return 0;
			
 
				+    case kRotate270:
			
 
				+      RotatePlane270(src, src_stride,
			
 
				+                     dst, dst_stride,
			
 
				+                     width, height);
			
 
				+      return 0;
			
 
				+    case kRotate180:
			
 
				+      RotatePlane180(src, src_stride,
			
 
				+                     dst, dst_stride,
			
 
				+                     width, height);
			
 
				+      return 0;
			
 
				+    default:
			
 
				+      break;
			
 
				+  }
			
 
				+  return -1;
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int I420Rotate(const uint8* src_y, int src_stride_y,
			
 
				+               const uint8* src_u, int src_stride_u,
			
 
				+               const uint8* src_v, int src_stride_v,
			
 
				+               uint8* dst_y, int dst_stride_y,
			
 
				+               uint8* dst_u, int dst_stride_u,
			
 
				+               uint8* dst_v, int dst_stride_v,
			
 
				+               int width, int height,
			
 
				+               enum RotationMode mode) {
			
 
				+  int halfwidth = (width + 1) >> 1;
			
 
				+  int halfheight = (height + 1) >> 1;
			
 
				+  if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
			
 
				+      !dst_y || !dst_u || !dst_v) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  // Negative height means invert the image.
			
 
				+  if (height < 0) {
			
 
				+    height = -height;
			
 
				+    halfheight = (height + 1) >> 1;
			
 
				+    src_y = src_y + (height - 1) * src_stride_y;
			
 
				+    src_u = src_u + (halfheight - 1) * src_stride_u;
			
 
				+    src_v = src_v + (halfheight - 1) * src_stride_v;
			
 
				+    src_stride_y = -src_stride_y;
			
 
				+    src_stride_u = -src_stride_u;
			
 
				+    src_stride_v = -src_stride_v;
			
 
				+  }
			
 
				+
			
 
				+  switch (mode) {
			
 
				+    case kRotate0:
			
 
				+      // copy frame
			
 
				+      return I420Copy(src_y, src_stride_y,
			
 
				+                      src_u, src_stride_u,
			
 
				+                      src_v, src_stride_v,
			
 
				+                      dst_y, dst_stride_y,
			
 
				+                      dst_u, dst_stride_u,
			
 
				+                      dst_v, dst_stride_v,
			
 
				+                      width, height);
			
 
				+    case kRotate90:
			
 
				+      RotatePlane90(src_y, src_stride_y,
			
 
				+                    dst_y, dst_stride_y,
			
 
				+                    width, height);
			
 
				+      RotatePlane90(src_u, src_stride_u,
			
 
				+                    dst_u, dst_stride_u,
			
 
				+                    halfwidth, halfheight);
			
 
				+      RotatePlane90(src_v, src_stride_v,
			
 
				+                    dst_v, dst_stride_v,
			
 
				+                    halfwidth, halfheight);
			
 
				+      return 0;
			
 
				+    case kRotate270:
			
 
				+      RotatePlane270(src_y, src_stride_y,
			
 
				+                     dst_y, dst_stride_y,
			
 
				+                     width, height);
			
 
				+      RotatePlane270(src_u, src_stride_u,
			
 
				+                     dst_u, dst_stride_u,
			
 
				+                     halfwidth, halfheight);
			
 
				+      RotatePlane270(src_v, src_stride_v,
			
 
				+                     dst_v, dst_stride_v,
			
 
				+                     halfwidth, halfheight);
			
 
				+      return 0;
			
 
				+    case kRotate180:
			
 
				+      RotatePlane180(src_y, src_stride_y,
			
 
				+                     dst_y, dst_stride_y,
			
 
				+                     width, height);
			
 
				+      RotatePlane180(src_u, src_stride_u,
			
 
				+                     dst_u, dst_stride_u,
			
 
				+                     halfwidth, halfheight);
			
 
				+      RotatePlane180(src_v, src_stride_v,
			
 
				+                     dst_v, dst_stride_v,
			
 
				+                     halfwidth, halfheight);
			
 
				+      return 0;
			
 
				+    default:
			
 
				+      break;
			
 
				+  }
			
 
				+  return -1;
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
			
 
				+                     const uint8* src_uv, int src_stride_uv,
			
 
				+                     uint8* dst_y, int dst_stride_y,
			
 
				+                     uint8* dst_u, int dst_stride_u,
			
 
				+                     uint8* dst_v, int dst_stride_v,
			
 
				+                     int width, int height,
			
 
				+                     enum RotationMode mode) {
			
 
				+  int halfwidth = (width + 1) >> 1;
			
 
				+  int halfheight = (height + 1) >> 1;
			
 
				+  if (!src_y || !src_uv || width <= 0 || height == 0 ||
			
 
				+      !dst_y || !dst_u || !dst_v) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  // Negative height means invert the image.
			
 
				+  if (height < 0) {
			
 
				+    height = -height;
			
 
				+    halfheight = (height + 1) >> 1;
			
 
				+    src_y = src_y + (height - 1) * src_stride_y;
			
 
				+    src_uv = src_uv + (halfheight - 1) * src_stride_uv;
			
 
				+    src_stride_y = -src_stride_y;
			
 
				+    src_stride_uv = -src_stride_uv;
			
 
				+  }
			
 
				+
			
 
				+  switch (mode) {
			
 
				+    case kRotate0:
			
 
				+      // copy frame
			
 
				+      return NV12ToI420(src_y, src_stride_y,
			
 
				+                        src_uv, src_stride_uv,
			
 
				+                        dst_y, dst_stride_y,
			
 
				+                        dst_u, dst_stride_u,
			
 
				+                        dst_v, dst_stride_v,
			
 
				+                        width, height);
			
 
				+    case kRotate90:
			
 
				+      RotatePlane90(src_y, src_stride_y,
			
 
				+                    dst_y, dst_stride_y,
			
 
				+                    width, height);
			
 
				+      RotateUV90(src_uv, src_stride_uv,
			
 
				+                 dst_u, dst_stride_u,
			
 
				+                 dst_v, dst_stride_v,
			
 
				+                 halfwidth, halfheight);
			
 
				+      return 0;
			
 
				+    case kRotate270:
			
 
				+      RotatePlane270(src_y, src_stride_y,
			
 
				+                     dst_y, dst_stride_y,
			
 
				+                     width, height);
			
 
				+      RotateUV270(src_uv, src_stride_uv,
			
 
				+                  dst_u, dst_stride_u,
			
 
				+                  dst_v, dst_stride_v,
			
 
				+                  halfwidth, halfheight);
			
 
				+      return 0;
			
 
				+    case kRotate180:
			
 
				+      RotatePlane180(src_y, src_stride_y,
			
 
				+                     dst_y, dst_stride_y,
			
 
				+                     width, height);
			
 
				+      RotateUV180(src_uv, src_stride_uv,
			
 
				+                  dst_u, dst_stride_u,
			
 
				+                  dst_v, dst_stride_v,
			
 
				+                  halfwidth, halfheight);
			
 
				+      return 0;
			
 
				+    default:
			
 
				+      break;
			
 
				+  }
			
 
				+  return -1;
			
 
				+}
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/rotate_any.cc
+++ b/src/jni/libyuv/source/rotate_any.cc
@@ -0,0 +1,80 @@
 
				+/*
			
 
				+ *  Copyright 2015 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/rotate.h"
			
 
				+#include "libyuv/rotate_row.h"
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+#define TANY(NAMEANY, TPOS_SIMD, MASK)                                         \
			
 
				+    void NAMEANY(const uint8* src, int src_stride,                             \
			
 
				+                 uint8* dst, int dst_stride, int width) {                      \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width - r;                                                       \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        TPOS_SIMD(src, src_stride, dst, dst_stride, n);                        \
			
 
				+      }                                                                        \
			
 
				+      TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r);\
			
 
				+    }
			
 
				+
			
 
				+#ifdef HAS_TRANSPOSEWX8_NEON
			
 
				+TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_TRANSPOSEWX8_SSSE3
			
 
				+TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_TRANSPOSEWX8_FAST_SSSE3
			
 
				+TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_TRANSPOSEWX8_DSPR2
			
 
				+TANY(TransposeWx8_Any_DSPR2, TransposeWx8_DSPR2, 7)
			
 
				+#endif
			
 
				+#undef TANY
			
 
				+
			
 
				+#define TUVANY(NAMEANY, TPOS_SIMD, MASK)                                       \
			
 
				+    void NAMEANY(const uint8* src, int src_stride,                             \
			
 
				+                uint8* dst_a, int dst_stride_a,                                \
			
 
				+                uint8* dst_b, int dst_stride_b, int width) {                   \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width - r;                                                       \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,   \
			
 
				+                  n);                                                          \
			
 
				+      }                                                                        \
			
 
				+      TransposeUVWx8_C(src + n * 2, src_stride,                                \
			
 
				+                       dst_a + n * dst_stride_a, dst_stride_a,                 \
			
 
				+                       dst_b + n * dst_stride_b, dst_stride_b, r);             \
			
 
				+    }
			
 
				+
			
 
				+#ifdef HAS_TRANSPOSEUVWX8_NEON
			
 
				+TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_TRANSPOSEUVWX8_SSE2
			
 
				+TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_TRANSPOSEUVWX8_DSPR2
			
 
				+TUVANY(TransposeUVWx8_Any_DSPR2, TransposeUVWx8_DSPR2, 7)
			
 
				+#endif
			
 
				+#undef TUVANY
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/src/jni/libyuv/source/rotate_argb.cc
+++ b/src/jni/libyuv/source/rotate_argb.cc
@@ -0,0 +1,205 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/rotate.h"
			
 
				+
			
 
				+#include "libyuv/cpu_id.h"
			
 
				+#include "libyuv/convert.h"
			
 
				+#include "libyuv/planar_functions.h"
			
 
				+#include "libyuv/row.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// ARGBScale has a function to copy pixels to a row, striding each source
			
 
				+// pixel by a constant.
			
 
				+#if !defined(LIBYUV_DISABLE_X86) && \
			
 
				+    (defined(_M_IX86) || \
			
 
				+    (defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
			
 
				+#define HAS_SCALEARGBROWDOWNEVEN_SSE2
			
 
				+void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
			
 
				+                               int src_stepx, uint8* dst_ptr, int dst_width);
			
 
				+#endif
			
 
				+#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
			
 
				+    (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
			
 
				+#define HAS_SCALEARGBROWDOWNEVEN_NEON
			
 
				+void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride,
			
 
				+                               int src_stepx, uint8* dst_ptr, int dst_width);
			
 
				+#endif
			
 
				+
			
 
				+void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
			
 
				+                            int src_stepx, uint8* dst_ptr, int dst_width);
			
 
				+
			
 
				+static void ARGBTranspose(const uint8* src, int src_stride,
			
 
				+                          uint8* dst, int dst_stride, int width, int height) {
			
 
				+  int i;
			
 
				+  int src_pixel_step = src_stride >> 2;
			
 
				+  void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
			
 
				+      int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
			
 
				+#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
			
 
				+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4)) {  // Width of dest.
			
 
				+    ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4)) {  // Width of dest.
			
 
				+    ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
			
 
				+  }
			
 
				+#endif
			
 
				+
			
 
				+  for (i = 0; i < width; ++i) {  // column of source to row of dest.
			
 
				+    ScaleARGBRowDownEven(src, 0, src_pixel_step, dst, height);
			
 
				+    dst += dst_stride;
			
 
				+    src += 4;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void ARGBRotate90(const uint8* src, int src_stride,
			
 
				+                  uint8* dst, int dst_stride, int width, int height) {
			
 
				+  // Rotate by 90 is a ARGBTranspose with the source read
			
 
				+  // from bottom to top. So set the source pointer to the end
			
 
				+  // of the buffer and flip the sign of the source stride.
			
 
				+  src += src_stride * (height - 1);
			
 
				+  src_stride = -src_stride;
			
 
				+  ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
			
 
				+}
			
 
				+
			
 
				+void ARGBRotate270(const uint8* src, int src_stride,
			
 
				+                    uint8* dst, int dst_stride, int width, int height) {
			
 
				+  // Rotate by 270 is a ARGBTranspose with the destination written
			
 
				+  // from bottom to top. So set the destination pointer to the end
			
 
				+  // of the buffer and flip the sign of the destination stride.
			
 
				+  dst += dst_stride * (width - 1);
			
 
				+  dst_stride = -dst_stride;
			
 
				+  ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
			
 
				+}
			
 
				+
			
 
				+void ARGBRotate180(const uint8* src, int src_stride,
			
 
				+                   uint8* dst, int dst_stride, int width, int height) {
			
 
				+  // Swap first and last row and mirror the content. Uses a temporary row.
			
 
				+  align_buffer_64(row, width * 4);
			
 
				+  const uint8* src_bot = src + src_stride * (height - 1);
			
 
				+  uint8* dst_bot = dst + dst_stride * (height - 1);
			
 
				+  int half_height = (height + 1) >> 1;
			
 
				+  int y;
			
 
				+  void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
			
 
				+      ARGBMirrorRow_C;
			
 
				+  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
			
 
				+#if defined(HAS_ARGBMIRRORROW_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
			
 
				+    if (IS_ALIGNED(width, 4)) {
			
 
				+      ARGBMirrorRow = ARGBMirrorRow_NEON;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_ARGBMIRRORROW_SSE2)
			
 
				+  if (TestCpuFlag(kCpuHasSSE2)) {
			
 
				+    ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
			
 
				+    if (IS_ALIGNED(width, 4)) {
			
 
				+      ARGBMirrorRow = ARGBMirrorRow_SSE2;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_ARGBMIRRORROW_AVX2)
			
 
				+  if (TestCpuFlag(kCpuHasAVX2)) {
			
 
				+    ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
			
 
				+    if (IS_ALIGNED(width, 8)) {
			
 
				+      ARGBMirrorRow = ARGBMirrorRow_AVX2;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_COPYROW_SSE2)
			
 
				+  if (TestCpuFlag(kCpuHasSSE2)) {
			
 
				+    CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_COPYROW_AVX)
			
 
				+  if (TestCpuFlag(kCpuHasAVX)) {
			
 
				+    CopyRow = IS_ALIGNED(width * 4, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_COPYROW_ERMS)
			
 
				+  if (TestCpuFlag(kCpuHasERMS)) {
			
 
				+    CopyRow = CopyRow_ERMS;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_COPYROW_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_COPYROW_MIPS)
			
 
				+  if (TestCpuFlag(kCpuHasMIPS)) {
			
 
				+    CopyRow = CopyRow_MIPS;
			
 
				+  }
			
 
				+#endif
			
 
				+
			
 
				+  // Odd height will harmlessly mirror the middle row twice.
			
 
				+  for (y = 0; y < half_height; ++y) {
			
 
				+    ARGBMirrorRow(src, row, width);  // Mirror first row into a buffer
			
 
				+    ARGBMirrorRow(src_bot, dst, width);  // Mirror last row into first row
			
 
				+    CopyRow(row, dst_bot, width * 4);  // Copy first mirrored row into last
			
 
				+    src += src_stride;
			
 
				+    dst += dst_stride;
			
 
				+    src_bot -= src_stride;
			
 
				+    dst_bot -= dst_stride;
			
 
				+  }
			
 
				+  free_aligned_buffer_64(row);
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int ARGBRotate(const uint8* src_argb, int src_stride_argb,
			
 
				+               uint8* dst_argb, int dst_stride_argb, int width, int height,
			
 
				+               enum RotationMode mode) {
			
 
				+  if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  // Negative height means invert the image.
			
 
				+  if (height < 0) {
			
 
				+    height = -height;
			
 
				+    src_argb = src_argb + (height - 1) * src_stride_argb;
			
 
				+    src_stride_argb = -src_stride_argb;
			
 
				+  }
			
 
				+
			
 
				+  switch (mode) {
			
 
				+    case kRotate0:
			
 
				+      // copy frame
			
 
				+      return ARGBCopy(src_argb, src_stride_argb,
			
 
				+                      dst_argb, dst_stride_argb,
			
 
				+                      width, height);
			
 
				+    case kRotate90:
			
 
				+      ARGBRotate90(src_argb, src_stride_argb,
			
 
				+                   dst_argb, dst_stride_argb,
			
 
				+                   width, height);
			
 
				+      return 0;
			
 
				+    case kRotate270:
			
 
				+      ARGBRotate270(src_argb, src_stride_argb,
			
 
				+                    dst_argb, dst_stride_argb,
			
 
				+                    width, height);
			
 
				+      return 0;
			
 
				+    case kRotate180:
			
 
				+      ARGBRotate180(src_argb, src_stride_argb,
			
 
				+                    dst_argb, dst_stride_argb,
			
 
				+                    width, height);
			
 
				+      return 0;
			
 
				+    default:
			
 
				+      break;
			
 
				+  }
			
 
				+  return -1;
			
 
				+}
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/rotate_common.cc
+++ b/src/jni/libyuv/source/rotate_common.cc
@@ -0,0 +1,92 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/row.h"
			
 
				+#include "libyuv/rotate_row.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+void TransposeWx8_C(const uint8* src, int src_stride,
			
 
				+                    uint8* dst, int dst_stride, int width) {
			
 
				+  int i;
			
 
				+  for (i = 0; i < width; ++i) {
			
 
				+    dst[0] = src[0 * src_stride];
			
 
				+    dst[1] = src[1 * src_stride];
			
 
				+    dst[2] = src[2 * src_stride];
			
 
				+    dst[3] = src[3 * src_stride];
			
 
				+    dst[4] = src[4 * src_stride];
			
 
				+    dst[5] = src[5 * src_stride];
			
 
				+    dst[6] = src[6 * src_stride];
			
 
				+    dst[7] = src[7 * src_stride];
			
 
				+    ++src;
			
 
				+    dst += dst_stride;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void TransposeUVWx8_C(const uint8* src, int src_stride,
			
 
				+                      uint8* dst_a, int dst_stride_a,
			
 
				+                      uint8* dst_b, int dst_stride_b, int width) {
			
 
				+  int i;
			
 
				+  for (i = 0; i < width; ++i) {
			
 
				+    dst_a[0] = src[0 * src_stride + 0];
			
 
				+    dst_b[0] = src[0 * src_stride + 1];
			
 
				+    dst_a[1] = src[1 * src_stride + 0];
			
 
				+    dst_b[1] = src[1 * src_stride + 1];
			
 
				+    dst_a[2] = src[2 * src_stride + 0];
			
 
				+    dst_b[2] = src[2 * src_stride + 1];
			
 
				+    dst_a[3] = src[3 * src_stride + 0];
			
 
				+    dst_b[3] = src[3 * src_stride + 1];
			
 
				+    dst_a[4] = src[4 * src_stride + 0];
			
 
				+    dst_b[4] = src[4 * src_stride + 1];
			
 
				+    dst_a[5] = src[5 * src_stride + 0];
			
 
				+    dst_b[5] = src[5 * src_stride + 1];
			
 
				+    dst_a[6] = src[6 * src_stride + 0];
			
 
				+    dst_b[6] = src[6 * src_stride + 1];
			
 
				+    dst_a[7] = src[7 * src_stride + 0];
			
 
				+    dst_b[7] = src[7 * src_stride + 1];
			
 
				+    src += 2;
			
 
				+    dst_a += dst_stride_a;
			
 
				+    dst_b += dst_stride_b;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void TransposeWxH_C(const uint8* src, int src_stride,
			
 
				+                    uint8* dst, int dst_stride,
			
 
				+                    int width, int height) {
			
 
				+  int i;
			
 
				+  for (i = 0; i < width; ++i) {
			
 
				+    int j;
			
 
				+    for (j = 0; j < height; ++j) {
			
 
				+      dst[i * dst_stride + j] = src[j * src_stride + i];
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void TransposeUVWxH_C(const uint8* src, int src_stride,
			
 
				+                      uint8* dst_a, int dst_stride_a,
			
 
				+                      uint8* dst_b, int dst_stride_b,
			
 
				+                      int width, int height) {
			
 
				+  int i;
			
 
				+  for (i = 0; i < width * 2; i += 2) {
			
 
				+    int j;
			
 
				+    for (j = 0; j < height; ++j) {
			
 
				+      dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
			
 
				+      dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/rotate_gcc.cc
+++ b/src/jni/libyuv/source/rotate_gcc.cc
@@ -0,0 +1,368 @@
 
				+/*
			
 
				+ *  Copyright 2015 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/row.h"
			
 
				+#include "libyuv/rotate_row.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// This module is for GCC x86 and x64.
			
 
				+#if !defined(LIBYUV_DISABLE_X86) && \
			
 
				+    (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
			
 
				+
			
 
				+// Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit.
			
 
				+#if defined(HAS_TRANSPOSEWX8_SSSE3)
			
 
				+void TransposeWx8_SSSE3(const uint8* src, int src_stride,
			
 
				+                        uint8* dst, int dst_stride, int width) {
			
 
				+  asm volatile (
			
 
				+    // Read in the data from the source pointer.
			
 
				+    // First round of bit swap.
			
 
				+    LABELALIGN
			
 
				+  "1:                                            \n"
			
 
				+    "movq       (%0),%%xmm0                      \n"
			
 
				+    "movq       (%0,%3),%%xmm1                   \n"
			
 
				+    "lea        (%0,%3,2),%0                     \n"
			
 
				+    "punpcklbw  %%xmm1,%%xmm0                    \n"
			
 
				+    "movq       (%0),%%xmm2                      \n"
			
 
				+    "movdqa     %%xmm0,%%xmm1                    \n"
			
 
				+    "palignr    $0x8,%%xmm1,%%xmm1               \n"
			
 
				+    "movq       (%0,%3),%%xmm3                   \n"
			
 
				+    "lea        (%0,%3,2),%0                     \n"
			
 
				+    "punpcklbw  %%xmm3,%%xmm2                    \n"
			
 
				+    "movdqa     %%xmm2,%%xmm3                    \n"
			
 
				+    "movq       (%0),%%xmm4                      \n"
			
 
				+    "palignr    $0x8,%%xmm3,%%xmm3               \n"
			
 
				+    "movq       (%0,%3),%%xmm5                   \n"
			
 
				+    "lea        (%0,%3,2),%0                     \n"
			
 
				+    "punpcklbw  %%xmm5,%%xmm4                    \n"
			
 
				+    "movdqa     %%xmm4,%%xmm5                    \n"
			
 
				+    "movq       (%0),%%xmm6                      \n"
			
 
				+    "palignr    $0x8,%%xmm5,%%xmm5               \n"
			
 
				+    "movq       (%0,%3),%%xmm7                   \n"
			
 
				+    "lea        (%0,%3,2),%0                     \n"
			
 
				+    "punpcklbw  %%xmm7,%%xmm6                    \n"
			
 
				+    "neg        %3                               \n"
			
 
				+    "movdqa     %%xmm6,%%xmm7                    \n"
			
 
				+    "lea        0x8(%0,%3,8),%0                  \n"
			
 
				+    "palignr    $0x8,%%xmm7,%%xmm7               \n"
			
 
				+    "neg        %3                               \n"
			
 
				+     // Second round of bit swap.
			
 
				+    "punpcklwd  %%xmm2,%%xmm0                    \n"
			
 
				+    "punpcklwd  %%xmm3,%%xmm1                    \n"
			
 
				+    "movdqa     %%xmm0,%%xmm2                    \n"
			
 
				+    "movdqa     %%xmm1,%%xmm3                    \n"
			
 
				+    "palignr    $0x8,%%xmm2,%%xmm2               \n"
			
 
				+    "palignr    $0x8,%%xmm3,%%xmm3               \n"
			
 
				+    "punpcklwd  %%xmm6,%%xmm4                    \n"
			
 
				+    "punpcklwd  %%xmm7,%%xmm5                    \n"
			
 
				+    "movdqa     %%xmm4,%%xmm6                    \n"
			
 
				+    "movdqa     %%xmm5,%%xmm7                    \n"
			
 
				+    "palignr    $0x8,%%xmm6,%%xmm6               \n"
			
 
				+    "palignr    $0x8,%%xmm7,%%xmm7               \n"
			
 
				+    // Third round of bit swap.
			
 
				+    // Write to the destination pointer.
			
 
				+    "punpckldq  %%xmm4,%%xmm0                    \n"
			
 
				+    "movq       %%xmm0,(%1)                      \n"
			
 
				+    "movdqa     %%xmm0,%%xmm4                    \n"
			
 
				+    "palignr    $0x8,%%xmm4,%%xmm4               \n"
			
 
				+    "movq       %%xmm4,(%1,%4)                   \n"
			
 
				+    "lea        (%1,%4,2),%1                     \n"
			
 
				+    "punpckldq  %%xmm6,%%xmm2                    \n"
			
 
				+    "movdqa     %%xmm2,%%xmm6                    \n"
			
 
				+    "movq       %%xmm2,(%1)                      \n"
			
 
				+    "palignr    $0x8,%%xmm6,%%xmm6               \n"
			
 
				+    "punpckldq  %%xmm5,%%xmm1                    \n"
			
 
				+    "movq       %%xmm6,(%1,%4)                   \n"
			
 
				+    "lea        (%1,%4,2),%1                     \n"
			
 
				+    "movdqa     %%xmm1,%%xmm5                    \n"
			
 
				+    "movq       %%xmm1,(%1)                      \n"
			
 
				+    "palignr    $0x8,%%xmm5,%%xmm5               \n"
			
 
				+    "movq       %%xmm5,(%1,%4)                   \n"
			
 
				+    "lea        (%1,%4,2),%1                     \n"
			
 
				+    "punpckldq  %%xmm7,%%xmm3                    \n"
			
 
				+    "movq       %%xmm3,(%1)                      \n"
			
 
				+    "movdqa     %%xmm3,%%xmm7                    \n"
			
 
				+    "palignr    $0x8,%%xmm7,%%xmm7               \n"
			
 
				+    "sub        $0x8,%2                          \n"
			
 
				+    "movq       %%xmm7,(%1,%4)                   \n"
			
 
				+    "lea        (%1,%4,2),%1                     \n"
			
 
				+    "jg         1b                               \n"
			
 
				+    : "+r"(src),    // %0
			
 
				+      "+r"(dst),    // %1
			
 
				+      "+r"(width)   // %2
			
 
				+    : "r"((intptr_t)(src_stride)),  // %3
			
 
				+      "r"((intptr_t)(dst_stride))   // %4
			
 
				+    : "memory", "cc",
			
 
				+      "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
			
 
				+  );
			
 
				+}
			
 
				+#endif  // defined(HAS_TRANSPOSEWX8_SSSE3)
			
 
				+
			
 
				+// Transpose 16x8. 64 bit
			
 
				+#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
			
 
				+void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
			
 
				+                             uint8* dst, int dst_stride, int width) {
			
 
				+  asm volatile (
			
 
				+    // Read in the data from the source pointer.
			
 
				+    // First round of bit swap.
			
 
				+    LABELALIGN
			
 
				+  "1:                                            \n"
			
 
				+    "movdqu     (%0),%%xmm0                      \n"
			
 
				+    "movdqu     (%0,%3),%%xmm1                   \n"
			
 
				+    "lea        (%0,%3,2),%0                     \n"
			
 
				+    "movdqa     %%xmm0,%%xmm8                    \n"
			
 
				+    "punpcklbw  %%xmm1,%%xmm0                    \n"
			
 
				+    "punpckhbw  %%xmm1,%%xmm8                    \n"
			
 
				+    "movdqu     (%0),%%xmm2                      \n"
			
 
				+    "movdqa     %%xmm0,%%xmm1                    \n"
			
 
				+    "movdqa     %%xmm8,%%xmm9                    \n"
			
 
				+    "palignr    $0x8,%%xmm1,%%xmm1               \n"
			
 
				+    "palignr    $0x8,%%xmm9,%%xmm9               \n"
			
 
				+    "movdqu     (%0,%3),%%xmm3                   \n"
			
 
				+    "lea        (%0,%3,2),%0                     \n"
			
 
				+    "movdqa     %%xmm2,%%xmm10                   \n"
			
 
				+    "punpcklbw  %%xmm3,%%xmm2                    \n"
			
 
				+    "punpckhbw  %%xmm3,%%xmm10                   \n"
			
 
				+    "movdqa     %%xmm2,%%xmm3                    \n"
			
 
				+    "movdqa     %%xmm10,%%xmm11                  \n"
			
 
				+    "movdqu     (%0),%%xmm4                      \n"
			
 
				+    "palignr    $0x8,%%xmm3,%%xmm3               \n"
			
 
				+    "palignr    $0x8,%%xmm11,%%xmm11             \n"
			
 
				+    "movdqu     (%0,%3),%%xmm5                   \n"
			
 
				+    "lea        (%0,%3,2),%0                     \n"
			
 
				+    "movdqa     %%xmm4,%%xmm12                   \n"
			
 
				+    "punpcklbw  %%xmm5,%%xmm4                    \n"
			
 
				+    "punpckhbw  %%xmm5,%%xmm12                   \n"
			
 
				+    "movdqa     %%xmm4,%%xmm5                    \n"
			
 
				+    "movdqa     %%xmm12,%%xmm13                  \n"
			
 
				+    "movdqu     (%0),%%xmm6                      \n"
			
 
				+    "palignr    $0x8,%%xmm5,%%xmm5               \n"
			
 
				+    "palignr    $0x8,%%xmm13,%%xmm13             \n"
			
 
				+    "movdqu     (%0,%3),%%xmm7                   \n"
			
 
				+    "lea        (%0,%3,2),%0                     \n"
			
 
				+    "movdqa     %%xmm6,%%xmm14                   \n"
			
 
				+    "punpcklbw  %%xmm7,%%xmm6                    \n"
			
 
				+    "punpckhbw  %%xmm7,%%xmm14                   \n"
			
 
				+    "neg        %3                               \n"
			
 
				+    "movdqa     %%xmm6,%%xmm7                    \n"
			
 
				+    "movdqa     %%xmm14,%%xmm15                  \n"
			
 
				+    "lea        0x10(%0,%3,8),%0                 \n"
			
 
				+    "palignr    $0x8,%%xmm7,%%xmm7               \n"
			
 
				+    "palignr    $0x8,%%xmm15,%%xmm15             \n"
			
 
				+    "neg        %3                               \n"
			
 
				+     // Second round of bit swap.
			
 
				+    "punpcklwd  %%xmm2,%%xmm0                    \n"
			
 
				+    "punpcklwd  %%xmm3,%%xmm1                    \n"
			
 
				+    "movdqa     %%xmm0,%%xmm2                    \n"
			
 
				+    "movdqa     %%xmm1,%%xmm3                    \n"
			
 
				+    "palignr    $0x8,%%xmm2,%%xmm2               \n"
			
 
				+    "palignr    $0x8,%%xmm3,%%xmm3               \n"
			
 
				+    "punpcklwd  %%xmm6,%%xmm4                    \n"
			
 
				+    "punpcklwd  %%xmm7,%%xmm5                    \n"
			
 
				+    "movdqa     %%xmm4,%%xmm6                    \n"
			
 
				+    "movdqa     %%xmm5,%%xmm7                    \n"
			
 
				+    "palignr    $0x8,%%xmm6,%%xmm6               \n"
			
 
				+    "palignr    $0x8,%%xmm7,%%xmm7               \n"
			
 
				+    "punpcklwd  %%xmm10,%%xmm8                   \n"
			
 
				+    "punpcklwd  %%xmm11,%%xmm9                   \n"
			
 
				+    "movdqa     %%xmm8,%%xmm10                   \n"
			
 
				+    "movdqa     %%xmm9,%%xmm11                   \n"
			
 
				+    "palignr    $0x8,%%xmm10,%%xmm10             \n"
			
 
				+    "palignr    $0x8,%%xmm11,%%xmm11             \n"
			
 
				+    "punpcklwd  %%xmm14,%%xmm12                  \n"
			
 
				+    "punpcklwd  %%xmm15,%%xmm13                  \n"
			
 
				+    "movdqa     %%xmm12,%%xmm14                  \n"
			
 
				+    "movdqa     %%xmm13,%%xmm15                  \n"
			
 
				+    "palignr    $0x8,%%xmm14,%%xmm14             \n"
			
 
				+    "palignr    $0x8,%%xmm15,%%xmm15             \n"
			
 
				+    // Third round of bit swap.
			
 
				+    // Write to the destination pointer.
			
 
				+    "punpckldq  %%xmm4,%%xmm0                    \n"
			
 
				+    "movq       %%xmm0,(%1)                      \n"
			
 
				+    "movdqa     %%xmm0,%%xmm4                    \n"
			
 
				+    "palignr    $0x8,%%xmm4,%%xmm4               \n"
			
 
				+    "movq       %%xmm4,(%1,%4)                   \n"
			
 
				+    "lea        (%1,%4,2),%1                     \n"
			
 
				+    "punpckldq  %%xmm6,%%xmm2                    \n"
			
 
				+    "movdqa     %%xmm2,%%xmm6                    \n"
			
 
				+    "movq       %%xmm2,(%1)                      \n"
			
 
				+    "palignr    $0x8,%%xmm6,%%xmm6               \n"
			
 
				+    "punpckldq  %%xmm5,%%xmm1                    \n"
			
 
				+    "movq       %%xmm6,(%1,%4)                   \n"
			
 
				+    "lea        (%1,%4,2),%1                     \n"
			
 
				+    "movdqa     %%xmm1,%%xmm5                    \n"
			
 
				+    "movq       %%xmm1,(%1)                      \n"
			
 
				+    "palignr    $0x8,%%xmm5,%%xmm5               \n"
			
 
				+    "movq       %%xmm5,(%1,%4)                   \n"
			
 
				+    "lea        (%1,%4,2),%1                     \n"
			
 
				+    "punpckldq  %%xmm7,%%xmm3                    \n"
			
 
				+    "movq       %%xmm3,(%1)                      \n"
			
 
				+    "movdqa     %%xmm3,%%xmm7                    \n"
			
 
				+    "palignr    $0x8,%%xmm7,%%xmm7               \n"
			
 
				+    "movq       %%xmm7,(%1,%4)                   \n"
			
 
				+    "lea        (%1,%4,2),%1                     \n"
			
 
				+    "punpckldq  %%xmm12,%%xmm8                   \n"
			
 
				+    "movq       %%xmm8,(%1)                      \n"
			
 
				+    "movdqa     %%xmm8,%%xmm12                   \n"
			
 
				+    "palignr    $0x8,%%xmm12,%%xmm12             \n"
			
 
				+    "movq       %%xmm12,(%1,%4)                  \n"
			
 
				+    "lea        (%1,%4,2),%1                     \n"
			
 
				+    "punpckldq  %%xmm14,%%xmm10                  \n"
			
 
				+    "movdqa     %%xmm10,%%xmm14                  \n"
			
 
				+    "movq       %%xmm10,(%1)                     \n"
			
 
				+    "palignr    $0x8,%%xmm14,%%xmm14             \n"
			
 
				+    "punpckldq  %%xmm13,%%xmm9                   \n"
			
 
				+    "movq       %%xmm14,(%1,%4)                  \n"
			
 
				+    "lea        (%1,%4,2),%1                     \n"
			
 
				+    "movdqa     %%xmm9,%%xmm13                   \n"
			
 
				+    "movq       %%xmm9,(%1)                      \n"
			
 
				+    "palignr    $0x8,%%xmm13,%%xmm13             \n"
			
 
				+    "movq       %%xmm13,(%1,%4)                  \n"
			
 
				+    "lea        (%1,%4,2),%1                     \n"
			
 
				+    "punpckldq  %%xmm15,%%xmm11                  \n"
			
 
				+    "movq       %%xmm11,(%1)                     \n"
			
 
				+    "movdqa     %%xmm11,%%xmm15                  \n"
			
 
				+    "palignr    $0x8,%%xmm15,%%xmm15             \n"
			
 
				+    "sub        $0x10,%2                         \n"
			
 
				+    "movq       %%xmm15,(%1,%4)                  \n"
			
 
				+    "lea        (%1,%4,2),%1                     \n"
			
 
				+    "jg         1b                               \n"
			
 
				+    : "+r"(src),    // %0
			
 
				+      "+r"(dst),    // %1
			
 
				+      "+r"(width)   // %2
			
 
				+    : "r"((intptr_t)(src_stride)),  // %3
			
 
				+      "r"((intptr_t)(dst_stride))   // %4
			
 
				+    : "memory", "cc",
			
 
				+      "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
			
 
				+      "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13",  "xmm14",  "xmm15"
			
 
				+  );
			
 
				+}
			
 
				+#endif  // defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
			
 
				+
			
 
				+// Transpose UV 8x8.  64 bit.
			
 
				+#if defined(HAS_TRANSPOSEUVWX8_SSE2)
			
 
				+void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
			
 
				+                         uint8* dst_a, int dst_stride_a,
			
 
				+                         uint8* dst_b, int dst_stride_b, int width) {
			
 
				+  asm volatile (
			
 
				+    // Read in the data from the source pointer.
			
 
				+    // First round of bit swap.
			
 
				+    LABELALIGN
			
 
				+  "1:                                            \n"
			
 
				+    "movdqu     (%0),%%xmm0                      \n"
			
 
				+    "movdqu     (%0,%4),%%xmm1                   \n"
			
 
				+    "lea        (%0,%4,2),%0                     \n"
			
 
				+    "movdqa     %%xmm0,%%xmm8                    \n"
			
 
				+    "punpcklbw  %%xmm1,%%xmm0                    \n"
			
 
				+    "punpckhbw  %%xmm1,%%xmm8                    \n"
			
 
				+    "movdqa     %%xmm8,%%xmm1                    \n"
			
 
				+    "movdqu     (%0),%%xmm2                      \n"
			
 
				+    "movdqu     (%0,%4),%%xmm3                   \n"
			
 
				+    "lea        (%0,%4,2),%0                     \n"
			
 
				+    "movdqa     %%xmm2,%%xmm8                    \n"
			
 
				+    "punpcklbw  %%xmm3,%%xmm2                    \n"
			
 
				+    "punpckhbw  %%xmm3,%%xmm8                    \n"
			
 
				+    "movdqa     %%xmm8,%%xmm3                    \n"
			
 
				+    "movdqu     (%0),%%xmm4                      \n"
			
 
				+    "movdqu     (%0,%4),%%xmm5                   \n"
			
 
				+    "lea        (%0,%4,2),%0                     \n"
			
 
				+    "movdqa     %%xmm4,%%xmm8                    \n"
			
 
				+    "punpcklbw  %%xmm5,%%xmm4                    \n"
			
 
				+    "punpckhbw  %%xmm5,%%xmm8                    \n"
			
 
				+    "movdqa     %%xmm8,%%xmm5                    \n"
			
 
				+    "movdqu     (%0),%%xmm6                      \n"
			
 
				+    "movdqu     (%0,%4),%%xmm7                   \n"
			
 
				+    "lea        (%0,%4,2),%0                     \n"
			
 
				+    "movdqa     %%xmm6,%%xmm8                    \n"
			
 
				+    "punpcklbw  %%xmm7,%%xmm6                    \n"
			
 
				+    "neg        %4                               \n"
			
 
				+    "lea        0x10(%0,%4,8),%0                 \n"
			
 
				+    "punpckhbw  %%xmm7,%%xmm8                    \n"
			
 
				+    "movdqa     %%xmm8,%%xmm7                    \n"
			
 
				+    "neg        %4                               \n"
			
 
				+     // Second round of bit swap.
			
 
				+    "movdqa     %%xmm0,%%xmm8                    \n"
			
 
				+    "movdqa     %%xmm1,%%xmm9                    \n"
			
 
				+    "punpckhwd  %%xmm2,%%xmm8                    \n"
			
 
				+    "punpckhwd  %%xmm3,%%xmm9                    \n"
			
 
				+    "punpcklwd  %%xmm2,%%xmm0                    \n"
			
 
				+    "punpcklwd  %%xmm3,%%xmm1                    \n"
			
 
				+    "movdqa     %%xmm8,%%xmm2                    \n"
			
 
				+    "movdqa     %%xmm9,%%xmm3                    \n"
			
 
				+    "movdqa     %%xmm4,%%xmm8                    \n"
			
 
				+    "movdqa     %%xmm5,%%xmm9                    \n"
			
 
				+    "punpckhwd  %%xmm6,%%xmm8                    \n"
			
 
				+    "punpckhwd  %%xmm7,%%xmm9                    \n"
			
 
				+    "punpcklwd  %%xmm6,%%xmm4                    \n"
			
 
				+    "punpcklwd  %%xmm7,%%xmm5                    \n"
			
 
				+    "movdqa     %%xmm8,%%xmm6                    \n"
			
 
				+    "movdqa     %%xmm9,%%xmm7                    \n"
			
 
				+    // Third round of bit swap.
			
 
				+    // Write to the destination pointer.
			
 
				+    "movdqa     %%xmm0,%%xmm8                    \n"
			
 
				+    "punpckldq  %%xmm4,%%xmm0                    \n"
			
 
				+    "movlpd     %%xmm0,(%1)                      \n"  // Write back U channel
			
 
				+    "movhpd     %%xmm0,(%2)                      \n"  // Write back V channel
			
 
				+    "punpckhdq  %%xmm4,%%xmm8                    \n"
			
 
				+    "movlpd     %%xmm8,(%1,%5)                   \n"
			
 
				+    "lea        (%1,%5,2),%1                     \n"
			
 
				+    "movhpd     %%xmm8,(%2,%6)                   \n"
			
 
				+    "lea        (%2,%6,2),%2                     \n"
			
 
				+    "movdqa     %%xmm2,%%xmm8                    \n"
			
 
				+    "punpckldq  %%xmm6,%%xmm2                    \n"
			
 
				+    "movlpd     %%xmm2,(%1)                      \n"
			
 
				+    "movhpd     %%xmm2,(%2)                      \n"
			
 
				+    "punpckhdq  %%xmm6,%%xmm8                    \n"
			
 
				+    "movlpd     %%xmm8,(%1,%5)                   \n"
			
 
				+    "lea        (%1,%5,2),%1                     \n"
			
 
				+    "movhpd     %%xmm8,(%2,%6)                   \n"
			
 
				+    "lea        (%2,%6,2),%2                     \n"
			
 
				+    "movdqa     %%xmm1,%%xmm8                    \n"
			
 
				+    "punpckldq  %%xmm5,%%xmm1                    \n"
			
 
				+    "movlpd     %%xmm1,(%1)                      \n"
			
 
				+    "movhpd     %%xmm1,(%2)                      \n"
			
 
				+    "punpckhdq  %%xmm5,%%xmm8                    \n"
			
 
				+    "movlpd     %%xmm8,(%1,%5)                   \n"
			
 
				+    "lea        (%1,%5,2),%1                     \n"
			
 
				+    "movhpd     %%xmm8,(%2,%6)                   \n"
			
 
				+    "lea        (%2,%6,2),%2                     \n"
			
 
				+    "movdqa     %%xmm3,%%xmm8                    \n"
			
 
				+    "punpckldq  %%xmm7,%%xmm3                    \n"
			
 
				+    "movlpd     %%xmm3,(%1)                      \n"
			
 
				+    "movhpd     %%xmm3,(%2)                      \n"
			
 
				+    "punpckhdq  %%xmm7,%%xmm8                    \n"
			
 
				+    "sub        $0x8,%3                          \n"
			
 
				+    "movlpd     %%xmm8,(%1,%5)                   \n"
			
 
				+    "lea        (%1,%5,2),%1                     \n"
			
 
				+    "movhpd     %%xmm8,(%2,%6)                   \n"
			
 
				+    "lea        (%2,%6,2),%2                     \n"
			
 
				+    "jg         1b                               \n"
			
 
				+    : "+r"(src),    // %0
			
 
				+      "+r"(dst_a),  // %1
			
 
				+      "+r"(dst_b),  // %2
			
 
				+      "+r"(width)   // %3
			
 
				+    : "r"((intptr_t)(src_stride)),    // %4
			
 
				+      "r"((intptr_t)(dst_stride_a)),  // %5
			
 
				+      "r"((intptr_t)(dst_stride_b))   // %6
			
 
				+    : "memory", "cc",
			
 
				+      "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
			
 
				+      "xmm8", "xmm9"
			
 
				+  );
			
 
				+}
			
 
				+#endif  // defined(HAS_TRANSPOSEUVWX8_SSE2)
			
 
				+#endif  // defined(__x86_64__) || defined(__i386__)
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/rotate_mips.cc
+++ b/src/jni/libyuv/source/rotate_mips.cc
@@ -0,0 +1,484 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/row.h"
			
 
				+#include "libyuv/rotate_row.h"
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+#if !defined(LIBYUV_DISABLE_MIPS) && \
			
 
				+    defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
			
 
				+    (_MIPS_SIM == _MIPS_SIM_ABI32)
			
 
				+
			
 
				+void TransposeWx8_DSPR2(const uint8* src, int src_stride,
			
 
				+                             uint8* dst, int dst_stride, int width) {
			
 
				+   __asm__ __volatile__ (
			
 
				+      ".set push                                         \n"
			
 
				+      ".set noreorder                                    \n"
			
 
				+      "sll              $t2, %[src_stride], 0x1          \n" // src_stride x 2
			
 
				+      "sll              $t4, %[src_stride], 0x2          \n" // src_stride x 4
			
 
				+      "sll              $t9, %[src_stride], 0x3          \n" // src_stride x 8
			
 
				+      "addu             $t3, $t2, %[src_stride]          \n"
			
 
				+      "addu             $t5, $t4, %[src_stride]          \n"
			
 
				+      "addu             $t6, $t2, $t4                    \n"
			
 
				+      "andi             $t0, %[dst], 0x3                 \n"
			
 
				+      "andi             $t1, %[dst_stride], 0x3          \n"
			
 
				+      "or               $t0, $t0, $t1                    \n"
			
 
				+      "bnez             $t0, 11f                         \n"
			
 
				+      " subu            $t7, $t9, %[src_stride]          \n"
			
 
				+//dst + dst_stride word aligned
			
 
				+    "1:                                                  \n"
			
 
				+      "lbu              $t0, 0(%[src])                   \n"
			
 
				+      "lbux             $t1, %[src_stride](%[src])       \n"
			
 
				+      "lbux             $t8, $t2(%[src])                 \n"
			
 
				+      "lbux             $t9, $t3(%[src])                 \n"
			
 
				+      "sll              $t1, $t1, 16                     \n"
			
 
				+      "sll              $t9, $t9, 16                     \n"
			
 
				+      "or               $t0, $t0, $t1                    \n"
			
 
				+      "or               $t8, $t8, $t9                    \n"
			
 
				+      "precr.qb.ph      $s0, $t8, $t0                    \n"
			
 
				+      "lbux             $t0, $t4(%[src])                 \n"
			
 
				+      "lbux             $t1, $t5(%[src])                 \n"
			
 
				+      "lbux             $t8, $t6(%[src])                 \n"
			
 
				+      "lbux             $t9, $t7(%[src])                 \n"
			
 
				+      "sll              $t1, $t1, 16                     \n"
			
 
				+      "sll              $t9, $t9, 16                     \n"
			
 
				+      "or               $t0, $t0, $t1                    \n"
			
 
				+      "or               $t8, $t8, $t9                    \n"
			
 
				+      "precr.qb.ph      $s1, $t8, $t0                    \n"
			
 
				+      "sw               $s0, 0(%[dst])                   \n"
			
 
				+      "addiu            %[width], -1                     \n"
			
 
				+      "addiu            %[src], 1                        \n"
			
 
				+      "sw               $s1, 4(%[dst])                   \n"
			
 
				+      "bnez             %[width], 1b                     \n"
			
 
				+      " addu            %[dst], %[dst], %[dst_stride]    \n"
			
 
				+      "b                2f                               \n"
			
 
				+//dst + dst_stride unaligned
			
 
				+   "11:                                                  \n"
			
 
				+      "lbu              $t0, 0(%[src])                   \n"
			
 
				+      "lbux             $t1, %[src_stride](%[src])       \n"
			
 
				+      "lbux             $t8, $t2(%[src])                 \n"
			
 
				+      "lbux             $t9, $t3(%[src])                 \n"
			
 
				+      "sll              $t1, $t1, 16                     \n"
			
 
				+      "sll              $t9, $t9, 16                     \n"
			
 
				+      "or               $t0, $t0, $t1                    \n"
			
 
				+      "or               $t8, $t8, $t9                    \n"
			
 
				+      "precr.qb.ph      $s0, $t8, $t0                    \n"
			
 
				+      "lbux             $t0, $t4(%[src])                 \n"
			
 
				+      "lbux             $t1, $t5(%[src])                 \n"
			
 
				+      "lbux             $t8, $t6(%[src])                 \n"
			
 
				+      "lbux             $t9, $t7(%[src])                 \n"
			
 
				+      "sll              $t1, $t1, 16                     \n"
			
 
				+      "sll              $t9, $t9, 16                     \n"
			
 
				+      "or               $t0, $t0, $t1                    \n"
			
 
				+      "or               $t8, $t8, $t9                    \n"
			
 
				+      "precr.qb.ph      $s1, $t8, $t0                    \n"
			
 
				+      "swr              $s0, 0(%[dst])                   \n"
			
 
				+      "swl              $s0, 3(%[dst])                   \n"
			
 
				+      "addiu            %[width], -1                     \n"
			
 
				+      "addiu            %[src], 1                        \n"
			
 
				+      "swr              $s1, 4(%[dst])                   \n"
			
 
				+      "swl              $s1, 7(%[dst])                   \n"
			
 
				+      "bnez             %[width], 11b                    \n"
			
 
				+       "addu             %[dst], %[dst], %[dst_stride]   \n"
			
 
				+    "2:                                                  \n"
			
 
				+      ".set pop                                          \n"
			
 
				+      :[src] "+r" (src),
			
 
				+       [dst] "+r" (dst),
			
 
				+       [width] "+r" (width)
			
 
				+      :[src_stride] "r" (src_stride),
			
 
				+       [dst_stride] "r" (dst_stride)
			
 
				+      : "t0", "t1",  "t2", "t3", "t4", "t5",
			
 
				+        "t6", "t7", "t8", "t9",
			
 
				+        "s0", "s1"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
			
 
				+                                  uint8* dst, int dst_stride, int width) {
			
 
				+  __asm__ __volatile__ (
			
 
				+      ".set noat                                         \n"
			
 
				+      ".set push                                         \n"
			
 
				+      ".set noreorder                                    \n"
			
 
				+      "beqz             %[width], 2f                     \n"
			
 
				+      " sll             $t2, %[src_stride], 0x1          \n"  // src_stride x 2
			
 
				+      "sll              $t4, %[src_stride], 0x2          \n"  // src_stride x 4
			
 
				+      "sll              $t9, %[src_stride], 0x3          \n"  // src_stride x 8
			
 
				+      "addu             $t3, $t2, %[src_stride]          \n"
			
 
				+      "addu             $t5, $t4, %[src_stride]          \n"
			
 
				+      "addu             $t6, $t2, $t4                    \n"
			
 
				+
			
 
				+      "srl              $AT, %[width], 0x2               \n"
			
 
				+      "andi             $t0, %[dst], 0x3                 \n"
			
 
				+      "andi             $t1, %[dst_stride], 0x3          \n"
			
 
				+      "or               $t0, $t0, $t1                    \n"
			
 
				+      "bnez             $t0, 11f                         \n"
			
 
				+      " subu            $t7, $t9, %[src_stride]          \n"
			
 
				+//dst + dst_stride word aligned
			
 
				+      "1:                                                \n"
			
 
				+      "lw               $t0, 0(%[src])                   \n"
			
 
				+      "lwx              $t1, %[src_stride](%[src])       \n"
			
 
				+      "lwx              $t8, $t2(%[src])                 \n"
			
 
				+      "lwx              $t9, $t3(%[src])                 \n"
			
 
				+
			
 
				+// t0 = | 30 | 20 | 10 | 00 |
			
 
				+// t1 = | 31 | 21 | 11 | 01 |
			
 
				+// t8 = | 32 | 22 | 12 | 02 |
			
 
				+// t9 = | 33 | 23 | 13 | 03 |
			
 
				+
			
 
				+      "precr.qb.ph     $s0, $t1, $t0                     \n"
			
 
				+      "precr.qb.ph     $s1, $t9, $t8                     \n"
			
 
				+      "precrq.qb.ph    $s2, $t1, $t0                     \n"
			
 
				+      "precrq.qb.ph    $s3, $t9, $t8                     \n"
			
 
				+
			
 
				+  // s0 = | 21 | 01 | 20 | 00 |
			
 
				+  // s1 = | 23 | 03 | 22 | 02 |
			
 
				+  // s2 = | 31 | 11 | 30 | 10 |
			
 
				+  // s3 = | 33 | 13 | 32 | 12 |
			
 
				+
			
 
				+      "precr.qb.ph     $s4, $s1, $s0                     \n"
			
 
				+      "precrq.qb.ph    $s5, $s1, $s0                     \n"
			
 
				+      "precr.qb.ph     $s6, $s3, $s2                     \n"
			
 
				+      "precrq.qb.ph    $s7, $s3, $s2                     \n"
			
 
				+
			
 
				+  // s4 = | 03 | 02 | 01 | 00 |
			
 
				+  // s5 = | 23 | 22 | 21 | 20 |
			
 
				+  // s6 = | 13 | 12 | 11 | 10 |
			
 
				+  // s7 = | 33 | 32 | 31 | 30 |
			
 
				+
			
 
				+      "lwx              $t0, $t4(%[src])                 \n"
			
 
				+      "lwx              $t1, $t5(%[src])                 \n"
			
 
				+      "lwx              $t8, $t6(%[src])                 \n"
			
 
				+      "lwx              $t9, $t7(%[src])                 \n"
			
 
				+
			
 
				+// t0 = | 34 | 24 | 14 | 04 |
			
 
				+// t1 = | 35 | 25 | 15 | 05 |
			
 
				+// t8 = | 36 | 26 | 16 | 06 |
			
 
				+// t9 = | 37 | 27 | 17 | 07 |
			
 
				+
			
 
				+      "precr.qb.ph     $s0, $t1, $t0                     \n"
			
 
				+      "precr.qb.ph     $s1, $t9, $t8                     \n"
			
 
				+      "precrq.qb.ph    $s2, $t1, $t0                     \n"
			
 
				+      "precrq.qb.ph    $s3, $t9, $t8                     \n"
			
 
				+
			
 
				+  // s0 = | 25 | 05 | 24 | 04 |
			
 
				+  // s1 = | 27 | 07 | 26 | 06 |
			
 
				+  // s2 = | 35 | 15 | 34 | 14 |
			
 
				+  // s3 = | 37 | 17 | 36 | 16 |
			
 
				+
			
 
				+      "precr.qb.ph     $t0, $s1, $s0                     \n"
			
 
				+      "precrq.qb.ph    $t1, $s1, $s0                     \n"
			
 
				+      "precr.qb.ph     $t8, $s3, $s2                     \n"
			
 
				+      "precrq.qb.ph    $t9, $s3, $s2                     \n"
			
 
				+
			
 
				+  // t0 = | 07 | 06 | 05 | 04 |
			
 
				+  // t1 = | 27 | 26 | 25 | 24 |
			
 
				+  // t8 = | 17 | 16 | 15 | 14 |
			
 
				+  // t9 = | 37 | 36 | 35 | 34 |
			
 
				+
			
 
				+      "addu            $s0, %[dst], %[dst_stride]        \n"
			
 
				+      "addu            $s1, $s0, %[dst_stride]           \n"
			
 
				+      "addu            $s2, $s1, %[dst_stride]           \n"
			
 
				+
			
 
				+      "sw              $s4, 0(%[dst])                    \n"
			
 
				+      "sw              $t0, 4(%[dst])                    \n"
			
 
				+      "sw              $s6, 0($s0)                       \n"
			
 
				+      "sw              $t8, 4($s0)                       \n"
			
 
				+      "sw              $s5, 0($s1)                       \n"
			
 
				+      "sw              $t1, 4($s1)                       \n"
			
 
				+      "sw              $s7, 0($s2)                       \n"
			
 
				+      "sw              $t9, 4($s2)                       \n"
			
 
				+
			
 
				+      "addiu            $AT, -1                          \n"
			
 
				+      "addiu            %[src], 4                        \n"
			
 
				+
			
 
				+      "bnez             $AT, 1b                          \n"
			
 
				+      " addu            %[dst], $s2, %[dst_stride]       \n"
			
 
				+      "b                2f                               \n"
			
 
				+//dst + dst_stride unaligned
			
 
				+      "11:                                               \n"
			
 
				+      "lw               $t0, 0(%[src])                   \n"
			
 
				+      "lwx              $t1, %[src_stride](%[src])       \n"
			
 
				+      "lwx              $t8, $t2(%[src])                 \n"
			
 
				+      "lwx              $t9, $t3(%[src])                 \n"
			
 
				+
			
 
				+// t0 = | 30 | 20 | 10 | 00 |
			
 
				+// t1 = | 31 | 21 | 11 | 01 |
			
 
				+// t8 = | 32 | 22 | 12 | 02 |
			
 
				+// t9 = | 33 | 23 | 13 | 03 |
			
 
				+
			
 
				+      "precr.qb.ph     $s0, $t1, $t0                     \n"
			
 
				+      "precr.qb.ph     $s1, $t9, $t8                     \n"
			
 
				+      "precrq.qb.ph    $s2, $t1, $t0                     \n"
			
 
				+      "precrq.qb.ph    $s3, $t9, $t8                     \n"
			
 
				+
			
 
				+  // s0 = | 21 | 01 | 20 | 00 |
			
 
				+  // s1 = | 23 | 03 | 22 | 02 |
			
 
				+  // s2 = | 31 | 11 | 30 | 10 |
			
 
				+  // s3 = | 33 | 13 | 32 | 12 |
			
 
				+
			
 
				+      "precr.qb.ph     $s4, $s1, $s0                     \n"
			
 
				+      "precrq.qb.ph    $s5, $s1, $s0                     \n"
			
 
				+      "precr.qb.ph     $s6, $s3, $s2                     \n"
			
 
				+      "precrq.qb.ph    $s7, $s3, $s2                     \n"
			
 
				+
			
 
				+  // s4 = | 03 | 02 | 01 | 00 |
			
 
				+  // s5 = | 23 | 22 | 21 | 20 |
			
 
				+  // s6 = | 13 | 12 | 11 | 10 |
			
 
				+  // s7 = | 33 | 32 | 31 | 30 |
			
 
				+
			
 
				+      "lwx              $t0, $t4(%[src])                 \n"
			
 
				+      "lwx              $t1, $t5(%[src])                 \n"
			
 
				+      "lwx              $t8, $t6(%[src])                 \n"
			
 
				+      "lwx              $t9, $t7(%[src])                 \n"
			
 
				+
			
 
				+// t0 = | 34 | 24 | 14 | 04 |
			
 
				+// t1 = | 35 | 25 | 15 | 05 |
			
 
				+// t8 = | 36 | 26 | 16 | 06 |
			
 
				+// t9 = | 37 | 27 | 17 | 07 |
			
 
				+
			
 
				+      "precr.qb.ph     $s0, $t1, $t0                     \n"
			
 
				+      "precr.qb.ph     $s1, $t9, $t8                     \n"
			
 
				+      "precrq.qb.ph    $s2, $t1, $t0                     \n"
			
 
				+      "precrq.qb.ph    $s3, $t9, $t8                     \n"
			
 
				+
			
 
				+  // s0 = | 25 | 05 | 24 | 04 |
			
 
				+  // s1 = | 27 | 07 | 26 | 06 |
			
 
				+  // s2 = | 35 | 15 | 34 | 14 |
			
 
				+  // s3 = | 37 | 17 | 36 | 16 |
			
 
				+
			
 
				+      "precr.qb.ph     $t0, $s1, $s0                     \n"
			
 
				+      "precrq.qb.ph    $t1, $s1, $s0                     \n"
			
 
				+      "precr.qb.ph     $t8, $s3, $s2                     \n"
			
 
				+      "precrq.qb.ph    $t9, $s3, $s2                     \n"
			
 
				+
			
 
				+  // t0 = | 07 | 06 | 05 | 04 |
			
 
				+  // t1 = | 27 | 26 | 25 | 24 |
			
 
				+  // t8 = | 17 | 16 | 15 | 14 |
			
 
				+  // t9 = | 37 | 36 | 35 | 34 |
			
 
				+
			
 
				+      "addu            $s0, %[dst], %[dst_stride]        \n"
			
 
				+      "addu            $s1, $s0, %[dst_stride]           \n"
			
 
				+      "addu            $s2, $s1, %[dst_stride]           \n"
			
 
				+
			
 
				+      "swr              $s4, 0(%[dst])                   \n"
			
 
				+      "swl              $s4, 3(%[dst])                   \n"
			
 
				+      "swr              $t0, 4(%[dst])                   \n"
			
 
				+      "swl              $t0, 7(%[dst])                   \n"
			
 
				+      "swr              $s6, 0($s0)                      \n"
			
 
				+      "swl              $s6, 3($s0)                      \n"
			
 
				+      "swr              $t8, 4($s0)                      \n"
			
 
				+      "swl              $t8, 7($s0)                      \n"
			
 
				+      "swr              $s5, 0($s1)                      \n"
			
 
				+      "swl              $s5, 3($s1)                      \n"
			
 
				+      "swr              $t1, 4($s1)                      \n"
			
 
				+      "swl              $t1, 7($s1)                      \n"
			
 
				+      "swr              $s7, 0($s2)                      \n"
			
 
				+      "swl              $s7, 3($s2)                      \n"
			
 
				+      "swr              $t9, 4($s2)                      \n"
			
 
				+      "swl              $t9, 7($s2)                      \n"
			
 
				+
			
 
				+      "addiu            $AT, -1                          \n"
			
 
				+      "addiu            %[src], 4                        \n"
			
 
				+
			
 
				+      "bnez             $AT, 11b                         \n"
			
 
				+      " addu            %[dst], $s2, %[dst_stride]       \n"
			
 
				+      "2:                                                \n"
			
 
				+      ".set pop                                          \n"
			
 
				+      ".set at                                           \n"
			
 
				+      :[src] "+r" (src),
			
 
				+       [dst] "+r" (dst),
			
 
				+       [width] "+r" (width)
			
 
				+      :[src_stride] "r" (src_stride),
			
 
				+       [dst_stride] "r" (dst_stride)
			
 
				+      : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9",
			
 
				+        "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
			
 
				+                               uint8* dst_a, int dst_stride_a,
			
 
				+                               uint8* dst_b, int dst_stride_b,
			
 
				+                               int width) {
			
 
				+  __asm__ __volatile__ (
			
 
				+      ".set push                                         \n"
			
 
				+      ".set noreorder                                    \n"
			
 
				+      "beqz            %[width], 2f                      \n"
			
 
				+      " sll            $t2, %[src_stride], 0x1           \n" // src_stride x 2
			
 
				+      "sll             $t4, %[src_stride], 0x2           \n" // src_stride x 4
			
 
				+      "sll             $t9, %[src_stride], 0x3           \n" // src_stride x 8
			
 
				+      "addu            $t3, $t2, %[src_stride]           \n"
			
 
				+      "addu            $t5, $t4, %[src_stride]           \n"
			
 
				+      "addu            $t6, $t2, $t4                     \n"
			
 
				+      "subu            $t7, $t9, %[src_stride]           \n"
			
 
				+      "srl             $t1, %[width], 1                  \n"
			
 
				+
			
 
				+// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
			
 
				+      "andi            $t0, %[dst_a], 0x3                \n"
			
 
				+      "andi            $t8, %[dst_b], 0x3                \n"
			
 
				+      "or              $t0, $t0, $t8                     \n"
			
 
				+      "andi            $t8, %[dst_stride_a], 0x3         \n"
			
 
				+      "andi            $s5, %[dst_stride_b], 0x3         \n"
			
 
				+      "or              $t8, $t8, $s5                     \n"
			
 
				+      "or              $t0, $t0, $t8                     \n"
			
 
				+      "bnez            $t0, 11f                          \n"
			
 
				+      " nop                                              \n"
			
 
				+// dst + dst_stride word aligned (both, a & b dst addresses)
			
 
				+    "1:                                                  \n"
			
 
				+      "lw              $t0, 0(%[src])                    \n" // |B0|A0|b0|a0|
			
 
				+      "lwx             $t8, %[src_stride](%[src])        \n" // |B1|A1|b1|a1|
			
 
				+      "addu            $s5, %[dst_a], %[dst_stride_a]    \n"
			
 
				+      "lwx             $t9, $t2(%[src])                  \n" // |B2|A2|b2|a2|
			
 
				+      "lwx             $s0, $t3(%[src])                  \n" // |B3|A3|b3|a3|
			
 
				+      "addu            $s6, %[dst_b], %[dst_stride_b]    \n"
			
 
				+
			
 
				+      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B1|A1|B0|A0|
			
 
				+      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B3|A3|B2|A2|
			
 
				+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A3|A2|A1|A0|
			
 
				+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B3|B2|B1|B0|
			
 
				+
			
 
				+      "sll             $t0, $t0, 16                      \n"
			
 
				+      "packrl.ph       $s1, $t8, $t0                     \n" // |b1|a1|b0|a0|
			
 
				+      "sll             $t9, $t9, 16                      \n"
			
 
				+      "packrl.ph       $s2, $s0, $t9                     \n" // |b3|a3|b2|a2|
			
 
				+
			
 
				+      "sw              $s3, 0($s5)                       \n"
			
 
				+      "sw              $s4, 0($s6)                       \n"
			
 
				+
			
 
				+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a3|a2|a1|a0|
			
 
				+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b3|b2|b1|b0|
			
 
				+
			
 
				+      "lwx             $t0, $t4(%[src])                  \n" // |B4|A4|b4|a4|
			
 
				+      "lwx             $t8, $t5(%[src])                  \n" // |B5|A5|b5|a5|
			
 
				+      "lwx             $t9, $t6(%[src])                  \n" // |B6|A6|b6|a6|
			
 
				+      "lwx             $s0, $t7(%[src])                  \n" // |B7|A7|b7|a7|
			
 
				+      "sw              $s3, 0(%[dst_a])                  \n"
			
 
				+      "sw              $s4, 0(%[dst_b])                  \n"
			
 
				+
			
 
				+      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B5|A5|B4|A4|
			
 
				+      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B6|A6|B7|A7|
			
 
				+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A7|A6|A5|A4|
			
 
				+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B7|B6|B5|B4|
			
 
				+
			
 
				+      "sll             $t0, $t0, 16                      \n"
			
 
				+      "packrl.ph       $s1, $t8, $t0                     \n" // |b5|a5|b4|a4|
			
 
				+      "sll             $t9, $t9, 16                      \n"
			
 
				+      "packrl.ph       $s2, $s0, $t9                     \n" // |b7|a7|b6|a6|
			
 
				+      "sw              $s3, 4($s5)                       \n"
			
 
				+      "sw              $s4, 4($s6)                       \n"
			
 
				+
			
 
				+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a7|a6|a5|a4|
			
 
				+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b7|b6|b5|b4|
			
 
				+
			
 
				+      "addiu           %[src], 4                         \n"
			
 
				+      "addiu           $t1, -1                           \n"
			
 
				+      "sll             $t0, %[dst_stride_a], 1           \n"
			
 
				+      "sll             $t8, %[dst_stride_b], 1           \n"
			
 
				+      "sw              $s3, 4(%[dst_a])                  \n"
			
 
				+      "sw              $s4, 4(%[dst_b])                  \n"
			
 
				+      "addu            %[dst_a], %[dst_a], $t0           \n"
			
 
				+      "bnez            $t1, 1b                           \n"
			
 
				+      " addu           %[dst_b], %[dst_b], $t8           \n"
			
 
				+      "b               2f                                \n"
			
 
				+      " nop                                              \n"
			
 
				+
			
 
				+// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
			
 
				+   "11:                                                  \n"
			
 
				+      "lw              $t0, 0(%[src])                    \n" // |B0|A0|b0|a0|
			
 
				+      "lwx             $t8, %[src_stride](%[src])        \n" // |B1|A1|b1|a1|
			
 
				+      "addu            $s5, %[dst_a], %[dst_stride_a]    \n"
			
 
				+      "lwx             $t9, $t2(%[src])                  \n" // |B2|A2|b2|a2|
			
 
				+      "lwx             $s0, $t3(%[src])                  \n" // |B3|A3|b3|a3|
			
 
				+      "addu            $s6, %[dst_b], %[dst_stride_b]    \n"
			
 
				+
			
 
				+      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B1|A1|B0|A0|
			
 
				+      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B3|A3|B2|A2|
			
 
				+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A3|A2|A1|A0|
			
 
				+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B3|B2|B1|B0|
			
 
				+
			
 
				+      "sll             $t0, $t0, 16                      \n"
			
 
				+      "packrl.ph       $s1, $t8, $t0                     \n" // |b1|a1|b0|a0|
			
 
				+      "sll             $t9, $t9, 16                      \n"
			
 
				+      "packrl.ph       $s2, $s0, $t9                     \n" // |b3|a3|b2|a2|
			
 
				+
			
 
				+      "swr             $s3, 0($s5)                       \n"
			
 
				+      "swl             $s3, 3($s5)                       \n"
			
 
				+      "swr             $s4, 0($s6)                       \n"
			
 
				+      "swl             $s4, 3($s6)                       \n"
			
 
				+
			
 
				+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a3|a2|a1|a0|
			
 
				+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b3|b2|b1|b0|
			
 
				+
			
 
				+      "lwx             $t0, $t4(%[src])                  \n" // |B4|A4|b4|a4|
			
 
				+      "lwx             $t8, $t5(%[src])                  \n" // |B5|A5|b5|a5|
			
 
				+      "lwx             $t9, $t6(%[src])                  \n" // |B6|A6|b6|a6|
			
 
				+      "lwx             $s0, $t7(%[src])                  \n" // |B7|A7|b7|a7|
			
 
				+      "swr             $s3, 0(%[dst_a])                  \n"
			
 
				+      "swl             $s3, 3(%[dst_a])                  \n"
			
 
				+      "swr             $s4, 0(%[dst_b])                  \n"
			
 
				+      "swl             $s4, 3(%[dst_b])                  \n"
			
 
				+
			
 
				+      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B5|A5|B4|A4|
			
 
				+      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B6|A6|B7|A7|
			
 
				+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A7|A6|A5|A4|
			
 
				+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B7|B6|B5|B4|
			
 
				+
			
 
				+      "sll             $t0, $t0, 16                      \n"
			
 
				+      "packrl.ph       $s1, $t8, $t0                     \n" // |b5|a5|b4|a4|
			
 
				+      "sll             $t9, $t9, 16                      \n"
			
 
				+      "packrl.ph       $s2, $s0, $t9                     \n" // |b7|a7|b6|a6|
			
 
				+
			
 
				+      "swr             $s3, 4($s5)                       \n"
			
 
				+      "swl             $s3, 7($s5)                       \n"
			
 
				+      "swr             $s4, 4($s6)                       \n"
			
 
				+      "swl             $s4, 7($s6)                       \n"
			
 
				+
			
 
				+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a7|a6|a5|a4|
			
 
				+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b7|b6|b5|b4|
			
 
				+
			
 
				+      "addiu           %[src], 4                         \n"
			
 
				+      "addiu           $t1, -1                           \n"
			
 
				+      "sll             $t0, %[dst_stride_a], 1           \n"
			
 
				+      "sll             $t8, %[dst_stride_b], 1           \n"
			
 
				+      "swr             $s3, 4(%[dst_a])                  \n"
			
 
				+      "swl             $s3, 7(%[dst_a])                  \n"
			
 
				+      "swr             $s4, 4(%[dst_b])                  \n"
			
 
				+      "swl             $s4, 7(%[dst_b])                  \n"
			
 
				+      "addu            %[dst_a], %[dst_a], $t0           \n"
			
 
				+      "bnez            $t1, 11b                          \n"
			
 
				+      " addu           %[dst_b], %[dst_b], $t8           \n"
			
 
				+
			
 
				+      "2:                                                \n"
			
 
				+      ".set pop                                          \n"
			
 
				+      : [src] "+r" (src),
			
 
				+        [dst_a] "+r" (dst_a),
			
 
				+        [dst_b] "+r" (dst_b),
			
 
				+        [width] "+r" (width),
			
 
				+        [src_stride] "+r" (src_stride)
			
 
				+      : [dst_stride_a] "r" (dst_stride_a),
			
 
				+        [dst_stride_b] "r" (dst_stride_b)
			
 
				+      : "t0", "t1",  "t2", "t3",  "t4", "t5",
			
 
				+        "t6", "t7", "t8", "t9",
			
 
				+        "s0", "s1", "s2", "s3",
			
 
				+        "s4", "s5", "s6"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+#endif  // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/rotate_neon.cc
+++ b/src/jni/libyuv/source/rotate_neon.cc
@@ -0,0 +1,533 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/row.h"
			
 
				+#include "libyuv/rotate_row.h"
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
			
 
				+    !defined(__aarch64__)
			
 
				+
			
 
				+static uvec8 kVTbl4x4Transpose =
			
 
				+  { 0,  4,  8, 12,  1,  5,  9, 13,  2,  6, 10, 14,  3,  7, 11, 15 };
			
 
				+
			
 
				+void TransposeWx8_NEON(const uint8* src, int src_stride,
			
 
				+                       uint8* dst, int dst_stride,
			
 
				+                       int width) {
			
 
				+  const uint8* src_temp = NULL;
			
 
				+  asm volatile (
			
 
				+    // loops are on blocks of 8. loop will stop when
			
 
				+    // counter gets to or below 0. starting the counter
			
 
				+    // at w-8 allow for this
			
 
				+    "sub         %5, #8                        \n"
			
 
				+
			
 
				+    // handle 8x8 blocks. this should be the majority of the plane
			
 
				+    "1:                                        \n"
			
 
				+      "mov         %0, %1                      \n"
			
 
				+
			
 
				+      MEMACCESS(0)
			
 
				+      "vld1.8      {d0}, [%0], %2              \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "vld1.8      {d1}, [%0], %2              \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "vld1.8      {d2}, [%0], %2              \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "vld1.8      {d3}, [%0], %2              \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "vld1.8      {d4}, [%0], %2              \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "vld1.8      {d5}, [%0], %2              \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "vld1.8      {d6}, [%0], %2              \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "vld1.8      {d7}, [%0]                  \n"
			
 
				+
			
 
				+      "vtrn.8      d1, d0                      \n"
			
 
				+      "vtrn.8      d3, d2                      \n"
			
 
				+      "vtrn.8      d5, d4                      \n"
			
 
				+      "vtrn.8      d7, d6                      \n"
			
 
				+
			
 
				+      "vtrn.16     d1, d3                      \n"
			
 
				+      "vtrn.16     d0, d2                      \n"
			
 
				+      "vtrn.16     d5, d7                      \n"
			
 
				+      "vtrn.16     d4, d6                      \n"
			
 
				+
			
 
				+      "vtrn.32     d1, d5                      \n"
			
 
				+      "vtrn.32     d0, d4                      \n"
			
 
				+      "vtrn.32     d3, d7                      \n"
			
 
				+      "vtrn.32     d2, d6                      \n"
			
 
				+
			
 
				+      "vrev16.8    q0, q0                      \n"
			
 
				+      "vrev16.8    q1, q1                      \n"
			
 
				+      "vrev16.8    q2, q2                      \n"
			
 
				+      "vrev16.8    q3, q3                      \n"
			
 
				+
			
 
				+      "mov         %0, %3                      \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d1}, [%0], %4              \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d0}, [%0], %4              \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d3}, [%0], %4              \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d2}, [%0], %4              \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d5}, [%0], %4              \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d4}, [%0], %4              \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d7}, [%0], %4              \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d6}, [%0]                  \n"
			
 
				+
			
 
				+      "add         %1, #8                      \n"  // src += 8
			
 
				+      "add         %3, %3, %4, lsl #3          \n"  // dst += 8 * dst_stride
			
 
				+      "subs        %5,  #8                     \n"  // w   -= 8
			
 
				+      "bge         1b                          \n"
			
 
				+
			
 
				+    // add 8 back to counter. if the result is 0 there are
			
 
				+    // no residuals.
			
 
				+    "adds        %5, #8                        \n"
			
 
				+    "beq         4f                            \n"
			
 
				+
			
 
				+    // some residual, so between 1 and 7 lines left to transpose
			
 
				+    "cmp         %5, #2                        \n"
			
 
				+    "blt         3f                            \n"
			
 
				+
			
 
				+    "cmp         %5, #4                        \n"
			
 
				+    "blt         2f                            \n"
			
 
				+
			
 
				+    // 4x8 block
			
 
				+    "mov         %0, %1                        \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.32     {d0[0]}, [%0], %2             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.32     {d0[1]}, [%0], %2             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.32     {d1[0]}, [%0], %2             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.32     {d1[1]}, [%0], %2             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.32     {d2[0]}, [%0], %2             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.32     {d2[1]}, [%0], %2             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.32     {d3[0]}, [%0], %2             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.32     {d3[1]}, [%0]                 \n"
			
 
				+
			
 
				+    "mov         %0, %3                        \n"
			
 
				+
			
 
				+    MEMACCESS(6)
			
 
				+    "vld1.8      {q3}, [%6]                    \n"
			
 
				+
			
 
				+    "vtbl.8      d4, {d0, d1}, d6              \n"
			
 
				+    "vtbl.8      d5, {d0, d1}, d7              \n"
			
 
				+    "vtbl.8      d0, {d2, d3}, d6              \n"
			
 
				+    "vtbl.8      d1, {d2, d3}, d7              \n"
			
 
				+
			
 
				+    // TODO(frkoenig): Rework shuffle above to
			
 
				+    // write out with 4 instead of 8 writes.
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d4[0]}, [%0], %4             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d4[1]}, [%0], %4             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d5[0]}, [%0], %4             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d5[1]}, [%0]                 \n"
			
 
				+
			
 
				+    "add         %0, %3, #4                    \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d0[0]}, [%0], %4             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d0[1]}, [%0], %4             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d1[0]}, [%0], %4             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d1[1]}, [%0]                 \n"
			
 
				+
			
 
				+    "add         %1, #4                        \n"  // src += 4
			
 
				+    "add         %3, %3, %4, lsl #2            \n"  // dst += 4 * dst_stride
			
 
				+    "subs        %5,  #4                       \n"  // w   -= 4
			
 
				+    "beq         4f                            \n"
			
 
				+
			
 
				+    // some residual, check to see if it includes a 2x8 block,
			
 
				+    // or less
			
 
				+    "cmp         %5, #2                        \n"
			
 
				+    "blt         3f                            \n"
			
 
				+
			
 
				+    // 2x8 block
			
 
				+    "2:                                        \n"
			
 
				+    "mov         %0, %1                        \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.16     {d0[0]}, [%0], %2             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.16     {d1[0]}, [%0], %2             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.16     {d0[1]}, [%0], %2             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.16     {d1[1]}, [%0], %2             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.16     {d0[2]}, [%0], %2             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.16     {d1[2]}, [%0], %2             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.16     {d0[3]}, [%0], %2             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.16     {d1[3]}, [%0]                 \n"
			
 
				+
			
 
				+    "vtrn.8      d0, d1                        \n"
			
 
				+
			
 
				+    "mov         %0, %3                        \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.64     {d0}, [%0], %4                \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.64     {d1}, [%0]                    \n"
			
 
				+
			
 
				+    "add         %1, #2                        \n"  // src += 2
			
 
				+    "add         %3, %3, %4, lsl #1            \n"  // dst += 2 * dst_stride
			
 
				+    "subs        %5,  #2                       \n"  // w   -= 2
			
 
				+    "beq         4f                            \n"
			
 
				+
			
 
				+    // 1x8 block
			
 
				+    "3:                                        \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld1.8      {d0[0]}, [%1], %2             \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld1.8      {d0[1]}, [%1], %2             \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld1.8      {d0[2]}, [%1], %2             \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld1.8      {d0[3]}, [%1], %2             \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld1.8      {d0[4]}, [%1], %2             \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld1.8      {d0[5]}, [%1], %2             \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld1.8      {d0[6]}, [%1], %2             \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld1.8      {d0[7]}, [%1]                 \n"
			
 
				+
			
 
				+    MEMACCESS(3)
			
 
				+    "vst1.64     {d0}, [%3]                    \n"
			
 
				+
			
 
				+    "4:                                        \n"
			
 
				+
			
 
				+    : "+r"(src_temp),          // %0
			
 
				+      "+r"(src),               // %1
			
 
				+      "+r"(src_stride),        // %2
			
 
				+      "+r"(dst),               // %3
			
 
				+      "+r"(dst_stride),        // %4
			
 
				+      "+r"(width)              // %5
			
 
				+    : "r"(&kVTbl4x4Transpose)  // %6
			
 
				+    : "memory", "cc", "q0", "q1", "q2", "q3"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+static uvec8 kVTbl4x4TransposeDi =
			
 
				+  { 0,  8,  1,  9,  2, 10,  3, 11,  4, 12,  5, 13,  6, 14,  7, 15 };
			
 
				+
			
 
				+void TransposeUVWx8_NEON(const uint8* src, int src_stride,
			
 
				+                         uint8* dst_a, int dst_stride_a,
			
 
				+                         uint8* dst_b, int dst_stride_b,
			
 
				+                         int width) {
			
 
				+  const uint8* src_temp = NULL;
			
 
				+  asm volatile (
			
 
				+    // loops are on blocks of 8. loop will stop when
			
 
				+    // counter gets to or below 0. starting the counter
			
 
				+    // at w-8 allow for this
			
 
				+    "sub         %7, #8                        \n"
			
 
				+
			
 
				+    // handle 8x8 blocks. this should be the majority of the plane
			
 
				+    "1:                                        \n"
			
 
				+      "mov         %0, %1                      \n"
			
 
				+
			
 
				+      MEMACCESS(0)
			
 
				+      "vld2.8      {d0,  d1},  [%0], %2        \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "vld2.8      {d2,  d3},  [%0], %2        \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "vld2.8      {d4,  d5},  [%0], %2        \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "vld2.8      {d6,  d7},  [%0], %2        \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "vld2.8      {d16, d17}, [%0], %2        \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "vld2.8      {d18, d19}, [%0], %2        \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "vld2.8      {d20, d21}, [%0], %2        \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "vld2.8      {d22, d23}, [%0]            \n"
			
 
				+
			
 
				+      "vtrn.8      q1, q0                      \n"
			
 
				+      "vtrn.8      q3, q2                      \n"
			
 
				+      "vtrn.8      q9, q8                      \n"
			
 
				+      "vtrn.8      q11, q10                    \n"
			
 
				+
			
 
				+      "vtrn.16     q1, q3                      \n"
			
 
				+      "vtrn.16     q0, q2                      \n"
			
 
				+      "vtrn.16     q9, q11                     \n"
			
 
				+      "vtrn.16     q8, q10                     \n"
			
 
				+
			
 
				+      "vtrn.32     q1, q9                      \n"
			
 
				+      "vtrn.32     q0, q8                      \n"
			
 
				+      "vtrn.32     q3, q11                     \n"
			
 
				+      "vtrn.32     q2, q10                     \n"
			
 
				+
			
 
				+      "vrev16.8    q0, q0                      \n"
			
 
				+      "vrev16.8    q1, q1                      \n"
			
 
				+      "vrev16.8    q2, q2                      \n"
			
 
				+      "vrev16.8    q3, q3                      \n"
			
 
				+      "vrev16.8    q8, q8                      \n"
			
 
				+      "vrev16.8    q9, q9                      \n"
			
 
				+      "vrev16.8    q10, q10                    \n"
			
 
				+      "vrev16.8    q11, q11                    \n"
			
 
				+
			
 
				+      "mov         %0, %3                      \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d2},  [%0], %4             \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d0},  [%0], %4             \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d6},  [%0], %4             \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d4},  [%0], %4             \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d18}, [%0], %4             \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d16}, [%0], %4             \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d22}, [%0], %4             \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d20}, [%0]                 \n"
			
 
				+
			
 
				+      "mov         %0, %5                      \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d3},  [%0], %6             \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d1},  [%0], %6             \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d7},  [%0], %6             \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d5},  [%0], %6             \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d19}, [%0], %6             \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d17}, [%0], %6             \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d23}, [%0], %6             \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "vst1.8      {d21}, [%0]                 \n"
			
 
				+
			
 
				+      "add         %1, #8*2                    \n"  // src   += 8*2
			
 
				+      "add         %3, %3, %4, lsl #3          \n"  // dst_a += 8 * dst_stride_a
			
 
				+      "add         %5, %5, %6, lsl #3          \n"  // dst_b += 8 * dst_stride_b
			
 
				+      "subs        %7,  #8                     \n"  // w     -= 8
			
 
				+      "bge         1b                          \n"
			
 
				+
			
 
				+    // add 8 back to counter. if the result is 0 there are
			
 
				+    // no residuals.
			
 
				+    "adds        %7, #8                        \n"
			
 
				+    "beq         4f                            \n"
			
 
				+
			
 
				+    // some residual, so between 1 and 7 lines left to transpose
			
 
				+    "cmp         %7, #2                        \n"
			
 
				+    "blt         3f                            \n"
			
 
				+
			
 
				+    "cmp         %7, #4                        \n"
			
 
				+    "blt         2f                            \n"
			
 
				+
			
 
				+    // TODO(frkoenig): Clean this up
			
 
				+    // 4x8 block
			
 
				+    "mov         %0, %1                        \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.64     {d0}, [%0], %2                \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.64     {d1}, [%0], %2                \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.64     {d2}, [%0], %2                \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.64     {d3}, [%0], %2                \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.64     {d4}, [%0], %2                \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.64     {d5}, [%0], %2                \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.64     {d6}, [%0], %2                \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld1.64     {d7}, [%0]                    \n"
			
 
				+
			
 
				+    MEMACCESS(8)
			
 
				+    "vld1.8      {q15}, [%8]                   \n"
			
 
				+
			
 
				+    "vtrn.8      q0, q1                        \n"
			
 
				+    "vtrn.8      q2, q3                        \n"
			
 
				+
			
 
				+    "vtbl.8      d16, {d0, d1}, d30            \n"
			
 
				+    "vtbl.8      d17, {d0, d1}, d31            \n"
			
 
				+    "vtbl.8      d18, {d2, d3}, d30            \n"
			
 
				+    "vtbl.8      d19, {d2, d3}, d31            \n"
			
 
				+    "vtbl.8      d20, {d4, d5}, d30            \n"
			
 
				+    "vtbl.8      d21, {d4, d5}, d31            \n"
			
 
				+    "vtbl.8      d22, {d6, d7}, d30            \n"
			
 
				+    "vtbl.8      d23, {d6, d7}, d31            \n"
			
 
				+
			
 
				+    "mov         %0, %3                        \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d16[0]},  [%0], %4           \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d16[1]},  [%0], %4           \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d17[0]},  [%0], %4           \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d17[1]},  [%0], %4           \n"
			
 
				+
			
 
				+    "add         %0, %3, #4                    \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d20[0]}, [%0], %4            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d20[1]}, [%0], %4            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d21[0]}, [%0], %4            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d21[1]}, [%0]                \n"
			
 
				+
			
 
				+    "mov         %0, %5                        \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d18[0]}, [%0], %6            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d18[1]}, [%0], %6            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d19[0]}, [%0], %6            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d19[1]}, [%0], %6            \n"
			
 
				+
			
 
				+    "add         %0, %5, #4                    \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d22[0]},  [%0], %6           \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d22[1]},  [%0], %6           \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d23[0]},  [%0], %6           \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.32     {d23[1]},  [%0]               \n"
			
 
				+
			
 
				+    "add         %1, #4*2                      \n"  // src   += 4 * 2
			
 
				+    "add         %3, %3, %4, lsl #2            \n"  // dst_a += 4 * dst_stride_a
			
 
				+    "add         %5, %5, %6, lsl #2            \n"  // dst_b += 4 * dst_stride_b
			
 
				+    "subs        %7,  #4                       \n"  // w     -= 4
			
 
				+    "beq         4f                            \n"
			
 
				+
			
 
				+    // some residual, check to see if it includes a 2x8 block,
			
 
				+    // or less
			
 
				+    "cmp         %7, #2                        \n"
			
 
				+    "blt         3f                            \n"
			
 
				+
			
 
				+    // 2x8 block
			
 
				+    "2:                                        \n"
			
 
				+    "mov         %0, %1                        \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld2.16     {d0[0], d2[0]}, [%0], %2      \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld2.16     {d1[0], d3[0]}, [%0], %2      \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld2.16     {d0[1], d2[1]}, [%0], %2      \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld2.16     {d1[1], d3[1]}, [%0], %2      \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld2.16     {d0[2], d2[2]}, [%0], %2      \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld2.16     {d1[2], d3[2]}, [%0], %2      \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld2.16     {d0[3], d2[3]}, [%0], %2      \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vld2.16     {d1[3], d3[3]}, [%0]          \n"
			
 
				+
			
 
				+    "vtrn.8      d0, d1                        \n"
			
 
				+    "vtrn.8      d2, d3                        \n"
			
 
				+
			
 
				+    "mov         %0, %3                        \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.64     {d0}, [%0], %4                \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.64     {d2}, [%0]                    \n"
			
 
				+
			
 
				+    "mov         %0, %5                        \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.64     {d1}, [%0], %6                \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "vst1.64     {d3}, [%0]                    \n"
			
 
				+
			
 
				+    "add         %1, #2*2                      \n"  // src   += 2 * 2
			
 
				+    "add         %3, %3, %4, lsl #1            \n"  // dst_a += 2 * dst_stride_a
			
 
				+    "add         %5, %5, %6, lsl #1            \n"  // dst_b += 2 * dst_stride_b
			
 
				+    "subs        %7,  #2                       \n"  // w     -= 2
			
 
				+    "beq         4f                            \n"
			
 
				+
			
 
				+    // 1x8 block
			
 
				+    "3:                                        \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld2.8      {d0[0], d1[0]}, [%1], %2      \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld2.8      {d0[1], d1[1]}, [%1], %2      \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld2.8      {d0[2], d1[2]}, [%1], %2      \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld2.8      {d0[3], d1[3]}, [%1], %2      \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld2.8      {d0[4], d1[4]}, [%1], %2      \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld2.8      {d0[5], d1[5]}, [%1], %2      \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld2.8      {d0[6], d1[6]}, [%1], %2      \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "vld2.8      {d0[7], d1[7]}, [%1]          \n"
			
 
				+
			
 
				+    MEMACCESS(3)
			
 
				+    "vst1.64     {d0}, [%3]                    \n"
			
 
				+    MEMACCESS(5)
			
 
				+    "vst1.64     {d1}, [%5]                    \n"
			
 
				+
			
 
				+    "4:                                        \n"
			
 
				+
			
 
				+    : "+r"(src_temp),            // %0
			
 
				+      "+r"(src),                 // %1
			
 
				+      "+r"(src_stride),          // %2
			
 
				+      "+r"(dst_a),               // %3
			
 
				+      "+r"(dst_stride_a),        // %4
			
 
				+      "+r"(dst_b),               // %5
			
 
				+      "+r"(dst_stride_b),        // %6
			
 
				+      "+r"(width)                // %7
			
 
				+    : "r"(&kVTbl4x4TransposeDi)  // %8
			
 
				+    : "memory", "cc",
			
 
				+      "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
			
 
				+  );
			
 
				+}
			
 
				+#endif  // defined(__ARM_NEON__) && !defined(__aarch64__)
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/rotate_neon64.cc
+++ b/src/jni/libyuv/source/rotate_neon64.cc
@@ -0,0 +1,543 @@
 
				+/*
			
 
				+ *  Copyright 2014 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/row.h"
			
 
				+#include "libyuv/rotate_row.h"
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// This module is for GCC Neon armv8 64 bit.
			
 
				+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
			
 
				+
			
 
				+static uvec8 kVTbl4x4Transpose =
			
 
				+  { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
			
 
				+
			
 
				+void TransposeWx8_NEON(const uint8* src, int src_stride,
			
 
				+                       uint8* dst, int dst_stride, int width) {
			
 
				+  const uint8* src_temp = NULL;
			
 
				+  int64 width64 = (int64) width;  // Work around clang 3.4 warning.
			
 
				+  asm volatile (
			
 
				+    // loops are on blocks of 8. loop will stop when
			
 
				+    // counter gets to or below 0. starting the counter
			
 
				+    // at w-8 allow for this
			
 
				+    "sub         %3, %3, #8                      \n"
			
 
				+
			
 
				+    // handle 8x8 blocks. this should be the majority of the plane
			
 
				+    "1:                                          \n"
			
 
				+      "mov         %0, %1                        \n"
			
 
				+
			
 
				+      MEMACCESS(0)
			
 
				+      "ld1        {v0.8b}, [%0], %5              \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "ld1        {v1.8b}, [%0], %5              \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "ld1        {v2.8b}, [%0], %5              \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "ld1        {v3.8b}, [%0], %5              \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "ld1        {v4.8b}, [%0], %5              \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "ld1        {v5.8b}, [%0], %5              \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "ld1        {v6.8b}, [%0], %5              \n"
			
 
				+      MEMACCESS(0)
			
 
				+      "ld1        {v7.8b}, [%0]                  \n"
			
 
				+
			
 
				+      "trn2     v16.8b, v0.8b, v1.8b             \n"
			
 
				+      "trn1     v17.8b, v0.8b, v1.8b             \n"
			
 
				+      "trn2     v18.8b, v2.8b, v3.8b             \n"
			
 
				+      "trn1     v19.8b, v2.8b, v3.8b             \n"
			
 
				+      "trn2     v20.8b, v4.8b, v5.8b             \n"
			
 
				+      "trn1     v21.8b, v4.8b, v5.8b             \n"
			
 
				+      "trn2     v22.8b, v6.8b, v7.8b             \n"
			
 
				+      "trn1     v23.8b, v6.8b, v7.8b             \n"
			
 
				+
			
 
				+      "trn2     v3.4h, v17.4h, v19.4h            \n"
			
 
				+      "trn1     v1.4h, v17.4h, v19.4h            \n"
			
 
				+      "trn2     v2.4h, v16.4h, v18.4h            \n"
			
 
				+      "trn1     v0.4h, v16.4h, v18.4h            \n"
			
 
				+      "trn2     v7.4h, v21.4h, v23.4h            \n"
			
 
				+      "trn1     v5.4h, v21.4h, v23.4h            \n"
			
 
				+      "trn2     v6.4h, v20.4h, v22.4h            \n"
			
 
				+      "trn1     v4.4h, v20.4h, v22.4h            \n"
			
 
				+
			
 
				+      "trn2     v21.2s, v1.2s, v5.2s             \n"
			
 
				+      "trn1     v17.2s, v1.2s, v5.2s             \n"
			
 
				+      "trn2     v20.2s, v0.2s, v4.2s             \n"
			
 
				+      "trn1     v16.2s, v0.2s, v4.2s             \n"
			
 
				+      "trn2     v23.2s, v3.2s, v7.2s             \n"
			
 
				+      "trn1     v19.2s, v3.2s, v7.2s             \n"
			
 
				+      "trn2     v22.2s, v2.2s, v6.2s             \n"
			
 
				+      "trn1     v18.2s, v2.2s, v6.2s             \n"
			
 
				+
			
 
				+      "mov         %0, %2                        \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+      "st1      {v17.8b}, [%0], %6               \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "st1      {v16.8b}, [%0], %6               \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "st1      {v19.8b}, [%0], %6               \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "st1      {v18.8b}, [%0], %6               \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "st1      {v21.8b}, [%0], %6               \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "st1      {v20.8b}, [%0], %6               \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "st1      {v23.8b}, [%0], %6               \n"
			
 
				+    MEMACCESS(0)
			
 
				+      "st1      {v22.8b}, [%0]                   \n"
			
 
				+
			
 
				+      "add         %1, %1, #8                    \n"  // src += 8
			
 
				+      "add         %2, %2, %6, lsl #3            \n"  // dst += 8 * dst_stride
			
 
				+      "subs        %3, %3, #8                    \n"  // w   -= 8
			
 
				+      "b.ge        1b                            \n"
			
 
				+
			
 
				+    // add 8 back to counter. if the result is 0 there are
			
 
				+    // no residuals.
			
 
				+    "adds        %3, %3, #8                      \n"
			
 
				+    "b.eq        4f                              \n"
			
 
				+
			
 
				+    // some residual, so between 1 and 7 lines left to transpose
			
 
				+    "cmp         %3, #2                          \n"
			
 
				+    "b.lt        3f                              \n"
			
 
				+
			
 
				+    "cmp         %3, #4                          \n"
			
 
				+    "b.lt        2f                              \n"
			
 
				+
			
 
				+    // 4x8 block
			
 
				+    "mov         %0, %1                          \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v0.s}[0], [%0], %5                 \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v0.s}[1], [%0], %5                 \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v0.s}[2], [%0], %5                 \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v0.s}[3], [%0], %5                 \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v1.s}[0], [%0], %5                 \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v1.s}[1], [%0], %5                 \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v1.s}[2], [%0], %5                 \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v1.s}[3], [%0]                     \n"
			
 
				+
			
 
				+    "mov         %0, %2                          \n"
			
 
				+
			
 
				+    MEMACCESS(4)
			
 
				+    "ld1      {v2.16b}, [%4]                     \n"
			
 
				+
			
 
				+    "tbl      v3.16b, {v0.16b}, v2.16b           \n"
			
 
				+    "tbl      v0.16b, {v1.16b}, v2.16b           \n"
			
 
				+
			
 
				+    // TODO(frkoenig): Rework shuffle above to
			
 
				+    // write out with 4 instead of 8 writes.
			
 
				+    MEMACCESS(0)
			
 
				+    "st1 {v3.s}[0], [%0], %6                     \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1 {v3.s}[1], [%0], %6                     \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1 {v3.s}[2], [%0], %6                     \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1 {v3.s}[3], [%0]                         \n"
			
 
				+
			
 
				+    "add         %0, %2, #4                      \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1 {v0.s}[0], [%0], %6                     \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1 {v0.s}[1], [%0], %6                     \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1 {v0.s}[2], [%0], %6                     \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1 {v0.s}[3], [%0]                         \n"
			
 
				+
			
 
				+    "add         %1, %1, #4                      \n"  // src += 4
			
 
				+    "add         %2, %2, %6, lsl #2              \n"  // dst += 4 * dst_stride
			
 
				+    "subs        %3, %3, #4                      \n"  // w   -= 4
			
 
				+    "b.eq        4f                              \n"
			
 
				+
			
 
				+    // some residual, check to see if it includes a 2x8 block,
			
 
				+    // or less
			
 
				+    "cmp         %3, #2                          \n"
			
 
				+    "b.lt        3f                              \n"
			
 
				+
			
 
				+    // 2x8 block
			
 
				+    "2:                                          \n"
			
 
				+    "mov         %0, %1                          \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v0.h}[0], [%0], %5                 \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v1.h}[0], [%0], %5                 \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v0.h}[1], [%0], %5                 \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v1.h}[1], [%0], %5                 \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v0.h}[2], [%0], %5                 \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v1.h}[2], [%0], %5                 \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v0.h}[3], [%0], %5                 \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1     {v1.h}[3], [%0]                     \n"
			
 
				+
			
 
				+    "trn2    v2.8b, v0.8b, v1.8b                 \n"
			
 
				+    "trn1    v3.8b, v0.8b, v1.8b                 \n"
			
 
				+
			
 
				+    "mov         %0, %2                          \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+    "st1     {v3.8b}, [%0], %6                   \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1     {v2.8b}, [%0]                       \n"
			
 
				+
			
 
				+    "add         %1, %1, #2                      \n"  // src += 2
			
 
				+    "add         %2, %2, %6, lsl #1              \n"  // dst += 2 * dst_stride
			
 
				+    "subs        %3, %3,  #2                     \n"  // w   -= 2
			
 
				+    "b.eq        4f                              \n"
			
 
				+
			
 
				+    // 1x8 block
			
 
				+    "3:                                          \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld1         {v0.b}[0], [%1], %5             \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld1         {v0.b}[1], [%1], %5             \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld1         {v0.b}[2], [%1], %5             \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld1         {v0.b}[3], [%1], %5             \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld1         {v0.b}[4], [%1], %5             \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld1         {v0.b}[5], [%1], %5             \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld1         {v0.b}[6], [%1], %5             \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld1         {v0.b}[7], [%1]                 \n"
			
 
				+
			
 
				+    MEMACCESS(2)
			
 
				+    "st1         {v0.8b}, [%2]                   \n"
			
 
				+
			
 
				+    "4:                                          \n"
			
 
				+
			
 
				+    : "+r"(src_temp),                             // %0
			
 
				+      "+r"(src),                                  // %1
			
 
				+      "+r"(dst),                                  // %2
			
 
				+      "+r"(width64)                               // %3
			
 
				+    : "r"(&kVTbl4x4Transpose),                    // %4
			
 
				+      "r"(static_cast<ptrdiff_t>(src_stride)),    // %5
			
 
				+      "r"(static_cast<ptrdiff_t>(dst_stride))     // %6
			
 
				+    : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
			
 
				+      "v17", "v18", "v19", "v20", "v21", "v22", "v23"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+static uint8 kVTbl4x4TransposeDi[32] =
			
 
				+  { 0,  16, 32, 48,  2, 18, 34, 50,  4, 20, 36, 52,  6, 22, 38, 54,
			
 
				+    1,  17, 33, 49,  3, 19, 35, 51,  5, 21, 37, 53,  7, 23, 39, 55};
			
 
				+
			
 
				+void TransposeUVWx8_NEON(const uint8* src, int src_stride,
			
 
				+                         uint8* dst_a, int dst_stride_a,
			
 
				+                         uint8* dst_b, int dst_stride_b,
			
 
				+                         int width) {
			
 
				+  const uint8* src_temp = NULL;
			
 
				+  int64 width64 = (int64) width;  // Work around clang 3.4 warning.
			
 
				+  asm volatile (
			
 
				+    // loops are on blocks of 8. loop will stop when
			
 
				+    // counter gets to or below 0. starting the counter
			
 
				+    // at w-8 allow for this
			
 
				+    "sub       %4, %4, #8                      \n"
			
 
				+
			
 
				+    // handle 8x8 blocks. this should be the majority of the plane
			
 
				+    "1:                                        \n"
			
 
				+    "mov       %0, %1                          \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v0.16b}, [%0], %5              \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v1.16b}, [%0], %5              \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v2.16b}, [%0], %5              \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v3.16b}, [%0], %5              \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v4.16b}, [%0], %5              \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v5.16b}, [%0], %5              \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v6.16b}, [%0], %5              \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v7.16b}, [%0]                  \n"
			
 
				+
			
 
				+    "trn1      v16.16b, v0.16b, v1.16b         \n"
			
 
				+    "trn2      v17.16b, v0.16b, v1.16b         \n"
			
 
				+    "trn1      v18.16b, v2.16b, v3.16b         \n"
			
 
				+    "trn2      v19.16b, v2.16b, v3.16b         \n"
			
 
				+    "trn1      v20.16b, v4.16b, v5.16b         \n"
			
 
				+    "trn2      v21.16b, v4.16b, v5.16b         \n"
			
 
				+    "trn1      v22.16b, v6.16b, v7.16b         \n"
			
 
				+    "trn2      v23.16b, v6.16b, v7.16b         \n"
			
 
				+
			
 
				+    "trn1      v0.8h, v16.8h, v18.8h           \n"
			
 
				+    "trn2      v1.8h, v16.8h, v18.8h           \n"
			
 
				+    "trn1      v2.8h, v20.8h, v22.8h           \n"
			
 
				+    "trn2      v3.8h, v20.8h, v22.8h           \n"
			
 
				+    "trn1      v4.8h, v17.8h, v19.8h           \n"
			
 
				+    "trn2      v5.8h, v17.8h, v19.8h           \n"
			
 
				+    "trn1      v6.8h, v21.8h, v23.8h           \n"
			
 
				+    "trn2      v7.8h, v21.8h, v23.8h           \n"
			
 
				+
			
 
				+    "trn1      v16.4s, v0.4s, v2.4s            \n"
			
 
				+    "trn2      v17.4s, v0.4s, v2.4s            \n"
			
 
				+    "trn1      v18.4s, v1.4s, v3.4s            \n"
			
 
				+    "trn2      v19.4s, v1.4s, v3.4s            \n"
			
 
				+    "trn1      v20.4s, v4.4s, v6.4s            \n"
			
 
				+    "trn2      v21.4s, v4.4s, v6.4s            \n"
			
 
				+    "trn1      v22.4s, v5.4s, v7.4s            \n"
			
 
				+    "trn2      v23.4s, v5.4s, v7.4s            \n"
			
 
				+
			
 
				+    "mov       %0, %2                          \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v16.d}[0], [%0], %6            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v18.d}[0], [%0], %6            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v17.d}[0], [%0], %6            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v19.d}[0], [%0], %6            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v16.d}[1], [%0], %6            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v18.d}[1], [%0], %6            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v17.d}[1], [%0], %6            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v19.d}[1], [%0]                \n"
			
 
				+
			
 
				+    "mov       %0, %3                          \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v20.d}[0], [%0], %7            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v22.d}[0], [%0], %7            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v21.d}[0], [%0], %7            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v23.d}[0], [%0], %7            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v20.d}[1], [%0], %7            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v22.d}[1], [%0], %7            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v21.d}[1], [%0], %7            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v23.d}[1], [%0]                \n"
			
 
				+
			
 
				+    "add       %1, %1, #16                     \n"  // src   += 8*2
			
 
				+    "add       %2, %2, %6, lsl #3              \n"  // dst_a += 8 * dst_stride_a
			
 
				+    "add       %3, %3, %7, lsl #3              \n"  // dst_b += 8 * dst_stride_b
			
 
				+    "subs      %4, %4,  #8                     \n"  // w     -= 8
			
 
				+    "b.ge      1b                              \n"
			
 
				+
			
 
				+    // add 8 back to counter. if the result is 0 there are
			
 
				+    // no residuals.
			
 
				+    "adds      %4, %4, #8                      \n"
			
 
				+    "b.eq      4f                              \n"
			
 
				+
			
 
				+    // some residual, so between 1 and 7 lines left to transpose
			
 
				+    "cmp       %4, #2                          \n"
			
 
				+    "b.lt      3f                              \n"
			
 
				+
			
 
				+    "cmp       %4, #4                          \n"
			
 
				+    "b.lt      2f                              \n"
			
 
				+
			
 
				+    // TODO(frkoenig): Clean this up
			
 
				+    // 4x8 block
			
 
				+    "mov       %0, %1                          \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v0.8b}, [%0], %5               \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v1.8b}, [%0], %5               \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v2.8b}, [%0], %5               \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v3.8b}, [%0], %5               \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v4.8b}, [%0], %5               \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v5.8b}, [%0], %5               \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v6.8b}, [%0], %5               \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld1       {v7.8b}, [%0]                   \n"
			
 
				+
			
 
				+    MEMACCESS(8)
			
 
				+    "ld1       {v30.16b}, [%8], #16            \n"
			
 
				+    "ld1       {v31.16b}, [%8]                 \n"
			
 
				+
			
 
				+    "tbl       v16.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b  \n"
			
 
				+    "tbl       v17.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v31.16b  \n"
			
 
				+    "tbl       v18.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v30.16b  \n"
			
 
				+    "tbl       v19.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v31.16b  \n"
			
 
				+
			
 
				+    "mov       %0, %2                          \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v16.s}[0],  [%0], %6           \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v16.s}[1],  [%0], %6           \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v16.s}[2],  [%0], %6           \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v16.s}[3],  [%0], %6           \n"
			
 
				+
			
 
				+    "add       %0, %2, #4                      \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v18.s}[0], [%0], %6            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v18.s}[1], [%0], %6            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v18.s}[2], [%0], %6            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v18.s}[3], [%0]                \n"
			
 
				+
			
 
				+    "mov       %0, %3                          \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v17.s}[0], [%0], %7            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v17.s}[1], [%0], %7            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v17.s}[2], [%0], %7            \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v17.s}[3], [%0], %7            \n"
			
 
				+
			
 
				+    "add       %0, %3, #4                      \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v19.s}[0],  [%0], %7           \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v19.s}[1],  [%0], %7           \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v19.s}[2],  [%0], %7           \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v19.s}[3],  [%0]               \n"
			
 
				+
			
 
				+    "add       %1, %1, #8                      \n"  // src   += 4 * 2
			
 
				+    "add       %2, %2, %6, lsl #2              \n"  // dst_a += 4 * dst_stride_a
			
 
				+    "add       %3, %3, %7, lsl #2              \n"  // dst_b += 4 * dst_stride_b
			
 
				+    "subs      %4,  %4,  #4                    \n"  // w     -= 4
			
 
				+    "b.eq      4f                              \n"
			
 
				+
			
 
				+    // some residual, check to see if it includes a 2x8 block,
			
 
				+    // or less
			
 
				+    "cmp       %4, #2                          \n"
			
 
				+    "b.lt      3f                              \n"
			
 
				+
			
 
				+    // 2x8 block
			
 
				+    "2:                                        \n"
			
 
				+    "mov       %0, %1                          \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld2       {v0.h, v1.h}[0], [%0], %5       \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld2       {v2.h, v3.h}[0], [%0], %5       \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld2       {v0.h, v1.h}[1], [%0], %5       \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld2       {v2.h, v3.h}[1], [%0], %5       \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld2       {v0.h, v1.h}[2], [%0], %5       \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld2       {v2.h, v3.h}[2], [%0], %5       \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld2       {v0.h, v1.h}[3], [%0], %5       \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "ld2       {v2.h, v3.h}[3], [%0]           \n"
			
 
				+
			
 
				+    "trn1      v4.8b, v0.8b, v2.8b             \n"
			
 
				+    "trn2      v5.8b, v0.8b, v2.8b             \n"
			
 
				+    "trn1      v6.8b, v1.8b, v3.8b             \n"
			
 
				+    "trn2      v7.8b, v1.8b, v3.8b             \n"
			
 
				+
			
 
				+    "mov       %0, %2                          \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v4.d}[0], [%0], %6             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v6.d}[0], [%0]                 \n"
			
 
				+
			
 
				+    "mov       %0, %3                          \n"
			
 
				+
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v5.d}[0], [%0], %7             \n"
			
 
				+    MEMACCESS(0)
			
 
				+    "st1       {v7.d}[0], [%0]                 \n"
			
 
				+
			
 
				+    "add       %1, %1, #4                      \n"  // src   += 2 * 2
			
 
				+    "add       %2, %2, %6, lsl #1              \n"  // dst_a += 2 * dst_stride_a
			
 
				+    "add       %3, %3, %7, lsl #1              \n"  // dst_b += 2 * dst_stride_b
			
 
				+    "subs      %4,  %4,  #2                    \n"  // w     -= 2
			
 
				+    "b.eq      4f                              \n"
			
 
				+
			
 
				+    // 1x8 block
			
 
				+    "3:                                        \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld2       {v0.b, v1.b}[0], [%1], %5       \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld2       {v0.b, v1.b}[1], [%1], %5       \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld2       {v0.b, v1.b}[2], [%1], %5       \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld2       {v0.b, v1.b}[3], [%1], %5       \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld2       {v0.b, v1.b}[4], [%1], %5       \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld2       {v0.b, v1.b}[5], [%1], %5       \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld2       {v0.b, v1.b}[6], [%1], %5       \n"
			
 
				+    MEMACCESS(1)
			
 
				+    "ld2       {v0.b, v1.b}[7], [%1]           \n"
			
 
				+
			
 
				+    MEMACCESS(2)
			
 
				+    "st1       {v0.d}[0], [%2]                 \n"
			
 
				+    MEMACCESS(3)
			
 
				+    "st1       {v1.d}[0], [%3]                 \n"
			
 
				+
			
 
				+    "4:                                        \n"
			
 
				+
			
 
				+    : "+r"(src_temp),                             // %0
			
 
				+      "+r"(src),                                  // %1
			
 
				+      "+r"(dst_a),                                // %2
			
 
				+      "+r"(dst_b),                                // %3
			
 
				+      "+r"(width64)                               // %4
			
 
				+    : "r"(static_cast<ptrdiff_t>(src_stride)),    // %5
			
 
				+      "r"(static_cast<ptrdiff_t>(dst_stride_a)),  // %6
			
 
				+      "r"(static_cast<ptrdiff_t>(dst_stride_b)),  // %7
			
 
				+      "r"(&kVTbl4x4TransposeDi)                   // %8
			
 
				+    : "memory", "cc",
			
 
				+      "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
			
 
				+      "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
			
 
				+      "v30", "v31"
			
 
				+  );
			
 
				+}
			
 
				+#endif  // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/rotate_win.cc
+++ b/src/jni/libyuv/source/rotate_win.cc
@@ -0,0 +1,247 @@
 
				+/*
			
 
				+ *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/row.h"
			
 
				+#include "libyuv/rotate_row.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// This module is for 32 bit Visual C x86 and clangcl
			
 
				+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
			
 
				+
			
 
				+__declspec(naked)
			
 
				+void TransposeWx8_SSSE3(const uint8* src, int src_stride,
			
 
				+                        uint8* dst, int dst_stride, int width) {
			
 
				+  __asm {
			
 
				+    push      edi
			
 
				+    push      esi
			
 
				+    push      ebp
			
 
				+    mov       eax, [esp + 12 + 4]   // src
			
 
				+    mov       edi, [esp + 12 + 8]   // src_stride
			
 
				+    mov       edx, [esp + 12 + 12]  // dst
			
 
				+    mov       esi, [esp + 12 + 16]  // dst_stride
			
 
				+    mov       ecx, [esp + 12 + 20]  // width
			
 
				+
			
 
				+    // Read in the data from the source pointer.
			
 
				+    // First round of bit swap.
			
 
				+    align      4
			
 
				+ convertloop:
			
 
				+    movq      xmm0, qword ptr [eax]
			
 
				+    lea       ebp, [eax + 8]
			
 
				+    movq      xmm1, qword ptr [eax + edi]
			
 
				+    lea       eax, [eax + 2 * edi]
			
 
				+    punpcklbw xmm0, xmm1
			
 
				+    movq      xmm2, qword ptr [eax]
			
 
				+    movdqa    xmm1, xmm0
			
 
				+    palignr   xmm1, xmm1, 8
			
 
				+    movq      xmm3, qword ptr [eax + edi]
			
 
				+    lea       eax, [eax + 2 * edi]
			
 
				+    punpcklbw xmm2, xmm3
			
 
				+    movdqa    xmm3, xmm2
			
 
				+    movq      xmm4, qword ptr [eax]
			
 
				+    palignr   xmm3, xmm3, 8
			
 
				+    movq      xmm5, qword ptr [eax + edi]
			
 
				+    punpcklbw xmm4, xmm5
			
 
				+    lea       eax, [eax + 2 * edi]
			
 
				+    movdqa    xmm5, xmm4
			
 
				+    movq      xmm6, qword ptr [eax]
			
 
				+    palignr   xmm5, xmm5, 8
			
 
				+    movq      xmm7, qword ptr [eax + edi]
			
 
				+    punpcklbw xmm6, xmm7
			
 
				+    mov       eax, ebp
			
 
				+    movdqa    xmm7, xmm6
			
 
				+    palignr   xmm7, xmm7, 8
			
 
				+    // Second round of bit swap.
			
 
				+    punpcklwd xmm0, xmm2
			
 
				+    punpcklwd xmm1, xmm3
			
 
				+    movdqa    xmm2, xmm0
			
 
				+    movdqa    xmm3, xmm1
			
 
				+    palignr   xmm2, xmm2, 8
			
 
				+    palignr   xmm3, xmm3, 8
			
 
				+    punpcklwd xmm4, xmm6
			
 
				+    punpcklwd xmm5, xmm7
			
 
				+    movdqa    xmm6, xmm4
			
 
				+    movdqa    xmm7, xmm5
			
 
				+    palignr   xmm6, xmm6, 8
			
 
				+    palignr   xmm7, xmm7, 8
			
 
				+    // Third round of bit swap.
			
 
				+    // Write to the destination pointer.
			
 
				+    punpckldq xmm0, xmm4
			
 
				+    movq      qword ptr [edx], xmm0
			
 
				+    movdqa    xmm4, xmm0
			
 
				+    palignr   xmm4, xmm4, 8
			
 
				+    movq      qword ptr [edx + esi], xmm4
			
 
				+    lea       edx, [edx + 2 * esi]
			
 
				+    punpckldq xmm2, xmm6
			
 
				+    movdqa    xmm6, xmm2
			
 
				+    palignr   xmm6, xmm6, 8
			
 
				+    movq      qword ptr [edx], xmm2
			
 
				+    punpckldq xmm1, xmm5
			
 
				+    movq      qword ptr [edx + esi], xmm6
			
 
				+    lea       edx, [edx + 2 * esi]
			
 
				+    movdqa    xmm5, xmm1
			
 
				+    movq      qword ptr [edx], xmm1
			
 
				+    palignr   xmm5, xmm5, 8
			
 
				+    punpckldq xmm3, xmm7
			
 
				+    movq      qword ptr [edx + esi], xmm5
			
 
				+    lea       edx, [edx + 2 * esi]
			
 
				+    movq      qword ptr [edx], xmm3
			
 
				+    movdqa    xmm7, xmm3
			
 
				+    palignr   xmm7, xmm7, 8
			
 
				+    sub       ecx, 8
			
 
				+    movq      qword ptr [edx + esi], xmm7
			
 
				+    lea       edx, [edx + 2 * esi]
			
 
				+    jg        convertloop
			
 
				+
			
 
				+    pop       ebp
			
 
				+    pop       esi
			
 
				+    pop       edi
			
 
				+    ret
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+__declspec(naked)
			
 
				+void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
			
 
				+                         uint8* dst_a, int dst_stride_a,
			
 
				+                         uint8* dst_b, int dst_stride_b,
			
 
				+                         int w) {
			
 
				+  __asm {
			
 
				+    push      ebx
			
 
				+    push      esi
			
 
				+    push      edi
			
 
				+    push      ebp
			
 
				+    mov       eax, [esp + 16 + 4]   // src
			
 
				+    mov       edi, [esp + 16 + 8]   // src_stride
			
 
				+    mov       edx, [esp + 16 + 12]  // dst_a
			
 
				+    mov       esi, [esp + 16 + 16]  // dst_stride_a
			
 
				+    mov       ebx, [esp + 16 + 20]  // dst_b
			
 
				+    mov       ebp, [esp + 16 + 24]  // dst_stride_b
			
 
				+    mov       ecx, esp
			
 
				+    sub       esp, 4 + 16
			
 
				+    and       esp, ~15
			
 
				+    mov       [esp + 16], ecx
			
 
				+    mov       ecx, [ecx + 16 + 28]  // w
			
 
				+
			
 
				+    align      4
			
 
				+ convertloop:
			
 
				+    // Read in the data from the source pointer.
			
 
				+    // First round of bit swap.
			
 
				+    movdqu    xmm0, [eax]
			
 
				+    movdqu    xmm1, [eax + edi]
			
 
				+    lea       eax, [eax + 2 * edi]
			
 
				+    movdqa    xmm7, xmm0  // use xmm7 as temp register.
			
 
				+    punpcklbw xmm0, xmm1
			
 
				+    punpckhbw xmm7, xmm1
			
 
				+    movdqa    xmm1, xmm7
			
 
				+    movdqu    xmm2, [eax]
			
 
				+    movdqu    xmm3, [eax + edi]
			
 
				+    lea       eax, [eax + 2 * edi]
			
 
				+    movdqa    xmm7, xmm2
			
 
				+    punpcklbw xmm2, xmm3
			
 
				+    punpckhbw xmm7, xmm3
			
 
				+    movdqa    xmm3, xmm7
			
 
				+    movdqu    xmm4, [eax]
			
 
				+    movdqu    xmm5, [eax + edi]
			
 
				+    lea       eax, [eax + 2 * edi]
			
 
				+    movdqa    xmm7, xmm4
			
 
				+    punpcklbw xmm4, xmm5
			
 
				+    punpckhbw xmm7, xmm5
			
 
				+    movdqa    xmm5, xmm7
			
 
				+    movdqu    xmm6, [eax]
			
 
				+    movdqu    xmm7, [eax + edi]
			
 
				+    lea       eax, [eax + 2 * edi]
			
 
				+    movdqu    [esp], xmm5  // backup xmm5
			
 
				+    neg       edi
			
 
				+    movdqa    xmm5, xmm6   // use xmm5 as temp register.
			
 
				+    punpcklbw xmm6, xmm7
			
 
				+    punpckhbw xmm5, xmm7
			
 
				+    movdqa    xmm7, xmm5
			
 
				+    lea       eax, [eax + 8 * edi + 16]
			
 
				+    neg       edi
			
 
				+    // Second round of bit swap.
			
 
				+    movdqa    xmm5, xmm0
			
 
				+    punpcklwd xmm0, xmm2
			
 
				+    punpckhwd xmm5, xmm2
			
 
				+    movdqa    xmm2, xmm5
			
 
				+    movdqa    xmm5, xmm1
			
 
				+    punpcklwd xmm1, xmm3
			
 
				+    punpckhwd xmm5, xmm3
			
 
				+    movdqa    xmm3, xmm5
			
 
				+    movdqa    xmm5, xmm4
			
 
				+    punpcklwd xmm4, xmm6
			
 
				+    punpckhwd xmm5, xmm6
			
 
				+    movdqa    xmm6, xmm5
			
 
				+    movdqu    xmm5, [esp]  // restore xmm5
			
 
				+    movdqu    [esp], xmm6  // backup xmm6
			
 
				+    movdqa    xmm6, xmm5    // use xmm6 as temp register.
			
 
				+    punpcklwd xmm5, xmm7
			
 
				+    punpckhwd xmm6, xmm7
			
 
				+    movdqa    xmm7, xmm6
			
 
				+    // Third round of bit swap.
			
 
				+    // Write to the destination pointer.
			
 
				+    movdqa    xmm6, xmm0
			
 
				+    punpckldq xmm0, xmm4
			
 
				+    punpckhdq xmm6, xmm4
			
 
				+    movdqa    xmm4, xmm6
			
 
				+    movdqu    xmm6, [esp]  // restore xmm6
			
 
				+    movlpd    qword ptr [edx], xmm0
			
 
				+    movhpd    qword ptr [ebx], xmm0
			
 
				+    movlpd    qword ptr [edx + esi], xmm4
			
 
				+    lea       edx, [edx + 2 * esi]
			
 
				+    movhpd    qword ptr [ebx + ebp], xmm4
			
 
				+    lea       ebx, [ebx + 2 * ebp]
			
 
				+    movdqa    xmm0, xmm2   // use xmm0 as the temp register.
			
 
				+    punpckldq xmm2, xmm6
			
 
				+    movlpd    qword ptr [edx], xmm2
			
 
				+    movhpd    qword ptr [ebx], xmm2
			
 
				+    punpckhdq xmm0, xmm6
			
 
				+    movlpd    qword ptr [edx + esi], xmm0
			
 
				+    lea       edx, [edx + 2 * esi]
			
 
				+    movhpd    qword ptr [ebx + ebp], xmm0
			
 
				+    lea       ebx, [ebx + 2 * ebp]
			
 
				+    movdqa    xmm0, xmm1   // use xmm0 as the temp register.
			
 
				+    punpckldq xmm1, xmm5
			
 
				+    movlpd    qword ptr [edx], xmm1
			
 
				+    movhpd    qword ptr [ebx], xmm1
			
 
				+    punpckhdq xmm0, xmm5
			
 
				+    movlpd    qword ptr [edx + esi], xmm0
			
 
				+    lea       edx, [edx + 2 * esi]
			
 
				+    movhpd    qword ptr [ebx + ebp], xmm0
			
 
				+    lea       ebx, [ebx + 2 * ebp]
			
 
				+    movdqa    xmm0, xmm3   // use xmm0 as the temp register.
			
 
				+    punpckldq xmm3, xmm7
			
 
				+    movlpd    qword ptr [edx], xmm3
			
 
				+    movhpd    qword ptr [ebx], xmm3
			
 
				+    punpckhdq xmm0, xmm7
			
 
				+    sub       ecx, 8
			
 
				+    movlpd    qword ptr [edx + esi], xmm0
			
 
				+    lea       edx, [edx + 2 * esi]
			
 
				+    movhpd    qword ptr [ebx + ebp], xmm0
			
 
				+    lea       ebx, [ebx + 2 * ebp]
			
 
				+    jg        convertloop
			
 
				+
			
 
				+    mov       esp, [esp + 16]
			
 
				+    pop       ebp
			
 
				+    pop       edi
			
 
				+    pop       esi
			
 
				+    pop       ebx
			
 
				+    ret
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+#endif  // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/row_any.cc
+++ b/src/jni/libyuv/source/row_any.cc
@@ -0,0 +1,818 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/row.h"
			
 
				+
			
 
				+#include <string.h>  // For memset.
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Subsampled source needs to be increase by 1 of not even.
			
 
				+#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
			
 
				+
			
 
				+// Any 4 planes to 1 with yuvconstants
			
 
				+#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)                \
			
 
				+    void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf,   \
			
 
				+                 const uint8* a_buf, uint8* dst_ptr,                           \
			
 
				+                 const struct YuvConstants* yuvconstants,  int width) {        \
			
 
				+      SIMD_ALIGNED(uint8 temp[64 * 5]);                                        \
			
 
				+      memset(temp, 0, 64 * 4);  /* for msan */                                 \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width & ~MASK;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n);        \
			
 
				+      }                                                                        \
			
 
				+      memcpy(temp, y_buf + n, r);                                              \
			
 
				+      memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \
			
 
				+      memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));              \
			
 
				+      memcpy(temp + 192, a_buf + n, r);                                        \
			
 
				+      ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256,            \
			
 
				+               yuvconstants, MASK + 1);                                        \
			
 
				+      memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256,                      \
			
 
				+             SS(r, DUVSHIFT) * BPP);                                           \
			
 
				+    }
			
 
				+
			
 
				+#ifdef HAS_I422ALPHATOARGBROW_SSSE3
			
 
				+ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_I422ALPHATOARGBROW_AVX2
			
 
				+ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_I422ALPHATOARGBROW_NEON
			
 
				+ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7)
			
 
				+#endif
			
 
				+#undef ANY41C
			
 
				+
			
 
				+// Any 3 planes to 1.
			
 
				+#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)                 \
			
 
				+    void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf,   \
			
 
				+                 uint8* dst_ptr, int width) {                                  \
			
 
				+      SIMD_ALIGNED(uint8 temp[64 * 4]);                                        \
			
 
				+      memset(temp, 0, 64 * 3);  /* for YUY2 and msan */                        \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width & ~MASK;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n);                             \
			
 
				+      }                                                                        \
			
 
				+      memcpy(temp, y_buf + n, r);                                              \
			
 
				+      memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \
			
 
				+      memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));              \
			
 
				+      ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1);             \
			
 
				+      memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192,                      \
			
 
				+             SS(r, DUVSHIFT) * BPP);                                           \
			
 
				+    }
			
 
				+#ifdef HAS_I422TOYUY2ROW_SSE2
			
 
				+ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
			
 
				+ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_I422TOYUY2ROW_NEON
			
 
				+ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_I422TOUYVYROW_NEON
			
 
				+ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_BLENDPLANEROW_AVX2
			
 
				+ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_BLENDPLANEROW_SSSE3
			
 
				+ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
			
 
				+#endif
			
 
				+#undef ANY31
			
 
				+
			
 
				+// Note that odd width replication includes 444 due to implementation
			
 
				+// on arm that subsamples 444 to 422 internally.
			
 
				+// Any 3 planes to 1 with yuvconstants
			
 
				+#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK)                \
			
 
				+    void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf,   \
			
 
				+                 uint8* dst_ptr, const struct YuvConstants* yuvconstants,      \
			
 
				+                 int width) {                                                  \
			
 
				+      SIMD_ALIGNED(uint8 temp[64 * 4]);                                        \
			
 
				+      memset(temp, 0, 64 * 3);  /* for YUY2 and msan */                        \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width & ~MASK;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n);               \
			
 
				+      }                                                                        \
			
 
				+      memcpy(temp, y_buf + n, r);                                              \
			
 
				+      memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT));               \
			
 
				+      memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT));              \
			
 
				+      if (width & 1) {                                                         \
			
 
				+        temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1];             \
			
 
				+        temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1];           \
			
 
				+      }                                                                        \
			
 
				+      ANY_SIMD(temp, temp + 64, temp + 128, temp + 192,                        \
			
 
				+               yuvconstants, MASK + 1);                                        \
			
 
				+      memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192,                      \
			
 
				+             SS(r, DUVSHIFT) * BPP);                                           \
			
 
				+    }
			
 
				+
			
 
				+#ifdef HAS_I422TOARGBROW_SSSE3
			
 
				+ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_I411TOARGBROW_SSSE3
			
 
				+ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_I444TOARGBROW_SSSE3
			
 
				+ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
			
 
				+ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
			
 
				+ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
			
 
				+ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
			
 
				+ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
			
 
				+ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
			
 
				+#endif  // HAS_I444TOARGBROW_SSSE3
			
 
				+#ifdef HAS_I422TORGB24ROW_AVX2
			
 
				+ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_I422TOARGBROW_AVX2
			
 
				+ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_I422TORGBAROW_AVX2
			
 
				+ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_I444TOARGBROW_AVX2
			
 
				+ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_I411TOARGBROW_AVX2
			
 
				+ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_I422TOARGB4444ROW_AVX2
			
 
				+ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_I422TOARGB1555ROW_AVX2
			
 
				+ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_I422TORGB565ROW_AVX2
			
 
				+ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_I422TOARGBROW_NEON
			
 
				+ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
			
 
				+ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
			
 
				+ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
			
 
				+ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
			
 
				+ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
			
 
				+ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
			
 
				+ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
			
 
				+ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
			
 
				+#endif
			
 
				+#undef ANY31C
			
 
				+
			
 
				+// Any 2 planes to 1.
			
 
				+#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)              \
			
 
				+    void NAMEANY(const uint8* y_buf, const uint8* uv_buf,                      \
			
 
				+                 uint8* dst_ptr, int width) {                                  \
			
 
				+      SIMD_ALIGNED(uint8 temp[64 * 3]);                                        \
			
 
				+      memset(temp, 0, 64 * 2);  /* for msan */                                 \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width & ~MASK;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        ANY_SIMD(y_buf, uv_buf, dst_ptr, n);                                   \
			
 
				+      }                                                                        \
			
 
				+      memcpy(temp, y_buf + n * SBPP, r * SBPP);                                \
			
 
				+      memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2,                       \
			
 
				+             SS(r, UVSHIFT) * SBPP2);                                          \
			
 
				+      ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1);                         \
			
 
				+      memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \
			
 
				+    }
			
 
				+
			
 
				+// Merge functions.
			
 
				+#ifdef HAS_MERGEUVROW_SSE2
			
 
				+ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_MERGEUVROW_AVX2
			
 
				+ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_MERGEUVROW_NEON
			
 
				+ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15)
			
 
				+#endif
			
 
				+
			
 
				+// Math functions.
			
 
				+#ifdef HAS_ARGBMULTIPLYROW_SSE2
			
 
				+ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBADDROW_SSE2
			
 
				+ANY21(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, 0, 4, 4, 4, 3)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBSUBTRACTROW_SSE2
			
 
				+ANY21(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, 0, 4, 4, 4, 3)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBMULTIPLYROW_AVX2
			
 
				+ANY21(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, 0, 4, 4, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBADDROW_AVX2
			
 
				+ANY21(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, 0, 4, 4, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBSUBTRACTROW_AVX2
			
 
				+ANY21(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, 0, 4, 4, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBMULTIPLYROW_NEON
			
 
				+ANY21(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, 0, 4, 4, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBADDROW_NEON
			
 
				+ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBSUBTRACTROW_NEON
			
 
				+ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_SOBELROW_SSE2
			
 
				+ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_SOBELROW_NEON
			
 
				+ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_SOBELTOPLANEROW_SSE2
			
 
				+ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_SOBELTOPLANEROW_NEON
			
 
				+ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_SOBELXYROW_SSE2
			
 
				+ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_SOBELXYROW_NEON
			
 
				+ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
			
 
				+#endif
			
 
				+#undef ANY21
			
 
				+
			
 
				+// Any 2 planes to 1 with yuvconstants
			
 
				+#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK)             \
			
 
				+    void NAMEANY(const uint8* y_buf, const uint8* uv_buf,                      \
			
 
				+                 uint8* dst_ptr, const struct YuvConstants* yuvconstants,      \
			
 
				+                 int width) {                                                  \
			
 
				+      SIMD_ALIGNED(uint8 temp[64 * 3]);                                        \
			
 
				+      memset(temp, 0, 64 * 2);  /* for msan */                                 \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width & ~MASK;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n);                     \
			
 
				+      }                                                                        \
			
 
				+      memcpy(temp, y_buf + n * SBPP, r * SBPP);                                \
			
 
				+      memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2,                       \
			
 
				+             SS(r, UVSHIFT) * SBPP2);                                          \
			
 
				+      ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1);           \
			
 
				+      memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \
			
 
				+    }
			
 
				+
			
 
				+// Biplanar to RGB.
			
 
				+#ifdef HAS_NV12TOARGBROW_SSSE3
			
 
				+ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_NV12TOARGBROW_AVX2
			
 
				+ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_NV12TOARGBROW_NEON
			
 
				+ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_NV21TOARGBROW_SSSE3
			
 
				+ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_NV21TOARGBROW_AVX2
			
 
				+ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_NV21TOARGBROW_NEON
			
 
				+ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_NV12TORGB565ROW_SSSE3
			
 
				+ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_NV12TORGB565ROW_AVX2
			
 
				+ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_NV12TORGB565ROW_NEON
			
 
				+ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
			
 
				+#endif
			
 
				+#undef ANY21C
			
 
				+
			
 
				+// Any 1 to 1.
			
 
				+#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)                     \
			
 
				+    void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) {            \
			
 
				+      SIMD_ALIGNED(uint8 temp[128 * 2]);                                       \
			
 
				+      memset(temp, 0, 128);  /* for YUY2 and msan */                           \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width & ~MASK;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        ANY_SIMD(src_ptr, dst_ptr, n);                                         \
			
 
				+      }                                                                        \
			
 
				+      memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP);    \
			
 
				+      ANY_SIMD(temp, temp + 128, MASK + 1);                                    \
			
 
				+      memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \
			
 
				+    }
			
 
				+
			
 
				+#ifdef HAS_COPYROW_AVX
			
 
				+ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63)
			
 
				+#endif
			
 
				+#ifdef HAS_COPYROW_SSE2
			
 
				+ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_COPYROW_NEON
			
 
				+ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31)
			
 
				+#endif
			
 
				+#if defined(HAS_ARGBTORGB24ROW_SSSE3)
			
 
				+ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15)
			
 
				+ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15)
			
 
				+ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3)
			
 
				+ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3)
			
 
				+ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3)
			
 
				+#endif
			
 
				+#if defined(HAS_ARGBTORGB565ROW_AVX2)
			
 
				+ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7)
			
 
				+#endif
			
 
				+#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
			
 
				+ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7)
			
 
				+ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7)
			
 
				+#endif
			
 
				+#if defined(HAS_J400TOARGBROW_SSE2)
			
 
				+ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7)
			
 
				+#endif
			
 
				+#if defined(HAS_J400TOARGBROW_AVX2)
			
 
				+ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15)
			
 
				+#endif
			
 
				+#if defined(HAS_I400TOARGBROW_SSE2)
			
 
				+ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7)
			
 
				+#endif
			
 
				+#if defined(HAS_I400TOARGBROW_AVX2)
			
 
				+ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15)
			
 
				+#endif
			
 
				+#if defined(HAS_RGB24TOARGBROW_SSSE3)
			
 
				+ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
			
 
				+ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
			
 
				+ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
			
 
				+ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7)
			
 
				+ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7)
			
 
				+#endif
			
 
				+#if defined(HAS_RAWTORGB24ROW_SSSE3)
			
 
				+ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7)
			
 
				+#endif
			
 
				+#if defined(HAS_RGB565TOARGBROW_AVX2)
			
 
				+ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15)
			
 
				+#endif
			
 
				+#if defined(HAS_ARGB1555TOARGBROW_AVX2)
			
 
				+ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15)
			
 
				+#endif
			
 
				+#if defined(HAS_ARGB4444TOARGBROW_AVX2)
			
 
				+ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
			
 
				+#endif
			
 
				+#if defined(HAS_ARGBTORGB24ROW_NEON)
			
 
				+ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7)
			
 
				+ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
			
 
				+ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7)
			
 
				+ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
			
 
				+ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
			
 
				+ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
			
 
				+ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
			
 
				+#endif
			
 
				+#if defined(HAS_RAWTORGB24ROW_NEON)
			
 
				+ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBTOYROW_AVX2
			
 
				+ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBTOYJROW_AVX2
			
 
				+ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_UYVYTOYROW_AVX2
			
 
				+ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_YUY2TOYROW_AVX2
			
 
				+ANY11(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 1, 4, 1, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBTOYROW_SSSE3
			
 
				+ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_BGRATOYROW_SSSE3
			
 
				+ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15)
			
 
				+ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15)
			
 
				+ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15)
			
 
				+ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15)
			
 
				+ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBTOYJROW_SSSE3
			
 
				+ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBTOYROW_NEON
			
 
				+ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBTOYJROW_NEON
			
 
				+ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_BGRATOYROW_NEON
			
 
				+ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ABGRTOYROW_NEON
			
 
				+ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_RGBATOYROW_NEON
			
 
				+ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_RGB24TOYROW_NEON
			
 
				+ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_RAWTOYROW_NEON
			
 
				+ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_RGB565TOYROW_NEON
			
 
				+ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGB1555TOYROW_NEON
			
 
				+ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGB4444TOYROW_NEON
			
 
				+ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_YUY2TOYROW_NEON
			
 
				+ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_UYVYTOYROW_NEON
			
 
				+ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 0, 2, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_RGB24TOARGBROW_NEON
			
 
				+ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_RAWTOARGBROW_NEON
			
 
				+ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_RGB565TOARGBROW_NEON
			
 
				+ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGB1555TOARGBROW_NEON
			
 
				+ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGB4444TOARGBROW_NEON
			
 
				+ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBATTENUATEROW_SSSE3
			
 
				+ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBUNATTENUATEROW_SSE2
			
 
				+ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBATTENUATEROW_AVX2
			
 
				+ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBUNATTENUATEROW_AVX2
			
 
				+ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBATTENUATEROW_NEON
			
 
				+ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
			
 
				+#endif
			
 
				+#undef ANY11
			
 
				+
			
 
				+// Any 1 to 1 with yuvconstants
			
 
				+#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)                    \
			
 
				+    void NAMEANY(const uint8* src_ptr, uint8* dst_ptr,                         \
			
 
				+                 const struct YuvConstants* yuvconstants, int width) {         \
			
 
				+      SIMD_ALIGNED(uint8 temp[128 * 2]);                                       \
			
 
				+      memset(temp, 0, 128);  /* for YUY2 and msan */                           \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width & ~MASK;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n);                           \
			
 
				+      }                                                                        \
			
 
				+      memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP);    \
			
 
				+      ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1);                      \
			
 
				+      memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \
			
 
				+    }
			
 
				+#if defined(HAS_YUY2TOARGBROW_SSSE3)
			
 
				+ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
			
 
				+ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
			
 
				+#endif
			
 
				+#if defined(HAS_YUY2TOARGBROW_AVX2)
			
 
				+ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
			
 
				+ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
			
 
				+#endif
			
 
				+#if defined(HAS_YUY2TOARGBROW_NEON)
			
 
				+ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
			
 
				+ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
			
 
				+#endif
			
 
				+#undef ANY11C
			
 
				+
			
 
				+// Any 1 to 1 blended.
			
 
				+#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK)                    \
			
 
				+    void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) {            \
			
 
				+      SIMD_ALIGNED(uint8 temp[128 * 2]);                                       \
			
 
				+      memset(temp, 0, 128 * 2);  /* for YUY2 and msan */                       \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width & ~MASK;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        ANY_SIMD(src_ptr, dst_ptr, n);                                         \
			
 
				+      }                                                                        \
			
 
				+      memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP);    \
			
 
				+      memcpy(temp + 128, dst_ptr + n * BPP, r * BPP);                          \
			
 
				+      ANY_SIMD(temp, temp + 128, MASK + 1);                                    \
			
 
				+      memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \
			
 
				+    }
			
 
				+
			
 
				+#ifdef HAS_ARGBCOPYALPHAROW_AVX2
			
 
				+ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
			
 
				+ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
			
 
				+ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
			
 
				+ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
			
 
				+#endif
			
 
				+#undef ANY11B
			
 
				+
			
 
				+// Any 1 to 1 with parameter.
			
 
				+#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK)                          \
			
 
				+    void NAMEANY(const uint8* src_ptr, uint8* dst_ptr,                         \
			
 
				+                 T shuffler, int width) {                                      \
			
 
				+      SIMD_ALIGNED(uint8 temp[64 * 2]);                                        \
			
 
				+      memset(temp, 0, 64);  /* for msan */                                     \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width & ~MASK;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        ANY_SIMD(src_ptr, dst_ptr, shuffler, n);                               \
			
 
				+      }                                                                        \
			
 
				+      memcpy(temp, src_ptr + n * SBPP, r * SBPP);                              \
			
 
				+      ANY_SIMD(temp, temp + 64, shuffler, MASK + 1);                           \
			
 
				+      memcpy(dst_ptr + n * BPP, temp + 64, r * BPP);                           \
			
 
				+    }
			
 
				+
			
 
				+#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
			
 
				+ANY11P(ARGBToRGB565DitherRow_Any_SSE2, ARGBToRGB565DitherRow_SSE2,
			
 
				+       const uint32, 4, 2, 3)
			
 
				+#endif
			
 
				+#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
			
 
				+ANY11P(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2,
			
 
				+       const uint32, 4, 2, 7)
			
 
				+#endif
			
 
				+#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
			
 
				+ANY11P(ARGBToRGB565DitherRow_Any_NEON, ARGBToRGB565DitherRow_NEON,
			
 
				+       const uint32, 4, 2, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBSHUFFLEROW_SSE2
			
 
				+ANY11P(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2, const uint8*, 4, 4, 3)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBSHUFFLEROW_SSSE3
			
 
				+ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8*, 4, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBSHUFFLEROW_AVX2
			
 
				+ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8*, 4, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBSHUFFLEROW_NEON
			
 
				+ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8*, 4, 4, 3)
			
 
				+#endif
			
 
				+#undef ANY11P
			
 
				+
			
 
				+// Any 1 to 1 interpolate.  Takes 2 rows of source via stride.
			
 
				+#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK)                             \
			
 
				+    void NAMEANY(uint8* dst_ptr, const uint8* src_ptr,                         \
			
 
				+                 ptrdiff_t src_stride_ptr, int width,                          \
			
 
				+                 int source_y_fraction) {                                      \
			
 
				+      SIMD_ALIGNED(uint8 temp[64 * 3]);                                        \
			
 
				+      memset(temp, 0, 64 * 2);  /* for msan */                                 \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width & ~MASK;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction);      \
			
 
				+      }                                                                        \
			
 
				+      memcpy(temp, src_ptr + n * SBPP, r * SBPP);                              \
			
 
				+      memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP);        \
			
 
				+      ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction);             \
			
 
				+      memcpy(dst_ptr + n * BPP, temp + 128, r * BPP);                          \
			
 
				+    }
			
 
				+
			
 
				+#ifdef HAS_INTERPOLATEROW_AVX2
			
 
				+ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_INTERPOLATEROW_SSSE3
			
 
				+ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_INTERPOLATEROW_NEON
			
 
				+ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_INTERPOLATEROW_DSPR2
			
 
				+ANY11T(InterpolateRow_Any_DSPR2, InterpolateRow_DSPR2, 1, 1, 3)
			
 
				+#endif
			
 
				+#undef ANY11T
			
 
				+
			
 
				+// Any 1 to 1 mirror.
			
 
				+#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK)                                   \
			
 
				+    void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) {            \
			
 
				+      SIMD_ALIGNED(uint8 temp[64 * 2]);                                        \
			
 
				+      memset(temp, 0, 64);  /* for msan */                                     \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width & ~MASK;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        ANY_SIMD(src_ptr + r * BPP, dst_ptr, n);                               \
			
 
				+      }                                                                        \
			
 
				+      memcpy(temp, src_ptr, r * BPP);                                          \
			
 
				+      ANY_SIMD(temp, temp + 64, MASK + 1);                                     \
			
 
				+      memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP);    \
			
 
				+    }
			
 
				+
			
 
				+#ifdef HAS_MIRRORROW_AVX2
			
 
				+ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_MIRRORROW_SSSE3
			
 
				+ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_MIRRORROW_NEON
			
 
				+ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBMIRRORROW_AVX2
			
 
				+ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBMIRRORROW_SSE2
			
 
				+ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBMIRRORROW_NEON
			
 
				+ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3)
			
 
				+#endif
			
 
				+#undef ANY11M
			
 
				+
			
 
				+// Any 1 plane. (memset)
			
 
				+#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK)                                  \
			
 
				+    void NAMEANY(uint8* dst_ptr, T v32, int width) {                           \
			
 
				+      SIMD_ALIGNED(uint8 temp[64]);                                            \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width & ~MASK;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        ANY_SIMD(dst_ptr, v32, n);                                             \
			
 
				+      }                                                                        \
			
 
				+      ANY_SIMD(temp, v32, MASK + 1);                                           \
			
 
				+      memcpy(dst_ptr + n * BPP, temp, r * BPP);                                \
			
 
				+    }
			
 
				+
			
 
				+#ifdef HAS_SETROW_X86
			
 
				+ANY1(SetRow_Any_X86, SetRow_X86, uint8, 1, 3)
			
 
				+#endif
			
 
				+#ifdef HAS_SETROW_NEON
			
 
				+ANY1(SetRow_Any_NEON, SetRow_NEON, uint8, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBSETROW_NEON
			
 
				+ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3)
			
 
				+#endif
			
 
				+#undef ANY1
			
 
				+
			
 
				+// Any 1 to 2.  Outputs UV planes.
			
 
				+#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK)                 \
			
 
				+    void NAMEANY(const uint8* src_ptr, uint8* dst_u, uint8* dst_v, int width) {\
			
 
				+      SIMD_ALIGNED(uint8 temp[128 * 3]);                                       \
			
 
				+      memset(temp, 0, 128);  /* for msan */                                    \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width & ~MASK;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        ANY_SIMD(src_ptr, dst_u, dst_v, n);                                    \
			
 
				+      }                                                                        \
			
 
				+      memcpy(temp, src_ptr  + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);     \
			
 
				+      /* repeat last 4 bytes for 422 subsampler */                             \
			
 
				+      if ((width & 1) && BPP == 4 && DUVSHIFT == 1) {                          \
			
 
				+        memcpy(temp + SS(r, UVSHIFT) * BPP,                                    \
			
 
				+               temp + SS(r, UVSHIFT) * BPP - BPP, BPP);                        \
			
 
				+      }                                                                        \
			
 
				+      /* repeat last 4 - 12 bytes for 411 subsampler */                        \
			
 
				+      if (((width & 3) == 1) && BPP == 4 && DUVSHIFT == 2) {                   \
			
 
				+        memcpy(temp + SS(r, UVSHIFT) * BPP,                                    \
			
 
				+               temp + SS(r, UVSHIFT) * BPP - BPP, BPP);                        \
			
 
				+        memcpy(temp + SS(r, UVSHIFT) * BPP + BPP,                              \
			
 
				+               temp + SS(r, UVSHIFT) * BPP - BPP, BPP * 2);                    \
			
 
				+      }                                                                        \
			
 
				+      if (((width & 3) == 2) && BPP == 4 && DUVSHIFT == 2) {                   \
			
 
				+        memcpy(temp + SS(r, UVSHIFT) * BPP,                                    \
			
 
				+               temp + SS(r, UVSHIFT) * BPP - BPP * 2, BPP * 2);                \
			
 
				+      }                                                                        \
			
 
				+      if (((width & 3) == 3) && BPP == 4 && DUVSHIFT == 2) {                   \
			
 
				+        memcpy(temp + SS(r, UVSHIFT) * BPP,                                    \
			
 
				+               temp + SS(r, UVSHIFT) * BPP - BPP, BPP);                        \
			
 
				+      }                                                                        \
			
 
				+      ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1);                        \
			
 
				+      memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT));            \
			
 
				+      memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT));            \
			
 
				+    }
			
 
				+
			
 
				+#ifdef HAS_SPLITUVROW_SSE2
			
 
				+ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_SPLITUVROW_AVX2
			
 
				+ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_SPLITUVROW_NEON
			
 
				+ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_SPLITUVROW_DSPR2
			
 
				+ANY12(SplitUVRow_Any_DSPR2, SplitUVRow_DSPR2, 0, 2, 0, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBTOUV444ROW_SSSE3
			
 
				+ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_YUY2TOUV422ROW_AVX2
			
 
				+ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31)
			
 
				+ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_YUY2TOUV422ROW_SSE2
			
 
				+ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15)
			
 
				+ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_YUY2TOUV422ROW_NEON
			
 
				+ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7)
			
 
				+ANY12(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, 0, 4, 2, 31)
			
 
				+ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15)
			
 
				+ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
			
 
				+#endif
			
 
				+#undef ANY12
			
 
				+
			
 
				+// Any 1 to 2 with source stride (2 rows of source).  Outputs UV planes.
			
 
				+// 128 byte row allows for 32 avx ARGB pixels.
			
 
				+#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK)                          \
			
 
				+    void NAMEANY(const uint8* src_ptr, int src_stride_ptr,                     \
			
 
				+                 uint8* dst_u, uint8* dst_v, int width) {                      \
			
 
				+      SIMD_ALIGNED(uint8 temp[128 * 4]);                                       \
			
 
				+      memset(temp, 0, 128 * 2);  /* for msan */                                \
			
 
				+      int r = width & MASK;                                                    \
			
 
				+      int n = width & ~MASK;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n);                    \
			
 
				+      }                                                                        \
			
 
				+      memcpy(temp, src_ptr  + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP);     \
			
 
				+      memcpy(temp + 128, src_ptr  + src_stride_ptr + (n >> UVSHIFT) * BPP,     \
			
 
				+             SS(r, UVSHIFT) * BPP);                                            \
			
 
				+      if ((width & 1) && UVSHIFT == 0) {  /* repeat last pixel for subsample */\
			
 
				+        memcpy(temp + SS(r, UVSHIFT) * BPP,                                    \
			
 
				+               temp + SS(r, UVSHIFT) * BPP - BPP, BPP);                        \
			
 
				+        memcpy(temp + 128 + SS(r, UVSHIFT) * BPP,                              \
			
 
				+               temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP);                  \
			
 
				+      }                                                                        \
			
 
				+      ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1);                   \
			
 
				+      memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1));                          \
			
 
				+      memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1));                          \
			
 
				+    }
			
 
				+
			
 
				+#ifdef HAS_ARGBTOUVROW_AVX2
			
 
				+ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBTOUVJROW_AVX2
			
 
				+ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBTOUVROW_SSSE3
			
 
				+ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
			
 
				+ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
			
 
				+ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15)
			
 
				+ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15)
			
 
				+ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_YUY2TOUVROW_AVX2
			
 
				+ANY12S(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, 1, 4, 31)
			
 
				+ANY12S(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, 1, 4, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_YUY2TOUVROW_SSE2
			
 
				+ANY12S(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, 1, 4, 15)
			
 
				+ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBTOUVROW_NEON
			
 
				+ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGBTOUVJROW_NEON
			
 
				+ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_BGRATOUVROW_NEON
			
 
				+ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_ABGRTOUVROW_NEON
			
 
				+ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_RGBATOUVROW_NEON
			
 
				+ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_RGB24TOUVROW_NEON
			
 
				+ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_RAWTOUVROW_NEON
			
 
				+ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_RGB565TOUVROW_NEON
			
 
				+ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGB1555TOUVROW_NEON
			
 
				+ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_ARGB4444TOUVROW_NEON
			
 
				+ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_YUY2TOUVROW_NEON
			
 
				+ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_UYVYTOUVROW_NEON
			
 
				+ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
			
 
				+#endif
			
 
				+#undef ANY12S
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/row_common.cc
+++ b/src/jni/libyuv/source/row_common.cc
--- a/src/jni/libyuv/source/row_gcc.cc
+++ b/src/jni/libyuv/source/row_gcc.cc
--- a/src/jni/libyuv/source/row_mips.cc
+++ b/src/jni/libyuv/source/row_mips.cc
@@ -0,0 +1,782 @@
 
				+/*
			
 
				+ *  Copyright (c) 2012 The LibYuv project authors. All Rights Reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/row.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// The following are available on Mips platforms:
			
 
				+#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
			
 
				+    (_MIPS_SIM == _MIPS_SIM_ABI32)
			
 
				+
			
 
				+#ifdef HAS_COPYROW_MIPS
			
 
				+void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
			
 
				+  __asm__ __volatile__ (
			
 
				+    ".set      noreorder                         \n"
			
 
				+    ".set      noat                              \n"
			
 
				+    "slti      $at, %[count], 8                  \n"
			
 
				+    "bne       $at ,$zero, $last8                \n"
			
 
				+    "xor       $t8, %[src], %[dst]               \n"
			
 
				+    "andi      $t8, $t8, 0x3                     \n"
			
 
				+
			
 
				+    "bne       $t8, $zero, unaligned             \n"
			
 
				+    "negu      $a3, %[dst]                       \n"
			
 
				+    // make dst/src aligned
			
 
				+    "andi      $a3, $a3, 0x3                     \n"
			
 
				+    "beq       $a3, $zero, $chk16w               \n"
			
 
				+    // word-aligned now count is the remining bytes count
			
 
				+    "subu     %[count], %[count], $a3            \n"
			
 
				+
			
 
				+    "lwr       $t8, 0(%[src])                    \n"
			
 
				+    "addu      %[src], %[src], $a3               \n"
			
 
				+    "swr       $t8, 0(%[dst])                    \n"
			
 
				+    "addu      %[dst], %[dst], $a3               \n"
			
 
				+
			
 
				+    // Now the dst/src are mutually word-aligned with word-aligned addresses
			
 
				+    "$chk16w:                                    \n"
			
 
				+    "andi      $t8, %[count], 0x3f               \n"  // whole 64-B chunks?
			
 
				+    // t8 is the byte count after 64-byte chunks
			
 
				+    "beq       %[count], $t8, chk8w              \n"
			
 
				+    // There will be at most 1 32-byte chunk after it
			
 
				+    "subu      $a3, %[count], $t8                \n"  // the reminder
			
 
				+    // Here a3 counts bytes in 16w chunks
			
 
				+    "addu      $a3, %[dst], $a3                  \n"
			
 
				+    // Now a3 is the final dst after 64-byte chunks
			
 
				+    "addu      $t0, %[dst], %[count]             \n"
			
 
				+    // t0 is the "past the end" address
			
 
				+
			
 
				+    // When in the loop we exercise "pref 30,x(a1)", the a1+x should not be past
			
 
				+    // the "t0-32" address
			
 
				+    // This means: for x=128 the last "safe" a1 address is "t0-160"
			
 
				+    // Alternatively, for x=64 the last "safe" a1 address is "t0-96"
			
 
				+    // we will use "pref 30,128(a1)", so "t0-160" is the limit
			
 
				+    "subu      $t9, $t0, 160                     \n"
			
 
				+    // t9 is the "last safe pref 30,128(a1)" address
			
 
				+    "pref      0, 0(%[src])                      \n"  // first line of src
			
 
				+    "pref      0, 32(%[src])                     \n"  // second line of src
			
 
				+    "pref      0, 64(%[src])                     \n"
			
 
				+    "pref      30, 32(%[dst])                    \n"
			
 
				+    // In case the a1 > t9 don't use "pref 30" at all
			
 
				+    "sgtu      $v1, %[dst], $t9                  \n"
			
 
				+    "bgtz      $v1, $loop16w                     \n"
			
 
				+    "nop                                         \n"
			
 
				+    // otherwise, start with using pref30
			
 
				+    "pref      30, 64(%[dst])                    \n"
			
 
				+    "$loop16w:                                    \n"
			
 
				+    "pref      0, 96(%[src])                     \n"
			
 
				+    "lw        $t0, 0(%[src])                    \n"
			
 
				+    "bgtz      $v1, $skip_pref30_96              \n"  // skip
			
 
				+    "lw        $t1, 4(%[src])                    \n"
			
 
				+    "pref      30, 96(%[dst])                    \n"  // continue
			
 
				+    "$skip_pref30_96:                            \n"
			
 
				+    "lw        $t2, 8(%[src])                    \n"
			
 
				+    "lw        $t3, 12(%[src])                   \n"
			
 
				+    "lw        $t4, 16(%[src])                   \n"
			
 
				+    "lw        $t5, 20(%[src])                   \n"
			
 
				+    "lw        $t6, 24(%[src])                   \n"
			
 
				+    "lw        $t7, 28(%[src])                   \n"
			
 
				+    "pref      0, 128(%[src])                    \n"
			
 
				+    //  bring the next lines of src, addr 128
			
 
				+    "sw        $t0, 0(%[dst])                    \n"
			
 
				+    "sw        $t1, 4(%[dst])                    \n"
			
 
				+    "sw        $t2, 8(%[dst])                    \n"
			
 
				+    "sw        $t3, 12(%[dst])                   \n"
			
 
				+    "sw        $t4, 16(%[dst])                   \n"
			
 
				+    "sw        $t5, 20(%[dst])                   \n"
			
 
				+    "sw        $t6, 24(%[dst])                   \n"
			
 
				+    "sw        $t7, 28(%[dst])                   \n"
			
 
				+    "lw        $t0, 32(%[src])                   \n"
			
 
				+    "bgtz      $v1, $skip_pref30_128             \n"  // skip pref 30,128(a1)
			
 
				+    "lw        $t1, 36(%[src])                   \n"
			
 
				+    "pref      30, 128(%[dst])                   \n"  // set dest, addr 128
			
 
				+    "$skip_pref30_128:                           \n"
			
 
				+    "lw        $t2, 40(%[src])                   \n"
			
 
				+    "lw        $t3, 44(%[src])                   \n"
			
 
				+    "lw        $t4, 48(%[src])                   \n"
			
 
				+    "lw        $t5, 52(%[src])                   \n"
			
 
				+    "lw        $t6, 56(%[src])                   \n"
			
 
				+    "lw        $t7, 60(%[src])                   \n"
			
 
				+    "pref      0, 160(%[src])                    \n"
			
 
				+    // bring the next lines of src, addr 160
			
 
				+    "sw        $t0, 32(%[dst])                   \n"
			
 
				+    "sw        $t1, 36(%[dst])                   \n"
			
 
				+    "sw        $t2, 40(%[dst])                   \n"
			
 
				+    "sw        $t3, 44(%[dst])                   \n"
			
 
				+    "sw        $t4, 48(%[dst])                   \n"
			
 
				+    "sw        $t5, 52(%[dst])                   \n"
			
 
				+    "sw        $t6, 56(%[dst])                   \n"
			
 
				+    "sw        $t7, 60(%[dst])                   \n"
			
 
				+
			
 
				+    "addiu     %[dst], %[dst], 64                \n"  // adding 64 to dest
			
 
				+    "sgtu      $v1, %[dst], $t9                  \n"
			
 
				+    "bne       %[dst], $a3, $loop16w             \n"
			
 
				+    " addiu    %[src], %[src], 64                \n"  // adding 64 to src
			
 
				+    "move      %[count], $t8                     \n"
			
 
				+
			
 
				+    // Here we have src and dest word-aligned but less than 64-bytes to go
			
 
				+
			
 
				+    "chk8w:                                      \n"
			
 
				+    "pref      0, 0x0(%[src])                    \n"
			
 
				+    "andi      $t8, %[count], 0x1f               \n"  // 32-byte chunk?
			
 
				+    // the t8 is the reminder count past 32-bytes
			
 
				+    "beq       %[count], $t8, chk1w              \n"
			
 
				+    // count=t8,no 32-byte chunk
			
 
				+    " nop                                        \n"
			
 
				+
			
 
				+    "lw        $t0, 0(%[src])                    \n"
			
 
				+    "lw        $t1, 4(%[src])                    \n"
			
 
				+    "lw        $t2, 8(%[src])                    \n"
			
 
				+    "lw        $t3, 12(%[src])                   \n"
			
 
				+    "lw        $t4, 16(%[src])                   \n"
			
 
				+    "lw        $t5, 20(%[src])                   \n"
			
 
				+    "lw        $t6, 24(%[src])                   \n"
			
 
				+    "lw        $t7, 28(%[src])                   \n"
			
 
				+    "addiu     %[src], %[src], 32                \n"
			
 
				+
			
 
				+    "sw        $t0, 0(%[dst])                    \n"
			
 
				+    "sw        $t1, 4(%[dst])                    \n"
			
 
				+    "sw        $t2, 8(%[dst])                    \n"
			
 
				+    "sw        $t3, 12(%[dst])                   \n"
			
 
				+    "sw        $t4, 16(%[dst])                   \n"
			
 
				+    "sw        $t5, 20(%[dst])                   \n"
			
 
				+    "sw        $t6, 24(%[dst])                   \n"
			
 
				+    "sw        $t7, 28(%[dst])                   \n"
			
 
				+    "addiu     %[dst], %[dst], 32                \n"
			
 
				+
			
 
				+    "chk1w:                                      \n"
			
 
				+    "andi      %[count], $t8, 0x3                \n"
			
 
				+    // now count is the reminder past 1w chunks
			
 
				+    "beq       %[count], $t8, $last8             \n"
			
 
				+    " subu     $a3, $t8, %[count]                \n"
			
 
				+    // a3 is count of bytes in 1w chunks
			
 
				+    "addu      $a3, %[dst], $a3                  \n"
			
 
				+    // now a3 is the dst address past the 1w chunks
			
 
				+    // copying in words (4-byte chunks)
			
 
				+    "$wordCopy_loop:                             \n"
			
 
				+    "lw        $t3, 0(%[src])                    \n"
			
 
				+    // the first t3 may be equal t0 ... optimize?
			
 
				+    "addiu     %[src], %[src],4                  \n"
			
 
				+    "addiu     %[dst], %[dst],4                  \n"
			
 
				+    "bne       %[dst], $a3,$wordCopy_loop        \n"
			
 
				+    " sw       $t3, -4(%[dst])                   \n"
			
 
				+
			
 
				+    // For the last (<8) bytes
			
 
				+    "$last8:                                     \n"
			
 
				+    "blez      %[count], leave                   \n"
			
 
				+    " addu     $a3, %[dst], %[count]             \n"  // a3 -last dst address
			
 
				+    "$last8loop:                                 \n"
			
 
				+    "lb        $v1, 0(%[src])                    \n"
			
 
				+    "addiu     %[src], %[src], 1                 \n"
			
 
				+    "addiu     %[dst], %[dst], 1                 \n"
			
 
				+    "bne       %[dst], $a3, $last8loop           \n"
			
 
				+    " sb       $v1, -1(%[dst])                   \n"
			
 
				+
			
 
				+    "leave:                                      \n"
			
 
				+    "  j       $ra                               \n"
			
 
				+    "  nop                                       \n"
			
 
				+
			
 
				+    //
			
 
				+    // UNALIGNED case
			
 
				+    //
			
 
				+
			
 
				+    "unaligned:                                  \n"
			
 
				+    // got here with a3="negu a1"
			
 
				+    "andi      $a3, $a3, 0x3                     \n"  // a1 is word aligned?
			
 
				+    "beqz      $a3, $ua_chk16w                   \n"
			
 
				+    " subu     %[count], %[count], $a3           \n"
			
 
				+    // bytes left after initial a3 bytes
			
 
				+    "lwr       $v1, 0(%[src])                    \n"
			
 
				+    "lwl       $v1, 3(%[src])                    \n"
			
 
				+    "addu      %[src], %[src], $a3               \n"  // a3 may be 1, 2 or 3
			
 
				+    "swr       $v1, 0(%[dst])                    \n"
			
 
				+    "addu      %[dst], %[dst], $a3               \n"
			
 
				+    // below the dst will be word aligned (NOTE1)
			
 
				+    "$ua_chk16w:                                 \n"
			
 
				+    "andi      $t8, %[count], 0x3f               \n"  // whole 64-B chunks?
			
 
				+    // t8 is the byte count after 64-byte chunks
			
 
				+    "beq       %[count], $t8, ua_chk8w           \n"
			
 
				+    // if a2==t8, no 64-byte chunks
			
 
				+    // There will be at most 1 32-byte chunk after it
			
 
				+    "subu      $a3, %[count], $t8                \n"  // the reminder
			
 
				+    // Here a3 counts bytes in 16w chunks
			
 
				+    "addu      $a3, %[dst], $a3                  \n"
			
 
				+    // Now a3 is the final dst after 64-byte chunks
			
 
				+    "addu      $t0, %[dst], %[count]             \n"  // t0 "past the end"
			
 
				+    "subu      $t9, $t0, 160                     \n"
			
 
				+    // t9 is the "last safe pref 30,128(a1)" address
			
 
				+    "pref      0, 0(%[src])                      \n"  // first line of src
			
 
				+    "pref      0, 32(%[src])                     \n"  // second line  addr 32
			
 
				+    "pref      0, 64(%[src])                     \n"
			
 
				+    "pref      30, 32(%[dst])                    \n"
			
 
				+    // safe, as we have at least 64 bytes ahead
			
 
				+    // In case the a1 > t9 don't use "pref 30" at all
			
 
				+    "sgtu      $v1, %[dst], $t9                  \n"
			
 
				+    "bgtz      $v1, $ua_loop16w                  \n"
			
 
				+    // skip "pref 30,64(a1)" for too short arrays
			
 
				+    " nop                                        \n"
			
 
				+    // otherwise, start with using pref30
			
 
				+    "pref      30, 64(%[dst])                    \n"
			
 
				+    "$ua_loop16w:                                \n"
			
 
				+    "pref      0, 96(%[src])                     \n"
			
 
				+    "lwr       $t0, 0(%[src])                    \n"
			
 
				+    "lwl       $t0, 3(%[src])                    \n"
			
 
				+    "lwr       $t1, 4(%[src])                    \n"
			
 
				+    "bgtz      $v1, $ua_skip_pref30_96           \n"
			
 
				+    " lwl      $t1, 7(%[src])                    \n"
			
 
				+    "pref      30, 96(%[dst])                    \n"
			
 
				+    // continue setting up the dest, addr 96
			
 
				+    "$ua_skip_pref30_96:                         \n"
			
 
				+    "lwr       $t2, 8(%[src])                    \n"
			
 
				+    "lwl       $t2, 11(%[src])                   \n"
			
 
				+    "lwr       $t3, 12(%[src])                   \n"
			
 
				+    "lwl       $t3, 15(%[src])                   \n"
			
 
				+    "lwr       $t4, 16(%[src])                   \n"
			
 
				+    "lwl       $t4, 19(%[src])                   \n"
			
 
				+    "lwr       $t5, 20(%[src])                   \n"
			
 
				+    "lwl       $t5, 23(%[src])                   \n"
			
 
				+    "lwr       $t6, 24(%[src])                   \n"
			
 
				+    "lwl       $t6, 27(%[src])                   \n"
			
 
				+    "lwr       $t7, 28(%[src])                   \n"
			
 
				+    "lwl       $t7, 31(%[src])                   \n"
			
 
				+    "pref      0, 128(%[src])                    \n"
			
 
				+    // bring the next lines of src, addr 128
			
 
				+    "sw        $t0, 0(%[dst])                    \n"
			
 
				+    "sw        $t1, 4(%[dst])                    \n"
			
 
				+    "sw        $t2, 8(%[dst])                    \n"
			
 
				+    "sw        $t3, 12(%[dst])                   \n"
			
 
				+    "sw        $t4, 16(%[dst])                   \n"
			
 
				+    "sw        $t5, 20(%[dst])                   \n"
			
 
				+    "sw        $t6, 24(%[dst])                   \n"
			
 
				+    "sw        $t7, 28(%[dst])                   \n"
			
 
				+    "lwr       $t0, 32(%[src])                   \n"
			
 
				+    "lwl       $t0, 35(%[src])                   \n"
			
 
				+    "lwr       $t1, 36(%[src])                   \n"
			
 
				+    "bgtz      $v1, ua_skip_pref30_128           \n"
			
 
				+    " lwl      $t1, 39(%[src])                   \n"
			
 
				+    "pref      30, 128(%[dst])                   \n"
			
 
				+    // continue setting up the dest, addr 128
			
 
				+    "ua_skip_pref30_128:                         \n"
			
 
				+
			
 
				+    "lwr       $t2, 40(%[src])                   \n"
			
 
				+    "lwl       $t2, 43(%[src])                   \n"
			
 
				+    "lwr       $t3, 44(%[src])                   \n"
			
 
				+    "lwl       $t3, 47(%[src])                   \n"
			
 
				+    "lwr       $t4, 48(%[src])                   \n"
			
 
				+    "lwl       $t4, 51(%[src])                   \n"
			
 
				+    "lwr       $t5, 52(%[src])                   \n"
			
 
				+    "lwl       $t5, 55(%[src])                   \n"
			
 
				+    "lwr       $t6, 56(%[src])                   \n"
			
 
				+    "lwl       $t6, 59(%[src])                   \n"
			
 
				+    "lwr       $t7, 60(%[src])                   \n"
			
 
				+    "lwl       $t7, 63(%[src])                   \n"
			
 
				+    "pref      0, 160(%[src])                    \n"
			
 
				+    // bring the next lines of src, addr 160
			
 
				+    "sw        $t0, 32(%[dst])                   \n"
			
 
				+    "sw        $t1, 36(%[dst])                   \n"
			
 
				+    "sw        $t2, 40(%[dst])                   \n"
			
 
				+    "sw        $t3, 44(%[dst])                   \n"
			
 
				+    "sw        $t4, 48(%[dst])                   \n"
			
 
				+    "sw        $t5, 52(%[dst])                   \n"
			
 
				+    "sw        $t6, 56(%[dst])                   \n"
			
 
				+    "sw        $t7, 60(%[dst])                   \n"
			
 
				+
			
 
				+    "addiu     %[dst],%[dst],64                  \n"  // adding 64 to dest
			
 
				+    "sgtu      $v1,%[dst],$t9                    \n"
			
 
				+    "bne       %[dst],$a3,$ua_loop16w            \n"
			
 
				+    " addiu    %[src],%[src],64                  \n"  // adding 64 to src
			
 
				+    "move      %[count],$t8                      \n"
			
 
				+
			
 
				+    // Here we have src and dest word-aligned but less than 64-bytes to go
			
 
				+
			
 
				+    "ua_chk8w:                                   \n"
			
 
				+    "pref      0, 0x0(%[src])                    \n"
			
 
				+    "andi      $t8, %[count], 0x1f               \n"  // 32-byte chunk?
			
 
				+    // the t8 is the reminder count
			
 
				+    "beq       %[count], $t8, $ua_chk1w          \n"
			
 
				+    // when count==t8, no 32-byte chunk
			
 
				+
			
 
				+    "lwr       $t0, 0(%[src])                    \n"
			
 
				+    "lwl       $t0, 3(%[src])                    \n"
			
 
				+    "lwr       $t1, 4(%[src])                    \n"
			
 
				+    "lwl       $t1, 7(%[src])                    \n"
			
 
				+    "lwr       $t2, 8(%[src])                    \n"
			
 
				+    "lwl       $t2, 11(%[src])                   \n"
			
 
				+    "lwr       $t3, 12(%[src])                   \n"
			
 
				+    "lwl       $t3, 15(%[src])                   \n"
			
 
				+    "lwr       $t4, 16(%[src])                   \n"
			
 
				+    "lwl       $t4, 19(%[src])                   \n"
			
 
				+    "lwr       $t5, 20(%[src])                   \n"
			
 
				+    "lwl       $t5, 23(%[src])                   \n"
			
 
				+    "lwr       $t6, 24(%[src])                   \n"
			
 
				+    "lwl       $t6, 27(%[src])                   \n"
			
 
				+    "lwr       $t7, 28(%[src])                   \n"
			
 
				+    "lwl       $t7, 31(%[src])                   \n"
			
 
				+    "addiu     %[src], %[src], 32                \n"
			
 
				+
			
 
				+    "sw        $t0, 0(%[dst])                    \n"
			
 
				+    "sw        $t1, 4(%[dst])                    \n"
			
 
				+    "sw        $t2, 8(%[dst])                    \n"
			
 
				+    "sw        $t3, 12(%[dst])                   \n"
			
 
				+    "sw        $t4, 16(%[dst])                   \n"
			
 
				+    "sw        $t5, 20(%[dst])                   \n"
			
 
				+    "sw        $t6, 24(%[dst])                   \n"
			
 
				+    "sw        $t7, 28(%[dst])                   \n"
			
 
				+    "addiu     %[dst], %[dst], 32                \n"
			
 
				+
			
 
				+    "$ua_chk1w:                                  \n"
			
 
				+    "andi      %[count], $t8, 0x3                \n"
			
 
				+    // now count is the reminder past 1w chunks
			
 
				+    "beq       %[count], $t8, ua_smallCopy       \n"
			
 
				+    "subu      $a3, $t8, %[count]                \n"
			
 
				+    // a3 is count of bytes in 1w chunks
			
 
				+    "addu      $a3, %[dst], $a3                  \n"
			
 
				+    // now a3 is the dst address past the 1w chunks
			
 
				+
			
 
				+    // copying in words (4-byte chunks)
			
 
				+    "$ua_wordCopy_loop:                          \n"
			
 
				+    "lwr       $v1, 0(%[src])                    \n"
			
 
				+    "lwl       $v1, 3(%[src])                    \n"
			
 
				+    "addiu     %[src], %[src], 4                 \n"
			
 
				+    "addiu     %[dst], %[dst], 4                 \n"
			
 
				+    // note: dst=a1 is word aligned here, see NOTE1
			
 
				+    "bne       %[dst], $a3, $ua_wordCopy_loop    \n"
			
 
				+    " sw       $v1,-4(%[dst])                    \n"
			
 
				+
			
 
				+    // Now less than 4 bytes (value in count) left to copy
			
 
				+    "ua_smallCopy:                               \n"
			
 
				+    "beqz      %[count], leave                   \n"
			
 
				+    " addu     $a3, %[dst], %[count]             \n" // a3 = last dst address
			
 
				+    "$ua_smallCopy_loop:                         \n"
			
 
				+    "lb        $v1, 0(%[src])                    \n"
			
 
				+    "addiu     %[src], %[src], 1                 \n"
			
 
				+    "addiu     %[dst], %[dst], 1                 \n"
			
 
				+    "bne       %[dst],$a3,$ua_smallCopy_loop     \n"
			
 
				+    " sb       $v1, -1(%[dst])                   \n"
			
 
				+
			
 
				+    "j         $ra                               \n"
			
 
				+    " nop                                        \n"
			
 
				+    ".set      at                                \n"
			
 
				+    ".set      reorder                           \n"
			
 
				+       : [dst] "+r" (dst), [src] "+r" (src)
			
 
				+       : [count] "r" (count)
			
 
				+       : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
			
 
				+       "t8", "t9", "a3", "v1", "at"
			
 
				+  );
			
 
				+}
			
 
				+#endif  // HAS_COPYROW_MIPS
			
 
				+
			
 
				+// DSPR2 functions
			
 
				+#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \
			
 
				+    (__mips_dsp_rev >= 2) && \
			
 
				+    (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
			
 
				+
			
 
				+void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
			
 
				+                           int width) {
			
 
				+  __asm__ __volatile__ (
			
 
				+    ".set push                                     \n"
			
 
				+    ".set noreorder                                \n"
			
 
				+    "srl             $t4, %[width], 4              \n"  // multiplies of 16
			
 
				+    "blez            $t4, 2f                       \n"
			
 
				+    " andi           %[width], %[width], 0xf       \n"  // residual
			
 
				+
			
 
				+  "1:                                              \n"
			
 
				+    "addiu           $t4, $t4, -1                  \n"
			
 
				+    "lw              $t0, 0(%[src_uv])             \n"  // V1 | U1 | V0 | U0
			
 
				+    "lw              $t1, 4(%[src_uv])             \n"  // V3 | U3 | V2 | U2
			
 
				+    "lw              $t2, 8(%[src_uv])             \n"  // V5 | U5 | V4 | U4
			
 
				+    "lw              $t3, 12(%[src_uv])            \n"  // V7 | U7 | V6 | U6
			
 
				+    "lw              $t5, 16(%[src_uv])            \n"  // V9 | U9 | V8 | U8
			
 
				+    "lw              $t6, 20(%[src_uv])            \n"  // V11 | U11 | V10 | U10
			
 
				+    "lw              $t7, 24(%[src_uv])            \n"  // V13 | U13 | V12 | U12
			
 
				+    "lw              $t8, 28(%[src_uv])            \n"  // V15 | U15 | V14 | U14
			
 
				+    "addiu           %[src_uv], %[src_uv], 32      \n"
			
 
				+    "precrq.qb.ph    $t9, $t1, $t0                 \n"  // V3 | V2 | V1 | V0
			
 
				+    "precr.qb.ph     $t0, $t1, $t0                 \n"  // U3 | U2 | U1 | U0
			
 
				+    "precrq.qb.ph    $t1, $t3, $t2                 \n"  // V7 | V6 | V5 | V4
			
 
				+    "precr.qb.ph     $t2, $t3, $t2                 \n"  // U7 | U6 | U5 | U4
			
 
				+    "precrq.qb.ph    $t3, $t6, $t5                 \n"  // V11 | V10 | V9 | V8
			
 
				+    "precr.qb.ph     $t5, $t6, $t5                 \n"  // U11 | U10 | U9 | U8
			
 
				+    "precrq.qb.ph    $t6, $t8, $t7                 \n"  // V15 | V14 | V13 | V12
			
 
				+    "precr.qb.ph     $t7, $t8, $t7                 \n"  // U15 | U14 | U13 | U12
			
 
				+    "sw              $t9, 0(%[dst_v])              \n"
			
 
				+    "sw              $t0, 0(%[dst_u])              \n"
			
 
				+    "sw              $t1, 4(%[dst_v])              \n"
			
 
				+    "sw              $t2, 4(%[dst_u])              \n"
			
 
				+    "sw              $t3, 8(%[dst_v])              \n"
			
 
				+    "sw              $t5, 8(%[dst_u])              \n"
			
 
				+    "sw              $t6, 12(%[dst_v])             \n"
			
 
				+    "sw              $t7, 12(%[dst_u])             \n"
			
 
				+    "addiu           %[dst_v], %[dst_v], 16        \n"
			
 
				+    "bgtz            $t4, 1b                       \n"
			
 
				+    " addiu          %[dst_u], %[dst_u], 16        \n"
			
 
				+
			
 
				+    "beqz            %[width], 3f                  \n"
			
 
				+    " nop                                          \n"
			
 
				+
			
 
				+  "2:                                              \n"
			
 
				+    "lbu             $t0, 0(%[src_uv])             \n"
			
 
				+    "lbu             $t1, 1(%[src_uv])             \n"
			
 
				+    "addiu           %[src_uv], %[src_uv], 2       \n"
			
 
				+    "addiu           %[width], %[width], -1        \n"
			
 
				+    "sb              $t0, 0(%[dst_u])              \n"
			
 
				+    "sb              $t1, 0(%[dst_v])              \n"
			
 
				+    "addiu           %[dst_u], %[dst_u], 1         \n"
			
 
				+    "bgtz            %[width], 2b                  \n"
			
 
				+    " addiu          %[dst_v], %[dst_v], 1         \n"
			
 
				+
			
 
				+  "3:                                              \n"
			
 
				+    ".set pop                                      \n"
			
 
				+     : [src_uv] "+r" (src_uv),
			
 
				+       [width] "+r" (width),
			
 
				+       [dst_u] "+r" (dst_u),
			
 
				+       [dst_v] "+r" (dst_v)
			
 
				+     :
			
 
				+     : "t0", "t1", "t2", "t3",
			
 
				+     "t4", "t5", "t6", "t7", "t8", "t9"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width) {
			
 
				+  __asm__ __volatile__ (
			
 
				+    ".set push                             \n"
			
 
				+    ".set noreorder                        \n"
			
 
				+
			
 
				+    "srl       $t4, %[width], 4            \n"  // multiplies of 16
			
 
				+    "andi      $t5, %[width], 0xf          \n"
			
 
				+    "blez      $t4, 2f                     \n"
			
 
				+    " addu     %[src], %[src], %[width]    \n"  // src += width
			
 
				+
			
 
				+   "1:                                     \n"
			
 
				+    "lw        $t0, -16(%[src])            \n"  // |3|2|1|0|
			
 
				+    "lw        $t1, -12(%[src])            \n"  // |7|6|5|4|
			
 
				+    "lw        $t2, -8(%[src])             \n"  // |11|10|9|8|
			
 
				+    "lw        $t3, -4(%[src])             \n"  // |15|14|13|12|
			
 
				+    "wsbh      $t0, $t0                    \n"  // |2|3|0|1|
			
 
				+    "wsbh      $t1, $t1                    \n"  // |6|7|4|5|
			
 
				+    "wsbh      $t2, $t2                    \n"  // |10|11|8|9|
			
 
				+    "wsbh      $t3, $t3                    \n"  // |14|15|12|13|
			
 
				+    "rotr      $t0, $t0, 16                \n"  // |0|1|2|3|
			
 
				+    "rotr      $t1, $t1, 16                \n"  // |4|5|6|7|
			
 
				+    "rotr      $t2, $t2, 16                \n"  // |8|9|10|11|
			
 
				+    "rotr      $t3, $t3, 16                \n"  // |12|13|14|15|
			
 
				+    "addiu     %[src], %[src], -16         \n"
			
 
				+    "addiu     $t4, $t4, -1                \n"
			
 
				+    "sw        $t3, 0(%[dst])              \n"  // |15|14|13|12|
			
 
				+    "sw        $t2, 4(%[dst])              \n"  // |11|10|9|8|
			
 
				+    "sw        $t1, 8(%[dst])              \n"  // |7|6|5|4|
			
 
				+    "sw        $t0, 12(%[dst])             \n"  // |3|2|1|0|
			
 
				+    "bgtz      $t4, 1b                     \n"
			
 
				+    " addiu    %[dst], %[dst], 16          \n"
			
 
				+    "beqz      $t5, 3f                     \n"
			
 
				+    " nop                                  \n"
			
 
				+
			
 
				+   "2:                                     \n"
			
 
				+    "lbu       $t0, -1(%[src])             \n"
			
 
				+    "addiu     $t5, $t5, -1                \n"
			
 
				+    "addiu     %[src], %[src], -1          \n"
			
 
				+    "sb        $t0, 0(%[dst])              \n"
			
 
				+    "bgez      $t5, 2b                     \n"
			
 
				+    " addiu    %[dst], %[dst], 1           \n"
			
 
				+
			
 
				+   "3:                                     \n"
			
 
				+    ".set pop                              \n"
			
 
				+      : [src] "+r" (src), [dst] "+r" (dst)
			
 
				+      : [width] "r" (width)
			
 
				+      : "t0", "t1", "t2", "t3", "t4", "t5"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
			
 
				+                            int width) {
			
 
				+  int x = 0;
			
 
				+  int y = 0;
			
 
				+  __asm__ __volatile__ (
			
 
				+    ".set push                                    \n"
			
 
				+    ".set noreorder                               \n"
			
 
				+
			
 
				+    "addu            $t4, %[width], %[width]      \n"
			
 
				+    "srl             %[x], %[width], 4            \n"
			
 
				+    "andi            %[y], %[width], 0xf          \n"
			
 
				+    "blez            %[x], 2f                     \n"
			
 
				+    " addu           %[src_uv], %[src_uv], $t4    \n"
			
 
				+
			
 
				+   "1:                                            \n"
			
 
				+    "lw              $t0, -32(%[src_uv])          \n"  // |3|2|1|0|
			
 
				+    "lw              $t1, -28(%[src_uv])          \n"  // |7|6|5|4|
			
 
				+    "lw              $t2, -24(%[src_uv])          \n"  // |11|10|9|8|
			
 
				+    "lw              $t3, -20(%[src_uv])          \n"  // |15|14|13|12|
			
 
				+    "lw              $t4, -16(%[src_uv])          \n"  // |19|18|17|16|
			
 
				+    "lw              $t6, -12(%[src_uv])          \n"  // |23|22|21|20|
			
 
				+    "lw              $t7, -8(%[src_uv])           \n"  // |27|26|25|24|
			
 
				+    "lw              $t8, -4(%[src_uv])           \n"  // |31|30|29|28|
			
 
				+
			
 
				+    "rotr            $t0, $t0, 16                 \n"  // |1|0|3|2|
			
 
				+    "rotr            $t1, $t1, 16                 \n"  // |5|4|7|6|
			
 
				+    "rotr            $t2, $t2, 16                 \n"  // |9|8|11|10|
			
 
				+    "rotr            $t3, $t3, 16                 \n"  // |13|12|15|14|
			
 
				+    "rotr            $t4, $t4, 16                 \n"  // |17|16|19|18|
			
 
				+    "rotr            $t6, $t6, 16                 \n"  // |21|20|23|22|
			
 
				+    "rotr            $t7, $t7, 16                 \n"  // |25|24|27|26|
			
 
				+    "rotr            $t8, $t8, 16                 \n"  // |29|28|31|30|
			
 
				+    "precr.qb.ph     $t9, $t0, $t1                \n"  // |0|2|4|6|
			
 
				+    "precrq.qb.ph    $t5, $t0, $t1                \n"  // |1|3|5|7|
			
 
				+    "precr.qb.ph     $t0, $t2, $t3                \n"  // |8|10|12|14|
			
 
				+    "precrq.qb.ph    $t1, $t2, $t3                \n"  // |9|11|13|15|
			
 
				+    "precr.qb.ph     $t2, $t4, $t6                \n"  // |16|18|20|22|
			
 
				+    "precrq.qb.ph    $t3, $t4, $t6                \n"  // |17|19|21|23|
			
 
				+    "precr.qb.ph     $t4, $t7, $t8                \n"  // |24|26|28|30|
			
 
				+    "precrq.qb.ph    $t6, $t7, $t8                \n"  // |25|27|29|31|
			
 
				+    "addiu           %[src_uv], %[src_uv], -32    \n"
			
 
				+    "addiu           %[x], %[x], -1               \n"
			
 
				+    "swr             $t4, 0(%[dst_u])             \n"
			
 
				+    "swl             $t4, 3(%[dst_u])             \n"  // |30|28|26|24|
			
 
				+    "swr             $t6, 0(%[dst_v])             \n"
			
 
				+    "swl             $t6, 3(%[dst_v])             \n"  // |31|29|27|25|
			
 
				+    "swr             $t2, 4(%[dst_u])             \n"
			
 
				+    "swl             $t2, 7(%[dst_u])             \n"  // |22|20|18|16|
			
 
				+    "swr             $t3, 4(%[dst_v])             \n"
			
 
				+    "swl             $t3, 7(%[dst_v])             \n"  // |23|21|19|17|
			
 
				+    "swr             $t0, 8(%[dst_u])             \n"
			
 
				+    "swl             $t0, 11(%[dst_u])            \n"  // |14|12|10|8|
			
 
				+    "swr             $t1, 8(%[dst_v])             \n"
			
 
				+    "swl             $t1, 11(%[dst_v])            \n"  // |15|13|11|9|
			
 
				+    "swr             $t9, 12(%[dst_u])            \n"
			
 
				+    "swl             $t9, 15(%[dst_u])            \n"  // |6|4|2|0|
			
 
				+    "swr             $t5, 12(%[dst_v])            \n"
			
 
				+    "swl             $t5, 15(%[dst_v])            \n"  // |7|5|3|1|
			
 
				+    "addiu           %[dst_v], %[dst_v], 16       \n"
			
 
				+    "bgtz            %[x], 1b                     \n"
			
 
				+    " addiu          %[dst_u], %[dst_u], 16       \n"
			
 
				+    "beqz            %[y], 3f                     \n"
			
 
				+    " nop                                         \n"
			
 
				+    "b               2f                           \n"
			
 
				+    " nop                                         \n"
			
 
				+
			
 
				+   "2:                                            \n"
			
 
				+    "lbu             $t0, -2(%[src_uv])           \n"
			
 
				+    "lbu             $t1, -1(%[src_uv])           \n"
			
 
				+    "addiu           %[src_uv], %[src_uv], -2     \n"
			
 
				+    "addiu           %[y], %[y], -1               \n"
			
 
				+    "sb              $t0, 0(%[dst_u])             \n"
			
 
				+    "sb              $t1, 0(%[dst_v])             \n"
			
 
				+    "addiu           %[dst_u], %[dst_u], 1        \n"
			
 
				+    "bgtz            %[y], 2b                     \n"
			
 
				+    " addiu          %[dst_v], %[dst_v], 1        \n"
			
 
				+
			
 
				+   "3:                                            \n"
			
 
				+    ".set pop                                     \n"
			
 
				+      : [src_uv] "+r" (src_uv),
			
 
				+        [dst_u] "+r" (dst_u),
			
 
				+        [dst_v] "+r" (dst_v),
			
 
				+        [x] "=&r" (x),
			
 
				+        [y] "+r" (y)
			
 
				+      : [width] "r" (width)
			
 
				+      : "t0", "t1", "t2", "t3", "t4",
			
 
				+      "t5", "t7", "t8", "t9"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+// Convert (4 Y and 2 VU) I422 and arrange RGB values into
			
 
				+// t5 = | 0 | B0 | 0 | b0 |
			
 
				+// t4 = | 0 | B1 | 0 | b1 |
			
 
				+// t9 = | 0 | G0 | 0 | g0 |
			
 
				+// t8 = | 0 | G1 | 0 | g1 |
			
 
				+// t2 = | 0 | R0 | 0 | r0 |
			
 
				+// t1 = | 0 | R1 | 0 | r1 |
			
 
				+#define YUVTORGB                                                               \
			
 
				+      "lw                $t0, 0(%[y_buf])       \n"                            \
			
 
				+      "lhu               $t1, 0(%[u_buf])       \n"                            \
			
 
				+      "lhu               $t2, 0(%[v_buf])       \n"                            \
			
 
				+      "preceu.ph.qbr     $t1, $t1               \n"                            \
			
 
				+      "preceu.ph.qbr     $t2, $t2               \n"                            \
			
 
				+      "preceu.ph.qbra    $t3, $t0               \n"                            \
			
 
				+      "preceu.ph.qbla    $t0, $t0               \n"                            \
			
 
				+      "subu.ph           $t1, $t1, $s5          \n"                            \
			
 
				+      "subu.ph           $t2, $t2, $s5          \n"                            \
			
 
				+      "subu.ph           $t3, $t3, $s4          \n"                            \
			
 
				+      "subu.ph           $t0, $t0, $s4          \n"                            \
			
 
				+      "mul.ph            $t3, $t3, $s0          \n"                            \
			
 
				+      "mul.ph            $t0, $t0, $s0          \n"                            \
			
 
				+      "shll.ph           $t4, $t1, 0x7          \n"                            \
			
 
				+      "subu.ph           $t4, $t4, $t1          \n"                            \
			
 
				+      "mul.ph            $t6, $t1, $s1          \n"                            \
			
 
				+      "mul.ph            $t1, $t2, $s2          \n"                            \
			
 
				+      "addq_s.ph         $t5, $t4, $t3          \n"                            \
			
 
				+      "addq_s.ph         $t4, $t4, $t0          \n"                            \
			
 
				+      "shra.ph           $t5, $t5, 6            \n"                            \
			
 
				+      "shra.ph           $t4, $t4, 6            \n"                            \
			
 
				+      "addiu             %[u_buf], 2            \n"                            \
			
 
				+      "addiu             %[v_buf], 2            \n"                            \
			
 
				+      "addu.ph           $t6, $t6, $t1          \n"                            \
			
 
				+      "mul.ph            $t1, $t2, $s3          \n"                            \
			
 
				+      "addu.ph           $t9, $t6, $t3          \n"                            \
			
 
				+      "addu.ph           $t8, $t6, $t0          \n"                            \
			
 
				+      "shra.ph           $t9, $t9, 6            \n"                            \
			
 
				+      "shra.ph           $t8, $t8, 6            \n"                            \
			
 
				+      "addu.ph           $t2, $t1, $t3          \n"                            \
			
 
				+      "addu.ph           $t1, $t1, $t0          \n"                            \
			
 
				+      "shra.ph           $t2, $t2, 6            \n"                            \
			
 
				+      "shra.ph           $t1, $t1, 6            \n"                            \
			
 
				+      "subu.ph           $t5, $t5, $s5          \n"                            \
			
 
				+      "subu.ph           $t4, $t4, $s5          \n"                            \
			
 
				+      "subu.ph           $t9, $t9, $s5          \n"                            \
			
 
				+      "subu.ph           $t8, $t8, $s5          \n"                            \
			
 
				+      "subu.ph           $t2, $t2, $s5          \n"                            \
			
 
				+      "subu.ph           $t1, $t1, $s5          \n"                            \
			
 
				+      "shll_s.ph         $t5, $t5, 8            \n"                            \
			
 
				+      "shll_s.ph         $t4, $t4, 8            \n"                            \
			
 
				+      "shll_s.ph         $t9, $t9, 8            \n"                            \
			
 
				+      "shll_s.ph         $t8, $t8, 8            \n"                            \
			
 
				+      "shll_s.ph         $t2, $t2, 8            \n"                            \
			
 
				+      "shll_s.ph         $t1, $t1, 8            \n"                            \
			
 
				+      "shra.ph           $t5, $t5, 8            \n"                            \
			
 
				+      "shra.ph           $t4, $t4, 8            \n"                            \
			
 
				+      "shra.ph           $t9, $t9, 8            \n"                            \
			
 
				+      "shra.ph           $t8, $t8, 8            \n"                            \
			
 
				+      "shra.ph           $t2, $t2, 8            \n"                            \
			
 
				+      "shra.ph           $t1, $t1, 8            \n"                            \
			
 
				+      "addu.ph           $t5, $t5, $s5          \n"                            \
			
 
				+      "addu.ph           $t4, $t4, $s5          \n"                            \
			
 
				+      "addu.ph           $t9, $t9, $s5          \n"                            \
			
 
				+      "addu.ph           $t8, $t8, $s5          \n"                            \
			
 
				+      "addu.ph           $t2, $t2, $s5          \n"                            \
			
 
				+      "addu.ph           $t1, $t1, $s5          \n"
			
 
				+
			
 
				+// TODO(fbarchard): accept yuv conversion constants.
			
 
				+void I422ToARGBRow_DSPR2(const uint8* y_buf,
			
 
				+                              const uint8* u_buf,
			
 
				+                              const uint8* v_buf,
			
 
				+                              uint8* rgb_buf,
			
 
				+                              const struct YuvConstants* yuvconstants,
			
 
				+                              int width) {
			
 
				+  __asm__ __volatile__ (
			
 
				+    ".set push                                \n"
			
 
				+    ".set noreorder                           \n"
			
 
				+    "beqz              %[width], 2f           \n"
			
 
				+    " repl.ph          $s0, 74                \n"  // |YG|YG| = |74|74|
			
 
				+    "repl.ph           $s1, -25               \n"  // |UG|UG| = |-25|-25|
			
 
				+    "repl.ph           $s2, -52               \n"  // |VG|VG| = |-52|-52|
			
 
				+    "repl.ph           $s3, 102               \n"  // |VR|VR| = |102|102|
			
 
				+    "repl.ph           $s4, 16                \n"  // |0|16|0|16|
			
 
				+    "repl.ph           $s5, 128               \n"  // |128|128| // clipping
			
 
				+    "lui               $s6, 0xff00            \n"
			
 
				+    "ori               $s6, 0xff00            \n"  // |ff|00|ff|00|ff|
			
 
				+
			
 
				+   "1:                                        \n"
			
 
				+      YUVTORGB
			
 
				+// Arranging into argb format
			
 
				+    "precr.qb.ph       $t4, $t8, $t4          \n"  // |G1|g1|B1|b1|
			
 
				+    "precr.qb.ph       $t5, $t9, $t5          \n"  // |G0|g0|B0|b0|
			
 
				+    "addiu             %[width], -4           \n"
			
 
				+    "precrq.qb.ph      $t8, $t4, $t5          \n"  // |G1|B1|G0|B0|
			
 
				+    "precr.qb.ph       $t9, $t4, $t5          \n"  // |g1|b1|g0|b0|
			
 
				+    "precr.qb.ph       $t2, $t1, $t2          \n"  // |R1|r1|R0|r0|
			
 
				+
			
 
				+    "addiu             %[y_buf], 4            \n"
			
 
				+    "preceu.ph.qbla    $t1, $t2               \n"  // |0 |R1|0 |R0|
			
 
				+    "preceu.ph.qbra    $t2, $t2               \n"  // |0 |r1|0 |r0|
			
 
				+    "or                $t1, $t1, $s6          \n"  // |ff|R1|ff|R0|
			
 
				+    "or                $t2, $t2, $s6          \n"  // |ff|r1|ff|r0|
			
 
				+    "precrq.ph.w       $t0, $t2, $t9          \n"  // |ff|r1|g1|b1|
			
 
				+    "precrq.ph.w       $t3, $t1, $t8          \n"  // |ff|R1|G1|B1|
			
 
				+    "sll               $t9, $t9, 16           \n"
			
 
				+    "sll               $t8, $t8, 16           \n"
			
 
				+    "packrl.ph         $t2, $t2, $t9          \n"  // |ff|r0|g0|b0|
			
 
				+    "packrl.ph         $t1, $t1, $t8          \n"  // |ff|R0|G0|B0|
			
 
				+// Store results.
			
 
				+    "sw                $t2, 0(%[rgb_buf])     \n"
			
 
				+    "sw                $t0, 4(%[rgb_buf])     \n"
			
 
				+    "sw                $t1, 8(%[rgb_buf])     \n"
			
 
				+    "sw                $t3, 12(%[rgb_buf])    \n"
			
 
				+    "bnez              %[width], 1b           \n"
			
 
				+    " addiu            %[rgb_buf], 16         \n"
			
 
				+   "2:                                        \n"
			
 
				+    ".set pop                                 \n"
			
 
				+      :[y_buf] "+r" (y_buf),
			
 
				+       [u_buf] "+r" (u_buf),
			
 
				+       [v_buf] "+r" (v_buf),
			
 
				+       [width] "+r" (width),
			
 
				+       [rgb_buf] "+r" (rgb_buf)
			
 
				+      :
			
 
				+      : "t0", "t1",  "t2", "t3",  "t4", "t5",
			
 
				+      "t6", "t7", "t8", "t9",
			
 
				+      "s0", "s1", "s2", "s3",
			
 
				+      "s4", "s5", "s6"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+// Bilinear filter 8x2 -> 8x1
			
 
				+void InterpolateRow_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
			
 
				+                               ptrdiff_t src_stride, int dst_width,
			
 
				+                               int source_y_fraction) {
			
 
				+    int y0_fraction = 256 - source_y_fraction;
			
 
				+    const uint8* src_ptr1 = src_ptr + src_stride;
			
 
				+
			
 
				+  __asm__ __volatile__ (
			
 
				+     ".set push                                           \n"
			
 
				+     ".set noreorder                                      \n"
			
 
				+
			
 
				+     "replv.ph          $t0, %[y0_fraction]               \n"
			
 
				+     "replv.ph          $t1, %[source_y_fraction]         \n"
			
 
				+
			
 
				+   "1:                                                    \n"
			
 
				+     "lw                $t2, 0(%[src_ptr])                \n"
			
 
				+     "lw                $t3, 0(%[src_ptr1])               \n"
			
 
				+     "lw                $t4, 4(%[src_ptr])                \n"
			
 
				+     "lw                $t5, 4(%[src_ptr1])               \n"
			
 
				+     "muleu_s.ph.qbl    $t6, $t2, $t0                     \n"
			
 
				+     "muleu_s.ph.qbr    $t7, $t2, $t0                     \n"
			
 
				+     "muleu_s.ph.qbl    $t8, $t3, $t1                     \n"
			
 
				+     "muleu_s.ph.qbr    $t9, $t3, $t1                     \n"
			
 
				+     "muleu_s.ph.qbl    $t2, $t4, $t0                     \n"
			
 
				+     "muleu_s.ph.qbr    $t3, $t4, $t0                     \n"
			
 
				+     "muleu_s.ph.qbl    $t4, $t5, $t1                     \n"
			
 
				+     "muleu_s.ph.qbr    $t5, $t5, $t1                     \n"
			
 
				+     "addq.ph           $t6, $t6, $t8                     \n"
			
 
				+     "addq.ph           $t7, $t7, $t9                     \n"
			
 
				+     "addq.ph           $t2, $t2, $t4                     \n"
			
 
				+     "addq.ph           $t3, $t3, $t5                     \n"
			
 
				+     "shra.ph           $t6, $t6, 8                       \n"
			
 
				+     "shra.ph           $t7, $t7, 8                       \n"
			
 
				+     "shra.ph           $t2, $t2, 8                       \n"
			
 
				+     "shra.ph           $t3, $t3, 8                       \n"
			
 
				+     "precr.qb.ph       $t6, $t6, $t7                     \n"
			
 
				+     "precr.qb.ph       $t2, $t2, $t3                     \n"
			
 
				+     "addiu             %[src_ptr], %[src_ptr], 8         \n"
			
 
				+     "addiu             %[src_ptr1], %[src_ptr1], 8       \n"
			
 
				+     "addiu             %[dst_width], %[dst_width], -8    \n"
			
 
				+     "sw                $t6, 0(%[dst_ptr])                \n"
			
 
				+     "sw                $t2, 4(%[dst_ptr])                \n"
			
 
				+     "bgtz              %[dst_width], 1b                  \n"
			
 
				+     " addiu            %[dst_ptr], %[dst_ptr], 8         \n"
			
 
				+
			
 
				+     ".set pop                                            \n"
			
 
				+  : [dst_ptr] "+r" (dst_ptr),
			
 
				+    [src_ptr1] "+r" (src_ptr1),
			
 
				+    [src_ptr] "+r" (src_ptr),
			
 
				+    [dst_width] "+r" (dst_width)
			
 
				+  : [source_y_fraction] "r" (source_y_fraction),
			
 
				+    [y0_fraction] "r" (y0_fraction),
			
 
				+    [src_stride] "r" (src_stride)
			
 
				+  : "t0", "t1", "t2", "t3", "t4", "t5",
			
 
				+    "t6", "t7", "t8", "t9"
			
 
				+  );
			
 
				+}
			
 
				+#endif  // __mips_dsp_rev >= 2
			
 
				+
			
 
				+#endif  // defined(__mips__)
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/row_neon.cc
+++ b/src/jni/libyuv/source/row_neon.cc
--- a/src/jni/libyuv/source/row_neon64.cc
+++ b/src/jni/libyuv/source/row_neon64.cc
--- a/src/jni/libyuv/source/row_win.cc
+++ b/src/jni/libyuv/source/row_win.cc
--- a/src/jni/libyuv/source/scale.cc
+++ b/src/jni/libyuv/source/scale.cc
--- a/src/jni/libyuv/source/scale_any.cc
+++ b/src/jni/libyuv/source/scale_any.cc
@@ -0,0 +1,221 @@
 
				+/*
			
 
				+ *  Copyright 2015 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/scale.h"
			
 
				+#include "libyuv/scale_row.h"
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols
			
 
				+#define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK)                            \
			
 
				+    void NAMEANY(uint8* dst_ptr, const uint8* src_ptr,                         \
			
 
				+                 int dst_width, int x, int dx) {                               \
			
 
				+      int n = dst_width & ~MASK;                                               \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        TERP_SIMD(dst_ptr, src_ptr, n, x, dx);                                 \
			
 
				+      }                                                                        \
			
 
				+      TERP_C(dst_ptr + n * BPP, src_ptr,                                       \
			
 
				+             dst_width & MASK, x + n * dx, dx);                                \
			
 
				+    }
			
 
				+
			
 
				+#ifdef HAS_SCALEFILTERCOLS_NEON
			
 
				+CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_SCALEARGBCOLS_NEON
			
 
				+CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_SCALEARGBFILTERCOLS_NEON
			
 
				+CANY(ScaleARGBFilterCols_Any_NEON, ScaleARGBFilterCols_NEON,
			
 
				+     ScaleARGBFilterCols_C, 4, 3)
			
 
				+#endif
			
 
				+#undef CANY
			
 
				+
			
 
				+// Fixed scale down.
			
 
				+#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK)   \
			
 
				+    void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride,                   \
			
 
				+                 uint8* dst_ptr, int dst_width) {                              \
			
 
				+      int r = (int)((unsigned int)dst_width % (MASK + 1));                     \
			
 
				+      int n = dst_width - r;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n);                    \
			
 
				+      }                                                                        \
			
 
				+      SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride,                 \
			
 
				+                     dst_ptr + n * BPP, r);                                    \
			
 
				+    }
			
 
				+
			
 
				+// Fixed scale down for odd source width.  Used by I420Blend subsampling.
			
 
				+// Since dst_width is (width + 1) / 2, this function scales one less pixel
			
 
				+// and copies the last pixel.
			
 
				+#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK)   \
			
 
				+    void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride,                   \
			
 
				+                 uint8* dst_ptr, int dst_width) {                              \
			
 
				+      int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1));               \
			
 
				+      int n = dst_width - r;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n);                    \
			
 
				+      }                                                                        \
			
 
				+      SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride,                 \
			
 
				+                     dst_ptr + n * BPP, r);                                    \
			
 
				+    }
			
 
				+
			
 
				+#ifdef HAS_SCALEROWDOWN2_SSSE3
			
 
				+SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
			
 
				+SDANY(ScaleRowDown2Linear_Any_SSSE3, ScaleRowDown2Linear_SSSE3,
			
 
				+      ScaleRowDown2Linear_C, 2, 1, 15)
			
 
				+SDANY(ScaleRowDown2Box_Any_SSSE3, ScaleRowDown2Box_SSSE3, ScaleRowDown2Box_C,
			
 
				+      2, 1, 15)
			
 
				+SDODD(ScaleRowDown2Box_Odd_SSSE3, ScaleRowDown2Box_SSSE3,
			
 
				+      ScaleRowDown2Box_Odd_C, 2, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_SCALEROWDOWN2_AVX2
			
 
				+SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
			
 
				+SDANY(ScaleRowDown2Linear_Any_AVX2, ScaleRowDown2Linear_AVX2,
			
 
				+      ScaleRowDown2Linear_C, 2, 1, 31)
			
 
				+SDANY(ScaleRowDown2Box_Any_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_C,
			
 
				+      2, 1, 31)
			
 
				+SDODD(ScaleRowDown2Box_Odd_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_Odd_C,
			
 
				+      2, 1, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_SCALEROWDOWN2_NEON
			
 
				+SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
			
 
				+SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON,
			
 
				+      ScaleRowDown2Linear_C, 2, 1, 15)
			
 
				+SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON,
			
 
				+      ScaleRowDown2Box_C, 2, 1, 15)
			
 
				+SDODD(ScaleRowDown2Box_Odd_NEON, ScaleRowDown2Box_NEON,
			
 
				+      ScaleRowDown2Box_Odd_C, 2, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_SCALEROWDOWN4_SSSE3
			
 
				+SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
			
 
				+SDANY(ScaleRowDown4Box_Any_SSSE3, ScaleRowDown4Box_SSSE3, ScaleRowDown4Box_C,
			
 
				+      4, 1, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_SCALEROWDOWN4_AVX2
			
 
				+SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15)
			
 
				+SDANY(ScaleRowDown4Box_Any_AVX2, ScaleRowDown4Box_AVX2, ScaleRowDown4Box_C,
			
 
				+      4, 1, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_SCALEROWDOWN4_NEON
			
 
				+SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7)
			
 
				+SDANY(ScaleRowDown4Box_Any_NEON, ScaleRowDown4Box_NEON, ScaleRowDown4Box_C,
			
 
				+      4, 1, 7)
			
 
				+#endif
			
 
				+#ifdef HAS_SCALEROWDOWN34_SSSE3
			
 
				+SDANY(ScaleRowDown34_Any_SSSE3, ScaleRowDown34_SSSE3,
			
 
				+      ScaleRowDown34_C, 4 / 3, 1, 23)
			
 
				+SDANY(ScaleRowDown34_0_Box_Any_SSSE3, ScaleRowDown34_0_Box_SSSE3,
			
 
				+      ScaleRowDown34_0_Box_C, 4 / 3, 1, 23)
			
 
				+SDANY(ScaleRowDown34_1_Box_Any_SSSE3, ScaleRowDown34_1_Box_SSSE3,
			
 
				+      ScaleRowDown34_1_Box_C, 4 / 3, 1, 23)
			
 
				+#endif
			
 
				+#ifdef HAS_SCALEROWDOWN34_NEON
			
 
				+SDANY(ScaleRowDown34_Any_NEON, ScaleRowDown34_NEON,
			
 
				+      ScaleRowDown34_C, 4 / 3, 1, 23)
			
 
				+SDANY(ScaleRowDown34_0_Box_Any_NEON, ScaleRowDown34_0_Box_NEON,
			
 
				+      ScaleRowDown34_0_Box_C, 4 / 3, 1, 23)
			
 
				+SDANY(ScaleRowDown34_1_Box_Any_NEON, ScaleRowDown34_1_Box_NEON,
			
 
				+      ScaleRowDown34_1_Box_C, 4 / 3, 1, 23)
			
 
				+#endif
			
 
				+#ifdef HAS_SCALEROWDOWN38_SSSE3
			
 
				+SDANY(ScaleRowDown38_Any_SSSE3, ScaleRowDown38_SSSE3,
			
 
				+      ScaleRowDown38_C, 8 / 3, 1, 11)
			
 
				+SDANY(ScaleRowDown38_3_Box_Any_SSSE3, ScaleRowDown38_3_Box_SSSE3,
			
 
				+      ScaleRowDown38_3_Box_C, 8 / 3, 1, 5)
			
 
				+SDANY(ScaleRowDown38_2_Box_Any_SSSE3, ScaleRowDown38_2_Box_SSSE3,
			
 
				+      ScaleRowDown38_2_Box_C, 8 / 3, 1, 5)
			
 
				+#endif
			
 
				+#ifdef HAS_SCALEROWDOWN38_NEON
			
 
				+SDANY(ScaleRowDown38_Any_NEON, ScaleRowDown38_NEON,
			
 
				+      ScaleRowDown38_C, 8 / 3, 1, 11)
			
 
				+SDANY(ScaleRowDown38_3_Box_Any_NEON, ScaleRowDown38_3_Box_NEON,
			
 
				+      ScaleRowDown38_3_Box_C, 8 / 3, 1, 11)
			
 
				+SDANY(ScaleRowDown38_2_Box_Any_NEON, ScaleRowDown38_2_Box_NEON,
			
 
				+      ScaleRowDown38_2_Box_C, 8 / 3, 1, 11)
			
 
				+#endif
			
 
				+
			
 
				+#ifdef HAS_SCALEARGBROWDOWN2_SSE2
			
 
				+SDANY(ScaleARGBRowDown2_Any_SSE2, ScaleARGBRowDown2_SSE2,
			
 
				+      ScaleARGBRowDown2_C, 2, 4, 3)
			
 
				+SDANY(ScaleARGBRowDown2Linear_Any_SSE2, ScaleARGBRowDown2Linear_SSE2,
			
 
				+      ScaleARGBRowDown2Linear_C, 2, 4, 3)
			
 
				+SDANY(ScaleARGBRowDown2Box_Any_SSE2, ScaleARGBRowDown2Box_SSE2,
			
 
				+      ScaleARGBRowDown2Box_C, 2, 4, 3)
			
 
				+#endif
			
 
				+#ifdef HAS_SCALEARGBROWDOWN2_NEON
			
 
				+SDANY(ScaleARGBRowDown2_Any_NEON, ScaleARGBRowDown2_NEON,
			
 
				+      ScaleARGBRowDown2_C, 2, 4, 7)
			
 
				+SDANY(ScaleARGBRowDown2Linear_Any_NEON, ScaleARGBRowDown2Linear_NEON,
			
 
				+      ScaleARGBRowDown2Linear_C, 2, 4, 7)
			
 
				+SDANY(ScaleARGBRowDown2Box_Any_NEON, ScaleARGBRowDown2Box_NEON,
			
 
				+      ScaleARGBRowDown2Box_C, 2, 4, 7)
			
 
				+#endif
			
 
				+#undef SDANY
			
 
				+
			
 
				+// Scale down by even scale factor.
			
 
				+#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK)          \
			
 
				+    void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, int src_stepx,    \
			
 
				+                 uint8* dst_ptr, int dst_width) {                              \
			
 
				+      int r = (int)((unsigned int)dst_width % (MASK + 1));                     \
			
 
				+      int n = dst_width - r;                                                   \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n);         \
			
 
				+      }                                                                        \
			
 
				+      SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride,              \
			
 
				+                     src_stepx, dst_ptr + n * BPP, r);                         \
			
 
				+    }
			
 
				+
			
 
				+#ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2
			
 
				+SDAANY(ScaleARGBRowDownEven_Any_SSE2, ScaleARGBRowDownEven_SSE2,
			
 
				+       ScaleARGBRowDownEven_C, 4, 3)
			
 
				+SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, ScaleARGBRowDownEvenBox_SSE2,
			
 
				+       ScaleARGBRowDownEvenBox_C, 4, 3)
			
 
				+#endif
			
 
				+#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
			
 
				+SDAANY(ScaleARGBRowDownEven_Any_NEON, ScaleARGBRowDownEven_NEON,
			
 
				+       ScaleARGBRowDownEven_C, 4, 3)
			
 
				+SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, ScaleARGBRowDownEvenBox_NEON,
			
 
				+       ScaleARGBRowDownEvenBox_C, 4, 3)
			
 
				+#endif
			
 
				+
			
 
				+// Add rows box filter scale down.
			
 
				+#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK)                  \
			
 
				+  void NAMEANY(const uint8* src_ptr, uint16* dst_ptr, int src_width) {         \
			
 
				+      int n = src_width & ~MASK;                                               \
			
 
				+      if (n > 0) {                                                             \
			
 
				+        SCALEADDROW_SIMD(src_ptr, dst_ptr, n);                                 \
			
 
				+      }                                                                        \
			
 
				+      SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK);               \
			
 
				+    }
			
 
				+
			
 
				+#ifdef HAS_SCALEADDROW_SSE2
			
 
				+SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15)
			
 
				+#endif
			
 
				+#ifdef HAS_SCALEADDROW_AVX2
			
 
				+SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31)
			
 
				+#endif
			
 
				+#ifdef HAS_SCALEADDROW_NEON
			
 
				+SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
			
 
				+#endif
			
 
				+#undef SAANY
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/src/jni/libyuv/source/scale_argb.cc
+++ b/src/jni/libyuv/source/scale_argb.cc
@@ -0,0 +1,859 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/scale.h"
			
 
				+
			
 
				+#include <assert.h>
			
 
				+#include <string.h>
			
 
				+
			
 
				+#include "libyuv/cpu_id.h"
			
 
				+#include "libyuv/planar_functions.h"  // For CopyARGB
			
 
				+#include "libyuv/row.h"
			
 
				+#include "libyuv/scale_row.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+static __inline int Abs(int v) {
			
 
				+  return v >= 0 ? v : -v;
			
 
				+}
			
 
				+
			
 
				+// ScaleARGB ARGB, 1/2
			
 
				+// This is an optimized version for scaling down a ARGB to 1/2 of
			
 
				+// its original size.
			
 
				+static void ScaleARGBDown2(int src_width, int src_height,
			
 
				+                           int dst_width, int dst_height,
			
 
				+                           int src_stride, int dst_stride,
			
 
				+                           const uint8* src_argb, uint8* dst_argb,
			
 
				+                           int x, int dx, int y, int dy,
			
 
				+                           enum FilterMode filtering) {
			
 
				+  int j;
			
 
				+  int row_stride = src_stride * (dy >> 16);
			
 
				+  void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                            uint8* dst_argb, int dst_width) =
			
 
				+    filtering == kFilterNone ? ScaleARGBRowDown2_C :
			
 
				+        (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
			
 
				+        ScaleARGBRowDown2Box_C);
			
 
				+  assert(dx == 65536 * 2);  // Test scale factor of 2.
			
 
				+  assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
			
 
				+  // Advance to odd row, even column.
			
 
				+  if (filtering == kFilterBilinear) {
			
 
				+    src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
			
 
				+  } else {
			
 
				+    src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
			
 
				+  }
			
 
				+
			
 
				+#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
			
 
				+  if (TestCpuFlag(kCpuHasSSE2)) {
			
 
				+    ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 :
			
 
				+        (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 :
			
 
				+        ScaleARGBRowDown2Box_Any_SSE2);
			
 
				+    if (IS_ALIGNED(dst_width, 4)) {
			
 
				+      ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
			
 
				+          (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
			
 
				+          ScaleARGBRowDown2Box_SSE2);
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_SCALEARGBROWDOWN2_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON :
			
 
				+        (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON :
			
 
				+        ScaleARGBRowDown2Box_Any_NEON);
			
 
				+    if (IS_ALIGNED(dst_width, 8)) {
			
 
				+      ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON :
			
 
				+          (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON :
			
 
				+          ScaleARGBRowDown2Box_NEON);
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+
			
 
				+  if (filtering == kFilterLinear) {
			
 
				+    src_stride = 0;
			
 
				+  }
			
 
				+  for (j = 0; j < dst_height; ++j) {
			
 
				+    ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
			
 
				+    src_argb += row_stride;
			
 
				+    dst_argb += dst_stride;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// ScaleARGB ARGB, 1/4
			
 
				+// This is an optimized version for scaling down a ARGB to 1/4 of
			
 
				+// its original size.
			
 
				+static void ScaleARGBDown4Box(int src_width, int src_height,
			
 
				+                              int dst_width, int dst_height,
			
 
				+                              int src_stride, int dst_stride,
			
 
				+                              const uint8* src_argb, uint8* dst_argb,
			
 
				+                              int x, int dx, int y, int dy) {
			
 
				+  int j;
			
 
				+  // Allocate 2 rows of ARGB.
			
 
				+  const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
			
 
				+  align_buffer_64(row, kRowSize * 2);
			
 
				+  int row_stride = src_stride * (dy >> 16);
			
 
				+  void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+    uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
			
 
				+  // Advance to odd row, even column.
			
 
				+  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
			
 
				+  assert(dx == 65536 * 4);  // Test scale factor of 4.
			
 
				+  assert((dy & 0x3ffff) == 0);  // Test vertical scale is multiple of 4.
			
 
				+#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
			
 
				+  if (TestCpuFlag(kCpuHasSSE2)) {
			
 
				+    ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
			
 
				+    if (IS_ALIGNED(dst_width, 4)) {
			
 
				+      ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_SCALEARGBROWDOWN2_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
			
 
				+    if (IS_ALIGNED(dst_width, 8)) {
			
 
				+      ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+
			
 
				+  for (j = 0; j < dst_height; ++j) {
			
 
				+    ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
			
 
				+    ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
			
 
				+                      row + kRowSize, dst_width * 2);
			
 
				+    ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
			
 
				+    src_argb += row_stride;
			
 
				+    dst_argb += dst_stride;
			
 
				+  }
			
 
				+  free_aligned_buffer_64(row);
			
 
				+}
			
 
				+
			
 
				+// ScaleARGB ARGB Even
			
 
				+// This is an optimized version for scaling down a ARGB to even
			
 
				+// multiple of its original size.
			
 
				+static void ScaleARGBDownEven(int src_width, int src_height,
			
 
				+                              int dst_width, int dst_height,
			
 
				+                              int src_stride, int dst_stride,
			
 
				+                              const uint8* src_argb, uint8* dst_argb,
			
 
				+                              int x, int dx, int y, int dy,
			
 
				+                              enum FilterMode filtering) {
			
 
				+  int j;
			
 
				+  int col_step = dx >> 16;
			
 
				+  int row_stride = (dy >> 16) * src_stride;
			
 
				+  void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
			
 
				+                               int src_step, uint8* dst_argb, int dst_width) =
			
 
				+      filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
			
 
				+  assert(IS_ALIGNED(src_width, 2));
			
 
				+  assert(IS_ALIGNED(src_height, 2));
			
 
				+  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
			
 
				+#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
			
 
				+  if (TestCpuFlag(kCpuHasSSE2)) {
			
 
				+    ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 :
			
 
				+        ScaleARGBRowDownEven_Any_SSE2;
			
 
				+    if (IS_ALIGNED(dst_width, 4)) {
			
 
				+      ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
			
 
				+          ScaleARGBRowDownEven_SSE2;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON :
			
 
				+        ScaleARGBRowDownEven_Any_NEON;
			
 
				+    if (IS_ALIGNED(dst_width, 4)) {
			
 
				+      ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
			
 
				+          ScaleARGBRowDownEven_NEON;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+
			
 
				+  if (filtering == kFilterLinear) {
			
 
				+    src_stride = 0;
			
 
				+  }
			
 
				+  for (j = 0; j < dst_height; ++j) {
			
 
				+    ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
			
 
				+    src_argb += row_stride;
			
 
				+    dst_argb += dst_stride;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Scale ARGB down with bilinear interpolation.
			
 
				+static void ScaleARGBBilinearDown(int src_width, int src_height,
			
 
				+                                  int dst_width, int dst_height,
			
 
				+                                  int src_stride, int dst_stride,
			
 
				+                                  const uint8* src_argb, uint8* dst_argb,
			
 
				+                                  int x, int dx, int y, int dy,
			
 
				+                                  enum FilterMode filtering) {
			
 
				+  int j;
			
 
				+  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
			
 
				+      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
			
 
				+      InterpolateRow_C;
			
 
				+  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
			
 
				+      int dst_width, int x, int dx) =
			
 
				+      (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
			
 
				+  int64 xlast = x + (int64)(dst_width - 1) * dx;
			
 
				+  int64 xl = (dx >= 0) ? x : xlast;
			
 
				+  int64 xr = (dx >= 0) ? xlast : x;
			
 
				+  int clip_src_width;
			
 
				+  xl = (xl >> 16) & ~3;  // Left edge aligned.
			
 
				+  xr = (xr >> 16) + 1;  // Right most pixel used.  Bilinear uses 2 pixels.
			
 
				+  xr = (xr + 1 + 3) & ~3;  // 1 beyond 4 pixel aligned right most pixel.
			
 
				+  if (xr > src_width) {
			
 
				+    xr = src_width;
			
 
				+  }
			
 
				+  clip_src_width = (int)(xr - xl) * 4;  // Width aligned to 4.
			
 
				+  src_argb += xl * 4;
			
 
				+  x -= (int)(xl << 16);
			
 
				+#if defined(HAS_INTERPOLATEROW_SSSE3)
			
 
				+  if (TestCpuFlag(kCpuHasSSSE3)) {
			
 
				+    InterpolateRow = InterpolateRow_Any_SSSE3;
			
 
				+    if (IS_ALIGNED(clip_src_width, 16)) {
			
 
				+      InterpolateRow = InterpolateRow_SSSE3;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_INTERPOLATEROW_AVX2)
			
 
				+  if (TestCpuFlag(kCpuHasAVX2)) {
			
 
				+    InterpolateRow = InterpolateRow_Any_AVX2;
			
 
				+    if (IS_ALIGNED(clip_src_width, 32)) {
			
 
				+      InterpolateRow = InterpolateRow_AVX2;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_INTERPOLATEROW_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    InterpolateRow = InterpolateRow_Any_NEON;
			
 
				+    if (IS_ALIGNED(clip_src_width, 16)) {
			
 
				+      InterpolateRow = InterpolateRow_NEON;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_INTERPOLATEROW_DSPR2)
			
 
				+  if (TestCpuFlag(kCpuHasDSPR2) &&
			
 
				+      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
			
 
				+    InterpolateRow = InterpolateRow_Any_DSPR2;
			
 
				+    if (IS_ALIGNED(clip_src_width, 4)) {
			
 
				+      InterpolateRow = InterpolateRow_DSPR2;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
			
 
				+  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
			
 
				+    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
			
 
				+    if (IS_ALIGNED(dst_width, 4)) {
			
 
				+      ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
			
 
				+  // Allocate a row of ARGB.
			
 
				+  {
			
 
				+    align_buffer_64(row, clip_src_width * 4);
			
 
				+
			
 
				+    const int max_y = (src_height - 1) << 16;
			
 
				+    if (y > max_y) {
			
 
				+      y = max_y;
			
 
				+    }
			
 
				+    for (j = 0; j < dst_height; ++j) {
			
 
				+      int yi = y >> 16;
			
 
				+      const uint8* src = src_argb + yi * src_stride;
			
 
				+      if (filtering == kFilterLinear) {
			
 
				+        ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
			
 
				+      } else {
			
 
				+        int yf = (y >> 8) & 255;
			
 
				+        InterpolateRow(row, src, src_stride, clip_src_width, yf);
			
 
				+        ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
			
 
				+      }
			
 
				+      dst_argb += dst_stride;
			
 
				+      y += dy;
			
 
				+      if (y > max_y) {
			
 
				+        y = max_y;
			
 
				+      }
			
 
				+    }
			
 
				+    free_aligned_buffer_64(row);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Scale ARGB up with bilinear interpolation.
			
 
				+static void ScaleARGBBilinearUp(int src_width, int src_height,
			
 
				+                                int dst_width, int dst_height,
			
 
				+                                int src_stride, int dst_stride,
			
 
				+                                const uint8* src_argb, uint8* dst_argb,
			
 
				+                                int x, int dx, int y, int dy,
			
 
				+                                enum FilterMode filtering) {
			
 
				+  int j;
			
 
				+  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
			
 
				+      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
			
 
				+      InterpolateRow_C;
			
 
				+  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
			
 
				+      int dst_width, int x, int dx) =
			
 
				+      filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
			
 
				+  const int max_y = (src_height - 1) << 16;
			
 
				+#if defined(HAS_INTERPOLATEROW_SSSE3)
			
 
				+  if (TestCpuFlag(kCpuHasSSSE3)) {
			
 
				+    InterpolateRow = InterpolateRow_Any_SSSE3;
			
 
				+    if (IS_ALIGNED(dst_width, 4)) {
			
 
				+      InterpolateRow = InterpolateRow_SSSE3;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_INTERPOLATEROW_AVX2)
			
 
				+  if (TestCpuFlag(kCpuHasAVX2)) {
			
 
				+    InterpolateRow = InterpolateRow_Any_AVX2;
			
 
				+    if (IS_ALIGNED(dst_width, 8)) {
			
 
				+      InterpolateRow = InterpolateRow_AVX2;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_INTERPOLATEROW_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    InterpolateRow = InterpolateRow_Any_NEON;
			
 
				+    if (IS_ALIGNED(dst_width, 4)) {
			
 
				+      InterpolateRow = InterpolateRow_NEON;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_INTERPOLATEROW_DSPR2)
			
 
				+  if (TestCpuFlag(kCpuHasDSPR2) &&
			
 
				+      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
			
 
				+    InterpolateRow = InterpolateRow_DSPR2;
			
 
				+  }
			
 
				+#endif
			
 
				+  if (src_width >= 32768) {
			
 
				+    ScaleARGBFilterCols = filtering ?
			
 
				+        ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
			
 
				+  }
			
 
				+#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
			
 
				+  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
			
 
				+    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
			
 
				+  if (filtering && TestCpuFlag(kCpuHasNEON)) {
			
 
				+    ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
			
 
				+    if (IS_ALIGNED(dst_width, 4)) {
			
 
				+      ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_SCALEARGBCOLS_SSE2)
			
 
				+  if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
			
 
				+    ScaleARGBFilterCols = ScaleARGBCols_SSE2;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_SCALEARGBCOLS_NEON)
			
 
				+  if (!filtering && TestCpuFlag(kCpuHasNEON)) {
			
 
				+    ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
			
 
				+    if (IS_ALIGNED(dst_width, 8)) {
			
 
				+      ScaleARGBFilterCols = ScaleARGBCols_NEON;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
			
 
				+    ScaleARGBFilterCols = ScaleARGBColsUp2_C;
			
 
				+#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
			
 
				+    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
			
 
				+      ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
			
 
				+    }
			
 
				+#endif
			
 
				+  }
			
 
				+
			
 
				+  if (y > max_y) {
			
 
				+    y = max_y;
			
 
				+  }
			
 
				+
			
 
				+  {
			
 
				+    int yi = y >> 16;
			
 
				+    const uint8* src = src_argb + yi * src_stride;
			
 
				+
			
 
				+    // Allocate 2 rows of ARGB.
			
 
				+    const int kRowSize = (dst_width * 4 + 31) & ~31;
			
 
				+    align_buffer_64(row, kRowSize * 2);
			
 
				+
			
 
				+    uint8* rowptr = row;
			
 
				+    int rowstride = kRowSize;
			
 
				+    int lasty = yi;
			
 
				+
			
 
				+    ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
			
 
				+    if (src_height > 1) {
			
 
				+      src += src_stride;
			
 
				+    }
			
 
				+    ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
			
 
				+    src += src_stride;
			
 
				+
			
 
				+    for (j = 0; j < dst_height; ++j) {
			
 
				+      yi = y >> 16;
			
 
				+      if (yi != lasty) {
			
 
				+        if (y > max_y) {
			
 
				+          y = max_y;
			
 
				+          yi = y >> 16;
			
 
				+          src = src_argb + yi * src_stride;
			
 
				+        }
			
 
				+        if (yi != lasty) {
			
 
				+          ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
			
 
				+          rowptr += rowstride;
			
 
				+          rowstride = -rowstride;
			
 
				+          lasty = yi;
			
 
				+          src += src_stride;
			
 
				+        }
			
 
				+      }
			
 
				+      if (filtering == kFilterLinear) {
			
 
				+        InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
			
 
				+      } else {
			
 
				+        int yf = (y >> 8) & 255;
			
 
				+        InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
			
 
				+      }
			
 
				+      dst_argb += dst_stride;
			
 
				+      y += dy;
			
 
				+    }
			
 
				+    free_aligned_buffer_64(row);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+#ifdef YUVSCALEUP
			
 
				+// Scale YUV to ARGB up with bilinear interpolation.
			
 
				+static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
			
 
				+                                     int dst_width, int dst_height,
			
 
				+                                     int src_stride_y,
			
 
				+                                     int src_stride_u,
			
 
				+                                     int src_stride_v,
			
 
				+                                     int dst_stride_argb,
			
 
				+                                     const uint8* src_y,
			
 
				+                                     const uint8* src_u,
			
 
				+                                     const uint8* src_v,
			
 
				+                                     uint8* dst_argb,
			
 
				+                                     int x, int dx, int y, int dy,
			
 
				+                                     enum FilterMode filtering) {
			
 
				+  int j;
			
 
				+  void (*I422ToARGBRow)(const uint8* y_buf,
			
 
				+                        const uint8* u_buf,
			
 
				+                        const uint8* v_buf,
			
 
				+                        uint8* rgb_buf,
			
 
				+                        int width) = I422ToARGBRow_C;
			
 
				+#if defined(HAS_I422TOARGBROW_SSSE3)
			
 
				+  if (TestCpuFlag(kCpuHasSSSE3)) {
			
 
				+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
			
 
				+    if (IS_ALIGNED(src_width, 8)) {
			
 
				+      I422ToARGBRow = I422ToARGBRow_SSSE3;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_I422TOARGBROW_AVX2)
			
 
				+  if (TestCpuFlag(kCpuHasAVX2)) {
			
 
				+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
			
 
				+    if (IS_ALIGNED(src_width, 16)) {
			
 
				+      I422ToARGBRow = I422ToARGBRow_AVX2;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_I422TOARGBROW_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
			
 
				+    if (IS_ALIGNED(src_width, 8)) {
			
 
				+      I422ToARGBRow = I422ToARGBRow_NEON;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_I422TOARGBROW_DSPR2)
			
 
				+  if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_width, 4) &&
			
 
				+      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
			
 
				+      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
			
 
				+      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
			
 
				+      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
			
 
				+    I422ToARGBRow = I422ToARGBRow_DSPR2;
			
 
				+  }
			
 
				+#endif
			
 
				+
			
 
				+  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
			
 
				+      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
			
 
				+      InterpolateRow_C;
			
 
				+#if defined(HAS_INTERPOLATEROW_SSSE3)
			
 
				+  if (TestCpuFlag(kCpuHasSSSE3)) {
			
 
				+    InterpolateRow = InterpolateRow_Any_SSSE3;
			
 
				+    if (IS_ALIGNED(dst_width, 4)) {
			
 
				+      InterpolateRow = InterpolateRow_SSSE3;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_INTERPOLATEROW_AVX2)
			
 
				+  if (TestCpuFlag(kCpuHasAVX2)) {
			
 
				+    InterpolateRow = InterpolateRow_Any_AVX2;
			
 
				+    if (IS_ALIGNED(dst_width, 8)) {
			
 
				+      InterpolateRow = InterpolateRow_AVX2;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_INTERPOLATEROW_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    InterpolateRow = InterpolateRow_Any_NEON;
			
 
				+    if (IS_ALIGNED(dst_width, 4)) {
			
 
				+      InterpolateRow = InterpolateRow_NEON;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_INTERPOLATEROW_DSPR2)
			
 
				+  if (TestCpuFlag(kCpuHasDSPR2) &&
			
 
				+      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
			
 
				+    InterpolateRow = InterpolateRow_DSPR2;
			
 
				+  }
			
 
				+#endif
			
 
				+
			
 
				+  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
			
 
				+      int dst_width, int x, int dx) =
			
 
				+      filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
			
 
				+  if (src_width >= 32768) {
			
 
				+    ScaleARGBFilterCols = filtering ?
			
 
				+        ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
			
 
				+  }
			
 
				+#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
			
 
				+  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
			
 
				+    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
			
 
				+  if (filtering && TestCpuFlag(kCpuHasNEON)) {
			
 
				+    ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
			
 
				+    if (IS_ALIGNED(dst_width, 4)) {
			
 
				+      ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_SCALEARGBCOLS_SSE2)
			
 
				+  if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
			
 
				+    ScaleARGBFilterCols = ScaleARGBCols_SSE2;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_SCALEARGBCOLS_NEON)
			
 
				+  if (!filtering && TestCpuFlag(kCpuHasNEON)) {
			
 
				+    ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
			
 
				+    if (IS_ALIGNED(dst_width, 8)) {
			
 
				+      ScaleARGBFilterCols = ScaleARGBCols_NEON;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
			
 
				+    ScaleARGBFilterCols = ScaleARGBColsUp2_C;
			
 
				+#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
			
 
				+    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
			
 
				+      ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
			
 
				+    }
			
 
				+#endif
			
 
				+  }
			
 
				+
			
 
				+  const int max_y = (src_height - 1) << 16;
			
 
				+  if (y > max_y) {
			
 
				+    y = max_y;
			
 
				+  }
			
 
				+  const int kYShift = 1;  // Shift Y by 1 to convert Y plane to UV coordinate.
			
 
				+  int yi = y >> 16;
			
 
				+  int uv_yi = yi >> kYShift;
			
 
				+  const uint8* src_row_y = src_y + yi * src_stride_y;
			
 
				+  const uint8* src_row_u = src_u + uv_yi * src_stride_u;
			
 
				+  const uint8* src_row_v = src_v + uv_yi * src_stride_v;
			
 
				+
			
 
				+  // Allocate 2 rows of ARGB.
			
 
				+  const int kRowSize = (dst_width * 4 + 31) & ~31;
			
 
				+  align_buffer_64(row, kRowSize * 2);
			
 
				+
			
 
				+  // Allocate 1 row of ARGB for source conversion.
			
 
				+  align_buffer_64(argb_row, src_width * 4);
			
 
				+
			
 
				+  uint8* rowptr = row;
			
 
				+  int rowstride = kRowSize;
			
 
				+  int lasty = yi;
			
 
				+
			
 
				+  // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
			
 
				+  ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
			
 
				+  if (src_height > 1) {
			
 
				+    src_row_y += src_stride_y;
			
 
				+    if (yi & 1) {
			
 
				+      src_row_u += src_stride_u;
			
 
				+      src_row_v += src_stride_v;
			
 
				+    }
			
 
				+  }
			
 
				+  ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
			
 
				+  if (src_height > 2) {
			
 
				+    src_row_y += src_stride_y;
			
 
				+    if (!(yi & 1)) {
			
 
				+      src_row_u += src_stride_u;
			
 
				+      src_row_v += src_stride_v;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  for (j = 0; j < dst_height; ++j) {
			
 
				+    yi = y >> 16;
			
 
				+    if (yi != lasty) {
			
 
				+      if (y > max_y) {
			
 
				+        y = max_y;
			
 
				+        yi = y >> 16;
			
 
				+        uv_yi = yi >> kYShift;
			
 
				+        src_row_y = src_y + yi * src_stride_y;
			
 
				+        src_row_u = src_u + uv_yi * src_stride_u;
			
 
				+        src_row_v = src_v + uv_yi * src_stride_v;
			
 
				+      }
			
 
				+      if (yi != lasty) {
			
 
				+        // TODO(fbarchard): Convert the clipped region of row.
			
 
				+        I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
			
 
				+        ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
			
 
				+        rowptr += rowstride;
			
 
				+        rowstride = -rowstride;
			
 
				+        lasty = yi;
			
 
				+        src_row_y += src_stride_y;
			
 
				+        if (yi & 1) {
			
 
				+          src_row_u += src_stride_u;
			
 
				+          src_row_v += src_stride_v;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+    if (filtering == kFilterLinear) {
			
 
				+      InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
			
 
				+    } else {
			
 
				+      int yf = (y >> 8) & 255;
			
 
				+      InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
			
 
				+    }
			
 
				+    dst_argb += dst_stride_argb;
			
 
				+    y += dy;
			
 
				+  }
			
 
				+  free_aligned_buffer_64(row);
			
 
				+  free_aligned_buffer_64(row_argb);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+// Scale ARGB to/from any dimensions, without interpolation.
			
 
				+// Fixed point math is used for performance: The upper 16 bits
			
 
				+// of x and dx is the integer part of the source position and
			
 
				+// the lower 16 bits are the fixed decimal part.
			
 
				+
			
 
				+static void ScaleARGBSimple(int src_width, int src_height,
			
 
				+                            int dst_width, int dst_height,
			
 
				+                            int src_stride, int dst_stride,
			
 
				+                            const uint8* src_argb, uint8* dst_argb,
			
 
				+                            int x, int dx, int y, int dy) {
			
 
				+  int j;
			
 
				+  void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
			
 
				+      int dst_width, int x, int dx) =
			
 
				+      (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
			
 
				+#if defined(HAS_SCALEARGBCOLS_SSE2)
			
 
				+  if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
			
 
				+    ScaleARGBCols = ScaleARGBCols_SSE2;
			
 
				+  }
			
 
				+#endif
			
 
				+#if defined(HAS_SCALEARGBCOLS_NEON)
			
 
				+  if (TestCpuFlag(kCpuHasNEON)) {
			
 
				+    ScaleARGBCols = ScaleARGBCols_Any_NEON;
			
 
				+    if (IS_ALIGNED(dst_width, 8)) {
			
 
				+      ScaleARGBCols = ScaleARGBCols_NEON;
			
 
				+    }
			
 
				+  }
			
 
				+#endif
			
 
				+  if (src_width * 2 == dst_width && x < 0x8000) {
			
 
				+    ScaleARGBCols = ScaleARGBColsUp2_C;
			
 
				+#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
			
 
				+    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
			
 
				+      ScaleARGBCols = ScaleARGBColsUp2_SSE2;
			
 
				+    }
			
 
				+#endif
			
 
				+  }
			
 
				+
			
 
				+  for (j = 0; j < dst_height; ++j) {
			
 
				+    ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
			
 
				+                  dst_width, x, dx);
			
 
				+    dst_argb += dst_stride;
			
 
				+    y += dy;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// ScaleARGB a ARGB.
			
 
				+// This function in turn calls a scaling function
			
 
				+// suitable for handling the desired resolutions.
			
 
				+static void ScaleARGB(const uint8* src, int src_stride,
			
 
				+                      int src_width, int src_height,
			
 
				+                      uint8* dst, int dst_stride,
			
 
				+                      int dst_width, int dst_height,
			
 
				+                      int clip_x, int clip_y, int clip_width, int clip_height,
			
 
				+                      enum FilterMode filtering) {
			
 
				+  // Initial source x/y coordinate and step values as 16.16 fixed point.
			
 
				+  int x = 0;
			
 
				+  int y = 0;
			
 
				+  int dx = 0;
			
 
				+  int dy = 0;
			
 
				+  // ARGB does not support box filter yet, but allow the user to pass it.
			
 
				+  // Simplify filtering when possible.
			
 
				+  filtering = ScaleFilterReduce(src_width, src_height,
			
 
				+                                dst_width, dst_height,
			
 
				+                                filtering);
			
 
				+
			
 
				+  // Negative src_height means invert the image.
			
 
				+  if (src_height < 0) {
			
 
				+    src_height = -src_height;
			
 
				+    src = src + (src_height - 1) * src_stride;
			
 
				+    src_stride = -src_stride;
			
 
				+  }
			
 
				+  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
			
 
				+             &x, &y, &dx, &dy);
			
 
				+  src_width = Abs(src_width);
			
 
				+  if (clip_x) {
			
 
				+    int64 clipf = (int64)(clip_x) * dx;
			
 
				+    x += (clipf & 0xffff);
			
 
				+    src += (clipf >> 16) * 4;
			
 
				+    dst += clip_x * 4;
			
 
				+  }
			
 
				+  if (clip_y) {
			
 
				+    int64 clipf = (int64)(clip_y) * dy;
			
 
				+    y += (clipf & 0xffff);
			
 
				+    src += (clipf >> 16) * src_stride;
			
 
				+    dst += clip_y * dst_stride;
			
 
				+  }
			
 
				+
			
 
				+  // Special case for integer step values.
			
 
				+  if (((dx | dy) & 0xffff) == 0) {
			
 
				+    if (!dx || !dy) {  // 1 pixel wide and/or tall.
			
 
				+      filtering = kFilterNone;
			
 
				+    } else {
			
 
				+      // Optimized even scale down. ie 2, 4, 6, 8, 10x.
			
 
				+      if (!(dx & 0x10000) && !(dy & 0x10000)) {
			
 
				+        if (dx == 0x20000) {
			
 
				+          // Optimized 1/2 downsample.
			
 
				+          ScaleARGBDown2(src_width, src_height,
			
 
				+                         clip_width, clip_height,
			
 
				+                         src_stride, dst_stride, src, dst,
			
 
				+                         x, dx, y, dy, filtering);
			
 
				+          return;
			
 
				+        }
			
 
				+        if (dx == 0x40000 && filtering == kFilterBox) {
			
 
				+          // Optimized 1/4 box downsample.
			
 
				+          ScaleARGBDown4Box(src_width, src_height,
			
 
				+                            clip_width, clip_height,
			
 
				+                            src_stride, dst_stride, src, dst,
			
 
				+                            x, dx, y, dy);
			
 
				+          return;
			
 
				+        }
			
 
				+        ScaleARGBDownEven(src_width, src_height,
			
 
				+                          clip_width, clip_height,
			
 
				+                          src_stride, dst_stride, src, dst,
			
 
				+                          x, dx, y, dy, filtering);
			
 
				+        return;
			
 
				+      }
			
 
				+      // Optimized odd scale down. ie 3, 5, 7, 9x.
			
 
				+      if ((dx & 0x10000) && (dy & 0x10000)) {
			
 
				+        filtering = kFilterNone;
			
 
				+        if (dx == 0x10000 && dy == 0x10000) {
			
 
				+          // Straight copy.
			
 
				+          ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
			
 
				+                   dst, dst_stride, clip_width, clip_height);
			
 
				+          return;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  if (dx == 0x10000 && (x & 0xffff) == 0) {
			
 
				+    // Arbitrary scale vertically, but unscaled vertically.
			
 
				+    ScalePlaneVertical(src_height,
			
 
				+                       clip_width, clip_height,
			
 
				+                       src_stride, dst_stride, src, dst,
			
 
				+                       x, y, dy, 4, filtering);
			
 
				+    return;
			
 
				+  }
			
 
				+  if (filtering && dy < 65536) {
			
 
				+    ScaleARGBBilinearUp(src_width, src_height,
			
 
				+                        clip_width, clip_height,
			
 
				+                        src_stride, dst_stride, src, dst,
			
 
				+                        x, dx, y, dy, filtering);
			
 
				+    return;
			
 
				+  }
			
 
				+  if (filtering) {
			
 
				+    ScaleARGBBilinearDown(src_width, src_height,
			
 
				+                          clip_width, clip_height,
			
 
				+                          src_stride, dst_stride, src, dst,
			
 
				+                          x, dx, y, dy, filtering);
			
 
				+    return;
			
 
				+  }
			
 
				+  ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
			
 
				+                  src_stride, dst_stride, src, dst,
			
 
				+                  x, dx, y, dy);
			
 
				+}
			
 
				+
			
 
				+LIBYUV_API
			
 
				+int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
			
 
				+                  int src_width, int src_height,
			
 
				+                  uint8* dst_argb, int dst_stride_argb,
			
 
				+                  int dst_width, int dst_height,
			
 
				+                  int clip_x, int clip_y, int clip_width, int clip_height,
			
 
				+                  enum FilterMode filtering) {
			
 
				+  if (!src_argb || src_width == 0 || src_height == 0 ||
			
 
				+      !dst_argb || dst_width <= 0 || dst_height <= 0 ||
			
 
				+      clip_x < 0 || clip_y < 0 ||
			
 
				+      clip_width > 32768 || clip_height > 32768 ||
			
 
				+      (clip_x + clip_width) > dst_width ||
			
 
				+      (clip_y + clip_height) > dst_height) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+  ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
			
 
				+            dst_argb, dst_stride_argb, dst_width, dst_height,
			
 
				+            clip_x, clip_y, clip_width, clip_height, filtering);
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+// Scale an ARGB image.
			
 
				+LIBYUV_API
			
 
				+int ARGBScale(const uint8* src_argb, int src_stride_argb,
			
 
				+              int src_width, int src_height,
			
 
				+              uint8* dst_argb, int dst_stride_argb,
			
 
				+              int dst_width, int dst_height,
			
 
				+              enum FilterMode filtering) {
			
 
				+  if (!src_argb || src_width == 0 || src_height == 0 ||
			
 
				+      src_width > 32768 || src_height > 32768 ||
			
 
				+      !dst_argb || dst_width <= 0 || dst_height <= 0) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+  ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
			
 
				+            dst_argb, dst_stride_argb, dst_width, dst_height,
			
 
				+            0, 0, dst_width, dst_height, filtering);
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+// Scale with YUV conversion to ARGB and clipping.
			
 
				+LIBYUV_API
			
 
				+int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
			
 
				+                       const uint8* src_u, int src_stride_u,
			
 
				+                       const uint8* src_v, int src_stride_v,
			
 
				+                       uint32 src_fourcc,
			
 
				+                       int src_width, int src_height,
			
 
				+                       uint8* dst_argb, int dst_stride_argb,
			
 
				+                       uint32 dst_fourcc,
			
 
				+                       int dst_width, int dst_height,
			
 
				+                       int clip_x, int clip_y, int clip_width, int clip_height,
			
 
				+                       enum FilterMode filtering) {
			
 
				+  uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4);
			
 
				+  int r;
			
 
				+  I420ToARGB(src_y, src_stride_y,
			
 
				+             src_u, src_stride_u,
			
 
				+             src_v, src_stride_v,
			
 
				+             argb_buffer, src_width * 4,
			
 
				+             src_width, src_height);
			
 
				+
			
 
				+  r = ARGBScaleClip(argb_buffer, src_width * 4,
			
 
				+                    src_width, src_height,
			
 
				+                    dst_argb, dst_stride_argb,
			
 
				+                    dst_width, dst_height,
			
 
				+                    clip_x, clip_y, clip_width, clip_height,
			
 
				+                    filtering);
			
 
				+  free(argb_buffer);
			
 
				+  return r;
			
 
				+}
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
--- a/src/jni/libyuv/source/scale_common.cc
+++ b/src/jni/libyuv/source/scale_common.cc
--- a/src/jni/libyuv/source/scale_gcc.cc
+++ b/src/jni/libyuv/source/scale_gcc.cc
--- a/src/jni/libyuv/source/scale_mips.cc
+++ b/src/jni/libyuv/source/scale_mips.cc
@@ -0,0 +1,644 @@
 
				+/*
			
 
				+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+#include "libyuv/basic_types.h"
			
 
				+#include "libyuv/row.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+// This module is for GCC MIPS DSPR2
			
 
				+#if !defined(LIBYUV_DISABLE_MIPS) && \
			
 
				+    defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
			
 
				+    (_MIPS_SIM == _MIPS_SIM_ABI32)
			
 
				+
			
 
				+void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                         uint8* dst, int dst_width) {
			
 
				+  __asm__ __volatile__(
			
 
				+    ".set push                                     \n"
			
 
				+    ".set noreorder                                \n"
			
 
				+
			
 
				+    "srl            $t9, %[dst_width], 4           \n"  // iterations -> by 16
			
 
				+    "beqz           $t9, 2f                        \n"
			
 
				+    " nop                                          \n"
			
 
				+
			
 
				+  "1:                                              \n"
			
 
				+    "lw             $t0, 0(%[src_ptr])             \n"  // |3|2|1|0|
			
 
				+    "lw             $t1, 4(%[src_ptr])             \n"  // |7|6|5|4|
			
 
				+    "lw             $t2, 8(%[src_ptr])             \n"  // |11|10|9|8|
			
 
				+    "lw             $t3, 12(%[src_ptr])            \n"  // |15|14|13|12|
			
 
				+    "lw             $t4, 16(%[src_ptr])            \n"  // |19|18|17|16|
			
 
				+    "lw             $t5, 20(%[src_ptr])            \n"  // |23|22|21|20|
			
 
				+    "lw             $t6, 24(%[src_ptr])            \n"  // |27|26|25|24|
			
 
				+    "lw             $t7, 28(%[src_ptr])            \n"  // |31|30|29|28|
			
 
				+    // TODO(fbarchard): Use odd pixels instead of even.
			
 
				+    "precr.qb.ph    $t8, $t1, $t0                  \n"  // |6|4|2|0|
			
 
				+    "precr.qb.ph    $t0, $t3, $t2                  \n"  // |14|12|10|8|
			
 
				+    "precr.qb.ph    $t1, $t5, $t4                  \n"  // |22|20|18|16|
			
 
				+    "precr.qb.ph    $t2, $t7, $t6                  \n"  // |30|28|26|24|
			
 
				+    "addiu          %[src_ptr], %[src_ptr], 32     \n"
			
 
				+    "addiu          $t9, $t9, -1                   \n"
			
 
				+    "sw             $t8, 0(%[dst])                 \n"
			
 
				+    "sw             $t0, 4(%[dst])                 \n"
			
 
				+    "sw             $t1, 8(%[dst])                 \n"
			
 
				+    "sw             $t2, 12(%[dst])                \n"
			
 
				+    "bgtz           $t9, 1b                        \n"
			
 
				+    " addiu         %[dst], %[dst], 16             \n"
			
 
				+
			
 
				+  "2:                                              \n"
			
 
				+    "andi           $t9, %[dst_width], 0xf         \n"  // residue
			
 
				+    "beqz           $t9, 3f                        \n"
			
 
				+    " nop                                          \n"
			
 
				+
			
 
				+  "21:                                             \n"
			
 
				+    "lbu            $t0, 0(%[src_ptr])             \n"
			
 
				+    "addiu          %[src_ptr], %[src_ptr], 2      \n"
			
 
				+    "addiu          $t9, $t9, -1                   \n"
			
 
				+    "sb             $t0, 0(%[dst])                 \n"
			
 
				+    "bgtz           $t9, 21b                       \n"
			
 
				+    " addiu         %[dst], %[dst], 1              \n"
			
 
				+
			
 
				+  "3:                                              \n"
			
 
				+    ".set pop                                      \n"
			
 
				+  : [src_ptr] "+r" (src_ptr),
			
 
				+    [dst] "+r" (dst)
			
 
				+  : [dst_width] "r" (dst_width)
			
 
				+  : "t0", "t1", "t2", "t3", "t4", "t5",
			
 
				+    "t6", "t7", "t8", "t9"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                            uint8* dst, int dst_width) {
			
 
				+  const uint8* t = src_ptr + src_stride;
			
 
				+
			
 
				+  __asm__ __volatile__ (
			
 
				+    ".set push                                    \n"
			
 
				+    ".set noreorder                               \n"
			
 
				+
			
 
				+    "srl            $t9, %[dst_width], 3          \n"  // iterations -> step 8
			
 
				+    "bltz           $t9, 2f                       \n"
			
 
				+    " nop                                         \n"
			
 
				+
			
 
				+  "1:                                             \n"
			
 
				+    "lw             $t0, 0(%[src_ptr])            \n"  // |3|2|1|0|
			
 
				+    "lw             $t1, 4(%[src_ptr])            \n"  // |7|6|5|4|
			
 
				+    "lw             $t2, 8(%[src_ptr])            \n"  // |11|10|9|8|
			
 
				+    "lw             $t3, 12(%[src_ptr])           \n"  // |15|14|13|12|
			
 
				+    "lw             $t4, 0(%[t])                  \n"  // |19|18|17|16|
			
 
				+    "lw             $t5, 4(%[t])                  \n"  // |23|22|21|20|
			
 
				+    "lw             $t6, 8(%[t])                  \n"  // |27|26|25|24|
			
 
				+    "lw             $t7, 12(%[t])                 \n"  // |31|30|29|28|
			
 
				+    "addiu          $t9, $t9, -1                  \n"
			
 
				+    "srl            $t8, $t0, 16                  \n"  // |X|X|3|2|
			
 
				+    "ins            $t0, $t4, 16, 16              \n"  // |17|16|1|0|
			
 
				+    "ins            $t4, $t8, 0, 16               \n"  // |19|18|3|2|
			
 
				+    "raddu.w.qb     $t0, $t0                      \n"  // |17+16+1+0|
			
 
				+    "raddu.w.qb     $t4, $t4                      \n"  // |19+18+3+2|
			
 
				+    "shra_r.w       $t0, $t0, 2                   \n"  // |t0+2|>>2
			
 
				+    "shra_r.w       $t4, $t4, 2                   \n"  // |t4+2|>>2
			
 
				+    "srl            $t8, $t1, 16                  \n"  // |X|X|7|6|
			
 
				+    "ins            $t1, $t5, 16, 16              \n"  // |21|20|5|4|
			
 
				+    "ins            $t5, $t8, 0, 16               \n"  // |22|23|7|6|
			
 
				+    "raddu.w.qb     $t1, $t1                      \n"  // |21+20+5+4|
			
 
				+    "raddu.w.qb     $t5, $t5                      \n"  // |23+22+7+6|
			
 
				+    "shra_r.w       $t1, $t1, 2                   \n"  // |t1+2|>>2
			
 
				+    "shra_r.w       $t5, $t5, 2                   \n"  // |t5+2|>>2
			
 
				+    "srl            $t8, $t2, 16                  \n"  // |X|X|11|10|
			
 
				+    "ins            $t2, $t6, 16, 16              \n"  // |25|24|9|8|
			
 
				+    "ins            $t6, $t8, 0, 16               \n"  // |27|26|11|10|
			
 
				+    "raddu.w.qb     $t2, $t2                      \n"  // |25+24+9+8|
			
 
				+    "raddu.w.qb     $t6, $t6                      \n"  // |27+26+11+10|
			
 
				+    "shra_r.w       $t2, $t2, 2                   \n"  // |t2+2|>>2
			
 
				+    "shra_r.w       $t6, $t6, 2                   \n"  // |t5+2|>>2
			
 
				+    "srl            $t8, $t3, 16                  \n"  // |X|X|15|14|
			
 
				+    "ins            $t3, $t7, 16, 16              \n"  // |29|28|13|12|
			
 
				+    "ins            $t7, $t8, 0, 16               \n"  // |31|30|15|14|
			
 
				+    "raddu.w.qb     $t3, $t3                      \n"  // |29+28+13+12|
			
 
				+    "raddu.w.qb     $t7, $t7                      \n"  // |31+30+15+14|
			
 
				+    "shra_r.w       $t3, $t3, 2                   \n"  // |t3+2|>>2
			
 
				+    "shra_r.w       $t7, $t7, 2                   \n"  // |t7+2|>>2
			
 
				+    "addiu          %[src_ptr], %[src_ptr], 16    \n"
			
 
				+    "addiu          %[t], %[t], 16                \n"
			
 
				+    "sb             $t0, 0(%[dst])                \n"
			
 
				+    "sb             $t4, 1(%[dst])                \n"
			
 
				+    "sb             $t1, 2(%[dst])                \n"
			
 
				+    "sb             $t5, 3(%[dst])                \n"
			
 
				+    "sb             $t2, 4(%[dst])                \n"
			
 
				+    "sb             $t6, 5(%[dst])                \n"
			
 
				+    "sb             $t3, 6(%[dst])                \n"
			
 
				+    "sb             $t7, 7(%[dst])                \n"
			
 
				+    "bgtz           $t9, 1b                       \n"
			
 
				+    " addiu         %[dst], %[dst], 8             \n"
			
 
				+
			
 
				+  "2:                                             \n"
			
 
				+    "andi           $t9, %[dst_width], 0x7        \n"  // x = residue
			
 
				+    "beqz           $t9, 3f                       \n"
			
 
				+    " nop                                         \n"
			
 
				+
			
 
				+    "21:                                          \n"
			
 
				+    "lwr            $t1, 0(%[src_ptr])            \n"
			
 
				+    "lwl            $t1, 3(%[src_ptr])            \n"
			
 
				+    "lwr            $t2, 0(%[t])                  \n"
			
 
				+    "lwl            $t2, 3(%[t])                  \n"
			
 
				+    "srl            $t8, $t1, 16                  \n"
			
 
				+    "ins            $t1, $t2, 16, 16              \n"
			
 
				+    "ins            $t2, $t8, 0, 16               \n"
			
 
				+    "raddu.w.qb     $t1, $t1                      \n"
			
 
				+    "raddu.w.qb     $t2, $t2                      \n"
			
 
				+    "shra_r.w       $t1, $t1, 2                   \n"
			
 
				+    "shra_r.w       $t2, $t2, 2                   \n"
			
 
				+    "sb             $t1, 0(%[dst])                \n"
			
 
				+    "sb             $t2, 1(%[dst])                \n"
			
 
				+    "addiu          %[src_ptr], %[src_ptr], 4     \n"
			
 
				+    "addiu          $t9, $t9, -2                  \n"
			
 
				+    "addiu          %[t], %[t], 4                 \n"
			
 
				+    "bgtz           $t9, 21b                      \n"
			
 
				+    " addiu         %[dst], %[dst], 2             \n"
			
 
				+
			
 
				+  "3:                                             \n"
			
 
				+    ".set pop                                     \n"
			
 
				+
			
 
				+  : [src_ptr] "+r" (src_ptr),
			
 
				+    [dst] "+r" (dst), [t] "+r" (t)
			
 
				+  : [dst_width] "r" (dst_width)
			
 
				+  : "t0", "t1", "t2", "t3", "t4", "t5",
			
 
				+    "t6", "t7", "t8", "t9"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                         uint8* dst, int dst_width) {
			
 
				+  __asm__ __volatile__ (
			
 
				+      ".set push                                    \n"
			
 
				+      ".set noreorder                               \n"
			
 
				+
			
 
				+      "srl            $t9, %[dst_width], 3          \n"
			
 
				+      "beqz           $t9, 2f                       \n"
			
 
				+      " nop                                         \n"
			
 
				+
			
 
				+     "1:                                            \n"
			
 
				+      "lw             $t1, 0(%[src_ptr])            \n"  // |3|2|1|0|
			
 
				+      "lw             $t2, 4(%[src_ptr])            \n"  // |7|6|5|4|
			
 
				+      "lw             $t3, 8(%[src_ptr])            \n"  // |11|10|9|8|
			
 
				+      "lw             $t4, 12(%[src_ptr])           \n"  // |15|14|13|12|
			
 
				+      "lw             $t5, 16(%[src_ptr])           \n"  // |19|18|17|16|
			
 
				+      "lw             $t6, 20(%[src_ptr])           \n"  // |23|22|21|20|
			
 
				+      "lw             $t7, 24(%[src_ptr])           \n"  // |27|26|25|24|
			
 
				+      "lw             $t8, 28(%[src_ptr])           \n"  // |31|30|29|28|
			
 
				+      "precr.qb.ph    $t1, $t2, $t1                 \n"  // |6|4|2|0|
			
 
				+      "precr.qb.ph    $t2, $t4, $t3                 \n"  // |14|12|10|8|
			
 
				+      "precr.qb.ph    $t5, $t6, $t5                 \n"  // |22|20|18|16|
			
 
				+      "precr.qb.ph    $t6, $t8, $t7                 \n"  // |30|28|26|24|
			
 
				+      "precr.qb.ph    $t1, $t2, $t1                 \n"  // |12|8|4|0|
			
 
				+      "precr.qb.ph    $t5, $t6, $t5                 \n"  // |28|24|20|16|
			
 
				+      "addiu          %[src_ptr], %[src_ptr], 32    \n"
			
 
				+      "addiu          $t9, $t9, -1                  \n"
			
 
				+      "sw             $t1, 0(%[dst])                \n"
			
 
				+      "sw             $t5, 4(%[dst])                \n"
			
 
				+      "bgtz           $t9, 1b                       \n"
			
 
				+      " addiu         %[dst], %[dst], 8             \n"
			
 
				+
			
 
				+    "2:                                             \n"
			
 
				+      "andi           $t9, %[dst_width], 7          \n"  // residue
			
 
				+      "beqz           $t9, 3f                       \n"
			
 
				+      " nop                                         \n"
			
 
				+
			
 
				+    "21:                                            \n"
			
 
				+      "lbu            $t1, 0(%[src_ptr])            \n"
			
 
				+      "addiu          %[src_ptr], %[src_ptr], 4     \n"
			
 
				+      "addiu          $t9, $t9, -1                  \n"
			
 
				+      "sb             $t1, 0(%[dst])                \n"
			
 
				+      "bgtz           $t9, 21b                      \n"
			
 
				+      " addiu         %[dst], %[dst], 1             \n"
			
 
				+
			
 
				+    "3:                                             \n"
			
 
				+      ".set pop                                     \n"
			
 
				+      : [src_ptr] "+r" (src_ptr),
			
 
				+        [dst] "+r" (dst)
			
 
				+      : [dst_width] "r" (dst_width)
			
 
				+      : "t1", "t2", "t3", "t4", "t5",
			
 
				+        "t6", "t7", "t8", "t9"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                            uint8* dst, int dst_width) {
			
 
				+  intptr_t stride = src_stride;
			
 
				+  const uint8* s1 = src_ptr + stride;
			
 
				+  const uint8* s2 = s1 + stride;
			
 
				+  const uint8* s3 = s2 + stride;
			
 
				+
			
 
				+  __asm__ __volatile__ (
			
 
				+      ".set push                                  \n"
			
 
				+      ".set noreorder                             \n"
			
 
				+
			
 
				+      "srl           $t9, %[dst_width], 1         \n"
			
 
				+      "andi          $t8, %[dst_width], 1         \n"
			
 
				+
			
 
				+     "1:                                          \n"
			
 
				+      "lw            $t0, 0(%[src_ptr])           \n"  // |3|2|1|0|
			
 
				+      "lw            $t1, 0(%[s1])                \n"  // |7|6|5|4|
			
 
				+      "lw            $t2, 0(%[s2])                \n"  // |11|10|9|8|
			
 
				+      "lw            $t3, 0(%[s3])                \n"  // |15|14|13|12|
			
 
				+      "lw            $t4, 4(%[src_ptr])           \n"  // |19|18|17|16|
			
 
				+      "lw            $t5, 4(%[s1])                \n"  // |23|22|21|20|
			
 
				+      "lw            $t6, 4(%[s2])                \n"  // |27|26|25|24|
			
 
				+      "lw            $t7, 4(%[s3])                \n"  // |31|30|29|28|
			
 
				+      "raddu.w.qb    $t0, $t0                     \n"  // |3 + 2 + 1 + 0|
			
 
				+      "raddu.w.qb    $t1, $t1                     \n"  // |7 + 6 + 5 + 4|
			
 
				+      "raddu.w.qb    $t2, $t2                     \n"  // |11 + 10 + 9 + 8|
			
 
				+      "raddu.w.qb    $t3, $t3                     \n"  // |15 + 14 + 13 + 12|
			
 
				+      "raddu.w.qb    $t4, $t4                     \n"  // |19 + 18 + 17 + 16|
			
 
				+      "raddu.w.qb    $t5, $t5                     \n"  // |23 + 22 + 21 + 20|
			
 
				+      "raddu.w.qb    $t6, $t6                     \n"  // |27 + 26 + 25 + 24|
			
 
				+      "raddu.w.qb    $t7, $t7                     \n"  // |31 + 30 + 29 + 28|
			
 
				+      "add           $t0, $t0, $t1                \n"
			
 
				+      "add           $t1, $t2, $t3                \n"
			
 
				+      "add           $t0, $t0, $t1                \n"
			
 
				+      "add           $t4, $t4, $t5                \n"
			
 
				+      "add           $t6, $t6, $t7                \n"
			
 
				+      "add           $t4, $t4, $t6                \n"
			
 
				+      "shra_r.w      $t0, $t0, 4                  \n"
			
 
				+      "shra_r.w      $t4, $t4, 4                  \n"
			
 
				+      "sb            $t0, 0(%[dst])               \n"
			
 
				+      "sb            $t4, 1(%[dst])               \n"
			
 
				+      "addiu         %[src_ptr], %[src_ptr], 8    \n"
			
 
				+      "addiu         %[s1], %[s1], 8              \n"
			
 
				+      "addiu         %[s2], %[s2], 8              \n"
			
 
				+      "addiu         %[s3], %[s3], 8              \n"
			
 
				+      "addiu         $t9, $t9, -1                 \n"
			
 
				+      "bgtz          $t9, 1b                      \n"
			
 
				+      " addiu        %[dst], %[dst], 2            \n"
			
 
				+      "beqz          $t8, 2f                      \n"
			
 
				+      " nop                                       \n"
			
 
				+
			
 
				+      "lw            $t0, 0(%[src_ptr])           \n"  // |3|2|1|0|
			
 
				+      "lw            $t1, 0(%[s1])                \n"  // |7|6|5|4|
			
 
				+      "lw            $t2, 0(%[s2])                \n"  // |11|10|9|8|
			
 
				+      "lw            $t3, 0(%[s3])                \n"  // |15|14|13|12|
			
 
				+      "raddu.w.qb    $t0, $t0                     \n"  // |3 + 2 + 1 + 0|
			
 
				+      "raddu.w.qb    $t1, $t1                     \n"  // |7 + 6 + 5 + 4|
			
 
				+      "raddu.w.qb    $t2, $t2                     \n"  // |11 + 10 + 9 + 8|
			
 
				+      "raddu.w.qb    $t3, $t3                     \n"  // |15 + 14 + 13 + 12|
			
 
				+      "add           $t0, $t0, $t1                \n"
			
 
				+      "add           $t1, $t2, $t3                \n"
			
 
				+      "add           $t0, $t0, $t1                \n"
			
 
				+      "shra_r.w      $t0, $t0, 4                  \n"
			
 
				+      "sb            $t0, 0(%[dst])               \n"
			
 
				+
			
 
				+      "2:                                         \n"
			
 
				+      ".set pop                                   \n"
			
 
				+
			
 
				+      : [src_ptr] "+r" (src_ptr),
			
 
				+        [dst] "+r" (dst),
			
 
				+        [s1] "+r" (s1),
			
 
				+        [s2] "+r" (s2),
			
 
				+        [s3] "+r" (s3)
			
 
				+      : [dst_width] "r" (dst_width)
			
 
				+      : "t0", "t1", "t2", "t3", "t4", "t5",
			
 
				+        "t6","t7", "t8", "t9"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                          uint8* dst, int dst_width) {
			
 
				+  __asm__ __volatile__ (
			
 
				+      ".set push                                          \n"
			
 
				+      ".set noreorder                                     \n"
			
 
				+    "1:                                                   \n"
			
 
				+      "lw              $t1, 0(%[src_ptr])                 \n"  // |3|2|1|0|
			
 
				+      "lw              $t2, 4(%[src_ptr])                 \n"  // |7|6|5|4|
			
 
				+      "lw              $t3, 8(%[src_ptr])                 \n"  // |11|10|9|8|
			
 
				+      "lw              $t4, 12(%[src_ptr])                \n"  // |15|14|13|12|
			
 
				+      "lw              $t5, 16(%[src_ptr])                \n"  // |19|18|17|16|
			
 
				+      "lw              $t6, 20(%[src_ptr])                \n"  // |23|22|21|20|
			
 
				+      "lw              $t7, 24(%[src_ptr])                \n"  // |27|26|25|24|
			
 
				+      "lw              $t8, 28(%[src_ptr])                \n"  // |31|30|29|28|
			
 
				+      "precrq.qb.ph    $t0, $t2, $t4                      \n"  // |7|5|15|13|
			
 
				+      "precrq.qb.ph    $t9, $t6, $t8                      \n"  // |23|21|31|30|
			
 
				+      "addiu           %[dst_width], %[dst_width], -24    \n"
			
 
				+      "ins             $t1, $t1, 8, 16                    \n"  // |3|1|0|X|
			
 
				+      "ins             $t4, $t0, 8, 16                    \n"  // |X|15|13|12|
			
 
				+      "ins             $t5, $t5, 8, 16                    \n"  // |19|17|16|X|
			
 
				+      "ins             $t8, $t9, 8, 16                    \n"  // |X|31|29|28|
			
 
				+      "addiu           %[src_ptr], %[src_ptr], 32         \n"
			
 
				+      "packrl.ph       $t0, $t3, $t0                      \n"  // |9|8|7|5|
			
 
				+      "packrl.ph       $t9, $t7, $t9                      \n"  // |25|24|23|21|
			
 
				+      "prepend         $t1, $t2, 8                        \n"  // |4|3|1|0|
			
 
				+      "prepend         $t3, $t4, 24                       \n"  // |15|13|12|11|
			
 
				+      "prepend         $t5, $t6, 8                        \n"  // |20|19|17|16|
			
 
				+      "prepend         $t7, $t8, 24                       \n"  // |31|29|28|27|
			
 
				+      "sw              $t1, 0(%[dst])                     \n"
			
 
				+      "sw              $t0, 4(%[dst])                     \n"
			
 
				+      "sw              $t3, 8(%[dst])                     \n"
			
 
				+      "sw              $t5, 12(%[dst])                    \n"
			
 
				+      "sw              $t9, 16(%[dst])                    \n"
			
 
				+      "sw              $t7, 20(%[dst])                    \n"
			
 
				+      "bnez            %[dst_width], 1b                   \n"
			
 
				+      " addiu          %[dst], %[dst], 24                 \n"
			
 
				+      ".set pop                                           \n"
			
 
				+      : [src_ptr] "+r" (src_ptr),
			
 
				+        [dst] "+r" (dst),
			
 
				+        [dst_width] "+r" (dst_width)
			
 
				+      :
			
 
				+      : "t0", "t1", "t2", "t3", "t4", "t5",
			
 
				+        "t6","t7", "t8", "t9"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                uint8* d, int dst_width) {
			
 
				+  __asm__ __volatile__ (
			
 
				+      ".set push                                         \n"
			
 
				+      ".set noreorder                                    \n"
			
 
				+      "repl.ph           $t3, 3                          \n"  // 0x00030003
			
 
				+
			
 
				+    "1:                                                  \n"
			
 
				+      "lw                $t0, 0(%[src_ptr])              \n"  // |S3|S2|S1|S0|
			
 
				+      "lwx               $t1, %[src_stride](%[src_ptr])  \n"  // |T3|T2|T1|T0|
			
 
				+      "rotr              $t2, $t0, 8                     \n"  // |S0|S3|S2|S1|
			
 
				+      "rotr              $t6, $t1, 8                     \n"  // |T0|T3|T2|T1|
			
 
				+      "muleu_s.ph.qbl    $t4, $t2, $t3                   \n"  // |S0*3|S3*3|
			
 
				+      "muleu_s.ph.qbl    $t5, $t6, $t3                   \n"  // |T0*3|T3*3|
			
 
				+      "andi              $t0, $t2, 0xFFFF                \n"  // |0|0|S2|S1|
			
 
				+      "andi              $t1, $t6, 0xFFFF                \n"  // |0|0|T2|T1|
			
 
				+      "raddu.w.qb        $t0, $t0                        \n"
			
 
				+      "raddu.w.qb        $t1, $t1                        \n"
			
 
				+      "shra_r.w          $t0, $t0, 1                     \n"
			
 
				+      "shra_r.w          $t1, $t1, 1                     \n"
			
 
				+      "preceu.ph.qbr     $t2, $t2                        \n"  // |0|S2|0|S1|
			
 
				+      "preceu.ph.qbr     $t6, $t6                        \n"  // |0|T2|0|T1|
			
 
				+      "rotr              $t2, $t2, 16                    \n"  // |0|S1|0|S2|
			
 
				+      "rotr              $t6, $t6, 16                    \n"  // |0|T1|0|T2|
			
 
				+      "addu.ph           $t2, $t2, $t4                   \n"
			
 
				+      "addu.ph           $t6, $t6, $t5                   \n"
			
 
				+      "sll               $t5, $t0, 1                     \n"
			
 
				+      "add               $t0, $t5, $t0                   \n"
			
 
				+      "shra_r.ph         $t2, $t2, 2                     \n"
			
 
				+      "shra_r.ph         $t6, $t6, 2                     \n"
			
 
				+      "shll.ph           $t4, $t2, 1                     \n"
			
 
				+      "addq.ph           $t4, $t4, $t2                   \n"
			
 
				+      "addu              $t0, $t0, $t1                   \n"
			
 
				+      "addiu             %[src_ptr], %[src_ptr], 4       \n"
			
 
				+      "shra_r.w          $t0, $t0, 2                     \n"
			
 
				+      "addu.ph           $t6, $t6, $t4                   \n"
			
 
				+      "shra_r.ph         $t6, $t6, 2                     \n"
			
 
				+      "srl               $t1, $t6, 16                    \n"
			
 
				+      "addiu             %[dst_width], %[dst_width], -3  \n"
			
 
				+      "sb                $t1, 0(%[d])                    \n"
			
 
				+      "sb                $t0, 1(%[d])                    \n"
			
 
				+      "sb                $t6, 2(%[d])                    \n"
			
 
				+      "bgtz              %[dst_width], 1b                \n"
			
 
				+      " addiu            %[d], %[d], 3                   \n"
			
 
				+    "3:                                                  \n"
			
 
				+      ".set pop                                          \n"
			
 
				+      : [src_ptr] "+r" (src_ptr),
			
 
				+        [src_stride] "+r" (src_stride),
			
 
				+        [d] "+r" (d),
			
 
				+        [dst_width] "+r" (dst_width)
			
 
				+      :
			
 
				+      : "t0", "t1", "t2", "t3",
			
 
				+        "t4", "t5", "t6"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                uint8* d, int dst_width) {
			
 
				+  __asm__ __volatile__ (
			
 
				+      ".set push                                           \n"
			
 
				+      ".set noreorder                                      \n"
			
 
				+      "repl.ph           $t2, 3                            \n"  // 0x00030003
			
 
				+
			
 
				+    "1:                                                    \n"
			
 
				+      "lw                $t0, 0(%[src_ptr])                \n"  // |S3|S2|S1|S0|
			
 
				+      "lwx               $t1, %[src_stride](%[src_ptr])    \n"  // |T3|T2|T1|T0|
			
 
				+      "rotr              $t4, $t0, 8                       \n"  // |S0|S3|S2|S1|
			
 
				+      "rotr              $t6, $t1, 8                       \n"  // |T0|T3|T2|T1|
			
 
				+      "muleu_s.ph.qbl    $t3, $t4, $t2                     \n"  // |S0*3|S3*3|
			
 
				+      "muleu_s.ph.qbl    $t5, $t6, $t2                     \n"  // |T0*3|T3*3|
			
 
				+      "andi              $t0, $t4, 0xFFFF                  \n"  // |0|0|S2|S1|
			
 
				+      "andi              $t1, $t6, 0xFFFF                  \n"  // |0|0|T2|T1|
			
 
				+      "raddu.w.qb        $t0, $t0                          \n"
			
 
				+      "raddu.w.qb        $t1, $t1                          \n"
			
 
				+      "shra_r.w          $t0, $t0, 1                       \n"
			
 
				+      "shra_r.w          $t1, $t1, 1                       \n"
			
 
				+      "preceu.ph.qbr     $t4, $t4                          \n"  // |0|S2|0|S1|
			
 
				+      "preceu.ph.qbr     $t6, $t6                          \n"  // |0|T2|0|T1|
			
 
				+      "rotr              $t4, $t4, 16                      \n"  // |0|S1|0|S2|
			
 
				+      "rotr              $t6, $t6, 16                      \n"  // |0|T1|0|T2|
			
 
				+      "addu.ph           $t4, $t4, $t3                     \n"
			
 
				+      "addu.ph           $t6, $t6, $t5                     \n"
			
 
				+      "shra_r.ph         $t6, $t6, 2                       \n"
			
 
				+      "shra_r.ph         $t4, $t4, 2                       \n"
			
 
				+      "addu.ph           $t6, $t6, $t4                     \n"
			
 
				+      "addiu             %[src_ptr], %[src_ptr], 4         \n"
			
 
				+      "shra_r.ph         $t6, $t6, 1                       \n"
			
 
				+      "addu              $t0, $t0, $t1                     \n"
			
 
				+      "addiu             %[dst_width], %[dst_width], -3    \n"
			
 
				+      "shra_r.w          $t0, $t0, 1                       \n"
			
 
				+      "srl               $t1, $t6, 16                      \n"
			
 
				+      "sb                $t1, 0(%[d])                      \n"
			
 
				+      "sb                $t0, 1(%[d])                      \n"
			
 
				+      "sb                $t6, 2(%[d])                      \n"
			
 
				+      "bgtz              %[dst_width], 1b                  \n"
			
 
				+      " addiu            %[d], %[d], 3                     \n"
			
 
				+    "3:                                                    \n"
			
 
				+      ".set pop                                            \n"
			
 
				+      : [src_ptr] "+r" (src_ptr),
			
 
				+        [src_stride] "+r" (src_stride),
			
 
				+        [d] "+r" (d),
			
 
				+        [dst_width] "+r" (dst_width)
			
 
				+      :
			
 
				+      : "t0", "t1", "t2", "t3",
			
 
				+        "t4", "t5", "t6"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                          uint8* dst, int dst_width) {
			
 
				+  __asm__ __volatile__ (
			
 
				+      ".set push                                     \n"
			
 
				+      ".set noreorder                                \n"
			
 
				+
			
 
				+    "1:                                              \n"
			
 
				+      "lw         $t0, 0(%[src_ptr])                 \n"  // |3|2|1|0|
			
 
				+      "lw         $t1, 4(%[src_ptr])                 \n"  // |7|6|5|4|
			
 
				+      "lw         $t2, 8(%[src_ptr])                 \n"  // |11|10|9|8|
			
 
				+      "lw         $t3, 12(%[src_ptr])                \n"  // |15|14|13|12|
			
 
				+      "lw         $t4, 16(%[src_ptr])                \n"  // |19|18|17|16|
			
 
				+      "lw         $t5, 20(%[src_ptr])                \n"  // |23|22|21|20|
			
 
				+      "lw         $t6, 24(%[src_ptr])                \n"  // |27|26|25|24|
			
 
				+      "lw         $t7, 28(%[src_ptr])                \n"  // |31|30|29|28|
			
 
				+      "wsbh       $t0, $t0                           \n"  // |2|3|0|1|
			
 
				+      "wsbh       $t6, $t6                           \n"  // |26|27|24|25|
			
 
				+      "srl        $t0, $t0, 8                        \n"  // |X|2|3|0|
			
 
				+      "srl        $t3, $t3, 16                       \n"  // |X|X|15|14|
			
 
				+      "srl        $t5, $t5, 16                       \n"  // |X|X|23|22|
			
 
				+      "srl        $t7, $t7, 16                       \n"  // |X|X|31|30|
			
 
				+      "ins        $t1, $t2, 24, 8                    \n"  // |8|6|5|4|
			
 
				+      "ins        $t6, $t5, 0, 8                     \n"  // |26|27|24|22|
			
 
				+      "ins        $t1, $t0, 0, 16                    \n"  // |8|6|3|0|
			
 
				+      "ins        $t6, $t7, 24, 8                    \n"  // |30|27|24|22|
			
 
				+      "prepend    $t2, $t3, 24                       \n"  // |X|15|14|11|
			
 
				+      "ins        $t4, $t4, 16, 8                    \n"  // |19|16|17|X|
			
 
				+      "ins        $t4, $t2, 0, 16                    \n"  // |19|16|14|11|
			
 
				+      "addiu      %[src_ptr], %[src_ptr], 32         \n"
			
 
				+      "addiu      %[dst_width], %[dst_width], -12    \n"
			
 
				+      "addiu      $t8,%[dst_width], -12              \n"
			
 
				+      "sw         $t1, 0(%[dst])                     \n"
			
 
				+      "sw         $t4, 4(%[dst])                     \n"
			
 
				+      "sw         $t6, 8(%[dst])                     \n"
			
 
				+      "bgez       $t8, 1b                            \n"
			
 
				+      " addiu     %[dst], %[dst], 12                 \n"
			
 
				+      ".set pop                                      \n"
			
 
				+      : [src_ptr] "+r" (src_ptr),
			
 
				+        [dst] "+r" (dst),
			
 
				+        [dst_width] "+r" (dst_width)
			
 
				+      :
			
 
				+      : "t0", "t1", "t2", "t3", "t4",
			
 
				+        "t5", "t6", "t7", "t8"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
			
 
				+                                uint8* dst_ptr, int dst_width) {
			
 
				+  intptr_t stride = src_stride;
			
 
				+  const uint8* t = src_ptr + stride;
			
 
				+  const int c = 0x2AAA;
			
 
				+
			
 
				+  __asm__ __volatile__ (
			
 
				+      ".set push                                         \n"
			
 
				+      ".set noreorder                                    \n"
			
 
				+
			
 
				+    "1:                                                  \n"
			
 
				+      "lw              $t0, 0(%[src_ptr])                \n"  // |S3|S2|S1|S0|
			
 
				+      "lw              $t1, 4(%[src_ptr])                \n"  // |S7|S6|S5|S4|
			
 
				+      "lw              $t2, 0(%[t])                      \n"  // |T3|T2|T1|T0|
			
 
				+      "lw              $t3, 4(%[t])                      \n"  // |T7|T6|T5|T4|
			
 
				+      "rotr            $t1, $t1, 16                      \n"  // |S5|S4|S7|S6|
			
 
				+      "packrl.ph       $t4, $t1, $t3                     \n"  // |S7|S6|T7|T6|
			
 
				+      "packrl.ph       $t5, $t3, $t1                     \n"  // |T5|T4|S5|S4|
			
 
				+      "raddu.w.qb      $t4, $t4                          \n"  // S7+S6+T7+T6
			
 
				+      "raddu.w.qb      $t5, $t5                          \n"  // T5+T4+S5+S4
			
 
				+      "precrq.qb.ph    $t6, $t0, $t2                     \n"  // |S3|S1|T3|T1|
			
 
				+      "precrq.qb.ph    $t6, $t6, $t6                     \n"  // |S3|T3|S3|T3|
			
 
				+      "srl             $t4, $t4, 2                       \n"  // t4 / 4
			
 
				+      "srl             $t6, $t6, 16                      \n"  // |0|0|S3|T3|
			
 
				+      "raddu.w.qb      $t6, $t6                          \n"  // 0+0+S3+T3
			
 
				+      "addu            $t6, $t5, $t6                     \n"
			
 
				+      "mul             $t6, $t6, %[c]                    \n"  // t6 * 0x2AAA
			
 
				+      "sll             $t0, $t0, 8                       \n"  // |S2|S1|S0|0|
			
 
				+      "sll             $t2, $t2, 8                       \n"  // |T2|T1|T0|0|
			
 
				+      "raddu.w.qb      $t0, $t0                          \n"  // S2+S1+S0+0
			
 
				+      "raddu.w.qb      $t2, $t2                          \n"  // T2+T1+T0+0
			
 
				+      "addu            $t0, $t0, $t2                     \n"
			
 
				+      "mul             $t0, $t0, %[c]                    \n"  // t0 * 0x2AAA
			
 
				+      "addiu           %[src_ptr], %[src_ptr], 8         \n"
			
 
				+      "addiu           %[t], %[t], 8                     \n"
			
 
				+      "addiu           %[dst_width], %[dst_width], -3    \n"
			
 
				+      "addiu           %[dst_ptr], %[dst_ptr], 3         \n"
			
 
				+      "srl             $t6, $t6, 16                      \n"
			
 
				+      "srl             $t0, $t0, 16                      \n"
			
 
				+      "sb              $t4, -1(%[dst_ptr])               \n"
			
 
				+      "sb              $t6, -2(%[dst_ptr])               \n"
			
 
				+      "bgtz            %[dst_width], 1b                  \n"
			
 
				+      " sb             $t0, -3(%[dst_ptr])               \n"
			
 
				+      ".set pop                                          \n"
			
 
				+      : [src_ptr] "+r" (src_ptr),
			
 
				+        [dst_ptr] "+r" (dst_ptr),
			
 
				+        [t] "+r" (t),
			
 
				+        [dst_width] "+r" (dst_width)
			
 
				+      : [c] "r" (c)
			
 
				+      : "t0", "t1", "t2", "t3", "t4", "t5", "t6"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
			
 
				+                                ptrdiff_t src_stride,
			
 
				+                                uint8* dst_ptr, int dst_width) {
			
 
				+  intptr_t stride = src_stride;
			
 
				+  const uint8* s1 = src_ptr + stride;
			
 
				+  stride += stride;
			
 
				+  const uint8* s2 = src_ptr + stride;
			
 
				+  const int c1 = 0x1C71;
			
 
				+  const int c2 = 0x2AAA;
			
 
				+
			
 
				+  __asm__ __volatile__ (
			
 
				+      ".set push                                         \n"
			
 
				+      ".set noreorder                                    \n"
			
 
				+
			
 
				+    "1:                                                  \n"
			
 
				+      "lw              $t0, 0(%[src_ptr])                \n"  // |S3|S2|S1|S0|
			
 
				+      "lw              $t1, 4(%[src_ptr])                \n"  // |S7|S6|S5|S4|
			
 
				+      "lw              $t2, 0(%[s1])                     \n"  // |T3|T2|T1|T0|
			
 
				+      "lw              $t3, 4(%[s1])                     \n"  // |T7|T6|T5|T4|
			
 
				+      "lw              $t4, 0(%[s2])                     \n"  // |R3|R2|R1|R0|
			
 
				+      "lw              $t5, 4(%[s2])                     \n"  // |R7|R6|R5|R4|
			
 
				+      "rotr            $t1, $t1, 16                      \n"  // |S5|S4|S7|S6|
			
 
				+      "packrl.ph       $t6, $t1, $t3                     \n"  // |S7|S6|T7|T6|
			
 
				+      "raddu.w.qb      $t6, $t6                          \n"  // S7+S6+T7+T6
			
 
				+      "packrl.ph       $t7, $t3, $t1                     \n"  // |T5|T4|S5|S4|
			
 
				+      "raddu.w.qb      $t7, $t7                          \n"  // T5+T4+S5+S4
			
 
				+      "sll             $t8, $t5, 16                      \n"  // |R5|R4|0|0|
			
 
				+      "raddu.w.qb      $t8, $t8                          \n"  // R5+R4
			
 
				+      "addu            $t7, $t7, $t8                     \n"
			
 
				+      "srl             $t8, $t5, 16                      \n"  // |0|0|R7|R6|
			
 
				+      "raddu.w.qb      $t8, $t8                          \n"  // R7 + R6
			
 
				+      "addu            $t6, $t6, $t8                     \n"
			
 
				+      "mul             $t6, $t6, %[c2]                   \n"  // t6 * 0x2AAA
			
 
				+      "precrq.qb.ph    $t8, $t0, $t2                     \n"  // |S3|S1|T3|T1|
			
 
				+      "precrq.qb.ph    $t8, $t8, $t4                     \n"  // |S3|T3|R3|R1|
			
 
				+      "srl             $t8, $t8, 8                       \n"  // |0|S3|T3|R3|
			
 
				+      "raddu.w.qb      $t8, $t8                          \n"  // S3 + T3 + R3
			
 
				+      "addu            $t7, $t7, $t8                     \n"
			
 
				+      "mul             $t7, $t7, %[c1]                   \n"  // t7 * 0x1C71
			
 
				+      "sll             $t0, $t0, 8                       \n"  // |S2|S1|S0|0|
			
 
				+      "sll             $t2, $t2, 8                       \n"  // |T2|T1|T0|0|
			
 
				+      "sll             $t4, $t4, 8                       \n"  // |R2|R1|R0|0|
			
 
				+      "raddu.w.qb      $t0, $t0                          \n"
			
 
				+      "raddu.w.qb      $t2, $t2                          \n"
			
 
				+      "raddu.w.qb      $t4, $t4                          \n"
			
 
				+      "addu            $t0, $t0, $t2                     \n"
			
 
				+      "addu            $t0, $t0, $t4                     \n"
			
 
				+      "mul             $t0, $t0, %[c1]                   \n"  // t0 * 0x1C71
			
 
				+      "addiu           %[src_ptr], %[src_ptr], 8         \n"
			
 
				+      "addiu           %[s1], %[s1], 8                   \n"
			
 
				+      "addiu           %[s2], %[s2], 8                   \n"
			
 
				+      "addiu           %[dst_width], %[dst_width], -3    \n"
			
 
				+      "addiu           %[dst_ptr], %[dst_ptr], 3         \n"
			
 
				+      "srl             $t6, $t6, 16                      \n"
			
 
				+      "srl             $t7, $t7, 16                      \n"
			
 
				+      "srl             $t0, $t0, 16                      \n"
			
 
				+      "sb              $t6, -1(%[dst_ptr])               \n"
			
 
				+      "sb              $t7, -2(%[dst_ptr])               \n"
			
 
				+      "bgtz            %[dst_width], 1b                  \n"
			
 
				+      " sb             $t0, -3(%[dst_ptr])               \n"
			
 
				+      ".set pop                                          \n"
			
 
				+      : [src_ptr] "+r" (src_ptr),
			
 
				+        [dst_ptr] "+r" (dst_ptr),
			
 
				+        [s1] "+r" (s1),
			
 
				+        [s2] "+r" (s2),
			
 
				+        [dst_width] "+r" (dst_width)
			
 
				+      : [c1] "r" (c1), [c2] "r" (c2)
			
 
				+      : "t0", "t1", "t2", "t3", "t4",
			
 
				+        "t5", "t6", "t7", "t8"
			
 
				+  );
			
 
				+}
			
 
				+
			
 
				+#endif  // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
--- a/src/jni/libyuv/source/scale_neon.cc
+++ b/src/jni/libyuv/source/scale_neon.cc
--- a/src/jni/libyuv/source/scale_neon64.cc
+++ b/src/jni/libyuv/source/scale_neon64.cc
--- a/src/jni/libyuv/source/scale_win.cc
+++ b/src/jni/libyuv/source/scale_win.cc
--- a/src/jni/libyuv/source/video_common.cc
+++ b/src/jni/libyuv/source/video_common.cc
@@ -0,0 +1,64 @@
 
				+/*
			
 
				+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
			
 
				+ *
			
 
				+ *  Use of this source code is governed by a BSD-style license
			
 
				+ *  that can be found in the LICENSE file in the root of the source
			
 
				+ *  tree. An additional intellectual property rights grant can be found
			
 
				+ *  in the file PATENTS. All contributing project authors may
			
 
				+ *  be found in the AUTHORS file in the root of the source tree.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#include "libyuv/video_common.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+namespace libyuv {
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0]))
			
 
				+
			
 
				+struct FourCCAliasEntry {
			
 
				+  uint32 alias;
			
 
				+  uint32 canonical;
			
 
				+};
			
 
				+
			
 
				+static const struct FourCCAliasEntry kFourCCAliases[] = {
			
 
				+  {FOURCC_IYUV, FOURCC_I420},
			
 
				+  {FOURCC_YU16, FOURCC_I422},
			
 
				+  {FOURCC_YU24, FOURCC_I444},
			
 
				+  {FOURCC_YUYV, FOURCC_YUY2},
			
 
				+  {FOURCC_YUVS, FOURCC_YUY2},  // kCMPixelFormat_422YpCbCr8_yuvs
			
 
				+  {FOURCC_HDYC, FOURCC_UYVY},
			
 
				+  {FOURCC_2VUY, FOURCC_UYVY},  // kCMPixelFormat_422YpCbCr8
			
 
				+  {FOURCC_JPEG, FOURCC_MJPG},  // Note: JPEG has DHT while MJPG does not.
			
 
				+  {FOURCC_DMB1, FOURCC_MJPG},
			
 
				+  {FOURCC_BA81, FOURCC_BGGR},  // deprecated.
			
 
				+  {FOURCC_RGB3, FOURCC_RAW },
			
 
				+  {FOURCC_BGR3, FOURCC_24BG},
			
 
				+  {FOURCC_CM32, FOURCC_BGRA},  // kCMPixelFormat_32ARGB
			
 
				+  {FOURCC_CM24, FOURCC_RAW },  // kCMPixelFormat_24RGB
			
 
				+  {FOURCC_L555, FOURCC_RGBO},  // kCMPixelFormat_16LE555
			
 
				+  {FOURCC_L565, FOURCC_RGBP},  // kCMPixelFormat_16LE565
			
 
				+  {FOURCC_5551, FOURCC_RGBO},  // kCMPixelFormat_16LE5551
			
 
				+};
			
 
				+// TODO(fbarchard): Consider mapping kCMPixelFormat_32BGRA to FOURCC_ARGB.
			
 
				+//  {FOURCC_BGRA, FOURCC_ARGB},  // kCMPixelFormat_32BGRA
			
 
				+
			
 
				+LIBYUV_API
			
 
				+uint32 CanonicalFourCC(uint32 fourcc) {
			
 
				+  int i;
			
 
				+  for (i = 0; i < ARRAY_SIZE(kFourCCAliases); ++i) {
			
 
				+    if (kFourCCAliases[i].alias == fourcc) {
			
 
				+      return kFourCCAliases[i].canonical;
			
 
				+    }
			
 
				+  }
			
 
				+  // Not an alias, so return it as-is.
			
 
				+  return fourcc;
			
 
				+}
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern "C"
			
 
				+}  // namespace libyuv
			
 
				+#endif
			
 
				+
			
--- a/src/jni/yuv_util/jni_onload.cc
+++ b/src/jni/yuv_util/jni_onload.cc
@@ -0,0 +1,58 @@
 
				+//
			
 
				+// Created by 葛昭友 on 2018/7/12.
			
 
				+//
			
 
				+
			
 
				+
			
 
				+#include <jni.h>
			
 
				+
			
 
				+#include "yuv_util.h"
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+#undef JNIEXPORT
			
 
				+#define JNIEXPORT __attribute__((visibility("default")))
			
 
				+
			
 
				+#include "./yuv_util.h"
			
 
				+
			
 
				+
			
 
				+namespace spark_live_pusher_jni {
			
 
				+    static JavaVM *jvm;
			
 
				+    static JNIEnv *jenv;
			
 
				+
			
 
				+
			
 
				+
			
 
				+    static JNINativeMethod yuv_util_methods[] = {
			
 
				+            {"RGBAToI420",           "([BIIZI)[B",            (void *) YuvUtil_RGBAToI420},
			
 
				+            {"ARGBToI420Scaled",     "([IIIZIIIII)[B",        (void *) YuvUtil_ARGBToI420Scaled},
			
 
				+            {"ARGBToI420",           "([IIIZI)[B",            (void *) YuvUtil_ARGBToI420},
			
 
				+            {"NV21ToI420Scaled",     "([BIIZIIIII)[B",        (void *) YuvUtil_NV21ToI420Scaled},
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+
			
 
				+    extern "C" jint JNIEXPORT JNI_OnLoad(JavaVM *vm, void *reserved) {
			
 
				+        jvm = vm;
			
 
				+
			
 
				+        if (jvm->GetEnv((void **) &jenv, JNI_VERSION_1_6) != JNI_OK) {
			
 
				+            LIBENC_LOGE("Env not got");
			
 
				+            return JNI_ERR;
			
 
				+        }
			
 
				+
			
 
				+        jclass clz = jenv->FindClass("com/spark/live/sdk/util/JniYuvUtil");
			
 
				+        if (clz == NULL) {
			
 
				+            LIBENC_LOGE("Class \"com/spark/live/sdk/util/JniYuvUtil\" not found");
			
 
				+            return JNI_ERR;
			
 
				+        }
			
 
				+
			
 
				+        if (jenv->RegisterNatives(clz, yuv_util_methods, LIBENC_ARRAY_ELEMS(yuv_util_methods))) {
			
 
				+            LIBENC_LOGE("methods not registered");
			
 
				+            return JNI_ERR;
			
 
				+        }
			
 
				+        return JNI_VERSION_1_6;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+
			
 
				+}  // namespace webrtc_jni
			
 
				+
			
--- a/src/jni/yuv_util/yuv_util.cc
+++ b/src/jni/yuv_util/yuv_util.cc