diff --git a/.gitignore b/.gitignore index 3e1b52e4..2a9930ee 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,7 @@ hs_err_pid* /.libs/ /findbugs/ /target/ + +# java files +*.class +*~ diff --git a/CHANGELOG.md b/CHANGELOG.md index 43b3bab7..241d7885 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,21 @@ # Aparapi Changelog +## 2.0.0 + +* Fixed OpenCL compilation which would throw null kernel or JNI linking exceptions. +* work on config, mem override, mem align, sync with java-ucores +* Java for UCores is meant to be an accelerator 'friendly' Java version based on OpenJDK and obuildfactory. Accelerator users should consider using Java for UCores in conjunction with Aparapi UCores as it will automatically allocate memory on boundaries that can enhance PCIE transfer speeds. +* Added a a more flexible way of per-platform config through ConfigSettings.cpp +* Support for Altera OpenCL V15 +* Support for platform select using Altera OpenCL V15 ICD +* Support for selection of binary/source flow for non-Altera platforms (usage examples below) +* Control dynamic binary/source/default flow through kernel (usage examples below) +* Device select for systems with multiple devices of the same type (usage examples below) +* New Configuration Settings file for better per platform settings control (ConfigSettings.h) +* Improved build system for Altera JNI +* Aparapi Range.getFactors bug fix – fixed a bug in the original Aparapi distribution. +* Altera ICD bug fix – only other accelerators were discovered + ## 1.3.3 * Fixed a bug where calling createProgram resulted in an exception. @@ -37,4 +53,4 @@ ## 1.1.0 -* Changed group id and package to com.aparapi +* Changed group id and package to com.aparapi \ No newline at end of file diff --git a/LICENSE b/LICENSE index e06d2081..ad410e11 100644 --- a/LICENSE +++ b/LICENSE @@ -198,5 +198,4 @@ Apache License distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and - limitations under the License. - + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md index 244b2c8a..07b7db2a 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,24 @@ Aparapi was originally a project conceived and developed by AMD corporation. It Aparapi Javadocs: [latest](http://www.javadoc.io/doc/com.aparapi/aparapi) - [1.3.2](http://www.javadoc.io/doc/com.aparapi/aparapi/1.3.2) - [1.3.1](http://www.javadoc.io/doc/com.aparapi/aparapi/1.3.1) - [1.3.0](http://www.javadoc.io/doc/com.aparapi/aparapi/1.3.0) - [1.2.0](http://www.javadoc.io/doc/com.aparapi/aparapi/1.2.0) - [1.1.2](http://www.javadoc.io/doc/com.aparapi/aparapi/1.1.2) - [1.1.1](http://www.javadoc.io/doc/com.aparapi/aparapi/1.1.1) - [1.1.0](http://www.javadoc.io/doc/com.aparapi/aparapi/1.1.0) - [1.0.0](http://www.javadoc.io/doc/com.syncleus.aparapi/aparapi/1.0.0) +The original Aparapi framework was built with GPUs/APUs in mind. We believe that heterogeneous computing has a lot more to offer than just combining CPUs and GPUs together. In the past few years we have started to see OpenCL support for FPGAs (Altera/Xilinx) and DSPs(TI) and more types computing devices will soon follow suite. + +Combining several types of devices and architectures is at the heart of heterogeneous computing's power efficiency advantage. We believe that in order to optimize performance per watt and accommodate the needs of today's high level programming requirements we need to be able to work with all these types of devices using a single high level code base. + +The framework has been tested and known to work on the following device types/OpenCL SDKs*: +1. CPUs (AMD/Intel) +2. GPUs (AMD/NVidia) +2. APUs (AMD) +3. FPGAs(Altera - Nallatech/Terasic boards) + +Changes from original work +----------------------------------- +1. Support for FPGA devices (right now Altera OpenCL is supported) +2. Support for multiple platforms (platform selection) +3. Support for accelerators and OpenCL binary file format and flow +3. Built against OpenJDK (to allow more control/flexibility in future Java/OpenCL integration) +4. Misc small changes - Improve profiling usability etc. + For detailed documentation see [Aparapi.com](http://Aparapi.com) or check out the [latest Javadocs](http://www.javadoc.io/doc/com.aparapi/aparapi). For support please use [Gitter](https://gitter.im/Syncleus/aparapi) or the [official Aparapi mailing list](https://groups.google.com/d/forum/aparapi). @@ -100,3 +118,63 @@ Kernel kernel = new Kernel() { Range range = Range.create(result.length); kernel.execute(range); ``` + +Project History +--------------- +To the best of my knowledge the original idea of trying to target FPGAs from Java using Aparapi was the brain child of Sai Rahul Chalamalasetti and Mitch Wright from HP Servers. + +I started writing the first implementation of the project (called APARAPI-FPGA's) while working as an intern at HP Servers in 2013. It started as a proof of concept but showed great promise. + +During that work I started envisioning a more general solution using Aparapi that will allow us to target any OpenCL device. i.e. not only GPUs or FPGAs. + +The project culminated with two publications in Sept 2014 (FPL 2014 and FSP 2014) in which we described our findings and promised to release our initial work back to the open source community. + +Since then we received a lot of interest and requests to release the source and we have been planning to release our implementation for some time now. + +This project was left on the back burner for a long time, but a couple of months back I started to dedicate some time to rewrite the framework to what I originally envisioned it to be i.e. a general, programmer friendly, high level framework for heterogeneous devices. + +Two things were important to me working towards this public release: +1. I wanted to make sure it will be something that can be usable by regular Java programmers and not just release another difficult to understand research project that would require significant effort to make it work. +2. I wanted to implement the more general idea (not just an FPGA frame work or GPU framework) + +The initial release is still far from being programmer "Friendly", but it is a step forward and I hope it will encourage high level programmers to experiment with "exotic" heterogeneous architectures. + +I welcome any one that wants to be involved in improving this framework. + +If you use this work for academic purposes please reference the original FPL/FSP papers describing our initial Aprapi work(see below). + +References +----------------------------- + +The modified Aparapi framework that supports FPGAs was first introduced in the following papers: + +Oren Segal, Sai Rahul Chalamalasetti, Mitch Wright and Martin Margala. “High Level Programming Framework for FPGAs in the Data Center”, Field Programmable Logic and Applications (FPL), 2014 24th International Conference on. IEEE, 2014. +http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6927442 + +Segal, Oren, Martin Margala, Sai Rahul Chalamalasetti, and Mitch Wright. “High Level Programming for Heterogeneous Architectures.” arXiv preprint arXiv:1408.4964 (2014). +http://arxiv.org/abs/1408.4964 + +The original Aparapi project can be found at: +https://code.google.com/p/aparapi/ + +This work is part of the Heterogeneous computing research that is conducted by the Mora research group at UML: +http://mora.uml.edu/ + +Documentation +--------------- +For information on how to setup the environment and run some tests please check the [project root]/docs folder: +https://gitlab.com/mora/aparapi-ucores/tree/master/docs + +Wiki: +https://gitlab.com/mora/aparapi-ucores/wikis/home + +Screenshots: +https://gitlab.com/mora/aparapi-ucores/wikis/Screenshots + +Acknowledgments +--------------- +1. AMD for doing such a great job with Aparapi without which this project would be an order of magnitude more complex. + +2. HP Servers which believed back in 2012 that Java-OpenCL on FPGAs is something worth looking at. + +3. Altera, Nallatech and Terassic for contributing FPGA hardware for us to develop and test on. diff --git a/docs/APARAPIInstallationNotes.txt b/docs/APARAPIInstallationNotes.txt new file mode 100644 index 00000000..bda639ba --- /dev/null +++ b/docs/APARAPIInstallationNotes.txt @@ -0,0 +1,41 @@ +--------------------------------------------------------- +APARAPI Installation Notes for CentOS release 6.x (Tested on 6.3/4/5/6) +--------------------------------------------------------- + +* Note default CentOs repository versions of Java(1.6) and Ant(1.7) are not compatible with latest APARAPI svn src + need to download newr versions (see below) + +- install Java JDK + sudo yum install java-1.7.0-openjdk-devel + +- install ANT + Download latest ANT -> currently apache-ant-1.9.1-bin.tar.gz + sudo tar xvzf apache-ant-1.9.1-bin.tar.gz -C /opt + sudo ln -s /opt/apache-ant-1.9.1 /opt/ant + +- install AMD APP SDK + + - needed for build even if hardware not available + +- install git + +- install g++ + + sudo yum install gcc-c++ + +- get src from git repository + + git clone ... [project folder] + +- set environment vars + + source [project folder]/env/aparapiBuildEnv + +- build + + cd [project folder]/src/aparapi + + ant clean build dist + + + diff --git a/docs/AparapiUcoresBinaryFlow.txt b/docs/AparapiUcoresBinaryFlow.txt new file mode 100644 index 00000000..f40b79c3 --- /dev/null +++ b/docs/AparapiUcoresBinaryFlow.txt @@ -0,0 +1,34 @@ +Aparapi Ucores Binary Flow +-------------------------- + +This is currently used only for FPGAs but we plan to expand it for general manual optimization/compilation of OpenCL source code. + +A Binary flow is one where the Aparapi framework generates an OpenCL file while running and then tries to read and execute the binary compilation of that source file. + +Sequence of events +------------------ + +1. Aparapi Application is executed. Aparapi automatically generates an OpencL source file describing the kernel(file name is the full java kernel class name.cl) + +2. Aparapi Application automatically tries to load the binary compiled file (full java kernel class name.aocx) + +3. If Aparapi Application finds the compiled file it executes the compiled kernel (runs the kernel on the accelerator) else it will revert to JTP mode in which case the application runs as specified without acceleration (JTP mode). A simulation mode if you will. + +How this works for FPGAs +------------------------ + +1. We first run the Application CPU/GPU/ACC mode. Aparapi-Ucores will generate an OpenCL file (ClassXXXName.cl), but will not find the equivalent binary file (ClassXXXName.aocx). It will revert to another execution mode such as JTP. We can simulate if the kernel works logically correct using JTP. + +2. We feed the OpenCL file (ClassXXXName.cl) to an FPGA OpenCL compiler. + +Example: + + a. aoc ClassXXXName.cl -O3 --board pcie385n_a7 -v --report + + b. Wait until an optimized FPGA design is created...currently takes several hours + + c. Output of the compiler will be: ClassXXXName.aocx + + 3. Copy the binary output file (ClassXXXName.aocx) to folder where the .cl file resides. + +4. Run application again in ACC/GPU mode. This time Aparapi will detect the ClassXXXName.aocx and use the FPGA as the acclerator. \ No newline at end of file diff --git a/docs/HowToBuildAPARAPI.txt b/docs/HowToBuildAPARAPI.txt new file mode 100644 index 00000000..bb332e06 --- /dev/null +++ b/docs/HowToBuildAPARAPI.txt @@ -0,0 +1,63 @@ + +--------------------------------------------------------- +How to build APARAPI from the cmd line after modifications to code +--------------------------------------------------------- +* first read APARAPI Installation Notes (APARAPIInstallationNotes.txt) for general instructions on getting the project setup and initial build steps + +- open new terminal window + +- set environment variables + + source [project folder]/env/AlteraV14Env + +- to build the APARAPI src tree + + cd [project folder]/src/aparapi + + ant clean build dist + + * this does not build the jni proxy with FPGA support automatically (see below for jni FPGA build) + +- to build the APARAPI jni proxy library (should run after initial build and after any changes to CPP source files) + + - goto jni dir + + cd [project folder]/src/aparapi/com.amd.aparapi.jni/ + + - to use ant to build standard jni version * + + ant + + - to use ant to build FPGA jni version * + + ant -f build_altera_ocl.xml + +* The output of the com.amd.aparapi.jni build is a dll stored in dist folder: + + [project folder]/src/aparapi/com.amd.aparapi.jni/dist/libaparapi_x86_64.so + +If you have a system where you want to have both FPGA and standard versions available (i.e. with multi OpenCL hardware platforms)you can simply copy the dll's to diffrent locations after the build and link to them through the java vm command line parameters: + +Example: + +Copy the fpga and standard dll's to the following locations respectively: +/src/aparapi/com.amd.aparapi.jni/dist.fpga/libaparapi_x86_64.so +/src/aparapi/com.amd.aparapi.jni/dist.std/libaparapi_x86_64.so + +To select one JNI version or the other at execution time run use the following paramaters to the java command: +java -Djava.library.path=../../com.amd.aparapi.jni/dist.std [rest of cmd line] +java -Djava.library.path=../../com.amd.aparapi.jni/dist.fpga [rest of cmd line] + + + + + + + + + + + + + + diff --git a/docs/HowToRunAPARAPI.txt b/docs/HowToRunAPARAPI.txt new file mode 100644 index 00000000..2506b0e2 --- /dev/null +++ b/docs/HowToRunAPARAPI.txt @@ -0,0 +1,62 @@ + +--------------------------------------------------------- +How to run APARAPI tests from the cmd line +--------------------------------------------------------- + +- open new terminal window + +- set environment variables + + source [project folder]/env/AlteraV14Env + +- got to either samples/examples + + - samples dir -> + + cd [project folder]/src/aparapi/samples + + - examples dir-> + + cd [project folder]/src/aparapi/examples/ + +- to run any sample or example: + + - run the shell script file named after the sample/example name and specify type of run + + Format is*: + + sh [name].sh [ACC|CPU|JTP|GPU|SEQ] + + Examples: + + - run nbody simulation + cd [project folder]/src/aparapi/examples/nbody + sh nbody.sh JTP + sh nbody.sh CPU + sh nbody.sh GPU + sh nbody.sh ACC + + - run mandel + cd [project folder]/src/aparapi/samples/mandel/ + sh mandel.sh JTP + sh mandel.sh CPU + sh mandel.sh GPU + sh mandel.sh ACC + + +* For more sophisticated use cases with dual configs(fpga/std) you can use the following format: + + sh [name].std.sh [ACC|CPU|JTP|GPU|SEQ] + + sh [name].fpga.sh [ACC|CPU|JTP|GPU|SEQ] + +You can read more about dual config in how to build aparapi. + + + + + + + + + diff --git a/docs/WorkingWithMultiplePlatforms.txt b/docs/WorkingWithMultiplePlatforms.txt new file mode 100644 index 00000000..d55a415a --- /dev/null +++ b/docs/WorkingWithMultiplePlatforms.txt @@ -0,0 +1,46 @@ +Working with multiple platforms +------------------------------- + +The original version of Aparapi does not support platform selection (it just chooses the first available one). + +This means that in scenarios where you have multiple OpenCL platforms such as AMD, NVidia, Intel, Altera etc. you do not have control over what accelerator device will be used. + +One of the changes we made is to allow selection for platforms that support the OpenCL ICD model. +(https://www.khronos.org/news/permalink/opencl-installable-client-driver-icd-loader) + +In the spirit of other Aparapi configuration options for device selection this can be set using a configuration option. + +The platform configuration option is called: com.amd.aparapi.platformHint + +When this option is set it causes Aparapi to search for the string platformHint inside the available platform names and if it found it will select that platform over other available ones. + +You can use it on the command line in the following way: + +java -Dcom.amd.aparapi.platformHint=AMD .... + +or + +java -Dcom.amd.aparapi.platformHint=NVIDIA .... + + +Nbody simulation example with platform selection +--------------------------------------------------------------------- + +The script nbody.std.no-opengles-platform-select.sh contains the following java command line: + +java \ + -Djava.library.path=../../com.amd.aparapi.jni/dist.std:../third-party/jogamp \ + -Dcom.amd.aparapi.executionMode=$1 \ + -Dbodies=$2 \ + -Dcom.amd.aparapi.platformHint=$3 \ + -Dheight=600 \ + -Dwidth=600 \ + -Djogl.disable.opengles \ + -classpath ../third-party/jogamp/jogl-all.jar:../third-party/jogamp/gluegen-rt.jar:../../com.amd.aparapi/dist/aparapi.jar:nbody.jar \ + com.amd.aparapi.examples.nbody.Main + +To choose an AMD GPU for example we would invoke the script in the following way(the third parameter): + +nbody.std.no-opengles-platform-select.sh GPU 1024 AMD + + diff --git a/env/AlteraV14Env b/env/AlteraV14Env new file mode 100644 index 00000000..501d6a4d --- /dev/null +++ b/env/AlteraV14Env @@ -0,0 +1,8 @@ +export QUARTUS_ROOTDIR=/home/sdev/altera/14.0/quartus +export PATH=$PATH:$QUARTUS_ROOTDIR/bin +export ALTERAOCLSDKROOT=/home/sdev/altera/14.0/hld +export LD_LIBRARY_PATH=$ALTERAOCLSDKROOT/host/linux64/lib:$ALTERAOCLSDKROOT/board/nalla_pcie/linux64/lib +export PATH=$PATH:$ALTERAOCLSDKROOT/bin +export AOCL_BOARD_PACKAGE_ROOT=/home/sdev/altera/14.0/hld/board/nalla_pcie +export LM_LICENSE_FILE=[your license file here] + diff --git a/env/aparapiBuildEnv b/env/aparapiBuildEnv new file mode 100644 index 00000000..20ee4815 --- /dev/null +++ b/env/aparapiBuildEnv @@ -0,0 +1,6 @@ +# Ant build environment +export ANT_HOME=/opt/ant +export PATH=${PATH}:${ANT_HOME}/bin + + + diff --git a/env/mahoutEnv b/env/mahoutEnv new file mode 100644 index 00000000..14a7683a --- /dev/null +++ b/env/mahoutEnv @@ -0,0 +1,10 @@ +# Generated file for hadoop/mahout env +export JAVA_HOME=/usr/lib/jvm/jre-1.7.0-openjdk.x86_64/ +#export HADOOP_HOME=/usr/local/hadoop-1.2.0/ +export HADOOP_DIR=/usr/local/hadoop-1.2.0 +export HADOOP_PREFIX=$HADOOP_DIR/ +export HADOOP_CONF_DIR=HADOOP_DIR/conf +export MAHOUT_HOME=/usr/local/mahout-0.9/ +export MAHOUT_VERSION=0.9-SNAPSHOT +#export MAVEN_OPTS=-Xmx1024m + diff --git a/pom.xml b/pom.xml index 844b3a2a..0ae13583 100644 --- a/pom.xml +++ b/pom.xml @@ -11,7 +11,7 @@ com.aparapi aparapi - 1.3.3-SNAPSHOT + 2.0.0-SNAPSHOT jar @@ -69,7 +69,7 @@ com.aparapi aparapi-jni - 1.1.1-SNAPSHOT + 2.0.0-SNAPSHOT diff --git a/src/main/java/com/aparapi/Config.java b/src/main/java/com/aparapi/Config.java index 6d04663d..34a00298 100644 --- a/src/main/java/com/aparapi/Config.java +++ b/src/main/java/com/aparapi/Config.java @@ -97,6 +97,25 @@ public class Config extends ConfigJNI{ */ public static final String executionMode = System.getProperty(propPkgName + ".executionMode"); + //!!! oren change 2.15.15 -> allow choosing a platform when multiple platforms are available + // Currently aparapi does not offer a way to choose a platform + /** + * Allows the user to request a specific platform + * + * Usage -Dcom.amd.aparapi.platformHint={platform name search string} + * + */ + public static final String platformHint = System.getProperty(propPkgName + ".platformHint"); + + //!!! oren change 7.15.15 -> allow choosing a flow type + /** + * Allows the user to select a flow type + * + * Usage -Dcom.amd.aparapi.flowType={binary|source|default} + * + */ + public static final String flowType = System.getProperty(propPkgName + ".flowType"); + /** * Allows the user to request that the execution mode of each kernel invocation be reported to stdout. * @@ -221,6 +240,10 @@ public interface InstructionListener{ System.out.println(propPkgName + ".logLevel{OFF|FINEST|FINER|FINE|WARNING|SEVERE|ALL}=" + logger.getLevel()); System.out.println(propPkgName + ".enableProfiling{true|false}=" + enableProfiling); System.out.println(propPkgName + ".enableProfilingCSV{true|false}=" + enableProfilingCSV); + // !!! oren change + System.out.println(propPkgName + ".profilingFileNameFormatStr{format str}=" + profilingFileNameFormatStr); + System.out.println(propPkgName + ".flowType{source|binary|default}=" + flowType); + ////////////////// System.out.println(propPkgName + ".enableVerboseJNI{true|false}=" + enableVerboseJNI); System.out.println(propPkgName + ".enableVerboseJNIOpenCLResourceTracking{true|false}=" + enableVerboseJNIOpenCLResourceTracking); @@ -236,4 +259,10 @@ public interface InstructionListener{ public static String getLoggerName() { return logPropName; } + + // !!! oren change -> expose pkg name beyond inheritance + public static String getPkgName() { + return propPkgName; + } + } diff --git a/src/main/java/com/aparapi/Kernel.java b/src/main/java/com/aparapi/Kernel.java index ecdff920..6470f6db 100644 --- a/src/main/java/com/aparapi/Kernel.java +++ b/src/main/java/com/aparapi/Kernel.java @@ -373,17 +373,10 @@ public Kernel execute(Range _range) { * kernel.execute(values.length); * *

-<<<<<<< HEAD:src/main/java/com/aparapi/Kernel.java * Alternatively, the property com.aparapi.executionMode can be set to one of JTP,GPU,ACC,CPU,SEQ * when an application is launched. *

     *    java -classpath ....;aparapi.jar -Dcom.aparapi.executionMode=GPU MyApplication
-=======
-    * Alternatively, the property com.amd.aparapi.executionMode can be set to one of JTP,GPU,ACC,CPU,SEQ
-    * when an application is launched.
-    * 

-    *    java -classpath ....;aparapi.jar -Dcom.amd.aparapi.executionMode=GPU MyApplication
->>>>>>> b118aad... added method to set execution mode without any fallback:com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
     * 

* Generally setting the execution mode is not recommended (it is best to let Aparapi decide automatically) but the option * provides a way to compare a kernel's performance under multiple execution modes. @@ -500,6 +493,80 @@ public boolean isOpenCL() { } }; + //////////////////// + // !!! oren change -> add source/binary flow support to kernel + //////////////////// + public static enum FlowType + { + // flow type list + SOURCE(com.aparapi.internal.jni.KernelRunnerJNI.JNI_FLAG_SOURCE_FLOW), + BINARY(com.aparapi.internal.jni.KernelRunnerJNI.JNI_FLAG_BINARY_FLOW), + DEFAULT(com.aparapi.internal.jni.KernelRunnerJNI.JNI_FLAG_DEFAULT_FLOW); + + // data store + int flowType; + + FlowType(int flowType) + { + setValue(flowType); + } + + FlowType(String flowTypeStr) + { + this.flowType = strToFlowType(flowTypeStr).getValue(); + } + + public int getValue() + { + return this.flowType; + } + + private void setValue(int flowType) + { + this.flowType = flowType; + } + + public static FlowType getDefaultFlowType() + { + // if set by user try get value else set to default + FlowType flowType = (Config.flowType==null) ? DEFAULT : strToFlowType(Config.flowType); + return flowType; + } + + public static FlowType strToFlowType(final String flowTypeStr) + { + try + { + FlowType flowType = valueOf(flowTypeStr.toUpperCase()); + return flowType; + } + catch (Exception e) + { + logger.info("!!! bad flow type => (" + flowTypeStr + ") => reverting to default platform flow!"); + throw e; + } + } + + } + + public FlowType getFlowType() { + return kernelFlowType; + } + + + public void setFlowType(FlowType kernelFlowType) { + this.kernelFlowType = kernelFlowType; + } + + public void setFlowType(String flowTypeStr) { + this.kernelFlowType = FlowType.strToFlowType(flowTypeStr); + } + + private FlowType kernelFlowType = FlowType.getDefaultFlowType(); + + + //////////////////// + private KernelRunner kernelRunner = null; private boolean autoCleanUpArrays = false; @@ -719,7 +786,8 @@ protected final int getGlobalId() { return getGlobalId(0); } - @OpenCLDelegate + +@OpenCLDelegate protected final int getGlobalId(int _dim) { return kernelState.getGlobalIds()[_dim]; } @@ -1074,7 +1142,23 @@ public Kernel clone() { } } + /** + * Init a kernel from an existing one. used in caching mechanisems to improve startup time (ex. SparkCL). + * + */ + public void init(Kernel kernel) { + + // create and init a copy of the kernel runner + kernelRunner = new KernelRunner(this); + if(kernel.kernelRunner!=null) + kernelRunner.init(kernel.kernelRunner); + // We need to be careful to also clone the KernelState + kernelState = new KernelState(kernel.kernelState); // Qualified copy constructor + } + + +/** * Delegates to either {@link java.lang.Math#acos(double)} (Java) or acos(float) (OpenCL). * * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable. diff --git a/src/main/java/com/aparapi/ProfileInfo.java b/src/main/java/com/aparapi/ProfileInfo.java index 00aadbff..e5217ea1 100644 --- a/src/main/java/com/aparapi/ProfileInfo.java +++ b/src/main/java/com/aparapi/ProfileInfo.java @@ -54,7 +54,9 @@ to national security controls as identified on the Commerce Control List (curren public class ProfileInfo{ - private enum TYPE { + // !!! oren change -> we need access to type at upper levels + //private +public static enum TYPE { R, X, W diff --git a/src/main/java/com/aparapi/Range.java b/src/main/java/com/aparapi/Range.java index 5ee22725..2f950e2c 100644 --- a/src/main/java/com/aparapi/Range.java +++ b/src/main/java/com/aparapi/Range.java @@ -66,7 +66,11 @@ public class Range extends RangeJNI{ public static final int THREADS_PER_CORE = 16; - public static final int MAX_OPENCL_GROUP_SIZE = 256; + // !!! oren change -> this value looks out dated and the mechanism probably needs revisiting !!! + // we already see evidence of improved performance for size==1024 on certain devices (ref: FPGA doc classification paper) + // for now we set it to 4X original value, but we should think about it more... + //public static final int MAX_OPENCL_GROUP_SIZE = 256; + public static final int MAX_OPENCL_GROUP_SIZE = 1024; public static final int MAX_GROUP_SIZE = Math.max(Runtime.getRuntime().availableProcessors() * THREADS_PER_CORE, MAX_OPENCL_GROUP_SIZE); @@ -120,6 +124,28 @@ public static Range create(Device _device, int _globalWidth, int _localWidth) { return (range); } + /** + * Create a range from an existing range and a device
+ * + * @param _device to be associated with range + * @param orgRange original range to copy from + * @return A new Range with the requested dimensions + */ + public static Range create(Device _device, Range orgRange) { + + switch(orgRange.getDims()) + { + case 1: + return create(_device,orgRange.globalSize_0,orgRange.localSize_0); + case 2: + return create2D(_device,orgRange.globalSize_0,orgRange.globalSize_1,orgRange.localSize_0,orgRange.localSize_1); + case 3: + return create3D(_device,orgRange.globalSize_0,orgRange.globalSize_1,orgRange.globalSize_2,orgRange.localSize_0,orgRange.localSize_1,orgRange.localSize_2); + default: + return null; + } + } + /** * Determine the set of factors for a given value. * @param _value The value we wish to factorize. @@ -128,11 +154,18 @@ public static Range create(Device _device, int _globalWidth, int _localWidth) { */ private static int[] getFactors(int _value, int _max) { - final int factors[] = new int[MAX_GROUP_SIZE]; + //final int factors[] = new int[MAX_GROUP_SIZE]; int factorIdx = 0; - for (int possibleFactor = 1; possibleFactor <= _max; possibleFactor++) { - if ((_value % possibleFactor) == 0) { + // !!! oren bug fix -> based on poz findings + // max can not be bigger then value and if factorIdx >= MAX_GROUP_SIZE we will have an access violation + final int GroupSizeLimit = Math.min(Math.min(_max,_value),MAX_GROUP_SIZE); + final int factors[] = new int[GroupSizeLimit]; + //for (int possibleFactor = 1; possibleFactor <= _max; possibleFactor++) + for (int possibleFactor = 1; possibleFactor <= GroupSizeLimit; possibleFactor++) + { + if ((_value % possibleFactor) == 0) + { factors[factorIdx++] = possibleFactor; } } diff --git a/src/main/java/com/aparapi/device/Device.java b/src/main/java/com/aparapi/device/Device.java index e43e90b2..7ea85557 100644 --- a/src/main/java/com/aparapi/device/Device.java +++ b/src/main/java/com/aparapi/device/Device.java @@ -15,168 +15,264 @@ */ package com.aparapi.device; -import com.aparapi.*; +import java.util.List; + import com.aparapi.internal.kernel.*; +import com.aparapi.Range; +import com.aparapi.device.OpenCLDevice.DeviceComparitor; +import com.aparapi.device.OpenCLDevice.DeviceSelector; +import com.aparapi.internal.opencl.OpenCLPlatform; public abstract class Device{ - public static enum TYPE { - UNKNOWN(Integer.MAX_VALUE), - GPU(2), - CPU(3), - JTP(5), - SEQ(6), - ACC(1), - ALT(4); - - /** Heuristic ranking of device types, lower is better. */ - public final int rank; - - TYPE(int rank) { - this.rank = rank; - } - }; - - /** @deprecated use {@link KernelManager#bestDevice()} - * @see com.aparapi.device - */ - @Deprecated - public static Device best() { - return KernelManager.instance().bestDevice(); + public static enum TYPE { + UNKNOWN(Integer.MAX_VALUE), + GPU(2), + CPU(3), + JTP(5), + SEQ(6), + ACC(1), + ALT(4); + + /** Heuristic ranking of device types, lower is better. */ + public final int rank; + + TYPE(int rank) { + this.rank = rank; + } + }; + + /** @deprecated use {@link KernelManager#bestDevice()} + * @see com.aparapi.device + */ + @Deprecated + public static Device best() { + return KernelManager.instance().bestDevice(); + } + + /** + * @see com.aparapi.device + */ + @SuppressWarnings("deprecation") + @Deprecated + public static Device bestGPU() { + return firstGPU(); + } + + /** + * @see com.aparapi.device + */ + @Deprecated + public static Device first(final Device.TYPE _type) { + return KernelManager.DeprecatedMethods.firstDevice(_type); + } + + /** + * @see com.aparapi.device + */ + @SuppressWarnings("deprecation") + @Deprecated + public static Device firstGPU() { + return KernelManager.DeprecatedMethods.firstDevice(TYPE.GPU); + } + + /** + * @see com.aparapi.device + */ + @SuppressWarnings("deprecation") + @Deprecated + public static Device firstCPU() { + return KernelManager.DeprecatedMethods.firstDevice(TYPE.CPU); + } + + /** + * @see com.aparapi.device + */ + @Deprecated + public static Device bestACC() { + throw new UnsupportedOperationException(); + } + + protected TYPE type = TYPE.UNKNOWN; + + protected int maxWorkGroupSize; + + protected int maxWorkItemDimensions; + + protected int[] maxWorkItemSize = new int[] { + 0, + 0, + 0 + }; + + public abstract String getShortDescription(); + + public TYPE getType() { + return type; + } + + public void setType(TYPE type) { + this.type = type; + } + + public int getMaxWorkItemDimensions() { + return maxWorkItemDimensions; + } + + public void setMaxWorkItemDimensions(int _maxWorkItemDimensions) { + maxWorkItemDimensions = _maxWorkItemDimensions; + } + + public int getMaxWorkGroupSize() { + return maxWorkGroupSize; + } + + public void setMaxWorkGroupSize(int _maxWorkGroupSize) { + maxWorkGroupSize = _maxWorkGroupSize; + } + + public int[] getMaxWorkItemSize() { + return maxWorkItemSize; + } + + public void setMaxWorkItemSize(int[] maxWorkItemSize) { + this.maxWorkItemSize = maxWorkItemSize; + } + + public Range createRange(int _globalWidth) { + return (Range.create(this, _globalWidth)); + } + + public Range createRange(int _globalWidth, int _localWidth) { + return (Range.create(this, _globalWidth, _localWidth)); + } + + public Range createRange2D(int _globalWidth, int _globalHeight) { + return (Range.create2D(this, _globalWidth, _globalHeight)); + } + + public Range createRange2D(int _globalWidth, int _globalHeight, int _localWidth, int _localHeight) { + return (Range.create2D(this, _globalWidth, _globalHeight, _localWidth, _localHeight)); + } + + public Range createRange3D(int _globalWidth, int _globalHeight, int _globalDepth) { + return (Range.create3D(this, _globalWidth, _globalHeight, _globalDepth)); + } + + public Range createRange3D(int _globalWidth, int _globalHeight, int _globalDepth, int _localWidth, int _localHeight, + int _localDepth) { + return (Range.create3D(this, _globalWidth, _globalHeight, _globalDepth, _localWidth, _localHeight, _localDepth)); + } + + public abstract long getDeviceId(); + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof Device)) { + return false; + } + + Device device = (Device) o; + + return getDeviceId() == device.getDeviceId(); + } + + @Override + public int hashCode() { + return Long.valueOf(getDeviceId()).hashCode(); + } + + // !!! oren change -> get device using the tuple (platform, deviceType, id) + + public static Device getDevice(String platformName, Device.TYPE deviceType, int deviceId) + { + return getDevice(platformName,deviceType.name(),deviceId); } - /** - * @see com.aparapi.device - */ - @SuppressWarnings("deprecation") - @Deprecated - public static Device bestGPU() { - return firstGPU(); - } + // get first available device - /** - * @see com.aparapi.device - */ - @Deprecated - public static Device first(final Device.TYPE _type) { - return KernelManager.DeprecatedMethods.firstDevice(_type); + public static Device getDevice(String platformName, Device.TYPE deviceType) + { + return getDevice(platformName,deviceType.name(),0); } - /** - * @see com.aparapi.device - */ - @SuppressWarnings("deprecation") - @Deprecated - public static Device firstGPU() { - return KernelManager.DeprecatedMethods.firstDevice(TYPE.GPU); + public static Device getDevice(String platformName, String deviceTypeName) + { + return getDevice(platformName,deviceTypeName,0); } - /** - * @see com.aparapi.device - */ - @SuppressWarnings("deprecation") - @Deprecated - public static Device firstCPU() { - return KernelManager.DeprecatedMethods.firstDevice(TYPE.CPU); - } + public static Device getDevice(String platformName, String deviceTypeName, int deviceId) + { + List platforms = (new OpenCLPlatform()).getOpenCLPlatformsFilteredBy(platformName); //getOpenCLPlatforms(); - /** - * @see com.aparapi.device - */ - @Deprecated - public static Device bestACC() { - throw new UnsupportedOperationException(); - } + int platformc = 0; + for (OpenCLPlatform platform : platforms) + { + //if(platform.getName().contains(platformName)) + //{ - protected TYPE type = TYPE.UNKNOWN; + System.out.println("Platform " + platformc + "{"); - protected int maxWorkGroupSize; + System.out.println(" Name : \"" + platform.getName() + "\""); - protected int maxWorkItemDimensions; + System.out.println(" Vendor : \"" + platform.getVendor() + "\""); - protected int[] maxWorkItemSize = new int[] { - 0, - 0, - 0 - }; + System.out.println(" Version : \"" + platform.getVersion() + "\""); - public abstract String getShortDescription(); + List devices = platform.getOpenCLDevices(); - public TYPE getType() { - return type; - } + System.out.println(" Platform contains " + devices.size() + " OpenCL devices"); - public void setType(TYPE type) { - this.type = type; - } + int devicec = 0; - public int getMaxWorkItemDimensions() { - return maxWorkItemDimensions; - } + for (OpenCLDevice device : devices) + { + if( device.getType().name().equalsIgnoreCase(deviceTypeName)) + { - public void setMaxWorkItemDimensions(int _maxWorkItemDimensions) { - maxWorkItemDimensions = _maxWorkItemDimensions; - } + System.out.println(" Device " + devicec + "{"); - public int getMaxWorkGroupSize() { - return maxWorkGroupSize; - } + System.out.println(" Type : " + device.getType()); - public void setMaxWorkGroupSize(int _maxWorkGroupSize) { - maxWorkGroupSize = _maxWorkGroupSize; - } + System.out.println(" GlobalMemSize : " + device.getGlobalMemSize()); - public int[] getMaxWorkItemSize() { - return maxWorkItemSize; - } + System.out.println(" LocalMemSize : " + device.getLocalMemSize()); - public void setMaxWorkItemSize(int[] maxWorkItemSize) { - this.maxWorkItemSize = maxWorkItemSize; - } - - public Range createRange(int _globalWidth) { - return (Range.create(this, _globalWidth)); - } - - public Range createRange(int _globalWidth, int _localWidth) { - return (Range.create(this, _globalWidth, _localWidth)); - } + System.out.println(" MaxComputeUnits : " + device.getMaxComputeUnits()); - public Range createRange2D(int _globalWidth, int _globalHeight) { - return (Range.create2D(this, _globalWidth, _globalHeight)); - } + System.out.println(" MaxWorkGroupSizes : " + device.getMaxWorkGroupSize()); - public Range createRange2D(int _globalWidth, int _globalHeight, int _localWidth, int _localHeight) { - return (Range.create2D(this, _globalWidth, _globalHeight, _localWidth, _localHeight)); - } + System.out.println(" MaxWorkItemDimensions : " + device.getMaxWorkItemDimensions()); - public Range createRange3D(int _globalWidth, int _globalHeight, int _globalDepth) { - return (Range.create3D(this, _globalWidth, _globalHeight, _globalDepth)); - } + System.out.println(" }"); + + if(deviceId>0 && (devicec!=deviceId)) + { + System.out.println("!!! devicec!=deviceId(" + deviceId + ") => continue search !!!"); + continue; + } + + // close platform bracket + System.out.println("}"); - public Range createRange3D(int _globalWidth, int _globalHeight, int _globalDepth, int _localWidth, int _localHeight, - int _localDepth) { - return (Range.create3D(this, _globalWidth, _globalHeight, _globalDepth, _localWidth, _localHeight, _localDepth)); - } + return device; + } - public abstract long getDeviceId(); + devicec++; + } + System.out.println("Device type/id combination not found"); - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (!(o instanceof Device)) { - return false; - } + System.out.println("}"); - Device device = (Device) o; + platformc++; - return getDeviceId() == device.getDeviceId(); - } + } - @Override - public int hashCode() { - return Long.valueOf(getDeviceId()).hashCode(); + //} + // return not found !!! + return null; } } diff --git a/src/main/java/com/aparapi/device/JavaDevice.java b/src/main/java/com/aparapi/device/JavaDevice.java index eaeb8abe..c3046fdd 100644 --- a/src/main/java/com/aparapi/device/JavaDevice.java +++ b/src/main/java/com/aparapi/device/JavaDevice.java @@ -15,33 +15,33 @@ */ package com.aparapi.device; -public class JavaDevice extends Device { +public class JavaDevice extends Device{ - public static final JavaDevice THREAD_POOL = new JavaDevice(TYPE.JTP, "Java Thread Pool", -3); - public static final JavaDevice ALTERNATIVE_ALGORITHM = new JavaDevice(TYPE.ALT, "Java Alternative Algorithm", -2); - public static final JavaDevice SEQUENTIAL = new JavaDevice(TYPE.SEQ, "Java Sequential", -1); + public static final JavaDevice THREAD_POOL = new JavaDevice(TYPE.JTP, "Java Thread Pool", -3); + public static final JavaDevice ALTERNATIVE_ALGORITHM = new JavaDevice(TYPE.ALT, "Java Alternative Algorithm", -2); + public static final JavaDevice SEQUENTIAL = new JavaDevice(TYPE.SEQ, "Java Sequential", -1); - private final String name; - private final long deviceId; + private final String name; + private final long deviceId; - private JavaDevice(TYPE _type, String _name, long deviceId) { - this.deviceId = deviceId; - this.type = _type; - this.name = _name; - } + private JavaDevice(TYPE _type, String _name, long deviceId) { + this.deviceId = deviceId; + this.type = _type; + this.name = _name; + } - @Override - public String getShortDescription() { - return name; - } + @Override + public String getShortDescription() { + return name; + } - @Override - public long getDeviceId() { - return deviceId; - } + @Override + public long getDeviceId() { + return deviceId; + } - @Override - public String toString() { - return getShortDescription(); - } + @Override + public String toString() { + return getShortDescription(); + } } diff --git a/src/main/java/com/aparapi/device/OpenCLDevice.java b/src/main/java/com/aparapi/device/OpenCLDevice.java index fbd86e40..e8968722 100644 --- a/src/main/java/com/aparapi/device/OpenCLDevice.java +++ b/src/main/java/com/aparapi/device/OpenCLDevice.java @@ -470,7 +470,10 @@ public static OpenCLDevice select(DeviceSelector _deviceSelector) { OpenCLDevice device = null; final OpenCLPlatform platform = new OpenCLPlatform(0, null, null, null); - for (final OpenCLPlatform p : platform.getOpenCLPlatforms()) { + //!!! oren change 2.15.15 -> allow choosing a platform when multiple platforms are available + // Currently aparapi does not offer a way to choose a platform + //for (final OpenCLPlatform p : platform.getOpenCLPlatforms()) { + for (final OpenCLPlatform p : platform.getOpenCLPlatformsFilteredByConfig()) { for (final OpenCLDevice d : p.getOpenCLDevices()) { device = _deviceSelector.select(d); if (device != null) { @@ -508,7 +511,10 @@ public static OpenCLDevice select(DeviceComparitor _deviceComparitor, Device.TYP OpenCLDevice device = null; final OpenCLPlatform platform = new OpenCLPlatform(0, null, null, null); - for (final OpenCLPlatform p : platform.getOpenCLPlatforms()) { + //!!! oren change 2.15.15 -> allow choosing a platform when multiple platforms are available + // Currently aparapi does not offer a way to choose a platform + //for (final OpenCLPlatform p : platform.getOpenCLPlatforms()) { + for (final OpenCLPlatform p : platform.getOpenCLPlatformsFilteredByConfig()) { for (final OpenCLDevice d : p.getOpenCLDevices()) { if (d.getType() != _type) continue; if (device == null) { diff --git a/src/main/java/com/aparapi/device/package-info.java b/src/main/java/com/aparapi/device/package-info.java index fd8989ab..40e46a38 100644 --- a/src/main/java/com/aparapi/device/package-info.java +++ b/src/main/java/com/aparapi/device/package-info.java @@ -14,21 +14,6 @@ * limitations under the License. */ /** - * Contains classes representing OpenCL-capable devices, and "virtual" (java) devices which execute kernels using java. - * - *

Various methods of {@link com.aparapi.device.Device} which selected devices of a particular type have been deprecated, - * as now the preferred mechanism for device selection is to rely on the {@link com.aparapi.internal.kernel.KernelManager} to - * select an appropriate device. Where a particular device is required to be used for a certain kernel, for such purposes as - * debugging or unit testing, this can be achieved by using - * {@link com.aparapi.internal.kernel.KernelManager#setKernelManager(com.aparapi.internal.kernel.KernelManager)} prior to - * invoking any Kernel executions, by overriding {@link com.aparapi.Kernel#isAllowDevice(com.aparapi.device.Device)} - * to veto/approve devices from the available devices for a given Kernel class, or (not recommended) by using - * {@link com.aparapi.internal.kernel.KernelManager#setPreferredDevices(com.aparapi.Kernel, java.util.LinkedHashSet)} to specify - * a particular device list for a given Kernel class. - * - *

In order to determine the Device which will be used to execute a particular Kernel, use {@link com.aparapi.Kernel#getTargetDevice()}. - * This can also be used immediately after execution to see on which device the kernel actually got executed (in case the execution failed - * and fell back to another device). * */ package com.aparapi.device; \ No newline at end of file diff --git a/src/main/java/com/aparapi/internal/jni/ConfigJNI.java b/src/main/java/com/aparapi/internal/jni/ConfigJNI.java index 91f4ab44..3fbe3c1c 100644 --- a/src/main/java/com/aparapi/internal/jni/ConfigJNI.java +++ b/src/main/java/com/aparapi/internal/jni/ConfigJNI.java @@ -44,6 +44,15 @@ public abstract class ConfigJNI{ */ @UsedByJNICode public static final boolean enableProfilingCSV = Boolean.getBoolean(propPkgName + ".enableProfilingCSV"); + //!!! oren change 2.15.19 -> Allows the user to set profile file name format + /** + * Allows the user to set profile file name format + * + * Usage -Dcom.amd.aparapi.profilingFileNameFormatStr={format string} + * + */ + @UsedByJNICode public static final String profilingFileNameFormatStr = System.getProperty(propPkgName + ".profilingFileNameFormatStr"); + /** * Allows the user to request that verbose JNI messages be dumped to stderr. * diff --git a/src/main/java/com/aparapi/internal/jni/KernelRunnerJNI.java b/src/main/java/com/aparapi/internal/jni/KernelRunnerJNI.java index 2ef6a54f..4a0ed0a7 100644 --- a/src/main/java/com/aparapi/internal/jni/KernelRunnerJNI.java +++ b/src/main/java/com/aparapi/internal/jni/KernelRunnerJNI.java @@ -304,6 +304,19 @@ public abstract class KernelRunnerJNI{ @UsedByJNICode protected static final int JNI_FLAG_USE_ACC = 1 << 5; + /** !!! oren change -> + * These flags indicate that we want to build source/binary i.e. use source/binary flow. + * + * Be careful changing final constants starting with JNI.
+ * + * @see com.aparapi.internal.annotation.UsedByJNICode + * + * @author oren + */ + @UsedByJNICode public static final int JNI_FLAG_SOURCE_FLOW = 1 << 0; + @UsedByJNICode public static final int JNI_FLAG_BINARY_FLOW = 1 << 1; + @UsedByJNICode public static final int JNI_FLAG_DEFAULT_FLOW = 1 << 2; + /* * Native methods */ @@ -330,7 +343,7 @@ public abstract class KernelRunnerJNI{ * can be passed empty) andused the cached binary. *

By passing an empty String as the _binaryKey, the entire JNI-side binary caching apparatus can be disabled. */ - protected native long buildProgramJNI(long _jniContextHandle, String _source, String _binaryKey); + protected native long buildProgramJNI(long _jniContextHandle, String _source, int _buildFlags); protected native int setArgsJNI(long _jniContextHandle, KernelArgJNI[] _args, int argc); diff --git a/src/main/java/com/aparapi/internal/jni/OpenCLJNI.java b/src/main/java/com/aparapi/internal/jni/OpenCLJNI.java index 189b5358..a2168006 100644 --- a/src/main/java/com/aparapi/internal/jni/OpenCLJNI.java +++ b/src/main/java/com/aparapi/internal/jni/OpenCLJNI.java @@ -15,9 +15,9 @@ */ package com.aparapi.internal.jni; -import com.aparapi.ProfileInfo; import java.util.List; +import com.aparapi.ProfileInfo; import com.aparapi.device.OpenCLDevice; import com.aparapi.internal.opencl.OpenCLArgDescriptor; import com.aparapi.internal.opencl.OpenCLKernel; @@ -32,12 +32,7 @@ public abstract class OpenCLJNI{ protected native List getPlatforms(); - public OpenCLProgram createProgram(OpenCLDevice context, String openCLSource) - { - return this.createProgram(context, openCLSource, null); - } - - protected native OpenCLProgram createProgram(OpenCLDevice context, String openCLSource, String binaryKey); + protected native OpenCLProgram createProgram(OpenCLDevice context, String openCLSource); protected native OpenCLKernel createKernelJNI(OpenCLProgram program, String kernelName, OpenCLArgDescriptor[] args); diff --git a/src/main/java/com/aparapi/internal/kernel/KernelRunner.java b/src/main/java/com/aparapi/internal/kernel/KernelRunner.java index 66d34cf6..08458478 100644 --- a/src/main/java/com/aparapi/internal/kernel/KernelRunner.java +++ b/src/main/java/com/aparapi/internal/kernel/KernelRunner.java @@ -170,6 +170,16 @@ public KernelRunner(Kernel _kernel) { KernelManager.instance(); // ensures static initialization of KernelManager } + public void init(KernelRunner kernelRunner) { + //this = super.clone(); + jniContextHandle = kernelRunner.jniContextHandle; + entryPoint = kernelRunner.entryPoint; + argc = kernelRunner.argc; + args = kernelRunner.args; + //puts = kernelRunner.puts; + capabilitiesSet = kernelRunner.capabilitiesSet; + } + /** * @see Kernel#cleanUpArrays(). */ @@ -1356,28 +1366,11 @@ else if (Config.enableShowGeneratedOpenCL) { // Send the string to OpenCL to compile it, or if the compiled binary is already cached on JNI side just empty string to use cached binary long handle; - if (BINARY_CACHING_DISABLED) { - handle = buildProgramJNI(jniContextHandle, openCL, ""); - } else { - synchronized (seenBinaryKeys) { - String binaryKey = kernel.getClass().getName() + ":" + device.getDeviceId(); - if (seenBinaryKeys.contains(binaryKey)) { - // use cached binary - logger.log(Level.INFO, "reusing cached binary for " + binaryKey); - handle = buildProgramJNI(jniContextHandle, "", binaryKey); - } - else { - // create and cache binary - logger.log(Level.INFO, "compiling new binary for " + binaryKey); - handle = buildProgramJNI(jniContextHandle, openCL, binaryKey); - seenBinaryKeys.add(binaryKey); - } - } - } - _settings.profile.onEvent(ProfilingEvent.OPENCL_COMPILED); - if (handle == 0) { + int buildFlags = kernel.getFlowType().getValue(); + if (buildProgramJNI(jniContextHandle, openCL,buildFlags) == 0) { return fallBackToNextDevice(_settings, "OpenCL compile failed"); } + _settings.profile.onEvent(ProfilingEvent.OPENCL_COMPILED); args = new KernelArg[entryPoint.getReferencedFields().size()]; int i = 0; diff --git a/src/main/java/com/aparapi/internal/model/Entrypoint.java b/src/main/java/com/aparapi/internal/model/Entrypoint.java index 287b406d..ce3e6451 100644 --- a/src/main/java/com/aparapi/internal/model/Entrypoint.java +++ b/src/main/java/com/aparapi/internal/model/Entrypoint.java @@ -174,7 +174,10 @@ public static Field getFieldFromClassHierarchy(Class _clazz, String _name) th try { field = _clazz.getDeclaredField(_name); final Class type = field.getType(); - if (type.isPrimitive() || type.isArray()) { + // !!! oren test - for alternative memory types + //if (type.isPrimitive() || type.isArray()) + if (type.isPrimitive() || type.isArray() || type.getName().contains("java.nio")) + { return field; } if (field.getAnnotation(Kernel.NoCL.class) != null) { diff --git a/src/main/java/com/aparapi/internal/opencl/OpenCLLoader.java b/src/main/java/com/aparapi/internal/opencl/OpenCLLoader.java index 63bfa6b5..46da8bfa 100644 --- a/src/main/java/com/aparapi/internal/opencl/OpenCLLoader.java +++ b/src/main/java/com/aparapi/internal/opencl/OpenCLLoader.java @@ -26,47 +26,46 @@ /** * This class is intended to be a singleton which determines if OpenCL is available upon startup of Aparapi */ -public class OpenCLLoader extends OpenCLJNI{ +public class OpenCLLoader extends OpenCLJNI { - private static final Logger logger = Logger.getLogger(Config.getLoggerName()); + private static final Logger logger = Logger.getLogger(Config.getLoggerName()); - private static boolean openCLAvailable = false; + private static boolean openCLAvailable = false; - private static final OpenCLLoader instance = new OpenCLLoader(); + private static final OpenCLLoader instance = new OpenCLLoader(); - static { - if (Config.useAgent) { - logger.fine("Using agent!"); - openCLAvailable = true; - } else { + static { + if (Config.useAgent) { + logger.fine("Using agent!"); + openCLAvailable = true; + } else { try { - NativeLoader.load(); - logger.info("Aparapi JNI loaded successfully."); - openCLAvailable = true; + NativeLoader.load(); + logger.info("Aparapi JNI loaded successfully."); + openCLAvailable = true; + } catch (final IOException e) { + logger.log(Level.SEVERE, "Check your environment. Failed to load aparapi native library " + + " or possibly failed to locate opencl native library (opencl.dll/opencl.so)." + + " Ensure that OpenCL is in your PATH (windows) or in LD_LIBRARY_PATH (linux)."); } - catch (final IOException e) { - logger.log(Level.SEVERE, "Check your environment. Failed to load aparapi native library " - + " or possibly failed to locate opencl native library (opencl.dll/opencl.so)." - + " Ensure that OpenCL is in your PATH (windows) or in LD_LIBRARY_PATH (linux)."); - } - } - } + } + } - /** - * Retrieve a singleton instance of OpenCLLoader - * - * @return A singleton instance of OpenCLLoader - */ - protected static OpenCLLoader getInstance() { - return instance; - } + /** + * Retrieve a singleton instance of OpenCLLoader + * + * @return A singleton instance of OpenCLLoader + */ + protected static OpenCLLoader getInstance() { + return instance; + } - /** - * Retrieve the status of whether OpenCL was successfully loaded - * - * @return The status of whether OpenCL was successfully loaded - */ - public static boolean isOpenCLAvailable() { - return openCLAvailable; - } + /** + * Retrieve the status of whether OpenCL was successfully loaded + * + * @return The status of whether OpenCL was successfully loaded + */ + public static boolean isOpenCLAvailable() { + return openCLAvailable; + } } diff --git a/src/main/java/com/aparapi/internal/opencl/OpenCLPlatform.java b/src/main/java/com/aparapi/internal/opencl/OpenCLPlatform.java index ac2b5201..28f0b491 100644 --- a/src/main/java/com/aparapi/internal/opencl/OpenCLPlatform.java +++ b/src/main/java/com/aparapi/internal/opencl/OpenCLPlatform.java @@ -18,9 +18,12 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Iterator; +import java.util.List; import com.aparapi.device.OpenCLDevice; import com.aparapi.internal.jni.OpenCLJNI; +import com.aparapi.Config; public class OpenCLPlatform extends OpenCLJNI{ @@ -85,6 +88,48 @@ public static List getUncachedOpenCLPlatforms(){ return platforms; } + //!!! oren change 2.15.15 -> allow choosing a platform when multiple platforms are available + // Currently aparapi does not offer a way to choose a platform + public List getOpenCLPlatformsFilteredByConfig() + { + return getOpenCLPlatformsFilteredBy(Config.platformHint); + } + + public List getOpenCLPlatformsFilteredBy(String filter) + { + if (OpenCLLoader.isOpenCLAvailable()) + { + List platformList = (getPlatforms()); + if(filter==null) + { + System.out.println("Not Filtering Platforms. Platform filter is empty!"); + } + else + { + System.out.println("Filtering Platforms using: " + filter ); + for (Iterator iterator = platformList.iterator(); iterator.hasNext(); ) + { + String platformName = iterator.next().getName(); + if (filter.equals("*") || platformName.contains(filter)) + { + System.out.println("Adding Platform: " + platformName ); + } + else + { + System.out.println("Discarding Platform: " + platformName); + iterator.remove(); + } + + } + } + return (platformList); + } + else + { + return (new ArrayList()); + } + } + public String getName() { return (name); } diff --git a/wiki-collateral/ProfilingKernelsFormEclipseProject.zip b/wiki-collateral/ProfilingKernelsFormEclipseProject.zip new file mode 100644 index 00000000..28566548 Binary files /dev/null and b/wiki-collateral/ProfilingKernelsFormEclipseProject.zip differ