Saturday, November 14, 2009

Activator.CreateInstance(...) Performance



Recently I have been looking into performance profiling different methods of implementing an
 Object Factory pattern
.  Something that can choose an appropriate concrete type and create it when a requested interface is given.  
A fundamental portion of this is to be able to register types with the factory, and in my case this needs to be done by a xml configuration file.
 Therefore the type is ultimately created by calling Type.GetType("FullyQualifiedTypeNameHere, AssemblyName").  This has bothered
me for some time and I have been waiting until I have time to performance test it.




I have an existing Object Factory that can create types when a requested interface is given.  You can register a concrete type against
an interface either by code or configuration, but mostly this needs to be done by the configuration xml file.  I wrote a test program
to time different methods of creating objects and was a little surprised by the results.




Here's the test program code:


namespace ActivatorCreateInstanceAlternatives {
    using System.Collections.Generic;
    using System.Diagnostics;
    using System.Linq;
    using System.Reflection;
    public class Program {
        private static ConstructorInfo _constr;
        private static Type _type;
        private static Dictionary<Guid, Client> allClients = new Dictionary<Guid, Client>();
        public static void Main(string[] args) {
            Console.WriteLine("Test One - Loop thru each 10000 times (x,x,x,x,x.... y,y,y,y,y.... z,z,z,z....).");
            var objectFactoryTime = ObjectFactoryPerfTests();
            var newCreateTime = NewPerfTests();
            var activatorTime = ActivatorCreatePerfTests();
            var activatorCachedTime = ActivatorCreateCachedTypePerfTests();
            var cachedConstructorTime = CachedConstructorPerfTests();

            Console.WriteLine("Baseline 'new' construct " + newCreateTime.ToString("N"));
            Console.WriteLine("Activator.CreateInstance " + activatorTime.ToString("N"));
            Console.WriteLine("Cached Activator.Create  " + activatorCachedTime.ToString("N"));
            Console.WriteLine("Cached constructor       " + cachedConstructorTime.ToString("N"));
            Console.WriteLine("Object Factory           " + objectFactoryTime.ToString("N"));
            Console.WriteLine(" ");
            Console.WriteLine("Test Two - For each 10,000 use each creation method (x,y,z, x,y,z, ...)");

            var objectFactoryTime2 = new List<double>();
            var newCreateTime2 = new List<double>();
            var activatorTime2 = new List<double>();
            var activatorCachedTime2 = new List<double>();
            var cachedConstructorTime2 = new List<double>();

            for (int index = 0; index < 10000; index++) {
                newCreateTime2.Add(NewPerfTest());
                activatorTime2.Add(ActivatorCreatePerfTest());
                activatorCachedTime2.Add(ActivatorCreateCachedTypePerfTest());
                cachedConstructorTime2.Add(CachedConstructorPerfTest());
                objectFactoryTime2.Add(ObjectFactoryPerfTest());
            }

            Console.WriteLine("Baseline 'new' construct " + newCreateTime2.Average(x => x).ToString("N"));
            Console.WriteLine("Activator.CreateInstance " + activatorTime2.Average(x => x).ToString("N"));
            Console.WriteLine("Cached Activator.Create  " + activatorCachedTime2.Average(x => x).ToString("N"));
            Console.WriteLine("Cached constructor       " + cachedConstructorTime2.Average(x => x).ToString("N"));
            Console.WriteLine("Object Factory           " + objectFactoryTime2.Average(x => x).ToString("N"));
            Console.WriteLine(" ");
        }

        private static double NewPerfTests() {
            var results = new List<double>();
            for (int index = 0; index < 10000; index++) {
                results.Add(NewPerfTest());
            }

            return results.Average(x => x);
        }

        private static double NewPerfTest() {
            var stopwatch = Stopwatch.StartNew();
            var client = new Client();
            Debug.Assert(client != null);
            stopwatch.Stop();
            allClients.Add(client.Id, client);
            return stopwatch.ElapsedTicks;
        }

        private static double ActivatorCreateCachedTypePerfTests() {
            var results = new List<double>();
            if (_type == null) {
                _type = Type.GetType("ActivatorCreateInstanceAlternatives.Client");
           }

            for (int index = 0; index < 10000; index++) {
                results.Add(ActivatorCreateCachedTypePerfTest());
            }

            return results.Average(x => x);
        }

        private static double ActivatorCreateCachedTypePerfTest() {
           var stopwatch = Stopwatch.StartNew();
           var client = Activator.CreateInstance(_type) as Client;
           Debug.Assert(client != null);
           stopwatch.Stop();
           allClients.Add(client.Id, client);
           return stopwatch.ElapsedTicks;
        }

        private static double ActivatorCreatePerfTests() {
            var results = new List<double>();
            for (int index = 0; index < 10000; index++) {
               results.Add(ActivatorCreatePerfTest());
            }

            return results.Average(x => x);
        }

        private static double ActivatorCreatePerfTest() {
            var stopwatch = Stopwatch.StartNew();
            var client = Activator.CreateInstance(Type.GetType("ActivatorCreateInstanceAlternatives.Client")) as Client;
            Debug.Assert(client != null);
            stopwatch.Stop();
            allClients.Add(client.Id, client);
            return stopwatch.ElapsedTicks;
        }

        private static double CachedConstructorPerfTests() {
            var results = new List<double>();
            if (_constr == null) {
                Type type = Type.GetType("ActivatorCreateInstanceAlternatives.Client");
                _constr = type.GetConstructor(new Type[] { });
            }

            Debug.Assert(_constr != null);
            for (int index = 0; index < 10000; index++) {
                results.Add(CachedConstructorPerfTest());
            }

            return results.Average(x => x);
        }

        private static double CachedConstructorPerfTest() {
            var stopwatch = Stopwatch.StartNew();
            var client = _constr.Invoke(new object[] { }) as Client;
            Debug.Assert(client != null);
            stopwatch.Stop();
            allClients.Add(client.Id, client);
            return stopwatch.ElapsedTicks;
        }

        private static double ObjectFactoryPerfTests() {
            var results = new List<double>();
            for (int index = 0; index < 10000; index++) {
                results.Add(ObjectFactoryPerfTest());
            }

            return results.Average(x => x);
        }

        private static double ObjectFactoryPerfTest() {
            var stopwatch = Stopwatch.StartNew();
            var client = ObjectFactory.Current.CreateInstance<IClient>() as Client;
            Debug.Assert(client != null);
            stopwatch.Stop();
            allClients.Add(client.Id, client);
            return stopwatch.ElapsedTicks;
        }
    }
 }




Test Overview
Let me summerise this code into what I am trying to profile.  
There are two test sets, each will create 10,000 objects using each technique of creating an object.  
The first test set creates 10,000 using one technique then moves onto the next technique.  
The second set creates one object using each technique 10,000 times. This is just to check consistency of the times.




New keyword
The 'new' keyword is used as a baseline control for the test, I wouldn't expect anything to be consistently faster than this.

Activator.CreateInstance
The next test is Activator.CreateInstance(Type.GetType("SomeTypeNameHere")).  
This I am expecting to be consistently the slowest, as it has to find the type then create an instance of it for each of the 10,000 iterations.

Activator.CreateInstance with cached Type
This test will use Activator.CreateInstance without refetching the type each iteration (the type is cached into a class level field).

Cached Constructor
This test will simply get a ConstructorInfo object, cache it into a class level field and Invoke for each iteration through the loop.

Object Factory
This is my existing object factory.  This isn't really a fair comparison as the object factory is doing a little more work than
simply creating the object, (like argument preconditions etc) but it shouldn't be a million miles off the others.

Running the program 3 times I get the following results:
Run1:
Run2:
Run3:

Summary of results:
 Baseline 'new' construct 5.26ms 1
 Activator Type cached 5.72ms 2
 Cached Constructor 9.23ms 3
 Object Factory 9.57ms 4
 Activator without caching the type 26.43 5
Its safe to say (despite the slightly high results from run1) that using the 'new' keyword to create an instance of a known type is the fastest, as you would expect.

But in second place is Activator.CreateInstance when using an instance of a known Type cached into a class level field.  So, because my object factory uses this technique already (with the addition of some checks etc) it is not that far behind. 

Conclusion
I don't think my existing Object Factory is broken.  This proves the cached type when using Activator.Create is not that slow.  Considering the alternatives (Reflection.EmitDynamic Languages and others) and the complexity they can introduce I think I'll leave it as is.  If you can see any serious flaws in this logic let me know.
-Ben

Monday, November 9, 2009

C# vs Vb.Net.... Fight!

16-November-2009


I need to have a bleet about some of my experiences converting Vb.Net 7 to C#3.5 and unfortunately to Vb.Net 9 as well. The conversion is for a massive existing Vb.Net solution in .Net1.1 that for various reasons must remain in Vb. In a perfect world redeveloping using new language constructs and in C# would be my preference. The good news is that there are some satellite web services and components that are small enough to justify conversion to C# and to drag the design kicking and screaming into this decade.


The debate rages between C# and Vb.Net fans and usually most people (who actually know what they are talking about) are polarised to one side or the other. I have used both, and for some time, have a slight preference for C#, but also there are some very nice positive aspects to Vb.Net. I coded exclusively in Vb and Vb.Net for about 7 years, and more recently 3 years in C#.






One major asset I have been a fan of in the Vb.Net camp has is good layman readability. I have taken advantage of this repeatedly in the past and used Vb.Net code to show clients and BA's to help explain behaviours etc.

This can also be taken advantage of by learner developers, as the syntax seems a little more approachable and less intimidating than C#.

On this subject, a downside is that sometimes Vb's syntax is very ambiguous and difficult to analyse with automated (handcrafted or thirdparty) tools.
C# is often praised for its concise unambiguous syntax; one line of code can only be interpreted one way.

There is an abundance of tools available for C#, and analytical tools are easier to write for C# source code. Vb.net also has tools available, but not as many as C#, and they are a little harder to come by.



In my personal opinion I like the Vb.Net syntax of generic type modifiers, apart from the excessive use of parenthesis in Vb and the 'Of' keyword. But I do like the 'As' modifier syntax directly after the generic declaration. For example compare these:
Public Sub MethodName(Of T As {Class, New, IDisposable})()
public void MethodName<T>() where T : class, new, IDisposable { }

Lambdas in C# are really nice. Vb's syntax is simply awful. Compare these:
list.ForEach(Function(x) Process(x))
list.ForEach(x => Process(x));
In actual fact this won't compile in Vb. Unless I'm missing something you cannot write a Lambda action (one that does not return a value) in Vb. Using Lambda actions is very handy for event handling in C# without having to write loads of methods to handle events. 

And in a similar vein, there are no multi-line lambdas in Vb.Net. Stink.
set.All(x => { Process(x); return true; }); 
I tend to use this syntax a lot (when changing the method to invoke is not an option), on anything other than List<T> where there is no .ForEach method. 

Historically using Vb.Net without turning on Option Explicit has been frowned upon; at least in my circle or contacts.  Now with Vb.Net usage of implicitly typed variables is not possible with this on.  Which seems odd to me. I need to investigate this further as I thought Option Explicit only prevent implicit casting from one type to another not type inference during declaration. So right now as far as I am aware in my conversions I have not been able to replicate this in Vb:
var results = from x in someList where condition(x) select new {Name = x.ToString(), From = x.GetType().Name};

This one really made me tear my hair out. It seems you cannot invoke an Action delegate that is stored in property. Consider this:
Public Class Class1
    Public Property MyDelegate As Action(Of object)
End Class
Inside some other method...
Public Sub Method1(director as Class1)
    director1.MyDelegate(new Object()) ' Will not work
    'You must do this...
    director1.MyDelegate()(New Object()) 'this will work - but is a very nasty confusing syntax - looks like a method call or array access
    'Or
    Dim action As Action(Of Object) = director.MyDelegate
    action(new Object()) ' this will work to.
End Sub
Annoying! - C# score's another point here for sure.



The built in support for XML in Vb.Net is quite convenient, if a little weird. Its definitely more concise and readable than C#.

Dim testXml = <test>
                  <product title="Prod1"/>
                  <product title="Prod2"/>
                  <product title="Prod3"/>
              </test>
Dim queryA = From t In testXml...<product> _
             Select New With {.Title = t.@title} '... is any descendant

Auto-Properties quite simply rock. For example:
public class Class1 {
    public string MyString {get; set;}
    public int MyInteger {get; set;}
           }
In comparison to the Vb conversion:
Public Class Class1 
    Private _myString as String
    Private _myInt as Integer
    Public Property MyString as String
        Get
            Return _myString
        End Get
    End Property
    ...


I really like Optional method arguments in Vb.  It means the number of overloads for a method is reduced, resulting in more concise code. Optional arguments are not support currently in C#, and can only be implemented by implementing an overload.

Public Function Foo(x As Integer, Optional y as Integer = 1) as Integer ...
Note: I believe optional parameters are coming in C# 4.
It has to be said that I hate the excessive use of parenthesis in Vb. You can almost hear the chief Vb designer say if in doubt throw in a bracket.  A crazy example of this is Properties that can take arguments!  Why?! Whats the difference then between a property and a method? Its commonly accepted that the whole point of a property is a simple state attribute of an object, it should not change state, and should not contain a significant amount of work. C# definitely wins a clarity point here.



I cannot help but like the convenience of in-line Vb dates:
Dim MyDate As Date = #2/29/2008#
Compared to C#
DateTime myDate = new DateTime(2008, 2, 29); // Or
DateTime myDate2 = new DateTime.Parse("2008/2/29");

C# has array initialisers.
var theList = new List<string>() {"abc", "def", "ghi"};
Vb has no such convenience :-(. Although I believe this is coming in Vb 10.
In Summary, and again this is based on my own personal "frustration-Richter-scale". C# tops Vb with 8 points to 5.  Hence my personal preference for C#.  Each to their own. 

Thursday, November 5, 2009

Dynamic Unit Test Data

5-November-2009

Ever written a test and found yourself copying and pasting it multiple times to pass in different pieces of test data?  Particularly if you have multiple classes that transform the same input data.  I am currently working on some client side libraries that work off transfered Data Transfer Objects (dtos).  So most of the client classes take these dtos as input.  One such class is a Json Adaptor that converts any dto to a string and back again (internally it uses the .Net Json serialiser see DataContractJsonSerializer with some more goodness injected).

To test this class I need to give it a wide range of test data. Including many different types of data, and with each type instantiated with a wide range of values.  I created a library of test data in the Data namespace, with each type in this namespace offering many different instances of the type.


namespace TestData
    public static class AddressDtoDataLibrary {
        private static readonly IList<TestDataGroup<Dto.AddressDto>> Data = new List<TestDataGroup<Dto.AddressDto>>();

        static AddressDtoDataLibrary() {
            Guid id = Guid.NewGuid();
            var serialised = new Dictionary<OutputFormat, string>() {
                { OutputFormat.Xml, GetInput1Xml(id) }, 
                { OutputFormat.Json, GetInput1Json(id) }
            };
            Data.Add(new TestDataGroup<Dto.AddressDto>(GetInput1(id), serialised));

            id = Guid.NewGuid();
            serialised = new Dictionary<OutputFormat, string>() {
                { OutputFormat.Xml, GetInput2Xml(id) },
                { OutputFormat.Json, GetInput2Json(id) },
            };
            Data.Add(new TestDataGroup<Identity.AddressDto>(GetInput2(id), serialised));

            // etc
        }

        public static TestDataGroup<Dto.AddressDto>[] DataArray {
            get {
                return Data.ToArray();
            }
        }

        internal static Identity.AddressDto GetInput1(Guid id) {
            return new Identity.AddressDto() {
                City = "Auckland",
                Country = "New Zealand",
                CreatedOn = new DateTime(2000, 2, 29),
                Id = id,
                Line1 = "24 Rodney Street",
                Line2 = string.Empty,
                Sid = string.Format("{0}@{1}", id, typeof(AddressDtoDataLibrary).Name),
                State = "Auckland",
                Suburb = "Birkenhead",
                Updated = new DateTime(2007, 12, 31, 7, 41, 59),
                Version = id,
                Zip = "2801"
            };
        }
        internal static string GetInput1Json(Guid id) {
            return @"{""City"":""Auckland"",""Country"":""New Zealand"",""CreatedOn"":""\/Date(951735600000+1300)\/"",""Id"":""" + id + @""",""Line1"":""24 Rodney Street"",""Line2"":"""",""Sid"":""" + id + @"@AddressDto"",""State"":""Auckland"",""Suburb"":""Birkenhead"",""Updated"":""\/Date(1199040119000+1300)\/"",""Version"":""" + id + @""",""Zip"":""2801""}";
        }
        internal static string GetInput1Xml(Guid id) {
            return @"<AddressDto>
                <City>Auckland</City>
                <Country>New Zealand</Country>
                <CreatedOn>2000-02-29T00:00:00</CreatedOn>
                <Id>" + id + @"</Id>
                <Line1>24 Rodney Street</Line1>
                <Line2></Line2>
                <Sid>" + id + @"@AddressDto</Sid>
                <State>Auckland</State>
                <Suburb>Birkenhead</Suburb>
                <Updated>2007-12-31T07:41:59</Updated>
                <Version>" + id + @"</Version>
                <Zip>2801</Zip>
            </AddressDto>";
        }
    }
}
I could use the NUnit [Values] attribute, but this requires me to hard code each set of data for each test method. Not to mention its an uglier syntax. Maybe acceptable for basic scalar types and strings, but not for objects imho. If I add more test data, it won't be applied to existing tests.
If I did go down this track it would look like this:


[Test]
public void TestJsonSerialiser(
    [Values(TestData.AddressDtoDataLibrary.DataArray[0].DtoObject)] IDto dtoObject, 
    [Values(TestData.AddressDtoDataLibrary.DataArray[0].Serialised[OutputFormat.Json])] string serialised) {
    /// test code here
}

Using TestDataSource attribute allows you to use a method to gather the testdata programmatically.

Here's the test fixture:



    [TestFixture]
    public class HelperTest {
        private static IEnumerable<Type> dataSourceClasses = new[] {
             typeof(Data.AddressDto), 
             typeof(Data.ClientIdentityAddressDto),
             typeof(Data.ClientIdentityDto),
             typeof(Data.CreateIdentityResponseDto),
             typeof(Data.PingResponseDto),
             typeof(Data.ScreenNameAvailabilityDto),
             typeof(Data.UpdateResponseDto)
        };
        private static object[] TestDataSourceArray;

        static HelperTest() {
            TestDataSourceArray = PopulateTestDataArray();
        }

        private static object[] PopulateTestDataArray() {
            var list = new List<object[]>();
            dataSourceClasses.All(t => {
                list.AddRange(from index in Enumerable.Range(0, HowMuchTestDataDoesThisClassHave(t)) select new object[] { t, index });
                return true;
            });
            return list.ToArray();
        }

        private static int HowMuchTestDataDoesThisClassHave(Type type) {
            var property = type.GetProperty("DataArray", BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static);
            if (property == null) {
                return 0;
            }

            var array = property.GetValue(null, new object[] { }) as object[];
            if (array == null) {
                return 0;
            }

            return array.Length;
        }

        [Test]
        [TestCaseSource("TestDataSourceArray")]
        public void TestJsonSerialise(Type dtoType, int index) {
            var dto = Data.TestDataHelper.GetTestDataItem(dtoType, index);
            Assert.IsNotNull(dto);
            var serialiser = new JsonSerialiser();
            var result = serialiser.Serialise(dto);
            Debug.WriteLine(result);
            Assert.IsNotNullOrEmpty(result);
        }
    }

Suffice to say the GetTestDataItem method in the Test gets the actual test data input from the type passed in. This type argument has a static property that presents an array of test data sets.  Each set contains the object, the matching xml serialisation string, and the matching json string.


public static class TestDataHelper {
        public static IDto GetTestDataItem(Type type, int index) {
            if (type == null) {
                return null;
            }

            if (index < 0) {
                return null;
            }

            try {
                var array = type.GetProperty("DataArray", BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static).GetValue(null, new object[] { }) as object[];
                if (array == null) {
                    return null;
                }

                if (index >= array.Length) {
                    return null;
                }

                var testDataGroup = array[index];
                return testDataGroup.GetType().GetProperty("DtoObject", BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic).GetValue(testDataGroup, new object[] { }) as IDto;
            } catch (Exception ex) {
                throw new NotSupportedException("The test data dto object " + type.Name + " does not have a DataArray or DtoObject property implemented.", ex);
            }
        }
    }

A little crude using reflection, (not cool in heavily used production code) but it works like a charm for testing.

Killed more than two birds with one test (especially since the test data was already written :-) ...

The test output from Nunit looks like this: